def test_case1(): df2 = df1.copy() df2 = df1[~df1.isnull().any(axis=1)] df2['Sex'] = df2['Sex'].map({'male': 0, 'female': 1}) checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0), dispatch_gen(df2, "df2", 0, 1), Info(None, None), []) l = checker.check(df1, df2) print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
def test_groupby(): df2 = df1.copy() df2 = df2.groupby(['Name'])['Age'].sum() df2 = df2.reset_index() df2 = df2.sort_values('Age', ascending=False) checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0), dispatch_gen(df2, "df2", 0, 1), Info(None, None), []) l = checker.check(df1, df2) print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
def test_double(): df2 = df1.copy() df2.dropna(inplace=True) checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0), dispatch_gen(df2, "df2", 0, 1), Info(None, None), []) l = checker.check(df1, df2) print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m") l = checker.check(df1, df2) print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
def test_io(): df2 = df1 with open("test.dat", "wb") as f: pickle.dump([df1, df2], f) with open("test.dat", "rb") as f: vars = pickle.load(f) df1, df2 = vars[0], vars[1] checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0), dispatch_gen(df2, "df2", 0, 1), Info(None, None), []) l = checker.check(df1, df2) print("df1", "->", "df2", "\033[96m", l, "\033[0m")
def test_case2(): df2 = df1.copy() df2.drop(['Sex', 'Name'], axis=1) df2['Age'].fillna(0, inplace=True) print(id(df2.index), id(df2['Age'].index)) # df2['Age'] = df2['Age'].map(lambda x: x + 1) df2['Age'] = df2['Age'].astype(str) df2.Survived = df2.Survived.astype(int) checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0), dispatch_gen(df2, "df2", 0, 1), Info(None, None), []) l = checker.check(df1, df2) print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
for file in sorted(os.listdir(data_path)): myvars = [] if file == "info.json" or file.endswith("_f.dat"): continue elif file.endswith(".dat"): with open(os.path.join(data_path, file), "rb") as f: try: vars = pickle.load(f) except Exception as e: print_error("error when pickle from " + file) print_error(e) continue for i in range(len(vars)): try: myvars.append( dispatch_gen(vars[i][0], vars[i][1][2], vars[i][1][0], vars[i][1][1])) if type(vars[i][0]) == list and len(vars[i][0]) <= 3: for j in range(len(vars[i][0])): myvars.append( dispatch_gen(vars[i][0][j], vars[i][1][2] + f"[{j}]", vars[i][1][0], vars[i][1][1])) except: print_error("error when dispatch var " + vars[i][1][2]) pass # comments = static_comments[vars[0][1][0]] if vars[0][1][0] in static_comments.keys() else [] comment, json_map = handlecell(myvars, 0, len(myvars) - 1, Info(info, vars[0][1][0])) # notebook.cells[code_indices[vars[0][1][0] - 1]].source = f"'''\n{comment}\n'''\n" + notebook.cells[code_indices[vars[0][1][0] - 1]].source