def test_nested_anno_dependencies(self): class Nested(Annotator): columns = ["b"] def calc(self, df): return pd.Series([10] * len(df)) def dep_annos(self): return [Constant("Nestedconst", 5)] class Nesting(Annotator): columns = ["a"] def calc(self, df): return pd.Series([15] * len(df)) def dep_annos(self): return [Constant("Nestingconst", 5), Nested()] anno = Nesting() a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}) ) a += anno a.write() ppg.run_pipegraph() assert (a.df["a"] == 15).all() assert (a.df["b"] == 10).all() assert (a.df["Nestedconst"] == 5).all() assert (a.df["Nestingconst"] == 5).all()
def test_write_mixed_manglers(self): test_df = pd.DataFrame({"A": [1, 2]}) def load(): return test_df a = DelayedDataFrame("shu", load) a.write(mangler_function=lambda df: df) def b(df): return df.head() with pytest.raises(ppg.JobContractError): a.write(mangler_function=b)
def test_write(self): test_df = pd.DataFrame({"A": [1, 2]}) def load(): return test_df a = DelayedDataFrame("shu", load) fn = a.write()[0] ppg.run_pipegraph() assert Path(fn.filenames[0]).exists() assert_frame_equal(pd.read_csv(fn.filenames[0], sep="\t"), test_df)
def test_filtering_on_annotator_missing(self): class A(Annotator): cache_name = "A" columns = ["aa"] def calc(self, df): return pd.DataFrame( {self.columns[0]: (["a", "b"] * int(len(df) / 2 + 1))[: len(df)]}, index=df.index, ) a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) ) b = a.filter("sha", lambda df: df["aaA"] == "a") load_job = b.load() a.write() print("run now") with pytest.raises(ppg.RuntimeError): ppg.run_pipegraph() assert "KeyError" in repr(load_job.lfg.exception)
def test_annotator_depending_on_actual_jobs(self): def wf(): Path("fileA").write_text("hello") class TestAnno(Annotator): columns = ["C"] def calc(self, df): prefix = Path("fileA").read_text() return pd.Series([prefix] * len(df)) def deps(self, ddf): return [ppg.FileGeneratingJob("fileA", wf)] a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}) ) a.add_annotator(TestAnno()) a.write() ppg.run_pipegraph() assert (a.df["C"] == "hello").all()
def test_write_excel(self): test_df = pd.DataFrame({"A": [1, 2]}) def load(): return test_df a = DelayedDataFrame("shu", load, result_dir="sha") assert Path("sha").exists() assert_frame_equal(a.df, test_df) assert a.non_annotator_columns == "A" fn = a.write("sha.xls")[1] assert fn.exists() assert_frame_equal(pd.read_excel(fn), test_df)
def test_write(self): test_df = pd.DataFrame({"A": [1, 2]}) def load(): return test_df a = DelayedDataFrame("shu", load, result_dir="sha") assert Path("sha").exists() assert_frame_equal(a.df, test_df) assert a.non_annotator_columns == "A" fn = a.write()[1] assert "/sha" in str(fn.parent) assert fn.exists() assert_frame_equal(pd.read_csv(fn, sep="\t"), test_df)
def test_write_excel2(self): data = {} for i in range(0, 257): c = "A%i" % i d = [1, 1] data[c] = d test_df = pd.DataFrame(data) def load(): return test_df a = DelayedDataFrame("shu", load, result_dir="sha") fn = a.write("sha.xls")[1] assert fn.exists() assert_frame_equal(pd.read_csv(fn, sep="\t"), test_df)
def test_write_mangle(self): test_df = pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) def load(): return test_df a = DelayedDataFrame("shu", load) assert_frame_equal(a.df, test_df) assert (a.non_annotator_columns == ["A", "B"]).all() def mangle(df): df = df.drop("A", axis=1) df = df[df.B == "c"] return df fn = a.write("test.csv", mangle)[1] assert fn.exists() assert_frame_equal(pd.read_csv(fn, sep="\t"), mangle(test_df))