def test_multiply_implicit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") text = """city\tdate A\tjan A\tfeb B\tfeb""".replace(" ", "") table = TableFormula(text) assert len(table) == 3 mul = table.multiply_column_by_row_instance( lambda v: v["date"], lambda v: v["city"]) exp = """KEY\tA|city\tA|date\tB|city\tB|date feb\tA\tfeb\tB\tfeb jan\tA\tjan\tNone\tNone""".replace(" ", "") exp = TableFormula(exp) exp.sort(lambda v: v["KEY"]) mul.sort(lambda v: v["KEY"]) delta = mul.delta(exp) if len(delta) > 0: for _ in delta: fLOG(_) assert False
def test_multiply_implicit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") text = """city\tdate A\tjan A\tfeb B\tfeb""".replace(" ", "") table = TableFormula(text) assert len(table) == 3 mul = table.multiply_column_by_row_instance(lambda v: v["date"], lambda v: v["city"]) exp = """KEY\tA|city\tA|date\tB|city\tB|date feb\tA\tfeb\tB\tfeb jan\tA\tjan\tNone\tNone""".replace(" ", "") exp = TableFormula(exp) exp.sort(lambda v: v["KEY"]) mul.sort(lambda v: v["KEY"]) delta = mul.delta(exp) if len(delta) > 0: for _ in delta: fLOG(_) assert False
def test_TableFormulaCore(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.split(__file__)[0] assert TableFormula.delta is not None file = os.path.join(fold, "data", "BNP.PA.txt") table = TableFormula(file, sep=",") table.sort(lambda v: v["Date"]) assert len(table) > 0 table = TableFormula("name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5" .replace(" ", "\t").replace("#", "\n")) assert "d_a\td_b\td_c" in str(table) dist = table.get_distinct_values("name") assert len(dist) > 0 table.add_column("has_A", lambda v: 1. if "A" in v["name"] else 0.) assert len(table) > 0 x = 1. / 3 table.add_column_smooth("has_A_smooth", lambda v: v[ "has_A"], [-1, 0, 1], [x, x, x]) assert len(table) > 0 fil = table.filter(lambda v: v["d_b"] == 2) assert len(table) > 0 rnd = table.random(5) assert len(rnd) > 0 rnd = table.random(1, True) assert len(rnd) > 0 fil = table.filter_quantile(lambda v: v["d_b"], 0, 0.4) assert len(fil) > 0 total = table.aggregate(lambda v: v["d_c"]) assert total > 0 table.sort(lambda v: v["d_b"] + v["d_c"]) assert len(table) > 0 union = table.union(table) assert len(union) > len(table) group = table.groupby(lambda v: v["name"], [lambda v: v["d_a"], lambda v: v["d_b"]], ["name", "sum_d_a", "sum_d_b"]) assert len(group) > 0 groupmax = table.groupby(lambda v: v["name"], [lambda v: v["d_a"], lambda v: v["d_b"]], ["name", "max_d_a", "max_d_b"], [max, max]) assert len(groupmax) > 0 group = table.groupby(lambda v: v["name"], [lambda v: v["d_a"]], ["name", "weight", "sum_d_a"], [lambda vec, w: sum(vec) / w], lambda v: v["d_b"]) innerjoin = table.innerjoin(group, lambda v: v["name"], lambda v: v["name"], "group") assert len(innerjoin) > 0 ext = table.extract_columns(["name", "d_a"]) assert len(ext) > 0 ext = table.remove_columns(["d_a"]) assert len(ext) > 0 d = table.todict(lambda v: v["name"], lambda v: v["d_b"], True) assert len(d) > 0 d = table.select(lambda v: (v["name"], v["d_b"])) assert len(list(d)) > 0 table.create_index(lambda v: (v["name"], v["d_a"])) row = table.get(('A', 1.1)) assert row value = table.get(('A', 1.1), 2) assert value table = TableFormula("name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5" .replace(" ", "\t").replace("#", "\n")) table.add_column("key_add", lambda v: "unique") mul = table.multiply_column_by_row_instance( lambda v: v["key_add"], lambda v: v["name"]) assert len(mul) > 0 table = TableFormula("key_name sum_a len_b avg_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5" .replace(" ", "\t").replace("#", "\n")) gr = table.groupby_implicit(lambda v: v["key_name"]) assert len(gr) > 0
def test_TableFormulaCore(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.split(__file__)[0] assert TableFormula.delta is not None file = os.path.join(fold, "data", "BNP.PA.txt") table = TableFormula(file, sep=",") table.sort(lambda v: v["Date"]) assert len(table) > 0 table = TableFormula( "name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace( " ", "\t").replace("#", "\n")) assert "d_a\td_b\td_c" in str(table) dist = table.get_distinct_values("name") assert len(dist) > 0 table.add_column("has_A", lambda v: 1. if "A" in v["name"] else 0.) assert len(table) > 0 x = 1. / 3 table.add_column_smooth("has_A_smooth", lambda v: v["has_A"], [-1, 0, 1], [x, x, x]) assert len(table) > 0 fil = table.filter(lambda v: v["d_b"] == 2) assert len(table) > 0 rnd = table.random(5) assert len(rnd) > 0 rnd = table.random(1, True) assert len(rnd) > 0 fil = table.filter_quantile(lambda v: v["d_b"], 0, 0.4) assert len(fil) > 0 total = table.aggregate(lambda v: v["d_c"]) assert total > 0 table.sort(lambda v: v["d_b"] + v["d_c"]) assert len(table) > 0 union = table.union(table) assert len(union) > len(table) group = table.groupby(lambda v: v["name"], [lambda v: v["d_a"], lambda v: v["d_b"]], ["name", "sum_d_a", "sum_d_b"]) assert len(group) > 0 groupmax = table.groupby(lambda v: v["name"], [lambda v: v["d_a"], lambda v: v["d_b"]], ["name", "max_d_a", "max_d_b"], [max, max]) assert len(groupmax) > 0 group = table.groupby(lambda v: v["name"], [lambda v: v["d_a"]], ["name", "weight", "sum_d_a"], [lambda vec, w: sum(vec) / w], lambda v: v["d_b"]) innerjoin = table.innerjoin(group, lambda v: v["name"], lambda v: v["name"], "group") assert len(innerjoin) > 0 ext = table.extract_columns(["name", "d_a"]) assert len(ext) > 0 ext = table.remove_columns(["d_a"]) assert len(ext) > 0 d = table.todict(lambda v: v["name"], lambda v: v["d_b"], True) assert len(d) > 0 d = table.select(lambda v: (v["name"], v["d_b"])) assert len(list(d)) > 0 table.create_index(lambda v: (v["name"], v["d_a"])) row = table.get(('A', 1.1)) assert row value = table.get(('A', 1.1), 2) assert value table = TableFormula( "name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace( " ", "\t").replace("#", "\n")) table.add_column("key_add", lambda v: "unique") mul = table.multiply_column_by_row_instance(lambda v: v["key_add"], lambda v: v["name"]) assert len(mul) > 0 table = TableFormula( "key_name sum_a len_b avg_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace( " ", "\t").replace("#", "\n")) gr = table.groupby_implicit(lambda v: v["key_name"]) assert len(gr) > 0