def test_get_aggregate_columns2(self): table_a = Table(table_name="A", columns=[ Column(name="aa", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) table_b = Table(table_name="B", columns=[ Column(name="ba", column_type=TypeEnum.int), Column(name="e", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) table_a.join(to_table=table_b, from_table_key="aa", to_table_key="ba") column_names = table_a.column_names self.assertEqual(len(column_names), 4) agg = table_b.get_aggregate_columns() self.assertEqual(1, len(agg)) self.assertEqual(agg[0].name, "ba")
def setUp(self) -> None: self.table_a = Table(table_name="A", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.string) ], owner=CharacterEnum.client, data_sizes=[100], data_paths=[""], annotations=[])
def setUp(self): self.a_table = Table(table_name="a", columns=[ Column(name="name", column_type=TypeEnum.string), Column(name="id", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) self.b_table = Table(table_name="b", columns=[ Column(name="name", column_type=TypeEnum.string), Column(name="id", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) self.c_table = Table(table_name="c", columns=[ Column(name="name", column_type=TypeEnum.string), Column(name="id", column_type=TypeEnum.int), Column(name="address", column_type=TypeEnum.string) ], data_sizes=[100], data_paths=[""], annotations=[])
def test_simple_join2(self): data = [JoinData(left_key="aa", right_key="ab"), JoinData(left_key="ec", right_key="eb")] table_a = Table(table_name="A", columns=[Column(name="aa", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""], annotations=[]) table_b = Table(table_name="B", columns=[Column(name="ab", column_type=TypeEnum.int), Column(name="eb", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""], annotations=[]) table_c = Table(table_name="C", columns=[Column(name="ec", column_type=TypeEnum.int), Column(name="f", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""], annotations=[]) tables = [table_a, table_b, table_c] root = SelectNode(tables=tables, annotation_name="demo") root.set_identifier_list([Identifier(tokens=[Token(None, "ec")]), Identifier(tokens=[Token(None, "f")])]) root.next = JoinNode(join_list=data, tables=tables) root.next.prev = root root.next.merge() result = root.next.to_code(table_a.get_root()) self.assertTrue(len(result) > 0)
def test_simple_join1(self): data = [JoinData(left_key="aa", right_key="ba")] table_a = Table(table_name="A", columns=[Column(name="aa", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""], annotations=[]) table_b = Table(table_name="B", columns=[Column(name="ba", column_type=TypeEnum.int), Column(name="e", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""], annotations=[]) root = SelectNode(tables=[table_a, table_b], annotation_name="demo") root.set_identifier_list([Identifier(tokens=[Token(None, "b")]), Identifier(tokens=[Token(None, "c")])]) root.next = JoinNode(join_list=data, tables=[table_a, table_b]) root.next.prev = root root.next.merge() result = root.next.to_code(table_a.get_root()) self.assertTrue('a.Aggregate({ "aa" });' in result[0])
def test_equal(self): column1 = Column(name="a", column_type=TypeEnum.int) column2 = Column(name="b", column_type=TypeEnum.int) table_1 = Table(columns=[column1], table_name="1", data_sizes=[100], data_paths=[""], annotations=[]) table_2 = Table(columns=[column2], table_name="2", data_sizes=[100], data_paths=[""], annotations=[]) column1 = table_1.original_column_names[0] column2 = table_2.original_column_names[0] column1.related_columns.append(column2) column2.related_columns.append(column1) self.assertTrue(column1 == column2)
def test_get_aggregate_columns(self): table_a = Table(table_name="A", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) table_b = Table(table_name="B", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="e", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) table_c = Table(table_name="C", columns=[ Column(name="e", column_type=TypeEnum.int), Column(name="f", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) table_a.join(table_b, "a", "a") table_c.join(table_a, "e", "e") column_names = table_a.column_names self.assertEqual(len(column_names), 4) agg = table_b.get_aggregate_columns() self.assertEqual(2, len(agg)) self.assertEqual(agg[0].name, "a") self.assertEqual(agg[1].name, "e") agg = table_a.get_aggregate_columns() self.assertEqual(1, len(agg)) self.assertEqual(agg[0].name, "e") agg = table_c.get_aggregate_columns() self.assertEqual(0, len(agg))
def test_is_cycle(self): table1 = FreeConnexTable(table_name="1", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) table2 = FreeConnexTable(table_name="2", columns=[ Column(name="b", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) table3 = FreeConnexTable(table_name="3", columns=[ Column(name="c", column_type=TypeEnum.int), Column(name="a", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) table1.join(table2, "b", "b") table2.join(table3, "c", "c") self.assertRaises(Exception, table3.join, table1, "a", "a")
def test_get_aggregate_columns3(self): table_a = Table(table_name="A", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) table_b = Table(table_name="B", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) table_c = Table(table_name="C", columns=[ Column(name="b", column_type=TypeEnum.int), Column(name="d", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) table_a.join(table_b, 'a', 'a') table_a.join(table_c, 'b', 'b') agg = table_b.get_aggregate_columns() self.assertEqual(1, len(agg)) self.assertEqual(agg[0].name, 'a') agg = table_c.get_aggregate_columns() self.assertEqual(1, len(agg)) self.assertEqual(agg[0].name, "b")
def test_is_free_connex_join4(self): """ See exaample/join_tree.drawio tree C :return: """ self.table_1 = FreeConnexTable(table_name="1", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="b1", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_2 = FreeConnexTable(table_name="2", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_3 = FreeConnexTable(table_name="3", columns=[ Column(name="b2", column_type=TypeEnum.int), Column(name="d1", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_4 = FreeConnexTable(table_name="4", columns=[ Column(name="d2", column_type=TypeEnum.int), Column(name="f", column_type=TypeEnum.int), Column(name="g", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_5 = FreeConnexTable(table_name="5", columns=[ Column(name="b3", column_type=TypeEnum.int), Column(name="e", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_1.join(self.table_2, "a", "a") self.table_1.join(self.table_3, "b1", "b2") self.table_3.join(self.table_4, "d1", "d2") self.table_3.join(self.table_5, "b2", "b3") output_attrs = ["b1", "d1", "e", "f"] non_output_attrs = ["a", "c", "g"] height_of_tree = self.table_1.get_height() is_free_connex, output_tables = self.table_1.is_free_connex( output_attrs=output_attrs, non_output_attrs=non_output_attrs, height=height_of_tree) self.assertFalse(is_free_connex) self.assertEqual(output_tables[0], self.table_3) self.assertFalse(self.table_1.is_cycle())
def setUp(self) -> None: self.table_1 = FreeConnexTable(table_name="1", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="b", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_2 = FreeConnexTable(table_name="2", columns=[ Column(name="a", column_type=TypeEnum.int), Column(name="c", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_3 = FreeConnexTable(table_name="3", columns=[ Column(name="b", column_type=TypeEnum.int), Column(name="d", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_4 = FreeConnexTable(table_name="4", columns=[ Column(name="d", column_type=TypeEnum.int), Column(name="f", column_type=TypeEnum.int), Column(name="g", column_type=TypeEnum.int), ], data_sizes=[100], data_paths=[""], annotations=[]) self.table_5 = FreeConnexTable(table_name="5", columns=[ Column(name="b", column_type=TypeEnum.int), Column(name="e", column_type=TypeEnum.int) ], data_sizes=[100], data_paths=[""], annotations=[])
def __to_code_util__(self, root: Table, from_key=None, to_key=None) -> List[str]: """ Do a post-order tree Traversal to generate code :param root: current table :param from_key: join key. From table's column name :param to_key: join key. To table's column name :return: list of generated code """ code = [] template = Template(self.open_template_file("join.template.j2")) for child in root.children: code += self.__to_code_util__(child.to_table, child.from_table_key, child.to_table_key) should_aggregate = False should_join = False if root.parent: # If has parent, then do the join. # If the number of agg is greater than 0, then do the aggregation # if root.parent.owner == root.owner: # # TODO: Remove this error when the original code changed # raise RuntimeError("Cannot semi join by the same owner") agg = root.get_aggregate_columns() agg = self.remove_duplicates(agg) should_join = True should_aggregate = len(agg) > 0 rendered = template.render(left_table=root.parent, right_table=root, aggregate=agg, left=from_key, right=to_key, should_aggregate=should_aggregate, should_join=should_join) code += rendered.split("\n") else: group_by = self.__get_group_by__() select = self.__get_select__() selections = [] is_group_by = False if group_by: selections = [i.normalized for i in group_by.identifier_list] is_group_by = True elif select: selections = [i.normalized for i in select.identifier_list] else: raise SyntaxError("SQL Statement should have select statement") columns = root.get_columns_after_aggregate() new_selections = self.__preprocess_selection__( selections=selections, columns=columns) agg = [ Column(name=s, column_type=TypeEnum.int) for s in new_selections ] agg = self.remove_duplicates(agg) should_join = False should_aggregate = len(agg) > 0 rendered = template.render(left_table=root.parent, right_table=root, aggregate=agg, left=from_key, right=to_key, should_aggregate=should_aggregate, should_join=should_join, reveal_table=root, should_reveal=True, is_group_by=is_group_by) code += rendered.split("\n") return code