Exemple #1
0
    def test_get_aggregate_columns2(self):
        table_a = Table(table_name="A",
                        columns=[
                            Column(name="aa", column_type=TypeEnum.int),
                            Column(name="b", column_type=TypeEnum.int),
                            Column(name="c", column_type=TypeEnum.int)
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_b = Table(table_name="B",
                        columns=[
                            Column(name="ba", column_type=TypeEnum.int),
                            Column(name="e", column_type=TypeEnum.int)
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_a.join(to_table=table_b, from_table_key="aa", to_table_key="ba")

        column_names = table_a.column_names
        self.assertEqual(len(column_names), 4)

        agg = table_b.get_aggregate_columns()
        self.assertEqual(1, len(agg))
        self.assertEqual(agg[0].name, "ba")
Exemple #2
0
 def setUp(self) -> None:
     self.table_a = Table(table_name="A",
                          columns=[
                              Column(name="a", column_type=TypeEnum.int),
                              Column(name="b", column_type=TypeEnum.string)
                          ],
                          owner=CharacterEnum.client,
                          data_sizes=[100],
                          data_paths=[""],
                          annotations=[])
Exemple #3
0
    def setUp(self):
        self.a_table = Table(table_name="a",
                             columns=[
                                 Column(name="name",
                                        column_type=TypeEnum.string),
                                 Column(name="id", column_type=TypeEnum.int)
                             ],
                             data_sizes=[100],
                             data_paths=[""],
                             annotations=[])
        self.b_table = Table(table_name="b",
                             columns=[
                                 Column(name="name",
                                        column_type=TypeEnum.string),
                                 Column(name="id", column_type=TypeEnum.int)
                             ],
                             data_sizes=[100],
                             data_paths=[""],
                             annotations=[])

        self.c_table = Table(table_name="c",
                             columns=[
                                 Column(name="name",
                                        column_type=TypeEnum.string),
                                 Column(name="id", column_type=TypeEnum.int),
                                 Column(name="address",
                                        column_type=TypeEnum.string)
                             ],
                             data_sizes=[100],
                             data_paths=[""],
                             annotations=[])
Exemple #4
0
    def test_simple_join2(self):
        data = [JoinData(left_key="aa", right_key="ab"), JoinData(left_key="ec", right_key="eb")]
        table_a = Table(table_name="A",
                        columns=[Column(name="aa", column_type=TypeEnum.int),
                                 Column(name="b", column_type=TypeEnum.int),
                                 Column(name="c", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""],
                        annotations=[])

        table_b = Table(table_name="B",
                        columns=[Column(name="ab", column_type=TypeEnum.int),
                                 Column(name="eb", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""],
                        annotations=[])

        table_c = Table(table_name="C",
                        columns=[Column(name="ec", column_type=TypeEnum.int),
                                 Column(name="f", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""],
                        annotations=[])

        tables = [table_a, table_b, table_c]

        root = SelectNode(tables=tables, annotation_name="demo")
        root.set_identifier_list([Identifier(tokens=[Token(None, "ec")]), Identifier(tokens=[Token(None, "f")])])

        root.next = JoinNode(join_list=data, tables=tables)
        root.next.prev = root

        root.next.merge()
        result = root.next.to_code(table_a.get_root())
        self.assertTrue(len(result) > 0)
Exemple #5
0
    def test_simple_join1(self):
        data = [JoinData(left_key="aa", right_key="ba")]
        table_a = Table(table_name="A",
                        columns=[Column(name="aa", column_type=TypeEnum.int),
                                 Column(name="b", column_type=TypeEnum.int),
                                 Column(name="c", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""],
                        annotations=[])

        table_b = Table(table_name="B",
                        columns=[Column(name="ba", column_type=TypeEnum.int),
                                 Column(name="e", column_type=TypeEnum.int)], data_sizes=[100], data_paths=[""],
                        annotations=[])

        root = SelectNode(tables=[table_a, table_b], annotation_name="demo")
        root.set_identifier_list([Identifier(tokens=[Token(None, "b")]), Identifier(tokens=[Token(None, "c")])])

        root.next = JoinNode(join_list=data, tables=[table_a, table_b])
        root.next.prev = root

        root.next.merge()
        result = root.next.to_code(table_a.get_root())
        self.assertTrue('a.Aggregate({ "aa" });' in result[0])
Exemple #6
0
    def test_equal(self):
        column1 = Column(name="a", column_type=TypeEnum.int)
        column2 = Column(name="b", column_type=TypeEnum.int)

        table_1 = Table(columns=[column1],
                        table_name="1",
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])
        table_2 = Table(columns=[column2],
                        table_name="2",
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        column1 = table_1.original_column_names[0]
        column2 = table_2.original_column_names[0]

        column1.related_columns.append(column2)
        column2.related_columns.append(column1)

        self.assertTrue(column1 == column2)
Exemple #7
0
    def test_get_aggregate_columns(self):
        table_a = Table(table_name="A",
                        columns=[
                            Column(name="a", column_type=TypeEnum.int),
                            Column(name="b", column_type=TypeEnum.int),
                            Column(name="c", column_type=TypeEnum.int)
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_b = Table(table_name="B",
                        columns=[
                            Column(name="a", column_type=TypeEnum.int),
                            Column(name="e", column_type=TypeEnum.int)
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_c = Table(table_name="C",
                        columns=[
                            Column(name="e", column_type=TypeEnum.int),
                            Column(name="f", column_type=TypeEnum.int)
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_a.join(table_b, "a", "a")
        table_c.join(table_a, "e", "e")

        column_names = table_a.column_names
        self.assertEqual(len(column_names), 4)

        agg = table_b.get_aggregate_columns()
        self.assertEqual(2, len(agg))
        self.assertEqual(agg[0].name, "a")
        self.assertEqual(agg[1].name, "e")

        agg = table_a.get_aggregate_columns()
        self.assertEqual(1, len(agg))
        self.assertEqual(agg[0].name, "e")

        agg = table_c.get_aggregate_columns()
        self.assertEqual(0, len(agg))
Exemple #8
0
    def test_is_cycle(self):
        table1 = FreeConnexTable(table_name="1",
                                 columns=[
                                     Column(name="a",
                                            column_type=TypeEnum.int),
                                     Column(name="b",
                                            column_type=TypeEnum.int),
                                 ],
                                 data_sizes=[100],
                                 data_paths=[""],
                                 annotations=[])

        table2 = FreeConnexTable(table_name="2",
                                 columns=[
                                     Column(name="b",
                                            column_type=TypeEnum.int),
                                     Column(name="c",
                                            column_type=TypeEnum.int),
                                 ],
                                 data_sizes=[100],
                                 data_paths=[""],
                                 annotations=[])

        table3 = FreeConnexTable(table_name="3",
                                 columns=[
                                     Column(name="c",
                                            column_type=TypeEnum.int),
                                     Column(name="a",
                                            column_type=TypeEnum.int),
                                 ],
                                 data_sizes=[100],
                                 data_paths=[""],
                                 annotations=[])

        table1.join(table2, "b", "b")
        table2.join(table3, "c", "c")

        self.assertRaises(Exception, table3.join, table1, "a", "a")
Exemple #9
0
    def test_get_aggregate_columns3(self):
        table_a = Table(table_name="A",
                        columns=[
                            Column(name="a", column_type=TypeEnum.int),
                            Column(name="b", column_type=TypeEnum.int),
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_b = Table(table_name="B",
                        columns=[
                            Column(name="a", column_type=TypeEnum.int),
                            Column(name="c", column_type=TypeEnum.int)
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_c = Table(table_name="C",
                        columns=[
                            Column(name="b", column_type=TypeEnum.int),
                            Column(name="d", column_type=TypeEnum.int)
                        ],
                        data_sizes=[100],
                        data_paths=[""],
                        annotations=[])

        table_a.join(table_b, 'a', 'a')
        table_a.join(table_c, 'b', 'b')

        agg = table_b.get_aggregate_columns()
        self.assertEqual(1, len(agg))
        self.assertEqual(agg[0].name, 'a')

        agg = table_c.get_aggregate_columns()
        self.assertEqual(1, len(agg))
        self.assertEqual(agg[0].name, "b")
Exemple #10
0
    def test_is_free_connex_join4(self):
        """
        See exaample/join_tree.drawio tree C
        :return:
        """
        self.table_1 = FreeConnexTable(table_name="1",
                                       columns=[
                                           Column(name="a",
                                                  column_type=TypeEnum.int),
                                           Column(name="b1",
                                                  column_type=TypeEnum.int),
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_2 = FreeConnexTable(table_name="2",
                                       columns=[
                                           Column(name="a",
                                                  column_type=TypeEnum.int),
                                           Column(name="c",
                                                  column_type=TypeEnum.int)
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_3 = FreeConnexTable(table_name="3",
                                       columns=[
                                           Column(name="b2",
                                                  column_type=TypeEnum.int),
                                           Column(name="d1",
                                                  column_type=TypeEnum.int)
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_4 = FreeConnexTable(table_name="4",
                                       columns=[
                                           Column(name="d2",
                                                  column_type=TypeEnum.int),
                                           Column(name="f",
                                                  column_type=TypeEnum.int),
                                           Column(name="g",
                                                  column_type=TypeEnum.int),
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_5 = FreeConnexTable(table_name="5",
                                       columns=[
                                           Column(name="b3",
                                                  column_type=TypeEnum.int),
                                           Column(name="e",
                                                  column_type=TypeEnum.int)
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_1.join(self.table_2, "a", "a")
        self.table_1.join(self.table_3, "b1", "b2")
        self.table_3.join(self.table_4, "d1", "d2")
        self.table_3.join(self.table_5, "b2", "b3")

        output_attrs = ["b1", "d1", "e", "f"]
        non_output_attrs = ["a", "c", "g"]

        height_of_tree = self.table_1.get_height()

        is_free_connex, output_tables = self.table_1.is_free_connex(
            output_attrs=output_attrs,
            non_output_attrs=non_output_attrs,
            height=height_of_tree)
        self.assertFalse(is_free_connex)
        self.assertEqual(output_tables[0], self.table_3)
        self.assertFalse(self.table_1.is_cycle())
Exemple #11
0
    def setUp(self) -> None:
        self.table_1 = FreeConnexTable(table_name="1",
                                       columns=[
                                           Column(name="a",
                                                  column_type=TypeEnum.int),
                                           Column(name="b",
                                                  column_type=TypeEnum.int),
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_2 = FreeConnexTable(table_name="2",
                                       columns=[
                                           Column(name="a",
                                                  column_type=TypeEnum.int),
                                           Column(name="c",
                                                  column_type=TypeEnum.int)
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_3 = FreeConnexTable(table_name="3",
                                       columns=[
                                           Column(name="b",
                                                  column_type=TypeEnum.int),
                                           Column(name="d",
                                                  column_type=TypeEnum.int)
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_4 = FreeConnexTable(table_name="4",
                                       columns=[
                                           Column(name="d",
                                                  column_type=TypeEnum.int),
                                           Column(name="f",
                                                  column_type=TypeEnum.int),
                                           Column(name="g",
                                                  column_type=TypeEnum.int),
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])

        self.table_5 = FreeConnexTable(table_name="5",
                                       columns=[
                                           Column(name="b",
                                                  column_type=TypeEnum.int),
                                           Column(name="e",
                                                  column_type=TypeEnum.int)
                                       ],
                                       data_sizes=[100],
                                       data_paths=[""],
                                       annotations=[])
Exemple #12
0
    def __to_code_util__(self,
                         root: Table,
                         from_key=None,
                         to_key=None) -> List[str]:
        """
        Do a post-order tree Traversal to generate code
        :param root: current table
        :param from_key: join key. From table's column name
        :param to_key: join key. To table's column name
        :return: list of generated code
        """
        code = []
        template = Template(self.open_template_file("join.template.j2"))
        for child in root.children:
            code += self.__to_code_util__(child.to_table, child.from_table_key,
                                          child.to_table_key)

        should_aggregate = False
        should_join = False

        if root.parent:
            # If has parent, then do the join.
            # If the number of agg is greater than 0, then do the aggregation

            # if root.parent.owner == root.owner:
            #     # TODO: Remove this error when the original code changed
            #     raise RuntimeError("Cannot semi join by the same owner")
            agg = root.get_aggregate_columns()
            agg = self.remove_duplicates(agg)
            should_join = True
            should_aggregate = len(agg) > 0

            rendered = template.render(left_table=root.parent,
                                       right_table=root,
                                       aggregate=agg,
                                       left=from_key,
                                       right=to_key,
                                       should_aggregate=should_aggregate,
                                       should_join=should_join)

            code += rendered.split("\n")

        else:
            group_by = self.__get_group_by__()
            select = self.__get_select__()
            selections = []
            is_group_by = False
            if group_by:
                selections = [i.normalized for i in group_by.identifier_list]
                is_group_by = True
            elif select:
                selections = [i.normalized for i in select.identifier_list]
            else:
                raise SyntaxError("SQL Statement should have select statement")

            columns = root.get_columns_after_aggregate()
            new_selections = self.__preprocess_selection__(
                selections=selections, columns=columns)
            agg = [
                Column(name=s, column_type=TypeEnum.int)
                for s in new_selections
            ]

            agg = self.remove_duplicates(agg)

            should_join = False
            should_aggregate = len(agg) > 0

            rendered = template.render(left_table=root.parent,
                                       right_table=root,
                                       aggregate=agg,
                                       left=from_key,
                                       right=to_key,
                                       should_aggregate=should_aggregate,
                                       should_join=should_join,
                                       reveal_table=root,
                                       should_reveal=True,
                                       is_group_by=is_group_by)
            code += rendered.split("\n")

        return code