Beispiel #1
0
    def test_multiple_join_with_multiple_ON(self):
        select_query = '''SELECT table1.a FROM table1 JOIN table2
            ON table1.a = table2.a JOIN table3
            ON table3.a = table1.a WHERE table1.a <= 5'''
        parser = Parser()
        select_stmt = parser.parse(select_query)[0]
        table1_col_a = TupleValueExpression('a', 'table1')
        table2_col_a = TupleValueExpression('a', 'table2')
        table3_col_a = TupleValueExpression('a', 'table3')
        select_list = [table1_col_a]
        child_join = TableRef(
            JoinNode(TableRef(TableInfo('table1')),
                     TableRef(TableInfo('table2')),
                     predicate=ComparisonExpression(
                         ExpressionType.COMPARE_EQUAL, table1_col_a,
                         table2_col_a),
                     join_type=JoinType.INNER_JOIN))

        from_table = TableRef(
            JoinNode(child_join,
                     TableRef(TableInfo('table3')),
                     predicate=ComparisonExpression(
                         ExpressionType.COMPARE_EQUAL, table3_col_a,
                         table1_col_a),
                     join_type=JoinType.INNER_JOIN))
        where_clause = ComparisonExpression(ExpressionType.COMPARE_LEQ,
                                            table1_col_a,
                                            ConstantValueExpression(5))
        expected_stmt = SelectStatement(select_list, from_table, where_clause)
        self.assertEqual(select_stmt, expected_stmt)
Beispiel #2
0
    def test_should_return_sorted_frames(self):
        """
        data (3 batches):
        'A' 'B' 'C'
        [1, 1, 1]
        ----------
        [1, 5, 6]
        [4, 7, 10]
        ----------
        [2, 9, 7]
        [4, 1, 2]
        [4, 2, 4]
        """

        df1 = pd.DataFrame(np.array([[1, 1, 1]]), columns=['A', 'B', 'C'])
        df2 = pd.DataFrame(np.array([[1, 5, 6], [4, 7, 10]]),
                           columns=['A', 'B', 'C'])
        df3 = pd.DataFrame(np.array([[2, 9, 7], [4, 1, 2], [4, 2, 4]]),
                           columns=['A', 'B', 'C'])

        batches = [Batch(frames=df) for df in [df1, df2, df3]]

        "query: .... ORDER BY A ASC, B DESC "

        plan = OrderByPlan([
            (TupleValueExpression(col_alias='A'), ParserOrderBySortType.ASC),
            (TupleValueExpression(col_alias='B'), ParserOrderBySortType.DESC)
        ])

        orderby_executor = OrderByExecutor(plan)
        orderby_executor.append_child(DummyExecutor(batches))

        sorted_batches = list(orderby_executor.exec())
        """
           A  B   C
        0  1  5   6
        1  1  1   1
        2  2  9   7
        3  4  7  10
        4  4  2   4
        5  4  1   2
        """
        expected_df1 = pd.DataFrame(np.array([[1, 5, 6]]),
                                    columns=['A', 'B', 'C'])
        expected_df2 = pd.DataFrame(np.array([[1, 1, 1], [2, 9, 7]]),
                                    columns=['A', 'B', 'C'])
        expected_df3 = pd.DataFrame(np.array([[4, 7, 10], [4, 2, 4], [4, 1,
                                                                      2]]),
                                    columns=['A', 'B', 'C'])

        expected_batches = [
            Batch(frames=df)
            for df in [expected_df1, expected_df2, expected_df3]
        ]

        self.assertEqual(expected_batches[0], sorted_batches[0])
        self.assertEqual(expected_batches[1], sorted_batches[1])
        self.assertEqual(expected_batches[2], sorted_batches[2])
    def test_should_return_top_frames_after_sorting(self):
        """
        Checks if limit returns the top 2 rows from the data
        after sorting

        data (3 batches):
        'A' 'B' 'C'
        [1, 1, 1]
        ----------
        [1, 5, 6]
        [4, 7, 10]
        ----------
        [2, 9, 7]
        [4, 1, 2]
        [4, 2, 4]
        """

        df1 = pd.DataFrame(np.array([[1, 1, 1]]), columns=['A', 'B', 'C'])
        df2 = pd.DataFrame(np.array([[1, 5, 6], [4, 7, 10]]),
                           columns=['A', 'B', 'C'])
        df3 = pd.DataFrame(np.array([[2, 9, 7], [4, 1, 2], [4, 2, 4]]),
                           columns=['A', 'B', 'C'])

        batches = [Batch(frames=df) for df in [df1, df2, df3]]

        "query: .... ORDER BY A ASC, B DESC limit 2"

        plan = OrderByPlan([
            (TupleValueExpression(col_alias='A'), ParserOrderBySortType.ASC),
            (TupleValueExpression(col_alias='B'), ParserOrderBySortType.DESC)
        ])

        orderby_executor = OrderByExecutor(plan)
        orderby_executor.append_child(DummyExecutor(batches))

        sorted_batches = list(orderby_executor.exec())

        limit_value = 2
        plan = LimitPlan(ConstantValueExpression(limit_value))
        limit_executor = LimitExecutor(plan)
        limit_executor.append_child(DummyExecutor(sorted_batches))
        reduced_batches = list(limit_executor.exec())

        # merge everything into one batch
        aggregated_batch = Batch.concat(reduced_batches, copy=False)
        """
           A  B   C
        0  1  5   6
        1  1  1   1
        """

        expected_df1 = pd.DataFrame(np.array([[1, 5, 6], [1, 1, 1]]),
                                    columns=['A', 'B', 'C'])

        expected_batches = [Batch(frames=df) for df in [expected_df1]]

        self.assertEqual(expected_batches[0], aggregated_batch)
 def test_aggregation_max(self):
     columnName = TupleValueExpression(col_name=0)
     columnName.col_alias = 0
     aggr_expr = AggregationExpression(
         ExpressionType.AGGREGATION_MAX,
         None,
         columnName
     )
     tuples = Batch(pd.DataFrame(
         {0: [1, 2, 3], 1: [2, 3, 4], 2: [3, 4, 5]}))
     batch = aggr_expr.evaluate(tuples, None)
     self.assertEqual(3, batch.frames.iloc[0][0])
    def _bind_load_data_statement(self, node: LoadDataStatement):
        table_ref = node.table_ref
        if node.file_options['file_format'] == FileFormatType.VIDEO:
            # Create a new metadata object
            create_video_metadata(table_ref.table.table_name)

        self.bind(table_ref)

        table_ref_obj = table_ref.table.table_obj
        if table_ref_obj is None:
            error = '{} does not exists. Create the table using \
                            CREATE TABLE.'.format(table_ref.table.table_name)
            logger.error(error)
            raise RuntimeError(error)

        # if query had columns specified, we just copy them
        if node.column_list is not None:
            column_list = node.column_list

        # else we curate the column list from the metadata
        else:
            column_list = []
            for column in table_ref_obj.columns:
                column_list.append(
                    TupleValueExpression(
                        col_name=column.name,
                        table_alias=table_ref_obj.name.lower(),
                        col_object=column))

        # bind the columns
        for expr in column_list:
            self.bind(expr)

        node.column_list = column_list
Beispiel #6
0
    def test_if_expr_tree_is_equal(self):
        const_exp1 = ConstantValueExpression(0)
        const_exp2 = ConstantValueExpression(0)
        columnName1 = TupleValueExpression(col_name='DATA')
        columnName2 = TupleValueExpression(col_name='DATA')

        aggr_expr1 = AggregationExpression(ExpressionType.AGGREGATION_AVG,
                                           None, columnName1)
        aggr_expr2 = AggregationExpression(ExpressionType.AGGREGATION_AVG,
                                           None, columnName2)
        cmpr_exp1 = ComparisonExpression(ExpressionType.COMPARE_NEQ,
                                         aggr_expr1, const_exp1)
        cmpr_exp2 = ComparisonExpression(ExpressionType.COMPARE_NEQ,
                                         aggr_expr2, const_exp2)

        self.assertEqual(cmpr_exp1, cmpr_exp2)
Beispiel #7
0
    def visitFullColumnName(self, ctx: evaql_parser.FullColumnNameContext):
        # Adding support for a.b
        # Will restrict implementation to raise error for a.b.c
        dottedIds = []
        if ctx.dottedId():
            if len(ctx.dottedId()) != 1:
                logger.error("Only tablename.colname syntax supported")
                return
            for id in ctx.dottedId():
                dottedIds.append(self.visit(id))

        uid = self.visit(ctx.uid())

        if len(dottedIds):
            return TupleValueExpression(table_alias=uid, col_name=dottedIds[0])
        else:
            return TupleValueExpression(col_name=uid)
Beispiel #8
0
def extend_star(binder_context: StatementBinderContext) \
        -> List[TupleValueExpression]:
    col_objs = binder_context._get_all_alias_and_col_name()

    target_list = list(
        [TupleValueExpression(col_name=col_name, table_alias=alias) 
            for alias, col_name in col_objs]
    )
    return target_list
Beispiel #9
0
    def visitUidList(self, ctx: evaql_parser.UidListContext):
        uid_list = []
        uid_list_length = len(ctx.uid())
        for uid_index in range(uid_list_length):
            uid = self.visit(ctx.uid(uid_index))
            uid_expr = TupleValueExpression(uid)
            uid_list.append(uid_expr)

        return uid_list
Beispiel #10
0
    def test_join(self):
        select_query = '''SELECT table1.a FROM table1 JOIN table2
                    ON table1.a = table2.a; '''
        parser = Parser()
        select_stmt = parser.parse(select_query)[0]
        table1_col_a = TupleValueExpression('a', 'table1')
        table2_col_a = TupleValueExpression('a', 'table2')
        select_list = [table1_col_a]
        from_table = TableRef(
            JoinNode(TableRef(TableInfo('table1')),
                     TableRef(TableInfo('table2')),
                     predicate=ComparisonExpression(
                         ExpressionType.COMPARE_EQUAL, table1_col_a,
                         table2_col_a),
                     join_type=JoinType.INNER_JOIN))
        expected_stmt = SelectStatement(select_list, from_table)

        self.assertEqual(select_stmt, expected_stmt)
Beispiel #11
0
    def test_insert_statement(self):
        parser = Parser()
        insert_query = """INSERT INTO MyVideo (Frame_ID, Frame_Path)
                                    VALUES    (1, '/mnt/frames/1.png');
                        """
        expected_stmt = InsertTableStatement(TableRef(TableInfo('MyVideo')), [
            TupleValueExpression('Frame_ID'),
            TupleValueExpression('Frame_Path')
        ], [
            ConstantValueExpression(1),
            ConstantValueExpression('/mnt/frames/1.png', ColumnType.TEXT)
        ])
        eva_statement_list = parser.parse(insert_query)
        self.assertIsInstance(eva_statement_list, list)
        self.assertEqual(len(eva_statement_list), 1)
        self.assertEqual(eva_statement_list[0].stmt_type, StatementType.INSERT)

        insert_stmt = eva_statement_list[0]
        self.assertEqual(insert_stmt, expected_stmt)
Beispiel #12
0
    def test_should_return_one_batch(self):

        column_list = [
            TupleValueExpression(col_name='id', table_alias='dummy'),
            TupleValueExpression(col_name='frame_id', table_alias='dummy'),
            TupleValueExpression(col_name='video_id', table_alias='dummy')
        ]

        # call the CSVReader
        csv_loader = CSVReader(file_url=os.path.join(PATH_PREFIX, 'dummy.csv'),
                               column_list=column_list,
                               batch_mem_size=NUM_FRAMES * FRAME_SIZE)

        # get the batches
        batches = list(csv_loader.read())
        expected = list(create_dummy_csv_batches())

        # assert batches are equal
        self.assertTrue(batches, expected)
Beispiel #13
0
    def visitSelectElements(self, ctx: evaql_parser.SelectElementsContext):
        if ctx.star:
            select_list = [TupleValueExpression(col_name='*')]
        else:
            select_list = []
            select_elements_count = len(ctx.selectElement())
            for select_element_index in range(select_elements_count):
                element = self.visit(ctx.selectElement(select_element_index))
                select_list.append(element)

        return select_list
Beispiel #14
0
    def test_should_call_csv_reader_and_storage_engine(self, write_mock):
        batch_frames = [list(range(5))] * 2

        # creates a dummy.csv
        create_sample_csv()

        file_path = 'dummy.csv'
        table_metainfo = 'info'
        batch_mem_size = 3000
        file_options = {}
        file_options['file_format'] = FileFormatType.CSV
        column_list = [
            TupleValueExpression(col_name='id', table_alias='dummy'),
            TupleValueExpression(col_name='frame_id', table_alias='dummy'),
            TupleValueExpression(col_name='video_id', table_alias='dummy')
        ]
        plan = type(
            "LoadDataPlan", (), {
                'table_metainfo': table_metainfo,
                'file_path': file_path,
                'batch_mem_size': batch_mem_size,
                'column_list': column_list,
                'file_options': file_options
            })

        load_executor = LoadDataExecutor(plan)
        batch = next(load_executor.exec())
        write_mock.has_calls(call(table_metainfo, batch_frames[0]),
                             call(table_metainfo, batch_frames[1]))

        # Note: We call exec() from the child classes.
        self.assertEqual(
            batch,
            Batch(
                pd.DataFrame([{
                    'CSV': file_path,
                    'Number of loaded frames': 20
                }])))

        # remove the dummy.csv
        file_remove('dummy.csv')
Beispiel #15
0
    def test_short_circuiting_or_complete(self):
        # tests whether right-hand side is bypassed completely with or
        tup_val_exp_l = TupleValueExpression(col_name=0)
        tup_val_exp_l.col_alias = 0
        tup_val_exp_r = TupleValueExpression(col_name=1)
        tup_val_exp_r.col_alias = 1

        comp_exp_l = ComparisonExpression(
            ExpressionType.COMPARE_EQUAL,
            tup_val_exp_l,
            tup_val_exp_r
        )
        comp_exp_r = Mock(spec=ComparisonExpression)

        logical_exp = LogicalExpression(
            ExpressionType.LOGICAL_OR,
            comp_exp_l,
            comp_exp_r
        )

        tuples = Batch(pd.DataFrame(
            {0: [1, 2, 3], 1: [1, 2, 3]}))
        self.assertEqual(
            [True, True, True],
            logical_exp.evaluate(tuples).frames[0].tolist()
        )
        comp_exp_r.evaluate.assert_not_called()
Beispiel #16
0
    def test_short_circuiting_or_partial(self):
        # tests whether right-hand side is partially executed with or
        tup_val_exp_l = TupleValueExpression(col_name=0)
        tup_val_exp_l.col_alias = 0
        tup_val_exp_r = TupleValueExpression(col_name=1)
        tup_val_exp_r.col_alias = 1

        comp_exp_l = ComparisonExpression(
            ExpressionType.COMPARE_EQUAL,
            tup_val_exp_l,
            tup_val_exp_r
        )
        comp_exp_r = Mock(spec=ComparisonExpression)
        comp_exp_r.evaluate = Mock(return_value=Mock(frames=[[True], [False]]))

        logical_exp = LogicalExpression(
            ExpressionType.LOGICAL_OR,
            comp_exp_l,
            comp_exp_r
        )

        tuples = Batch(pd.DataFrame(
            {0: [1, 2, 3, 4], 1: [5, 6, 3, 4]}))
        self.assertEqual(
            [True, False, True, True],
            logical_exp.evaluate(tuples).frames[0].tolist()
        )
        comp_exp_r.evaluate.assert_called_once_with(tuples, mask=[0, 1])
Beispiel #17
0
    def test_load_csv_data_statement(self):
        parser = Parser()
        load_data_query = """LOAD DATA INFILE 'data/meta.csv'
                             INTO
                             MyMeta (id, frame_id, video_id, label)
                             WITH FORMAT CSV;"""
        file_options = {}
        file_options['file_format'] = FileFormatType.CSV
        expected_stmt = LoadDataStatement(TableRef(TableInfo('MyMeta')),
                                          Path('data/meta.csv'), [
                                              TupleValueExpression('id'),
                                              TupleValueExpression('frame_id'),
                                              TupleValueExpression('video_id'),
                                              TupleValueExpression('label')
                                          ], file_options)
        eva_statement_list = parser.parse(load_data_query)
        self.assertIsInstance(eva_statement_list, list)
        self.assertEqual(len(eva_statement_list), 1)
        self.assertEqual(eva_statement_list[0].stmt_type,
                         StatementType.LOAD_DATA)

        load_data_stmt = eva_statement_list[0]
        self.assertEqual(load_data_stmt, expected_stmt)
Beispiel #18
0
 def test_lateral_join_with_where(self):
     select_query = '''SELECT frame FROM MyVideo JOIN LATERAL
                         ObjectDet(frame);'''
     parser = Parser()
     select_stmt = parser.parse(select_query)[0]
     tuple_frame = TupleValueExpression('frame')
     func_expr = FunctionExpression(func=None,
                                    name='ObjectDet',
                                    children=[tuple_frame])
     from_table = TableRef(
         JoinNode(TableRef(TableInfo('MyVideo')),
                  TableRef(func_expr),
                  join_type=JoinType.LATERAL_JOIN))
     expected_stmt = SelectStatement([tuple_frame], from_table)
     self.assertEqual(select_stmt, expected_stmt)
Beispiel #19
0
    def test_should_return_false_for_unequal_expressions(self):
        const_exp1 = ConstantValueExpression(0)
        const_exp2 = ConstantValueExpression(1)
        func_expr = FunctionExpression(lambda x: x + 1, name='test')
        cmpr_exp = ComparisonExpression(ExpressionType.COMPARE_NEQ, const_exp1,
                                        const_exp2)
        tuple_expr = TupleValueExpression(col_name='id')
        aggr_expr = AggregationExpression(ExpressionType.AGGREGATION_MAX, None,
                                          tuple_expr)
        logical_expr = LogicalExpression(ExpressionType.LOGICAL_OR, cmpr_exp,
                                         cmpr_exp)

        self.assertNotEqual(const_exp1, const_exp2)
        self.assertNotEqual(cmpr_exp, const_exp1)
        self.assertNotEqual(func_expr, cmpr_exp)
        self.assertNotEqual(tuple_expr, aggr_expr)
        self.assertNotEqual(aggr_expr, tuple_expr)
        self.assertNotEqual(tuple_expr, cmpr_exp)
        self.assertNotEqual(logical_expr, cmpr_exp)
Beispiel #20
0
 def test_masking(self):
     tup_val_exp1 = TupleValueExpression(col_name=0)
     tup_val_exp2 = TupleValueExpression(col_name=1)
     tup_val_exp3 = TupleValueExpression(col_name=2)
     tup_val_exp1.col_alias = 0
     tup_val_exp2.col_alias = 1
     tup_val_exp3.col_alias = 2
     tuples = Batch(
         pd.DataFrame({
             0: [1, 2, 3, 4, 5, 6],
             1: [7, 8, 9, 10, 11, 12],
             2: [13, 14, 15, 16, 17, 18]
         }))
     mask1 = [0, 1, 2, 3, 4, 5]
     self.assertEqual([1, 2, 3, 4, 5, 6],
                      tup_val_exp1.evaluate(tuples,
                                            mask=mask1).frames[0].tolist())
     self.assertEqual([7, 9, 11],
                      tup_val_exp2.evaluate(tuples,
                                            mask=[0, 2,
                                                  4]).frames[1].tolist())
     self.assertEqual([],
                      tup_val_exp3.evaluate(tuples,
                                            mask=[]).frames[2].tolist())
Beispiel #21
0
 def _bind_tuple_expr(self, node: TupleValueExpression):
     table_alias, col_obj = self._binder_context.get_binded_column(
         node.col_name, node.table_alias)
     node.col_alias = '{}.{}'.format(table_alias, node.col_name.lower())
     node.col_object = col_obj