def test_nested_select_statement(self): parser = Parser() sub_query = """SELECT CLASS FROM TAIPAI WHERE CLASS = 'VAN'""" nested_query = """SELECT ID FROM ({}) AS T;""".format(sub_query) parsed_sub_query = parser.parse(sub_query)[0] actual_stmt = parser.parse(nested_query)[0] self.assertEqual(actual_stmt.stmt_type, StatementType.SELECT) self.assertEqual(actual_stmt.target_list[0].col_name, 'ID') self.assertEqual(actual_stmt.from_table, TableRef(parsed_sub_query, alias='T')) sub_query = """SELECT Yolo(frame).bbox FROM autonomous_vehicle_1 WHERE Yolo(frame).label = 'vehicle'""" nested_query = """SELECT Licence_plate(bbox) FROM ({}) AS T WHERE Is_suspicious(bbox) = 1 AND Licence_plate(bbox) = '12345'; """.format(sub_query) query = """SELECT Licence_plate(bbox) FROM TAIPAI WHERE Is_suspicious(bbox) = 1 AND Licence_plate(bbox) = '12345'; """ query_stmt = parser.parse(query)[0] actual_stmt = parser.parse(nested_query)[0] sub_query_stmt = parser.parse(sub_query)[0] self.assertEqual(actual_stmt.from_table, TableRef(sub_query_stmt, alias='T')) self.assertEqual(actual_stmt.where_clause, query_stmt.where_clause) self.assertEqual(actual_stmt.target_list, query_stmt.target_list)
def test_multiple_join_with_multiple_ON(self): select_query = '''SELECT table1.a FROM table1 JOIN table2 ON table1.a = table2.a JOIN table3 ON table3.a = table1.a WHERE table1.a <= 5''' parser = Parser() select_stmt = parser.parse(select_query)[0] table1_col_a = TupleValueExpression('a', 'table1') table2_col_a = TupleValueExpression('a', 'table2') table3_col_a = TupleValueExpression('a', 'table3') select_list = [table1_col_a] child_join = TableRef( JoinNode(TableRef(TableInfo('table1')), TableRef(TableInfo('table2')), predicate=ComparisonExpression( ExpressionType.COMPARE_EQUAL, table1_col_a, table2_col_a), join_type=JoinType.INNER_JOIN)) from_table = TableRef( JoinNode(child_join, TableRef(TableInfo('table3')), predicate=ComparisonExpression( ExpressionType.COMPARE_EQUAL, table3_col_a, table1_col_a), join_type=JoinType.INNER_JOIN)) where_clause = ComparisonExpression(ExpressionType.COMPARE_LEQ, table1_col_a, ConstantValueExpression(5)) expected_stmt = SelectStatement(select_list, from_table, where_clause) self.assertEqual(select_stmt, expected_stmt)
def test_lateral_join_with_where(self): select_query = '''SELECT frame FROM MyVideo JOIN LATERAL ObjectDet(frame);''' parser = Parser() select_stmt = parser.parse(select_query)[0] tuple_frame = TupleValueExpression('frame') func_expr = FunctionExpression(func=None, name='ObjectDet', children=[tuple_frame]) from_table = TableRef( JoinNode(TableRef(TableInfo('MyVideo')), TableRef(func_expr), join_type=JoinType.LATERAL_JOIN)) expected_stmt = SelectStatement([tuple_frame], from_table) self.assertEqual(select_stmt, expected_stmt)
def visitInnerJoin(self, ctx: evaql_parser.InnerJoinContext): table = self.visit(ctx.tableSourceItemWithSample()) if table.is_func_expr(): return TableRef( JoinNode(None, table, join_type=JoinType.LATERAL_JOIN)) else: if ctx.ON() is None: raise Exception( 'ERROR: Syntax error: Join should specify the ON columns') join_predicates = self.visit(ctx.expression()) return TableRef( JoinNode(None, table, predicate=join_predicates, join_type=JoinType.INNER_JOIN))
def test_should_return_false_for_unequal_plans_and_true_for_equal_plans( self, ): plans = [] create_plan = LogicalCreate(TableRef(TableInfo("video")), [MagicMock()]) create_udf_plan = LogicalCreateUDF("udf", False, None, None, None) insert_plan = LogicalInsert(MagicMock(), 0, [MagicMock()], [MagicMock()]) query_derived_plan = LogicalQueryDerivedGet(alias="T") load_plan = LogicalLoadData(MagicMock(), MagicMock(), MagicMock(), MagicMock()) rename_plan = LogicalRename(TableRef(TableInfo("old")), TableInfo("new")) show_plan = LogicalShow(MagicMock()) drop_plan = LogicalDrop([MagicMock()], True) get_plan = LogicalGet(MagicMock(), MagicMock(), MagicMock()) sample_plan = LogicalSample(MagicMock()) filter_plan = LogicalFilter(MagicMock()) order_by_plan = LogicalOrderBy(MagicMock()) union_plan = LogicalUnion(MagicMock()) function_scan_plan = LogicalFunctionScan(MagicMock()) join_plan = LogicalJoin(MagicMock(), MagicMock(), MagicMock(), MagicMock()) create_plan.append_child(create_udf_plan) plans.append(create_plan) plans.append(create_udf_plan) plans.append(insert_plan) plans.append(query_derived_plan) plans.append(load_plan) plans.append(rename_plan) plans.append(drop_plan) plans.append(get_plan) plans.append(sample_plan) plans.append(filter_plan) plans.append(order_by_plan) plans.append(union_plan) plans.append(function_scan_plan) plans.append(join_plan) plans.append(show_plan) length = len(plans) for i in range(length): self.assertEqual(plans[i], plans[i]) if i >= 1: # compare against next plan self.assertNotEqual(plans[i - 1], plans[i])
def visitInsertStatement(self, ctx: evaql_parser.InsertStatementContext): table_ref = None column_list = [] value_list = [] # first two children with be INSERT INTO # Then we will have terminal nodes for '(', ')' for child in ctx.children[2:]: if not isinstance(child, TerminalNode): try: rule_idx = child.getRuleIndex() if rule_idx == evaql_parser.RULE_tableName: table_ref = TableRef(self.visit(ctx.tableName())) elif rule_idx == evaql_parser.RULE_uidList: column_list = self.visit(ctx.uidList()) elif rule_idx == evaql_parser.RULE_insertStatementValue: insrt_value = self.visit(ctx.insertStatementValue()) # Support only (value1, value2, .... value n) value_list = insrt_value[0] except BaseException: # stop parsing something bad happened return None insert_stmt = InsertTableStatement(table_ref, column_list, value_list) return insert_stmt
def test_visit_load_statement(self, mock_load, mock_visit): ctx = MagicMock() table = 'myVideo' path = MagicMock() path.value = 'video.mp4' column_list = None file_format = FileFormatType.VIDEO file_options = {} file_options['file_format'] = file_format params = { ctx.fileName.return_value: path, ctx.tableName.return_value: table, ctx.fileOptions.return_value: file_options, ctx.uidList.return_value: column_list } def side_effect(arg): return params[arg] mock_visit.side_effect = side_effect visitor = ParserVisitor() visitor.visitLoadStatement(ctx) mock_visit.assert_has_calls([ call(ctx.fileName()), call(ctx.tableName()), call(ctx.fileOptions()), call(ctx.uidList()) ]) mock_load.assert_called_once() mock_load.assert_called_with(TableRef('myVideo'), 'video.mp4', column_list, file_options)
def test_create_materialized_view_plan(self): dummy_view = TableRef(TableInfo('dummy')) columns = ['id', 'id2'] plan = CreateMaterializedViewPlan(dummy_view, columns) self.assertEqual(plan.opr_type, PlanOprType.CREATE_MATERIALIZED_VIEW) self.assertEqual(plan.view, dummy_view) self.assertEqual(plan.columns, columns)
def visitColumnCreateTable(self, ctx: evaql_parser.ColumnCreateTableContext): table_ref = None if_not_exists = False create_definitions = [] # first two children will be CREATE TABLE terminal token for child in ctx.children[2:]: try: rule_idx = child.getRuleIndex() if rule_idx == evaql_parser.RULE_tableName: table_ref = TableRef(self.visit(ctx.tableName())) elif rule_idx == evaql_parser.RULE_ifNotExists: if_not_exists = True elif rule_idx == evaql_parser.RULE_createDefinitions: create_definitions = self.visit(ctx.createDefinitions()) except BaseException: # stop parsing something bad happened return None create_stmt = CreateTableStatement(table_ref, if_not_exists, create_definitions) return create_stmt
def test_join(self): select_query = '''SELECT table1.a FROM table1 JOIN table2 ON table1.a = table2.a; ''' parser = Parser() select_stmt = parser.parse(select_query)[0] table1_col_a = TupleValueExpression('a', 'table1') table2_col_a = TupleValueExpression('a', 'table2') select_list = [table1_col_a] from_table = TableRef( JoinNode(TableRef(TableInfo('table1')), TableRef(TableInfo('table2')), predicate=ComparisonExpression( ExpressionType.COMPARE_EQUAL, table1_col_a, table2_col_a), join_type=JoinType.INNER_JOIN)) expected_stmt = SelectStatement(select_list, from_table) self.assertEqual(select_stmt, expected_stmt)
def test_drop_plan(self): dummy_info = TableInfo('dummy') dummy_table = TableRef(dummy_info) CatalogManager().reset() dummy_plan_node = DropPlan([dummy_table], False) self.assertEqual(dummy_plan_node.opr_type, PlanOprType.DROP) self.assertEqual(dummy_plan_node.table_refs[0].table.table_name, "dummy")
def test_rename_plan(self): dummy_info = TableInfo("old") dummy_old = TableRef(dummy_info) dummy_new = TableInfo("new") CatalogManager().reset() dummy_plan_node = RenamePlan(dummy_old, dummy_new) self.assertEqual(dummy_plan_node.opr_type, PlanOprType.RENAME) self.assertEqual(dummy_plan_node.old_table.table.table_name, "old") self.assertEqual(dummy_plan_node.new_name.table_name, "new")
def visitTableSourceItemWithSample( self, ctx: evaql_parser.TableSourceItemWithSampleContext): sample_freq = None alias = None table = self.visit(ctx.tableSourceItem()) if ctx.sampleClause(): sample_freq = self.visit(ctx.sampleClause()) if ctx.AS(): alias = self.visit(ctx.uid()) return TableRef(table, alias, sample_freq)
def test_drop_statement(self): parser = Parser() drop_queries = "DROP TABLE student_info" expected_stmt = DropTableStatement( [TableRef(TableInfo('student_info'))], False) eva_statement_list = parser.parse(drop_queries) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.DROP) drop_stmt = eva_statement_list[0] self.assertEqual(drop_stmt, expected_stmt)
def test_raises_mismatch_columns(self, mock_check): mock_check.return_value = False dummy_view = TableRef(TableInfo('dummy')) columns = ['id', 'id2'] plan = CreateMaterializedViewPlan(dummy_view, columns) child = MagicMock() child.node.opr_type = PlanOprType.SEQUENTIAL_SCAN child.project_expr.__len__.return_value = 3 with self.assertRaises(RuntimeError): create_udf_executor = CreateMaterializedViewExecutor(plan) create_udf_executor.append_child(child) create_udf_executor.exec()
def test_rename_statement(self): parser = Parser() rename_queries = "RENAME TABLE student TO student_info" expected_stmt = RenameTableStatement(TableRef(TableInfo('student')), TableInfo('student_info')) eva_statement_list = parser.parse(rename_queries) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.RENAME) rename_stmt = eva_statement_list[0] self.assertEqual(rename_stmt, expected_stmt)
def test_table_ref(self): ''' Testing table info in TableRef Class: TableInfo ''' table_info = TableInfo('TAIPAI', 'Schema', 'Database') table_ref_obj = TableRef(table_info) select_stmt_new = SelectStatement() select_stmt_new.from_table = table_ref_obj self.assertEqual(select_stmt_new.from_table.table.table_name, 'TAIPAI') self.assertEqual(select_stmt_new.from_table.table.schema_name, 'Schema') self.assertEqual(select_stmt_new.from_table.table.database_name, 'Database')
def test_support_only_seq_scan(self, mock_check): mock_check.return_value = False dummy_view = TableRef(TableInfo('dummy')) columns = ['id', 'id2'] plan = CreateMaterializedViewPlan(dummy_view, columns) for child_opr_type in PlanOprType: if child_opr_type is PlanOprType.SEQUENTIAL_SCAN: continue child = MagicMock() child.node.opr_type = child_opr_type with self.assertRaises(RuntimeError): create_udf_executor = CreateMaterializedViewExecutor(plan) create_udf_executor.append_child(child) create_udf_executor.exec()
def test_materialized_view(self): select_query = '''SELECT id, FastRCNNObjectDetector(frame).labels FROM MyVideo WHERE id<5; ''' query = 'CREATE MATERIALIZED VIEW uadtrac_fastRCNN (id, labels) AS {}'\ .format(select_query) parser = Parser() mat_view_stmt = parser.parse(query) select_stmt = parser.parse(select_query) expected_stmt = CreateMaterializedViewStatement( TableRef(TableInfo('uadtrac_fastRCNN')), [ ColumnDefinition('id', None, None, None), ColumnDefinition('labels', None, None, None) ], False, select_stmt[0]) self.assertEqual(mat_view_stmt[0], expected_stmt)
def test_create_plan(self): dummy_info = TableInfo('dummy') dummy_table = TableRef(dummy_info) CatalogManager().reset() columns = [ DataFrameColumn('id', ColumnType.INTEGER), DataFrameColumn('name', ColumnType.TEXT, array_dimensions=[50]) ] dummy_plan_node = CreatePlan(dummy_table, columns, False) self.assertEqual(dummy_plan_node.opr_type, PlanOprType.CREATE) self.assertEqual(dummy_plan_node.if_not_exists, False) self.assertEqual(dummy_plan_node.table_ref.table.table_name, "dummy") self.assertEqual(dummy_plan_node.column_list[0].name, "id") self.assertEqual(dummy_plan_node.column_list[1].name, "name")
def _bind_tableref(self, node: TableRef): if node.is_table_atom(): # Table self._binder_context.add_table_alias(node.alias, node.table.table_name) bind_table_info(node.table) elif node.is_select(): current_context = self._binder_context self._binder_context = StatementBinderContext() self.bind(node.select_statement) self._binder_context = current_context self._binder_context.add_derived_table_alias( node.alias, node.select_statement.target_list) elif node.is_join(): self.bind(node.join_node.left) self.bind(node.join_node.right) if node.join_node.predicate: self.bind(node.join_node.predicate) elif node.is_func_expr(): self.bind(node.func_expr) self._binder_context.add_derived_table_alias( node.func_expr.alias, [node.func_expr]) else: raise ValueError(f'Unsupported node {type(node)}')
def test_should_return_false_for_unequal_expression(self): table = TableRef(TableInfo('MyVideo')) load_stmt = LoadDataStatement(table, Path('data/video.mp4'), FileFormatType.VIDEO) insert_stmt = InsertTableStatement(table) create_udf = CreateUDFStatement('udf', False, [ ColumnDefinition('frame', ColumnType.NDARRAY, NdArrayType.UINT8, [3, 256, 256]) ], [ ColumnDefinition('labels', ColumnType.NDARRAY, NdArrayType.STR, [10]) ], Path('data/fastrcnn.py'), 'Classification') select_stmt = SelectStatement() self.assertNotEqual(load_stmt, insert_stmt) self.assertNotEqual(insert_stmt, load_stmt) self.assertNotEqual(create_udf, insert_stmt) self.assertNotEqual(select_stmt, create_udf)
def visitCreateMaterializedView( self, ctx: evaql_parser.CreateMaterializedViewContext): view_name = self.visit(ctx.tableName()) view_ref = TableRef(view_name) if_not_exists = False if ctx.ifNotExists(): if_not_exists = True uid_list = self.visit(ctx.uidList()) # setting all other column definition attributes as None, # need to figure from query col_list = [ ColumnDefinition(uid.col_name, None, None, None) for uid in uid_list ] query = self.visit(ctx.selectStatement()) return CreateMaterializedViewStatement(view_ref, col_list, if_not_exists, query)
def test_load_video_data_statement(self): parser = Parser() load_data_query = """LOAD DATA INFILE 'data/video.mp4' INTO MyVideo WITH FORMAT VIDEO;""" file_options = {} file_options['file_format'] = FileFormatType.VIDEO column_list = None expected_stmt = LoadDataStatement(TableRef(TableInfo('MyVideo')), Path('data/video.mp4'), column_list, file_options) eva_statement_list = parser.parse(load_data_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.LOAD_DATA) load_data_stmt = eva_statement_list[0] self.assertEqual(load_data_stmt, expected_stmt)
def test_insert_statement(self): parser = Parser() insert_query = """INSERT INTO MyVideo (Frame_ID, Frame_Path) VALUES (1, '/mnt/frames/1.png'); """ expected_stmt = InsertTableStatement(TableRef(TableInfo('MyVideo')), [ TupleValueExpression('Frame_ID'), TupleValueExpression('Frame_Path') ], [ ConstantValueExpression(1), ConstantValueExpression('/mnt/frames/1.png', ColumnType.TEXT) ]) eva_statement_list = parser.parse(insert_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.INSERT) insert_stmt = eva_statement_list[0] self.assertEqual(insert_stmt, expected_stmt)
def visitLoadStatement(self, ctx: evaql_parser.LoadStatementContext): file_path = self.visit(ctx.fileName()).value table = TableRef(self.visit(ctx.tableName())) # Set default for file_format as Video file_format = FileFormatType.VIDEO file_options = {} file_options['file_format'] = file_format if ctx.fileOptions(): file_options = self.visit(ctx.fileOptions()) # set default for column_list as None column_list = None if ctx.uidList(): column_list = self.visit(ctx.uidList()) stmt = LoadDataStatement(table, file_path, column_list, file_options) return stmt
def test_load_csv_data_statement(self): parser = Parser() load_data_query = """LOAD DATA INFILE 'data/meta.csv' INTO MyMeta (id, frame_id, video_id, label) WITH FORMAT CSV;""" file_options = {} file_options['file_format'] = FileFormatType.CSV expected_stmt = LoadDataStatement(TableRef(TableInfo('MyMeta')), Path('data/meta.csv'), [ TupleValueExpression('id'), TupleValueExpression('frame_id'), TupleValueExpression('video_id'), TupleValueExpression('label') ], file_options) eva_statement_list = parser.parse(load_data_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.LOAD_DATA) load_data_stmt = eva_statement_list[0] self.assertEqual(load_data_stmt, expected_stmt)
def visitRenameTable(self, ctx: evaql_parser.RenameTableContext): old_table_ref = TableRef(self.visit(ctx.oldtableName())) new_table_name = self.visit(ctx.newtableName()) rename_stmt = RenameTableStatement(old_table_ref, new_table_name) return rename_stmt
def visitTables(self, ctx: evaql_parser.TablesContext): tables = [] for child in ctx.children: tables.append(TableRef(self.visit(child))) return tables