def test_join_tables_disambiguate_identifiers_in_condition(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('int.tab1.column1'), # integration name included Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ) ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2')), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), # integration name gets stripped out Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), ProjectStep(dataframe=Result(2), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]), ], ) assert plan.steps == expected_plan.steps
def test_plan_union_queries(self): query1 = Select(targets=[Identifier('column1'), Constant(None, alias=Identifier('predicted'))], from_table=Identifier('int.tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('column1'), Identifier('column2')]), BinaryOperation('>', args=[Identifier('column3'), Constant(0)]), ])) query2 = Select( targets=[Identifier('tab1.column1'), Identifier('pred.predicted', alias=Identifier('predicted'))], from_table=Join(left=Identifier('int.tab1'), right=Identifier('mindsdb.pred'), join_type=JoinType.INNER_JOIN, implicit=True) ) query = Union(left=query1, right=query2, unique=False) expected_plan = QueryPlan( steps=[ # Query 1 FetchDataframeStep(integration='int', query=Select(targets=[Identifier('tab.column1', alias=Identifier('column1')), Constant(None, alias=Identifier('predicted'))], from_table=Identifier('tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('tab.column1'), Identifier('tab.column2')]), BinaryOperation('>', args=[Identifier('tab.column3'), Constant(0)]), ]) )), # Query 2 FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(1), predictor=Identifier('pred')), JoinStep(left=Result(1), right=Result(2), query=Join(left=Identifier('result_1', alias=Identifier('tab1')), right=Identifier('result_2', alias=Identifier('pred')), join_type=JoinType.INNER_JOIN)), ProjectStep(dataframe=Result(3), columns=[Identifier('tab1.column1'), Identifier('pred.predicted', alias=Identifier('predicted'))]), # Union UnionStep(left=Result(0), right=Result(4), unique=False), ], ) plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]
def test_integration_name_is_case_insensitive(self): query = Select(targets=[Identifier('column1')], from_table=Identifier('int.tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('column1'), Identifier('column2')]), BinaryOperation('>', args=[Identifier('column3'), Constant(0)]), ])) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Identifier('tab.column1', alias=Identifier('column1'))], from_table=Identifier('tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('tab.column1'), Identifier('tab.column2')]), BinaryOperation('>', args=[Identifier('tab.column3'), Constant(0)]), ]) )), ]) plan = plan_query(query, integrations=['INT']) assert plan.steps == expected_plan.steps
def test_join_predictor_plan_default_namespace_predictor(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('pred.predicted')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('pred'), join_type=JoinType.INNER_JOIN, implicit=True) ) expected_plan = QueryPlan( default_namespace='mindsdb', steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab1')), ), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('tab1')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.INNER_JOIN)), ProjectStep(dataframe=Result(2), columns=[Identifier('tab1.column1'), Identifier('pred.predicted')]), ], ) plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}}) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]
def test_integration_select_subquery_in_where(self): query = Select(targets=[Star()], from_table=Identifier('int.tab1'), where=BinaryOperation(op='in', args=( Identifier(parts=['column1']), Select(targets=[Identifier('column2')], from_table=Identifier('int.tab2'), parentheses=True) ))) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab1'), where=BinaryOperation(op='in', args=[ Identifier('tab1.column1'), Select(targets=[ Identifier('tab2.column2', alias=Identifier('column2'))], from_table=Identifier('tab2'), parentheses=True)] ))), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_join_predictor_plan_order_by(self): query = Select(targets=[Identifier('tab.column1'), Identifier('pred.predicted')], from_table=Join(left=Identifier('int.tab'), right=Identifier('mindsdb.pred'), join_type=JoinType.INNER_JOIN, implicit=True), where=BinaryOperation('=', args=[Identifier('tab.product_id'), Constant('x')]), limit=Constant(10), offset=Constant(15), order_by=[OrderBy(field=Identifier('tab.column1'))] ) expected_plan = QueryPlan( steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab'), where=BinaryOperation('=', args=[Identifier('tab.product_id'), Constant('x')]), limit=Constant(10), offset=Constant(15), order_by=[OrderBy(field=Identifier('tab.column1'))], ), ), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('tab')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.INNER_JOIN)), ProjectStep(dataframe=Result(2), columns=[Identifier('tab.column1'), Identifier('pred.predicted')]), ], ) plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) assert plan.steps == expected_plan.steps
def test_integration_select_default_namespace(self): query = Select(targets=[Identifier('column1'), Constant(1), Function('database', args=[])], from_table=Identifier('tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('column1'), Identifier('column2')]), BinaryOperation('>', args=[Identifier('column3'), Constant(0)]), ])) expected_plan = QueryPlan(integrations=['int'], default_namespace='int', steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Identifier('tab.column1', alias=Identifier('column1')), Constant(1), Function('database', args=[]), ], from_table=Identifier('tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('tab.column1'), Identifier('tab.column2')]), BinaryOperation('>', args=[Identifier('tab.column3'), Constant(0)]), ]) ), step_num=0, references=None, ), ]) plan = plan_query(query, integrations=['int'], default_namespace='int') for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]
def test_integration_select_plan_group_by(self): query = Select(targets=[Identifier('column1'), Identifier("column2"), Function(op="sum", args=[Identifier(parts=["column3"])], alias=Identifier('total')), ], from_table=Identifier('int.tab'), group_by=[Identifier("column1"), Identifier("column2")], having=BinaryOperation('=', args=[Identifier("column1"), Constant(0)]) ) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select(targets=[ Identifier('tab.column1', alias=Identifier('column1')), Identifier('tab.column2', alias=Identifier('column2')), Function(op="sum", args=[Identifier(parts=['tab', 'column3'])], alias=Identifier('total')), ], from_table=Identifier('tab'), group_by=[Identifier('tab.column1'), Identifier('tab.column2')], having=BinaryOperation('=', args=[Identifier('tab.column1'), Constant(0)]) )), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_integration_select_limit_offset(self): query = Select(targets=[Identifier('column1')], from_table=Identifier('int.tab'), where=BinaryOperation('=', args=[Identifier('column1'), Identifier('column2')]), limit=Constant(10), offset=Constant(15), ) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Identifier('tab.column1', alias=Identifier('column1'))], from_table=Identifier('tab'), where=BinaryOperation('=', args=[Identifier('tab.column1'), Identifier( 'tab.column2')]), limit=Constant(10), offset=Constant(15), ), ), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_predictors_select_plan(self): query = Select( targets=[ Identifier('column1'), Constant(1), NullConstant(), Function('database', args=[]) ], from_table=Identifier('mindsdb.predictors'), where=BinaryOperation( 'and', args=[ BinaryOperation( '=', args=[Identifier('column1'), Identifier('column2')]), BinaryOperation('>', args=[Identifier('column3'), Constant(0)]), ])) expected_plan = QueryPlan( integrations=['mindsdb'], steps=[ FetchDataframeStep( integration='mindsdb', query=Select( targets=[ Identifier('predictors.column1', alias=Identifier('column1')), Constant(1), NullConstant(), Function('database', args=[]), ], from_table=Identifier('predictors'), where=BinaryOperation( 'and', args=[ BinaryOperation( '=', args=[ Identifier('predictors.column1'), Identifier('predictors.column2') ]), BinaryOperation( '>', args=[ Identifier('predictors.column3'), Constant(0) ]), ])), step_num=0, references=None, ), ]) plan = plan_query(query, integrations=['mindsdb']) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]
def test_join_tables_plan_groupby(self): query = Select(targets=[ Identifier('tab1.column1'), Identifier('tab2.column1'), Function('sum', args=[Identifier('tab2.column2')], alias=Identifier('total'))], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ), group_by=[Identifier('tab1.column1'), Identifier('tab2.column1')], having=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Constant(0)]) ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps = [ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2')), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), GroupByStep(dataframe=Result(2), targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Function('sum', args=[Identifier('tab2.column2')])], columns=[Identifier('tab1.column1'), Identifier('tab2.column1')]), FilterStep(dataframe=Result(3), query=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Constant(0)])), ProjectStep(dataframe=Result(4), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Function(op='sum', args=[Identifier('tab2.column2')], alias=Identifier('total'))]), ], ) assert plan.steps == expected_plan.steps
def get_integration_select_step(self, select): integration_name, table = self.get_integration_path_from_identifier_or_error( select.from_table) fetch_df_select = copy.deepcopy(select) recursively_disambiguate_identifiers(fetch_df_select, integration_name, table) return FetchDataframeStep(integration=integration_name, query=fetch_df_select)
def plan_integration_nested_select(self, select): fetch_df_select = copy.deepcopy(select) deepest_select = get_deepest_select(fetch_df_select) integration_name, table = self.get_integration_path_from_identifier_or_error( deepest_select.from_table) recursively_disambiguate_identifiers(deepest_select, integration_name, table) return self.plan.add_step( FetchDataframeStep(integration=integration_name, query=fetch_df_select))
def test_integration_select_plan_star(self): query = Select(targets=[Star()], from_table=Identifier('int.tab')) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab'))), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_join_tables_plan_order_by(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ), limit=Constant(10), offset=Constant(15), order_by=[OrderBy(field=Identifier('tab1.column1'))], ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps = [ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2')), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), OrderByStep(dataframe=Result(2), order_by=[OrderBy(field=Identifier('tab1.column1'))]), LimitOffsetStep(dataframe=Result(3), limit=10, offset=15), ProjectStep(dataframe=Result(4), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]), ], ) assert plan.steps == expected_plan.steps
def test_integration_select_column_alias(self): query = Select(targets=[Identifier('col1', alias=Identifier('column_alias'))], from_table=Identifier('int.tab')) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Identifier(parts=['tab', 'col1'], alias=Identifier('column_alias'))], from_table=Identifier(parts=['tab'])), ), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_integration_select_table_alias_full_query(self): sql = 'select ta.sqft from int.test_data.home_rentals as ta' query = parse_sql(sql, dialect='sqlite') expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Identifier(parts=['ta', 'sqft'], alias=Identifier('sqft'))], from_table=Identifier(parts=['test_data', 'home_rentals'], alias=Identifier('ta'))), ), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_integration_select_plan_complex_path(self): query = Select(targets=[Identifier(parts=['int', 'tab', 'a column with spaces'])], from_table=Identifier('int.tab')) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[ Identifier('tab.`a column with spaces`', alias=Identifier('a column with spaces') ) ], from_table=Identifier('tab')), ), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_integration_select_subquery_in_from(self): query = Select(targets=[Identifier('column1')], from_table=Select(targets=[Identifier('column1')], from_table=Identifier('int.tab'), alias=Identifier('subquery'))) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Identifier('column1')], from_table=Select( targets=[Identifier('tab.column1', alias=Identifier('column1'))], from_table=Identifier('tab'), alias=Identifier('subquery')), )), ]) plan = plan_query(query, integrations=['int']) assert plan.steps == expected_plan.steps
def test_integration_select_3_level(self): sql = "select * from xxx.yyy.zzz where x > 1" query = parse_sql(sql, dialect='mindsdb') expected_plan = QueryPlan(integrations=['int'], default_namespace='xxx', steps=[ FetchDataframeStep( integration='xxx', query=Select( targets=[Star()], from_table=Identifier('yyy.zzz'), where=BinaryOperation(op='>', args=[ Identifier('yyy.zzz.x'), Constant(1) ]) ) ) ]) plan = plan_query(query, integrations=['xxx']) assert plan.steps == expected_plan.steps
def test_join_tables_where_plan(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ), where=BinaryOperation('and', args=[ BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('tab1.column1'), Constant(1)]), BinaryOperation('=', args=[Identifier('tab2.column1'), Constant(0)]), ] ), BinaryOperation('=', args=[Identifier('tab1.column3'), Identifier('tab2.column3')]), ] ) ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1'), ), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2'), ), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), FilterStep(dataframe=Result(2), query=BinaryOperation('and', args=[ BinaryOperation('and', args=[ BinaryOperation('=', args=[ Identifier( 'tab1.column1'), Constant( 1)]), BinaryOperation('=', args=[ Identifier( 'tab2.column1'), Constant( 0)]), ] ), BinaryOperation('=', args=[Identifier( 'tab1.column3'), Identifier( 'tab2.column3')]), ] )), ProjectStep(dataframe=Result(3), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]), ], ) assert plan.steps == expected_plan.steps
def test_nested_select(self): # for tableau sql = f''' SELECT time FROM ( select * from int.covid join mindsdb.pred limit 10 ) `Custom SQL Query` limit 1 ''' query = parse_sql(sql, dialect='mindsdb') expected_plan = QueryPlan( default_namespace='mindsdb', steps=[ FetchDataframeStep(integration='int', query=parse_sql('select * from covid limit 10')), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('covid')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.JOIN)), ProjectStep(dataframe=Result(2), columns=[Star()]), ProjectStep(dataframe=Result(3), columns=[Identifier('time')], ignore_doubles=True), LimitOffsetStep(dataframe=Result(4), limit=1) ], ) plan = plan_query( query, integrations=['int'], predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}} ) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i] sql = f''' SELECT `time` FROM ( select * from int.covid join mindsdb.pred ) `Custom SQL Query` GROUP BY 1 ''' query = parse_sql(sql, dialect='mindsdb') expected_plan = QueryPlan( default_namespace='mindsdb', steps=[ FetchDataframeStep(integration='int', query=parse_sql('select * from covid')), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('covid')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.JOIN)), ProjectStep(dataframe=Result(2), columns=[Star()]), GroupByStep(dataframe=Result(3), columns=[Constant(1)], targets=[Identifier('time')]) ], ) plan = plan_query( query, integrations=['int'], predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}} ) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]