def test_join_tables_plan_default_namespace(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], from_table=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ) ) expected_plan = QueryPlan(integrations=['int'], default_namespace='int', steps = [ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2')), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), ProjectStep(dataframe=Result(2), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]), ], ) plan = plan_query(query, integrations=['int'], default_namespace='int') assert plan.steps == expected_plan.steps
def test_join_predictor_plan_default_namespace_predictor(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('pred.predicted')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('pred'), join_type=JoinType.INNER_JOIN, implicit=True) ) expected_plan = QueryPlan( default_namespace='mindsdb', steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab1')), ), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('tab1')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.INNER_JOIN)), ProjectStep(dataframe=Result(2), columns=[Identifier('tab1.column1'), Identifier('pred.predicted')]), ], ) plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}}) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]
def test_join_tables_disambiguate_identifiers_in_condition(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('int.tab1.column1'), # integration name included Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ) ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2')), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), # integration name gets stripped out Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), ProjectStep(dataframe=Result(2), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]), ], ) assert plan.steps == expected_plan.steps
def test_join_predictor_plan_order_by(self): query = Select(targets=[Identifier('tab.column1'), Identifier('pred.predicted')], from_table=Join(left=Identifier('int.tab'), right=Identifier('mindsdb.pred'), join_type=JoinType.INNER_JOIN, implicit=True), where=BinaryOperation('=', args=[Identifier('tab.product_id'), Constant('x')]), limit=Constant(10), offset=Constant(15), order_by=[OrderBy(field=Identifier('tab.column1'))] ) expected_plan = QueryPlan( steps=[ FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab'), where=BinaryOperation('=', args=[Identifier('tab.product_id'), Constant('x')]), limit=Constant(10), offset=Constant(15), order_by=[OrderBy(field=Identifier('tab.column1'))], ), ), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('tab')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.INNER_JOIN)), ProjectStep(dataframe=Result(2), columns=[Identifier('tab.column1'), Identifier('pred.predicted')]), ], ) plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) assert plan.steps == expected_plan.steps
def test_select_from_predictor_get_columns(self): sql = f'SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0' query = parse_sql(sql, dialect='mindsdb') expected_query = Select( targets=[Identifier('GDP_per_capita_USD')], from_table=Identifier('hdi_predictor_external'), where=BinaryOperation(op="=", args=[Constant(1), Constant(0)])) assert query.to_tree() == expected_query.to_tree() expected_plan = QueryPlan( predictor_namespace='mindsdb', default_namespace='mindsdb', steps=[ GetPredictorColumns( namespace='mindsdb', predictor=Identifier('hdi_predictor_external')), ProjectStep(dataframe=Result(0), columns=[Identifier('GDP_per_capita_USD')]), ], ) plan = plan_query(query, predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'hdi_predictor_external': {}}) assert plan.steps == expected_plan.steps
def test_select_from_predictor_default_namespace(self): query = Select( targets=[Star()], from_table=Identifier('pred'), where=BinaryOperation( op='and', args=[ BinaryOperation(op='=', args=[Identifier('x1'), Constant(1)]), BinaryOperation(op='=', args=[Identifier('x2'), Constant('2')]) ], )) expected_plan = QueryPlan( predictor_namespace='mindsdb', default_namespace='mindsdb', steps=[ ApplyPredictorRowStep(namespace='mindsdb', predictor=Identifier('pred'), row_dict={ 'x1': 1, 'x2': '2' }), ProjectStep(dataframe=Result(0), columns=[Star()]), ], ) plan = plan_query(query, predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}}) assert plan.steps == expected_plan.steps
def test_join_tables_plan_groupby(self): query = Select(targets=[ Identifier('tab1.column1'), Identifier('tab2.column1'), Function('sum', args=[Identifier('tab2.column2')], alias=Identifier('total'))], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ), group_by=[Identifier('tab1.column1'), Identifier('tab2.column1')], having=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Constant(0)]) ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps = [ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2')), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), GroupByStep(dataframe=Result(2), targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Function('sum', args=[Identifier('tab2.column2')])], columns=[Identifier('tab1.column1'), Identifier('tab2.column1')]), FilterStep(dataframe=Result(3), query=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Constant(0)])), ProjectStep(dataframe=Result(4), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Function(op='sum', args=[Identifier('tab2.column2')], alias=Identifier('total'))]), ], ) assert plan.steps == expected_plan.steps
def test_join_tables_plan_order_by(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ), limit=Constant(10), offset=Constant(15), order_by=[OrderBy(field=Identifier('tab1.column1'))], ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps = [ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2')), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), OrderByStep(dataframe=Result(2), order_by=[OrderBy(field=Identifier('tab1.column1'))]), LimitOffsetStep(dataframe=Result(3), limit=10, offset=15), ProjectStep(dataframe=Result(4), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]), ], ) assert plan.steps == expected_plan.steps
def test_plan_union_queries(self): query1 = Select(targets=[Identifier('column1'), Constant(None, alias=Identifier('predicted'))], from_table=Identifier('int.tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('column1'), Identifier('column2')]), BinaryOperation('>', args=[Identifier('column3'), Constant(0)]), ])) query2 = Select( targets=[Identifier('tab1.column1'), Identifier('pred.predicted', alias=Identifier('predicted'))], from_table=Join(left=Identifier('int.tab1'), right=Identifier('mindsdb.pred'), join_type=JoinType.INNER_JOIN, implicit=True) ) query = Union(left=query1, right=query2, unique=False) expected_plan = QueryPlan( steps=[ # Query 1 FetchDataframeStep(integration='int', query=Select(targets=[Identifier('tab.column1', alias=Identifier('column1')), Constant(None, alias=Identifier('predicted'))], from_table=Identifier('tab'), where=BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('tab.column1'), Identifier('tab.column2')]), BinaryOperation('>', args=[Identifier('tab.column3'), Constant(0)]), ]) )), # Query 2 FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1')), ), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(1), predictor=Identifier('pred')), JoinStep(left=Result(1), right=Result(2), query=Join(left=Identifier('result_1', alias=Identifier('tab1')), right=Identifier('result_2', alias=Identifier('pred')), join_type=JoinType.INNER_JOIN)), ProjectStep(dataframe=Result(3), columns=[Identifier('tab1.column1'), Identifier('pred.predicted', alias=Identifier('predicted'))]), # Union UnionStep(left=Result(0), right=Result(4), unique=False), ], ) plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', predictor_metadata={'pred': {}}) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]
def result(self): if self.step_num is None: raise PlanningException( f'Can\'t reference a step with no assigned step number. Tried to reference: {type(self)}' ) return Result(self.step_num)
def test_join_tables_where_plan(self): query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')], from_table=Join(left=Identifier('int.tab1'), right=Identifier('int.tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN ), where=BinaryOperation('and', args=[ BinaryOperation('and', args=[ BinaryOperation('=', args=[Identifier('tab1.column1'), Constant(1)]), BinaryOperation('=', args=[Identifier('tab2.column1'), Constant(0)]), ] ), BinaryOperation('=', args=[Identifier('tab1.column3'), Identifier('tab2.column3')]), ] ) ) plan = plan_query(query, integrations=['int']) expected_plan = QueryPlan(integrations=['int'], steps=[ FetchDataframeStep(integration='int', query=Select( targets=[Star()], from_table=Identifier('tab1'), ), ), FetchDataframeStep(integration='int', query=Select(targets=[Star()], from_table=Identifier('tab2'), ), ), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('tab1'), right=Identifier('tab2'), condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]), join_type=JoinType.INNER_JOIN )), FilterStep(dataframe=Result(2), query=BinaryOperation('and', args=[ BinaryOperation('and', args=[ BinaryOperation('=', args=[ Identifier( 'tab1.column1'), Constant( 1)]), BinaryOperation('=', args=[ Identifier( 'tab2.column1'), Constant( 0)]), ] ), BinaryOperation('=', args=[Identifier( 'tab1.column3'), Identifier( 'tab2.column3')]), ] )), ProjectStep(dataframe=Result(3), columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]), ], ) assert plan.steps == expected_plan.steps
def test_nested_select(self): # for tableau sql = f''' SELECT time FROM ( select * from int.covid join mindsdb.pred limit 10 ) `Custom SQL Query` limit 1 ''' query = parse_sql(sql, dialect='mindsdb') expected_plan = QueryPlan( default_namespace='mindsdb', steps=[ FetchDataframeStep(integration='int', query=parse_sql('select * from covid limit 10')), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('covid')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.JOIN)), ProjectStep(dataframe=Result(2), columns=[Star()]), ProjectStep(dataframe=Result(3), columns=[Identifier('time')], ignore_doubles=True), LimitOffsetStep(dataframe=Result(4), limit=1) ], ) plan = plan_query( query, integrations=['int'], predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}} ) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i] sql = f''' SELECT `time` FROM ( select * from int.covid join mindsdb.pred ) `Custom SQL Query` GROUP BY 1 ''' query = parse_sql(sql, dialect='mindsdb') expected_plan = QueryPlan( default_namespace='mindsdb', steps=[ FetchDataframeStep(integration='int', query=parse_sql('select * from covid')), ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')), JoinStep(left=Result(0), right=Result(1), query=Join(left=Identifier('result_0', alias=Identifier('covid')), right=Identifier('result_1', alias=Identifier('pred')), join_type=JoinType.JOIN)), ProjectStep(dataframe=Result(2), columns=[Star()]), GroupByStep(dataframe=Result(3), columns=[Constant(1)], targets=[Identifier('time')]) ], ) plan = plan_query( query, integrations=['int'], predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}} ) for i in range(len(plan.steps)): assert plan.steps[i] == expected_plan.steps[i]