예제 #1
0
    def test_select_from_predictor_get_columns(self):
        sql = f'SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0'
        query = parse_sql(sql, dialect='mindsdb')

        expected_query = Select(
            targets=[Identifier('GDP_per_capita_USD')],
            from_table=Identifier('hdi_predictor_external'),
            where=BinaryOperation(op="=", args=[Constant(1),
                                                Constant(0)]))
        assert query.to_tree() == expected_query.to_tree()

        expected_plan = QueryPlan(
            predictor_namespace='mindsdb',
            default_namespace='mindsdb',
            steps=[
                GetPredictorColumns(
                    namespace='mindsdb',
                    predictor=Identifier('hdi_predictor_external')),
                ProjectStep(dataframe=Result(0),
                            columns=[Identifier('GDP_per_capita_USD')]),
            ],
        )

        plan = plan_query(query,
                          predictor_namespace='mindsdb',
                          default_namespace='mindsdb',
                          predictor_metadata={'hdi_predictor_external': {}})

        assert plan.steps == expected_plan.steps
예제 #2
0
    def test_join_tables_disambiguate_identifiers_in_condition(self):
        query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')],
                       from_table=Join(left=Identifier('int.tab1'),
                                       right=Identifier('int.tab2'),
                                       condition=BinaryOperation(op='=', args=[Identifier('int.tab1.column1'), # integration name included
                                                                               Identifier('tab2.column1')]),
                                       join_type=JoinType.INNER_JOIN
                                       )
                       )
        plan = plan_query(query, integrations=['int'])
        expected_plan = QueryPlan(integrations=['int'],
                                  steps=[
                                      FetchDataframeStep(integration='int',
                                                         query=Select(
                                                             targets=[Star()],
                                                             from_table=Identifier('tab1')),
                                                         ),
                                      FetchDataframeStep(integration='int',
                                                         query=Select(targets=[Star()],
                                                                      from_table=Identifier('tab2')),
                                                         ),
                                      JoinStep(left=Result(0), right=Result(1),
                                               query=Join(left=Identifier('tab1'),
                                                          right=Identifier('tab2'),
                                                          condition=BinaryOperation(op='=',
                                                                                    args=[Identifier('tab1.column1'), # integration name gets stripped out
                                                                                          Identifier('tab2.column1')]),
                                                          join_type=JoinType.INNER_JOIN
                                                          )),
                                      ProjectStep(dataframe=Result(2),
                                                  columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]),
                                  ],
                                  )

        assert plan.steps == expected_plan.steps
예제 #3
0
    def test_select_from_predictor_default_namespace(self):
        query = Select(
            targets=[Star()],
            from_table=Identifier('pred'),
            where=BinaryOperation(
                op='and',
                args=[
                    BinaryOperation(op='=',
                                    args=[Identifier('x1'),
                                          Constant(1)]),
                    BinaryOperation(op='=',
                                    args=[Identifier('x2'),
                                          Constant('2')])
                ],
            ))
        expected_plan = QueryPlan(
            predictor_namespace='mindsdb',
            default_namespace='mindsdb',
            steps=[
                ApplyPredictorRowStep(namespace='mindsdb',
                                      predictor=Identifier('pred'),
                                      row_dict={
                                          'x1': 1,
                                          'x2': '2'
                                      }),
                ProjectStep(dataframe=Result(0), columns=[Star()]),
            ],
        )

        plan = plan_query(query,
                          predictor_namespace='mindsdb',
                          default_namespace='mindsdb',
                          predictor_metadata={'pred': {}})

        assert plan.steps == expected_plan.steps
예제 #4
0
    def test_join_tables_plan_default_namespace(self):
        query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')],
                       from_table=Join(left=Identifier('tab1'),
                                       right=Identifier('tab2'),
                                       condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]),
                                       join_type=JoinType.INNER_JOIN
                                       )
                )
        expected_plan = QueryPlan(integrations=['int'],
                                  default_namespace='int',
                                  steps = [
                                      FetchDataframeStep(integration='int',
                                                         query=Select(
                                                             targets=[Star()],
                                                             from_table=Identifier('tab1')),
                                                         ),
                                      FetchDataframeStep(integration='int',
                                                         query=Select(targets=[Star()],
                                                                      from_table=Identifier('tab2')),
                                                         ),
                                      JoinStep(left=Result(0), right=Result(1),
                                               query=Join(left=Identifier('tab1'),
                                                          right=Identifier('tab2'),
                                                          condition=BinaryOperation(op='=',
                                                                                    args=[Identifier('tab1.column1'),
                                                                                          Identifier('tab2.column1')]),
                                                          join_type=JoinType.INNER_JOIN
                                                          )),
                                      ProjectStep(dataframe=Result(2),
                                                  columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]),
                                  ],
        )
        plan = plan_query(query, integrations=['int'], default_namespace='int')

        assert plan.steps == expected_plan.steps
예제 #5
0
    def test_join_predictor_plan_default_namespace_predictor(self):
        query = Select(targets=[Identifier('tab1.column1'), Identifier('pred.predicted')],
                       from_table=Join(left=Identifier('int.tab1'),
                                       right=Identifier('pred'),
                                       join_type=JoinType.INNER_JOIN,
                                       implicit=True)
                       )
        expected_plan = QueryPlan(
            default_namespace='mindsdb',
            steps=[
                FetchDataframeStep(integration='int',
                                   query=Select(targets=[Star()],
                                                from_table=Identifier('tab1')),
                                   ),
                ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')),
                JoinStep(left=Result(0), right=Result(1),
                         query=Join(left=Identifier('result_0', alias=Identifier('tab1')),
                                    right=Identifier('result_1', alias=Identifier('pred')),
                                    join_type=JoinType.INNER_JOIN)),
                ProjectStep(dataframe=Result(2), columns=[Identifier('tab1.column1'), Identifier('pred.predicted')]),
            ],
        )
        plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'pred': {}})

        for i in range(len(plan.steps)):
            assert plan.steps[i] == expected_plan.steps[i]
예제 #6
0
    def test_join_predictor_plan_order_by(self):
        query = Select(targets=[Identifier('tab.column1'), Identifier('pred.predicted')],
                       from_table=Join(left=Identifier('int.tab'),
                                       right=Identifier('mindsdb.pred'),
                                       join_type=JoinType.INNER_JOIN,
                                       implicit=True),
                       where=BinaryOperation('=', args=[Identifier('tab.product_id'), Constant('x')]),
                       limit=Constant(10),
                       offset=Constant(15),
                       order_by=[OrderBy(field=Identifier('tab.column1'))]
                       )

        expected_plan = QueryPlan(
            steps=[
                FetchDataframeStep(integration='int',
                                   query=Select(targets=[Star()],
                                                from_table=Identifier('tab'),
                                                where=BinaryOperation('=', args=[Identifier('tab.product_id'), Constant('x')]),
                                                limit=Constant(10),
                                                offset=Constant(15),
                                                order_by=[OrderBy(field=Identifier('tab.column1'))],
                                                ),
                                   ),
                ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')),
                JoinStep(left=Result(0), right=Result(1),
                         query=Join(left=Identifier('result_0', alias=Identifier('tab')),
                                    right=Identifier('result_1', alias=Identifier('pred')),
                                    join_type=JoinType.INNER_JOIN)),
                ProjectStep(dataframe=Result(2), columns=[Identifier('tab.column1'), Identifier('pred.predicted')]),
            ],
        )
        plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', predictor_metadata={'pred': {}})

        assert plan.steps == expected_plan.steps
예제 #7
0
    def test_plan_union_queries(self):
        query1 = Select(targets=[Identifier('column1'), Constant(None, alias=Identifier('predicted'))],
                       from_table=Identifier('int.tab'),
                       where=BinaryOperation('and', args=[
                           BinaryOperation('=', args=[Identifier('column1'), Identifier('column2')]),
                           BinaryOperation('>', args=[Identifier('column3'), Constant(0)]),
                       ]))

        query2 = Select(
            targets=[Identifier('tab1.column1'), Identifier('pred.predicted', alias=Identifier('predicted'))],
            from_table=Join(left=Identifier('int.tab1'),
                            right=Identifier('mindsdb.pred'),
                            join_type=JoinType.INNER_JOIN,
                            implicit=True)
            )

        query = Union(left=query1, right=query2, unique=False)
        expected_plan = QueryPlan(
            steps=[
                # Query 1
                FetchDataframeStep(integration='int',
                                   query=Select(targets=[Identifier('tab.column1', alias=Identifier('column1')),
                                                         Constant(None, alias=Identifier('predicted'))],
                                                from_table=Identifier('tab'),
                                                where=BinaryOperation('and', args=[
                                                    BinaryOperation('=',
                                                                    args=[Identifier('tab.column1'),
                                                                          Identifier('tab.column2')]),
                                                    BinaryOperation('>',
                                                                    args=[Identifier('tab.column3'),
                                                                          Constant(0)]),
                                                ])
                                                )),
                # Query 2
                FetchDataframeStep(integration='int',
                                   query=Select(
                                       targets=[Star()],
                                       from_table=Identifier('tab1')),
                                   ),
                ApplyPredictorStep(namespace='mindsdb', dataframe=Result(1), predictor=Identifier('pred')),
                JoinStep(left=Result(1), right=Result(2),
                         query=Join(left=Identifier('result_1', alias=Identifier('tab1')),
                                    right=Identifier('result_2', alias=Identifier('pred')),
                                    join_type=JoinType.INNER_JOIN)),
                ProjectStep(dataframe=Result(3), columns=[Identifier('tab1.column1'), Identifier('pred.predicted', alias=Identifier('predicted'))]),

                # Union
                UnionStep(left=Result(0), right=Result(4), unique=False),

            ],
        )

        plan = plan_query(query, integrations=['int'], predictor_namespace='mindsdb', predictor_metadata={'pred': {}})

        for i in range(len(plan.steps)):
            assert plan.steps[i] == expected_plan.steps[i]
예제 #8
0
 def test_join_tables_plan_groupby(self):
     query = Select(targets=[
         Identifier('tab1.column1'),
         Identifier('tab2.column1'),
         Function('sum', args=[Identifier('tab2.column2')], alias=Identifier('total'))],
         from_table=Join(left=Identifier('int.tab1'),
                         right=Identifier('int.tab2'),
                         condition=BinaryOperation(op='=',
                                                   args=[Identifier('tab1.column1'), Identifier('tab2.column1')]),
                         join_type=JoinType.INNER_JOIN
                         ),
         group_by=[Identifier('tab1.column1'), Identifier('tab2.column1')],
         having=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Constant(0)])
     )
     plan = plan_query(query, integrations=['int'])
     expected_plan = QueryPlan(integrations=['int'],
                               steps = [
                                   FetchDataframeStep(integration='int',
                                                      query=Select(
                                                          targets=[Star()],
                                                          from_table=Identifier('tab1')),
                                                      ),
                                   FetchDataframeStep(integration='int',
                                                      query=Select(targets=[Star()],
                                                                   from_table=Identifier('tab2')),
                                                      ),
                                   JoinStep(left=Result(0), right=Result(1),
                                            query=Join(left=Identifier('tab1'),
                                                       right=Identifier('tab2'),
                                                       condition=BinaryOperation(op='=',
                                                                                 args=[Identifier('tab1.column1'),
                                                                                       Identifier('tab2.column1')]),
                                                       join_type=JoinType.INNER_JOIN
                                                       )),
                                   GroupByStep(dataframe=Result(2),
                                               targets=[Identifier('tab1.column1'),
                                                         Identifier('tab2.column1'),
                                                         Function('sum', args=[Identifier('tab2.column2')])],
                                               columns=[Identifier('tab1.column1'), Identifier('tab2.column1')]),
                                   FilterStep(dataframe=Result(3), query=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Constant(0)])),
                                   ProjectStep(dataframe=Result(4),
                                               columns=[Identifier('tab1.column1'), Identifier('tab2.column1'),
                                                        Function(op='sum', args=[Identifier('tab2.column2')], alias=Identifier('total'))]),
                               ],
                               )
     assert plan.steps == expected_plan.steps
예제 #9
0
    def plan_project(self, query, dataframe, ignore_doubles=False):
        out_identifiers = []

        for target in query.targets:
            if isinstance(target, Identifier) \
                    or isinstance(target, Star) \
                    or isinstance(target, Function) \
                    or isinstance(target, Constant):
                out_identifiers.append(target)
            else:
                new_identifier = Identifier(str(target.to_string(alias=False)),
                                            alias=target.alias)
                out_identifiers.append(new_identifier)
        return self.plan.add_step(
            ProjectStep(dataframe=dataframe,
                        columns=out_identifiers,
                        ignore_doubles=ignore_doubles))
예제 #10
0
    def test_join_tables_plan_order_by(self):
        query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')],
                       from_table=Join(left=Identifier('int.tab1'),
                                       right=Identifier('int.tab2'),
                                       condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'), Identifier('tab2.column1')]),
                                       join_type=JoinType.INNER_JOIN
                                       ),
                       limit=Constant(10),
                       offset=Constant(15),
                       order_by=[OrderBy(field=Identifier('tab1.column1'))],
                )
        plan = plan_query(query, integrations=['int'])
        expected_plan = QueryPlan(integrations=['int'],
                                  steps = [
                                      FetchDataframeStep(integration='int',
                                                         query=Select(
                                                             targets=[Star()],
                                                             from_table=Identifier('tab1')),
                                                         ),
                                      FetchDataframeStep(integration='int',
                                                         query=Select(targets=[Star()],
                                                                      from_table=Identifier('tab2')),
                                                         ),
                                      JoinStep(left=Result(0), right=Result(1),
                                               query=Join(left=Identifier('tab1'),
                                                          right=Identifier('tab2'),
                                                          condition=BinaryOperation(op='=',
                                                                                    args=[Identifier('tab1.column1'),
                                                                                          Identifier('tab2.column1')]),
                                                          join_type=JoinType.INNER_JOIN
                                                          )),
                                      OrderByStep(dataframe=Result(2), order_by=[OrderBy(field=Identifier('tab1.column1'))]),
                                      LimitOffsetStep(dataframe=Result(3), limit=10, offset=15),
                                      ProjectStep(dataframe=Result(4),
                                                  columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]),
                                  ],
                                  )

        assert plan.steps == expected_plan.steps
예제 #11
0
    def test_join_tables_where_plan(self):
        query = Select(targets=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')],
                       from_table=Join(left=Identifier('int.tab1'),
                                       right=Identifier('int.tab2'),
                                       condition=BinaryOperation(op='=', args=[Identifier('tab1.column1'),
                                                                               Identifier('tab2.column1')]),
                                       join_type=JoinType.INNER_JOIN
                                       ),
                       where=BinaryOperation('and',
                                             args=[
                                                 BinaryOperation('and',
                                                                 args=[
                                                                     BinaryOperation('=',
                                                                                     args=[Identifier('tab1.column1'),
                                                                                           Constant(1)]),
                                                                     BinaryOperation('=',
                                                                                     args=[Identifier('tab2.column1'),
                                                                                           Constant(0)]),

                                                                 ]
                                                                 ),
                                                 BinaryOperation('=',
                                                                 args=[Identifier('tab1.column3'),
                                                                       Identifier('tab2.column3')]),
                                             ]
                                             )
                       )

        plan = plan_query(query, integrations=['int'])
        expected_plan = QueryPlan(integrations=['int'],
                                  steps=[
                                      FetchDataframeStep(integration='int',
                                                         query=Select(
                                                             targets=[Star()],
                                                             from_table=Identifier('tab1'),
                                                         ),

                                                         ),
                                      FetchDataframeStep(integration='int',
                                                         query=Select(targets=[Star()],
                                                                      from_table=Identifier('tab2'),
                                                                      ),
                                                         ),
                                      JoinStep(left=Result(0), right=Result(1),
                                               query=Join(left=Identifier('tab1'),
                                                          right=Identifier('tab2'),
                                                          condition=BinaryOperation(op='=',
                                                                                    args=[Identifier('tab1.column1'),
                                                                                          Identifier('tab2.column1')]),
                                                          join_type=JoinType.INNER_JOIN
                                                          )),
                                      FilterStep(dataframe=Result(2),
                                                 query=BinaryOperation('and',
                                                                       args=[
                                                                           BinaryOperation('and',
                                                                                           args=[
                                                                                               BinaryOperation('=',
                                                                                                               args=[
                                                                                                                   Identifier(
                                                                                                                       'tab1.column1'),
                                                                                                                   Constant(
                                                                                                                       1)]),
                                                                                               BinaryOperation('=',
                                                                                                               args=[
                                                                                                                   Identifier(
                                                                                                                       'tab2.column1'),
                                                                                                                   Constant(
                                                                                                                       0)]),

                                                                                           ]
                                                                                           ),
                                                                           BinaryOperation('=',
                                                                                           args=[Identifier(
                                                                                               'tab1.column3'),
                                                                                                 Identifier(
                                                                                                     'tab2.column3')]),
                                                                       ]
                                                                       )),
                                      ProjectStep(dataframe=Result(3),
                                                  columns=[Identifier('tab1.column1'), Identifier('tab2.column1'), Identifier('tab2.column2')]),
                                  ],
                                  )

        assert plan.steps == expected_plan.steps
예제 #12
0
    def test_nested_select(self):
        # for tableau

        sql = f'''
            SELECT time
            FROM ( 
               select * from int.covid 
               join mindsdb.pred 
               limit 10
            ) `Custom SQL Query`
            limit 1
         '''

        query = parse_sql(sql, dialect='mindsdb')

        expected_plan = QueryPlan(
            default_namespace='mindsdb',
            steps=[
                FetchDataframeStep(integration='int',
                                   query=parse_sql('select * from covid limit 10')),
                ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')),
                JoinStep(left=Result(0), right=Result(1),
                         query=Join(left=Identifier('result_0', alias=Identifier('covid')),
                                    right=Identifier('result_1', alias=Identifier('pred')),
                                    join_type=JoinType.JOIN)),
                ProjectStep(dataframe=Result(2), columns=[Star()]),
                ProjectStep(dataframe=Result(3), columns=[Identifier('time')], ignore_doubles=True),
                LimitOffsetStep(dataframe=Result(4), limit=1)
            ],
        )

        plan = plan_query(
            query,
            integrations=['int'],
            predictor_namespace='mindsdb',
            default_namespace='mindsdb',
            predictor_metadata={'pred': {}}
        )
        for i in range(len(plan.steps)):

            assert plan.steps[i] == expected_plan.steps[i]

        sql = f'''
                 SELECT `time`
                 FROM (
                   select * from int.covid
                   join mindsdb.pred
                 ) `Custom SQL Query`
                GROUP BY 1
            '''

        query = parse_sql(sql, dialect='mindsdb')

        expected_plan = QueryPlan(
            default_namespace='mindsdb',
            steps=[
                FetchDataframeStep(integration='int',
                                   query=parse_sql('select * from covid')),
                ApplyPredictorStep(namespace='mindsdb', dataframe=Result(0), predictor=Identifier('pred')),
                JoinStep(left=Result(0), right=Result(1),
                         query=Join(left=Identifier('result_0', alias=Identifier('covid')),
                                    right=Identifier('result_1', alias=Identifier('pred')),
                                    join_type=JoinType.JOIN)),
                ProjectStep(dataframe=Result(2), columns=[Star()]),
                GroupByStep(dataframe=Result(3), columns=[Constant(1)], targets=[Identifier('time')])
            ],
        )

        plan = plan_query(
            query,
            integrations=['int'],
            predictor_namespace='mindsdb',
            default_namespace='mindsdb',
            predictor_metadata={'pred': {}}
        )
        for i in range(len(plan.steps)):
            assert plan.steps[i] == expected_plan.steps[i]