def test_select_from_engines(self, dialect):
     sql = 'select * from engines'
     ast = parse_sql(sql, dialect=dialect)
     expected_ast = Select(targets=[Star()],
                           from_table=Identifier.from_path_str('engines'))
     assert ast.to_tree() == expected_ast.to_tree()
     assert str(ast) == str(expected_ast)
 def test_select_status(self, dialect):
     sql = 'select status from mindsdb.predictors'
     ast = parse_sql(sql, dialect=dialect)
     expected_ast = Select(
         targets=[Identifier.from_path_str("status")],
         from_table=Identifier.from_path_str('mindsdb.predictors'))
     assert ast.to_tree() == expected_ast.to_tree()
     # assert str(ast).lower() == sql.lower()
     assert str(ast) == str(expected_ast)
    def test_select_from_view_kw(self, dialect):
        for table in ['view.t', 'views.t']:
            sql = f'select * from {table}'

            ast = parse_sql(sql, dialect=dialect)
            expected_ast = Select(targets=[Star()],
                                  from_table=Identifier.from_path_str(table))
            assert ast.to_tree() == expected_ast.to_tree()
            assert str(ast) == str(expected_ast)
    def test_not_in(self, dialect):
        sql = f"""SELECT column1 NOT   IN column2"""
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='not in',
                            args=(Identifier.from_path_str("column1"),
                                  Identifier.from_path_str("column2")))
        ], )

        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
    def test_select_function_no_args(self, dialect):
        sql = f'SELECT database() FROM tab'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(
            targets=[Function(op='database', args=[])],
            from_table=Identifier.from_path_str('tab'),
        )

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
    def test_is_false(self, dialect):
        sql = "SELECT col1 FROM t1 WHERE col1 IS FALSE"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[Identifier.from_path_str("col1")],
                              from_table=Identifier.from_path_str('t1'),
                              where=BinaryOperation(
                                  'is',
                                  args=(Identifier.from_path_str('col1'),
                                        Constant(False))))
        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
    def test_operation_converts_to_lowercase(self, dialect):
        sql = f'SELECT column1 IS column2 FROM tab'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='is',
                            args=(Identifier.from_path_str('column1'),
                                  Identifier.from_path_str('column2'))),
        ],
                              from_table=Identifier.from_path_str('tab'))

        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
    def test_between(self, dialect):
        sql = "SELECT col1 FROM t1 WHERE col1 BETWEEN a AND b"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(
            targets=[Identifier.from_path_str("col1")],
            from_table=Identifier.from_path_str('t1'),
            where=BetweenOperation(args=(Identifier.from_path_str('col1'),
                                         Identifier.from_path_str('a'),
                                         Identifier.from_path_str('b'))))

        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
    def test_unary_is_special_values(self, dialect):
        args = [('NULL', NullConstant()), ('TRUE', Constant(value=True)),
                ('FALSE', Constant(value=False))]
        for sql_arg, python_obj in args:
            sql = f"""SELECT column1 IS {sql_arg}"""
            ast = parse_sql(sql, dialect=dialect)

            expected_ast = Select(targets=[
                BinaryOperation(op='IS',
                                args=(Identifier.from_path_str("column1"),
                                      python_obj))
            ], )

            assert str(ast).lower() == sql.lower()
            assert ast.to_tree() == expected_ast.to_tree()
    def test_select_dquote_alias(self, dialect):
        sql = """
            select
              a as "database"      
            from information_schema.tables "database"
        """
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(
            targets=[Identifier('a', alias=Identifier('database'))],
            from_table=Identifier(parts=['information_schema', 'tables'],
                                  alias=Identifier('database')),
        )

        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
    def test_select_function_one_arg(self, dialect):
        funcs = ['sum', 'min', 'max', 'some_custom_function']
        for func in funcs:
            sql = f'SELECT {func}(column) FROM tab'
            ast = parse_sql(sql, dialect=dialect)

            expected_ast = Select(
                targets=[
                    Function(op=func,
                             args=(Identifier.from_path_str('column'), ))
                ],
                from_table=Identifier.from_path_str('tab'),
            )

            assert str(ast).lower() == sql.lower()
            assert str(ast) == str(expected_ast)
            assert ast.to_tree() == expected_ast.to_tree()
Exemple #12
0
    def plan_join_two_tables(self, join):
        select_left_step = self.plan_integration_select(
            Select(targets=[Star()], from_table=join.left))
        select_right_step = self.plan_integration_select(
            Select(targets=[Star()], from_table=join.right))

        left_integration_name, left_table = self.get_integration_path_from_identifier_or_error(
            join.left)
        right_integration_name, right_table = self.get_integration_path_from_identifier_or_error(
            join.right)

        left_table_path = left_table.to_string(alias=False)
        right_table_path = right_table.to_string(alias=False)

        new_condition_args = []
        for arg in join.condition.args:
            if isinstance(arg, Identifier):
                if left_table_path in arg.parts:
                    new_condition_args.append(
                        disambiguate_integration_column_identifier(
                            arg, left_integration_name, left_table))
                elif right_table_path in arg.parts:
                    new_condition_args.append(
                        disambiguate_integration_column_identifier(
                            arg, right_integration_name, right_table))
                else:
                    raise PlanningException(
                        f'Wrong table or no source table in join condition for column: {str(arg)}'
                    )
            else:
                new_condition_args.append(arg)
        new_join = copy.deepcopy(join)
        new_join.condition.args = new_condition_args
        new_join.left = Identifier(left_table_path, alias=left_table.alias)
        new_join.right = Identifier(right_table_path, alias=right_table.alias)

        # FIXME: INFORMATION_SCHEMA with condition
        # clear join condition for INFORMATION_SCHEMA
        if right_integration_name == 'INFORMATION_SCHEMA':
            new_join.condition = None

        return self.plan.add_step(
            JoinStep(left=select_left_step.result,
                     right=select_right_step.result,
                     query=new_join))
    def test_select_binary_operations(self, dialect):
        for op in [
                '+', '-', '/', '*', '%', '=', '!=', '>', '<', '>=', '<=', 'is',
                'IS NOT', 'like', 'in', 'and', 'or', '||'
        ]:
            sql = f'SELECT column1 {op.upper()} column2 FROM tab'
            ast = parse_sql(sql, dialect=dialect)

            expected_ast = Select(targets=[
                BinaryOperation(op=op,
                                args=(Identifier.from_path_str('column1'),
                                      Identifier.from_path_str('column2'))),
            ],
                                  from_table=Identifier.from_path_str('tab'))

            assert str(ast).lower() == sql.lower()
            assert str(ast) == str(expected_ast)
            assert ast.to_tree() == expected_ast.to_tree()
    def test_operator_chained_and(self, dialect):
        sql = f"""SELECT column1 AND column2 AND column3"""
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='AND',
                            args=(
                                BinaryOperation(
                                    op='and',
                                    args=(
                                        Identifier.from_path_str("column1"),
                                        Identifier.from_path_str("column2"))),
                                Identifier.from_path_str("column3"),
                            ))
        ])

        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()
    def test_operator_precedence_sum_mult_parentheses(self, dialect):
        sql = f'SELECT (column1 + column2) * column3'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(
                op='*',
                args=(
                    BinaryOperation(op='+',
                                    args=(Identifier.from_path_str('column1'),
                                          Identifier.from_path_str('column2')),
                                    parentheses=True),
                    Identifier.from_path_str('column3'),
                ),
            )
        ])

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
Exemple #16
0
    def plan_fetch_timeseries_partitions(self, query, table,
                                         predictor_group_by_names):
        targets = [Identifier(column) for column in predictor_group_by_names]

        query = Select(
            distinct=True,
            targets=targets,
            from_table=table,
            where=query.where,
        )
        select_step = self.plan_integration_select(query)
        return select_step
    def test_select_varialbe_complex(self):
        sql = f"""SELECT * FROM tab1 WHERE column1 in (SELECT column2 + @variable FROM t2)"""
        ast = parse_sql(sql, dialect='mysql')
        expected_ast = Select(targets=[Star()],
                              from_table=Identifier('tab1'),
                              where=BinaryOperation(
                                  op='in',
                                  args=(Identifier('column1'),
                                        Select(targets=[
                                            BinaryOperation(
                                                op='+',
                                                args=[
                                                    Identifier('column2'),
                                                    Variable('variable')
                                                ])
                                        ],
                                               from_table=Identifier('t2'),
                                               parentheses=True))))

        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
    def test_where_and_or_precedence(self, dialect):
        sql = "SELECT col1 FROM tab WHERE col1 AND col2 OR col3"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[Identifier.from_path_str('col1')],
                              from_table=Identifier.from_path_str('tab'),
                              where=BinaryOperation(
                                  op='or',
                                  args=(
                                      BinaryOperation(
                                          op='and',
                                          args=(
                                              Identifier.from_path_str('col1'),
                                              Identifier.from_path_str('col2'),
                                          )),
                                      Identifier.from_path_str('col3'),
                                  )))

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()

        sql = "SELECT col1 FROM tab WHERE col1 = 1 AND col2 = 1 OR col3 = 1"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(
            targets=[Identifier.from_path_str('col1')],
            from_table=Identifier.from_path_str('tab'),
            where=BinaryOperation(
                op='or',
                args=(
                    BinaryOperation(
                        op='and',
                        args=(
                            BinaryOperation(
                                op='=',
                                args=(
                                    Identifier.from_path_str('col1'),
                                    Constant(1),
                                )),
                            BinaryOperation(
                                op='=',
                                args=(
                                    Identifier.from_path_str('col2'),
                                    Constant(1),
                                )),
                        )),
                    BinaryOperation(op='=',
                                    args=(
                                        Identifier.from_path_str('col3'),
                                        Constant(1),
                                    )),
                )))

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
Exemple #19
0
    def plan_predictor(self, query, table, predictor_namespace, predictor):
        integration_select_step = self.plan_integration_select(
            Select(
                targets=[Star()],
                from_table=table,
                where=query.where,
                group_by=query.group_by,
                having=query.having,
                order_by=query.order_by,
                limit=query.limit,
                offset=query.offset,
            ))
        predictor_step = self.plan.add_step(
            ApplyPredictorStep(namespace=predictor_namespace,
                               dataframe=integration_select_step.result,
                               predictor=predictor))

        return {'predictor': predictor_step, 'data': integration_select_step}
    def test_select_variable(self):
        sql = 'SELECT @version'
        ast = parse_sql(sql, dialect='mysql')
        expected_ast = Select(targets=[Variable('version')])
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)

        sql = 'SELECT @@version'
        ast = parse_sql(sql, dialect='mysql')
        expected_ast = Select(
            targets=[Variable('version', is_system_var=True)])
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
    def test_operator_precedence_or_and(self, dialect):
        sql = f'SELECT column1 OR column2 AND column3'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='or',
                            args=(
                                Identifier.from_path_str('column1'),
                                BinaryOperation(
                                    op='and',
                                    args=(
                                        Identifier.from_path_str('column2'),
                                        Identifier.from_path_str('column3')))))
        ])

        assert str(ast).lower() == sql.lower()
        assert ast == expected_ast
        assert ast.to_tree() == expected_ast.to_tree()

        sql = f'SELECT column1 AND column2 OR column3'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='or',
                            args=(
                                BinaryOperation(
                                    op='and',
                                    args=(
                                        Identifier.from_path_str('column1'),
                                        Identifier.from_path_str('column2'))),
                                Identifier.from_path_str('column3'),
                            ))
        ])

        assert str(ast).lower() == sql.lower()
        assert ast == expected_ast
        assert ast.to_tree() == expected_ast.to_tree()
Exemple #22
0
    def plan_timeseries_predictor(self, query, table, predictor_namespace,
                                  predictor):
        predictor_name = predictor.to_string(alias=False).lower()
        # to original case
        predictor_name = self.predictor_names[predictor_name]

        predictor_time_column_name = self.predictor_metadata[predictor_name][
            'order_by_column']
        predictor_group_by_names = self.predictor_metadata[predictor_name][
            'group_by_columns']
        if predictor_group_by_names is None:
            predictor_group_by_names = []
        predictor_window = self.predictor_metadata[predictor_name]['window']

        if query.order_by:
            raise PlanningException(
                f'Can\'t provide ORDER BY to time series predictor, it will be taken from predictor settings. Found: {query.order_by}'
            )

        saved_limit = query.limit

        if query.group_by or query.having or query.offset:
            raise PlanningException(
                f'Unsupported query to timeseries predictor: {str(query)}')

        allowed_columns = [predictor_time_column_name.lower()]
        if len(predictor_group_by_names) > 0:
            allowed_columns += [i.lower() for i in predictor_group_by_names]
        validate_ts_where_condition(query.where,
                                    allowed_columns=allowed_columns)

        time_filter = find_time_filter(
            query.where, time_column_name=predictor_time_column_name)

        order_by = [
            OrderBy(Identifier(parts=[predictor_time_column_name]),
                    direction='DESC')
        ]

        preparation_where = copy.deepcopy(query.where)

        # add {order_by_field} is not null
        def add_order_not_null(condition):
            order_field_not_null = BinaryOperation(
                op='is not',
                args=[
                    Identifier(parts=[predictor_time_column_name]),
                    NullConstant()
                ])
            if condition is not None:
                condition = BinaryOperation(
                    op='and', args=[condition, order_field_not_null])
            else:
                condition = order_field_not_null
            return condition

        preparation_where2 = copy.deepcopy(preparation_where)
        preparation_where = add_order_not_null(preparation_where)

        # Obtain integration selects
        if isinstance(time_filter, BetweenOperation):
            between_from = time_filter.args[1]
            preparation_time_filter = BinaryOperation(
                '<',
                args=[Identifier(predictor_time_column_name), between_from])
            preparation_where2 = replace_time_filter(preparation_where2,
                                                     time_filter,
                                                     preparation_time_filter)
            integration_select_1 = Select(
                targets=[Star()],
                from_table=table,
                where=add_order_not_null(preparation_where2),
                order_by=order_by,
                limit=Constant(predictor_window))

            integration_select_2 = Select(targets=[Star()],
                                          from_table=table,
                                          where=preparation_where,
                                          order_by=order_by)

            integration_selects = [integration_select_1, integration_select_2]
        elif isinstance(
                time_filter, BinaryOperation
        ) and time_filter.op == '>' and time_filter.args[1] == Latest():
            integration_select = Select(
                targets=[Star()],
                from_table=table,
                where=preparation_where,
                order_by=order_by,
                limit=Constant(predictor_window),
            )
            integration_select.where = find_and_remove_time_filter(
                integration_select.where, time_filter)
            integration_selects = [integration_select]

        elif isinstance(time_filter,
                        BinaryOperation) and time_filter.op in ('>', '>='):
            time_filter_date = time_filter.args[1]
            preparation_time_filter_op = {'>': '<=', '>=': '<'}[time_filter.op]

            preparation_time_filter = BinaryOperation(
                preparation_time_filter_op,
                args=[
                    Identifier(predictor_time_column_name), time_filter_date
                ])
            preparation_where2 = replace_time_filter(preparation_where2,
                                                     time_filter,
                                                     preparation_time_filter)
            integration_select_1 = Select(
                targets=[Star()],
                from_table=table,
                where=add_order_not_null(preparation_where2),
                order_by=order_by,
                limit=Constant(predictor_window))

            integration_select_2 = Select(targets=[Star()],
                                          from_table=table,
                                          where=preparation_where,
                                          order_by=order_by)

            integration_selects = [integration_select_1, integration_select_2]
        else:
            integration_select = Select(
                targets=[Star()],
                from_table=table,
                where=preparation_where,
                order_by=order_by,
            )
            integration_selects = [integration_select]

        if len(predictor_group_by_names) == 0:
            # ts query without grouping
            # one or multistep
            if len(integration_selects) == 1:
                select_partition_step = self.get_integration_select_step(
                    integration_selects[0])
            else:
                select_partition_step = MultipleSteps(steps=[
                    self.get_integration_select_step(s)
                    for s in integration_selects
                ],
                                                      reduce='union')

            # fetch data step
            data_step = self.plan.add_step(select_partition_step)
        else:
            # inject $var to queries
            for integration_select in integration_selects:
                condition = integration_select.where
                for num, column in enumerate(predictor_group_by_names):
                    cond = BinaryOperation(
                        '=',
                        args=[Identifier(column),
                              Constant(f'$var[{column}]')])

                    # join to main condition
                    if condition is None:
                        condition = cond
                    else:
                        condition = BinaryOperation('and',
                                                    args=[condition, cond])

                integration_select.where = condition
            # one or multistep
            if len(integration_selects) == 1:
                select_partition_step = self.get_integration_select_step(
                    integration_selects[0])
            else:
                select_partition_step = MultipleSteps(steps=[
                    self.get_integration_select_step(s)
                    for s in integration_selects
                ],
                                                      reduce='union')

            # get groping values
            no_time_filter_query = copy.deepcopy(query)
            no_time_filter_query.where = find_and_remove_time_filter(
                no_time_filter_query.where, time_filter)
            select_partitions_step = self.plan_fetch_timeseries_partitions(
                no_time_filter_query, table, predictor_group_by_names)

            # sub-query by every grouping value
            map_reduce_step = self.plan.add_step(
                MapReduceStep(values=select_partitions_step.result,
                              reduce='union',
                              step=select_partition_step))
            data_step = map_reduce_step

        predictor_step = self.plan.add_step(
            ApplyTimeseriesPredictorStep(
                output_time_filter=time_filter,
                namespace=predictor_namespace,
                dataframe=data_step.result,
                predictor=predictor,
            ))

        return {
            'predictor': predictor_step,
            'data': data_step,
            'saved_limit': saved_limit,
        }
Exemple #23
0
    def execute_step(self, step, steps_data):
        if type(step) == GetPredictorColumns:
            predictor_name = step.predictor.parts[-1]
            dn = self.datahub.get(self.mindsdb_database_name)
            columns = dn.get_table_columns(predictor_name)
            columns = [(column_name, column_name) for column_name in columns]
            data = {
                'values': [],
                'columns': {
                    (self.mindsdb_database_name, predictor_name, predictor_name):
                    columns
                },
                'tables':
                [(self.mindsdb_database_name, predictor_name, predictor_name)]
            }
        elif type(step) == GetTableColumns:
            table = step.table
            dn = self.datahub.get(step.namespace)
            ds_query = Select(from_table=Identifier(table), targets=[Star()])
            dso, _ = dn.data_store.create_datasource(
                dn.integration_name, {'query': ds_query.to_string()})

            columns = dso.get_columns()
            cols = []
            for col in columns:
                if not isinstance(col, dict):
                    col = {'name': col, 'type': 'str'}
                cols.append(col)

            table_alias = (self.database, table, table)

            data = {
                'values': [],
                'columns': {
                    table_alias: cols
                },
                'tables': [table_alias]
            }
        elif type(step) == FetchDataframeStep:
            data = self._fetch_dataframe_step(step)
        elif type(step) == UnionStep:
            raise ErNotSupportedYet('Union step is not implemented')
            # TODO add union support
            # left_data = steps_data[step.left.step_num]
            # right_data = steps_data[step.right.step_num]
            # data = left_data + right_data
        elif type(step) == MapReduceStep:
            try:
                if step.reduce != 'union':
                    raise Exception(
                        f'Unknown MapReduceStep type: {step.reduce}')

                step_data = steps_data[step.values.step_num]
                vars = []
                step_data_values = step_data['values']
                for row in step_data_values:
                    var_group = {}
                    vars.append(var_group)
                    for row_data in row.values():
                        for name, value in row_data.items():
                            if name[0] != '__mindsdb_row_id':
                                var_group[name[1] or name[0]] = value

                data = {'values': [], 'columns': {}, 'tables': []}
                substep = step.step
                if type(substep) == FetchDataframeStep:
                    query = substep.query
                    for var_group in vars:
                        markQueryVar(query.where)
                        for name, value in var_group.items():
                            replaceQueryVar(query.where, value, name)
                        sub_data = self._fetch_dataframe_step(substep)
                        if len(data['columns']) == 0:
                            data['columns'] = sub_data['columns']
                        if len(data['tables']) == 0:
                            data['tables'] = sub_data['tables']
                        data['values'].extend(sub_data['values'])
                        unmarkQueryVar(query.where)
                elif type(substep) == MultipleSteps:
                    data = self._multiple_steps_reduce(substep, vars)
                else:
                    raise Exception(f'Unknown step type: {step.step}')
            except Exception as e:
                raise SqlApiException(f'error in map reduce step: {e}') from e
        elif type(step) == MultipleSteps:
            if step.reduce != 'union':
                raise Exception(
                    f"Only MultipleSteps with type = 'union' is supported. Got '{step.type}'"
                )
            data = None
            for substep in step.steps:
                subdata = self.execute_step(substep, steps_data)
                if data is None:
                    data = subdata
                else:
                    data['values'].extend(subdata['values'])
        elif type(step) == ApplyPredictorRowStep:
            try:
                predictor = '.'.join(step.predictor.parts)
                dn = self.datahub.get(self.mindsdb_database_name)
                where_data = step.row_dict

                data = dn.select(
                    table=predictor,
                    columns=None,
                    where_data=where_data,
                    integration_name=self.session.integration,
                    integration_type=self.session.integration_type)

                data = [{(key, key): value
                         for key, value in row.items()} for row in data]

                table_name = get_preditor_alias(step, self.database)
                values = [{table_name: x} for x in data]
                columns = {table_name: []}
                if len(data) > 0:
                    row = data[0]
                    columns[table_name] = list(row.keys())
                # TODO else

                data = {
                    'values': values,
                    'columns': columns,
                    'tables': [table_name]
                }
            except Exception as e:
                raise SqlApiException(
                    f'error in apply predictor row step: {e}') from e
        elif type(step) in (ApplyPredictorStep, ApplyTimeseriesPredictorStep):
            try:
                dn = self.datahub.get(self.mindsdb_database_name)
                predictor = '.'.join(step.predictor.parts)
                where_data = []
                for row in steps_data[step.dataframe.step_num]['values']:
                    new_row = {}
                    for table_name in row:
                        keys_intersection = set(new_row) & set(row[table_name])
                        if len(keys_intersection) > 0:
                            raise Exception(
                                f'The predictor got two identical keys from different datasources: {keys_intersection}'
                            )
                        new_row.update(row[table_name])
                    where_data.append(new_row)

                where_data = [{key[1]: value
                               for key, value in row.items()}
                              for row in where_data]

                is_timeseries = self.planner.predictor_metadata[predictor][
                    'timeseries']
                _mdb_make_predictions = None
                if is_timeseries:
                    if 'LATEST' in self.query_str:
                        _mdb_make_predictions = False
                    else:
                        _mdb_make_predictions = True
                    for row in where_data:
                        if '__mdb_make_predictions' not in row:
                            row['__mdb_make_predictions'] = _mdb_make_predictions

                for row in where_data:
                    for key in row:
                        if isinstance(row[key], datetime.date):
                            row[key] = str(row[key])

                data = dn.select(
                    table=predictor,
                    columns=None,
                    where_data=where_data,
                    integration_name=self.session.integration,
                    integration_type=self.session.integration_type)

                # if is_timeseries:
                #     if 'LATEST' not in self.raw:
                #         # remove additional records from predictor results:
                #         # first 'window_size' and last 'horizon' records
                #         # otherwise there are many unxpected rows in prediciton result:
                #         # ----------------------------------------------------------------------------------------
                #         # mysql> SELECT tb.time, tb.state, tb.pnew_case, tb.new_case from
                #         # MYSQL_LOCAL.test_data.covid AS
                #         # ta JOIN mindsdb.covid_hor3 AS tb
                #         # WHERE ta.state = "CA" AND ta.time BETWEEN "2020-10-19" AND "2020-10-20";
                #         # ----------------------------------------------------------------------------------------
                #         # +------------+-------+-----------+----------+
                #         # | time       | state | pnew_case | new_case |
                #         # +------------+-------+-----------+----------+
                #         # | 2020-10-09 | CA    | 0         | 2862     |
                #         # | 2020-10-10 | CA    | 0         | 2979     |
                #         # | 2020-10-11 | CA    | 0         | 3075     |
                #         # | 2020-10-12 | CA    | 0         | 3329     |
                #         # | 2020-10-13 | CA    | 0         | 2666     |
                #         # | 2020-10-14 | CA    | 0         | 2378     |
                #         # | 2020-10-15 | CA    | 0         | 3449     |
                #         # | 2020-10-16 | CA    | 0         | 3803     |
                #         # | 2020-10-17 | CA    | 0         | 4170     |
                #         # | 2020-10-18 | CA    | 0         | 3806     |
                #         # | 2020-10-19 | CA    | 0         | 3286     |
                #         # | 2020-10-20 | CA    | 0         | 3474     |
                #         # | 2020-10-21 | CA    | 0         | 3474     |
                #         # | 2020-10-22 | CA    | 0         | 3474     |
                #         # +------------+-------+-----------+----------+
                #         # 14 rows in set (2.52 sec)

                #         window_size = predictor_metadata[predictor]['window']
                #         horizon = predictor_metadata[predictor]['horizon']
                #         if len(data) >= (window_size + horizon):
                #             data = data[window_size:]
                #             if len(data) > horizon and horizon > 1:
                #                 data = data[:-horizon + 1]
                data = [{(key, key): value
                         for key, value in row.items()} for row in data]

                table_name = get_preditor_alias(step, self.database)
                values = [{table_name: x} for x in data]
                columns = {table_name: []}
                if len(data) > 0:
                    row = data[0]
                    columns[table_name] = list(row.keys())
                # TODO else

                data = {
                    'values': values,
                    'columns': columns,
                    'tables': [table_name]
                }
            except Exception as e:
                raise SqlApiException(
                    f'error in apply predictor step: {e}') from e
        elif type(step) == JoinStep:
            try:
                left_data = steps_data[step.left.step_num]
                right_data = steps_data[step.right.step_num]

                # FIXME https://github.com/mindsdb/mindsdb_sql/issues/136
                # is_timeseries = False
                # if True in [type(step) == ApplyTimeseriesPredictorStep for step in plan.steps]:
                #     right_data = steps_data[step.left.step_num]
                #     left_data = steps_data[step.right.step_num]
                #     is_timeseries = True

                if step.query.condition is not None:
                    raise Exception(
                        'At this moment supported only JOIN without condition')
                if step.query.join_type.upper() not in ('LEFT JOIN', 'JOIN'):
                    raise Exception(
                        'At this moment supported only JOIN and LEFT JOIN')
                if (len(left_data['tables']) != 1
                        or len(right_data['tables']) != 1
                        or left_data['tables'][0] == right_data['tables'][0]):
                    raise Exception(
                        'At this moment supported only JOIN of two different tables'
                    )

                data = {
                    'values': [],
                    'columns': {},
                    'tables':
                    list(set(left_data['tables'] + right_data['tables']))
                }

                for data_part in [left_data, right_data]:
                    for table_name in data_part['columns']:
                        if table_name not in data['columns']:
                            data['columns'][table_name] = data_part['columns'][
                                table_name]
                        else:
                            data['columns'][table_name].extend(
                                data_part['columns'][table_name])
                for table_name in data['columns']:
                    data['columns'][table_name] = list(
                        set(data['columns'][table_name]))

                left_key = left_data['tables'][0]
                right_key = right_data['tables'][0]

                left_columns_map = {}
                left_columns_map_reverse = {}
                for i, column_name in enumerate(
                        left_data['columns'][left_key]):
                    left_columns_map[f'a{i}'] = column_name
                    left_columns_map_reverse[column_name] = f'a{i}'

                right_columns_map = {}
                right_columns_map_reverse = {}
                for i, column_name in enumerate(
                        right_data['columns'][right_key]):
                    right_columns_map[f'b{i}'] = column_name
                    right_columns_map_reverse[column_name] = f'b{i}'

                left_df_data = []
                for row in left_data['values']:
                    row = row[left_key]
                    left_df_data.append({
                        left_columns_map_reverse[key]: value
                        for key, value in row.items()
                    })

                right_df_data = []
                for row in right_data['values']:
                    row = row[right_key]
                    right_df_data.append({
                        right_columns_map_reverse[key]: value
                        for key, value in row.items()
                    })

                df_a = pd.DataFrame(left_df_data)
                df_b = pd.DataFrame(right_df_data)

                a_name = f'a{round(time.time() * 1000)}'
                b_name = f'b{round(time.time() * 1000)}'
                con = duckdb.connect(database=':memory:')
                con.register(a_name, df_a)
                con.register(b_name, df_b)
                resp_df = con.execute(f"""
                    SELECT * FROM {a_name} as ta full join {b_name} as tb
                    ON ta.{left_columns_map_reverse[('__mindsdb_row_id', '__mindsdb_row_id')]}
                     = tb.{right_columns_map_reverse[('__mindsdb_row_id', '__mindsdb_row_id')]}
                """).fetchdf()
                con.unregister(a_name)
                con.unregister(b_name)
                con.close()
                resp_df = resp_df.where(pd.notnull(resp_df), None)
                resp_dict = resp_df.to_dict(orient='records')

                for row in resp_dict:
                    new_row = {left_key: {}, right_key: {}}
                    for key, value in row.items():
                        if key.startswith('a'):
                            new_row[left_key][left_columns_map[key]] = value
                        else:
                            new_row[right_key][right_columns_map[key]] = value
                    data['values'].append(new_row)

                # remove all records with empty data from predictor from join result
                # otherwise there are emtpy records in the final result:
                # +------------+------------+-------+-----------+----------+
                # | time       | time       | state | pnew_case | new_case |
                # +------------+------------+-------+-----------+----------+
                # | 2020-10-21 | 2020-10-24 | CA    | 0.0       | 5945.0   |
                # | 2020-10-22 | 2020-10-23 | CA    | 0.0       | 6141.0   |
                # | 2020-10-23 | 2020-10-22 | CA    | 0.0       | 2940.0   |
                # | 2020-10-24 | 2020-10-21 | CA    | 0.0       | 3707.0   |
                # | NULL       | 2020-10-20 | NULL  | nan       | nan      |
                # | NULL       | 2020-10-19 | NULL  | nan       | nan      |
                # | NULL       | 2020-10-18 | NULL  | nan       | nan      |
                # | NULL       | 2020-10-17 | NULL  | nan       | nan      |
                # | NULL       | 2020-10-16 | NULL  | nan       | nan      |
                # +------------+------------+-------+-----------+----------+
                # 9 rows in set (2.07 sec)

                # if is_timeseries:
                #     data_values = []
                #     for row in data['values']:
                #         for key in row:
                #             if 'mindsdb' in key:
                #                 if not is_empty_prediction_row(row[key]):
                #                     data_values.append(row)
                #                     break
                #     data['values'] = data_values
            except Exception as e:
                raise SqlApiException(f'error in join step: {e}') from e

        elif type(step) == FilterStep:
            raise ErNotSupportedYet('FilterStep is not implemented')
        # elif type(step) == ApplyTimeseriesPredictorStep:
        #     raise Exception('ApplyTimeseriesPredictorStep is not implemented')
        elif type(step) == LimitOffsetStep:
            try:
                step_data = steps_data[step.dataframe.step_num]
                data = {
                    'values': step_data['values'].copy(),
                    'columns': step_data['columns'].copy(),
                    'tables': step_data['tables'].copy()
                }
                if isinstance(step.offset, Constant) and isinstance(
                        step.offset.value, int):
                    data['values'] = data['values'][step.offset.value:]
                if isinstance(step.limit, Constant) and isinstance(
                        step.limit.value, int):
                    data['values'] = data['values'][:step.limit.value]
            except Exception as e:
                raise SqlApiException(
                    f'error in limit offset step: {e}') from e
        elif type(step) == ProjectStep:
            try:
                step_data = steps_data[step.dataframe.step_num]
                columns_list = []
                for column_identifier in step.columns:
                    table_name = None
                    if type(column_identifier) == Star:
                        for table_name, table_columns_list in step_data[
                                'columns'].items():
                            for column in table_columns_list:
                                columns_list.append(
                                    Column(database=table_name[0],
                                           table_name=table_name[1],
                                           table_alias=table_name[2],
                                           name=column[0],
                                           alias=column[1]))
                    elif type(column_identifier) == Identifier:
                        column_name_parts = column_identifier.parts
                        column_alias = None if column_identifier.alias is None else '.'.join(
                            column_identifier.alias.parts)
                        if len(column_name_parts) > 2:
                            raise Exception(
                                f'Column name must contain no more than 2 parts. Got name: {column_identifier}'
                            )
                        elif len(column_name_parts) == 1:
                            column_name = column_name_parts[0]

                            appropriate_table = None
                            if len(step_data['tables']) == 1:
                                appropriate_table = step_data['tables'][0]
                            else:
                                for table_name, table_columns in step_data[
                                        'columns'].items():
                                    table_column_names_list = [
                                        x[1] or x[0] for x in table_columns
                                    ]
                                    column_exists = get_column_in_case(
                                        table_column_names_list, column_name)
                                    if column_exists:
                                        if appropriate_table is not None:
                                            raise Exception(
                                                'Found multiple appropriate tables for column {column_name}'
                                            )
                                        else:
                                            appropriate_table = table_name
                            if appropriate_table is None:
                                # it is probably constaint
                                # FIXME https://github.com/mindsdb/mindsdb_sql/issues/133
                                # column_name = column_name.strip("'")
                                # name_or_alias = column_alias or column_name
                                # column_alias = name_or_alias
                                # for row in step_data['values']:
                                #     for table in row:
                                #         row[table][(column_name, name_or_alias)] = row[table][(column_name, column_name)]
                                # appropriate_table = step_data['tables'][0]
                                # FIXME: must be exception
                                columns_list.append(
                                    Column(database=appropriate_table[0],
                                           table_name=appropriate_table[1],
                                           table_alias=appropriate_table[2],
                                           name=column_alias))
                            else:
                                columns_list.append(
                                    Column(database=appropriate_table[0],
                                           table_name=appropriate_table[1],
                                           table_alias=appropriate_table[2],
                                           name=column_name,
                                           alias=column_alias))  # column_name
                        elif len(column_name_parts) == 2:
                            table_name_or_alias = column_name_parts[0]
                            column_name = column_name_parts[1]

                            appropriate_table = None
                            for table_name, table_columns in step_data[
                                    'columns'].items():
                                table_column_names_list = [
                                    x[1] or x[0] for x in table_columns
                                ]
                                checkig_table_name_or_alias = table_name[
                                    2] or table_name[1]
                                if table_name_or_alias.lower(
                                ) == checkig_table_name_or_alias.lower():
                                    column_exists = get_column_in_case(
                                        table_column_names_list, column_name)
                                    if column_exists:
                                        appropriate_table = table_name
                                        break
                                    else:
                                        raise Exception(
                                            f'Can not find column "{column_name}" in table "{table_name}"'
                                        )
                            if appropriate_table is None:
                                raise Exception(
                                    f'Can not find approproate table for column {column_name}'
                                )

                            columns_to_copy = None
                            table_column_names_list = [
                                x[1] or x[0] for x in table_columns
                            ]
                            checking_name = get_column_in_case(
                                table_column_names_list, column_name)
                            for column in step_data['columns'][
                                    appropriate_table]:
                                if column[0] == checking_name and (
                                        column[1] is None
                                        or column[1] == checking_name):
                                    columns_to_copy = column
                                    break
                            else:
                                raise Exception(
                                    f'Can not find approproate column in data: {(column_name, column_alias)}'
                                )

                            for row in step_data['values']:
                                row[appropriate_table][(
                                    column_name, column_alias
                                )] = row[appropriate_table][columns_to_copy]

                            columns_list.append(
                                Column(database=appropriate_table[0],
                                       table_name=appropriate_table[1],
                                       table_alias=appropriate_table[2],
                                       name=column_name,
                                       alias=column_alias))
                        else:
                            raise Exception('Undefined column name')
                    else:
                        raise Exception(
                            f'Unexpected column name type: {column_identifier}'
                        )

                self.columns_list = columns_list
                data = step_data
            except Exception as e:
                raise SqlApiException(f'error on project step:{e} ') from e
        else:
            raise SqlApiException(F'Unknown planner step: {step}')
        return data