Exemple #1
0
    def execute_steps(self, params=None):
        # find all parameters
        stmt = self.planner.statement

        # is already executed
        if stmt is None:
            if params is not None:
                raise PlanningException("Can't execute statement")
            stmt = Statement()

        # === form query with new target ===

        query = self.planner.query

        if params is not None:

            if len(params) != len(stmt.params):
                raise PlanningException("Count of execution parameters don't match prepared statement")

            query = utils.fill_query_params(query, params)

            self.planner.query = query

        # prevent from second execution
        stmt.params = None

        if (
                isinstance(query, ast.Select)
                or isinstance(query, ast.Union)
                or isinstance(query, ast.CreateTable)
                or isinstance(query, ast.Insert)
        ):
            return self.plan_query(query)
        else:
            return []
Exemple #2
0
def recursively_disambiguate_identifiers_in_select(select, integration_name,
                                                   table):
    select.targets = disambiguate_select_targets(select.targets,
                                                 integration_name, table)

    if select.from_table:
        if isinstance(select.from_table, Identifier):
            select.from_table = table
    if select.where:
        if not isinstance(select.where, BinaryOperation) and not isinstance(
                select.where, BetweenOperation):
            raise PlanningException(
                f'Unsupported where clause {type(select.where)}, only BinaryOperation is supported now.'
            )

        where = copy.deepcopy(select.where)
        recursively_disambiguate_identifiers_in_op(where, integration_name,
                                                   table)
        select.where = where

    if select.group_by:
        group_by = copy.deepcopy(select.group_by)
        group_by2 = []
        for field in group_by:
            if isinstance(field, Identifier):
                field = disambiguate_integration_column_identifier(
                    field, integration_name, table)
            group_by2.append(field)
        select.group_by = group_by2

    if select.having:
        if not isinstance(select.having, BinaryOperation):
            raise PlanningException(
                f'Unsupported having clause {type(select.having)}, only BinaryOperation is supported now.'
            )

        having = copy.deepcopy(select.having)
        recursively_disambiguate_identifiers_in_op(having, integration_name,
                                                   table)
        select.having = having

    if select.order_by:
        order_by = []
        for order_by_item in select.order_by:
            new_order_item = copy.deepcopy(order_by_item)
            new_order_item.field = disambiguate_integration_column_identifier(
                new_order_item.field, integration_name, table)
            order_by.append(new_order_item)
        select.order_by = order_by
Exemple #3
0
def get_integration_path_from_identifier(identifier):
    parts = identifier.parts
    integration_name = parts[0]
    new_parts = parts[1:]

    if len(parts) == 1:
        raise PlanningException(
            f'No integration specified for table: {str(identifier)}')
    elif len(parts) > 4:
        raise PlanningException(
            f'Too many parts (dots) in table identifier: {str(identifier)}')

    new_identifier = copy.deepcopy(identifier)
    new_identifier.parts = new_parts

    return integration_name, new_identifier
Exemple #4
0
    def get_statement_info(self):
        stmt = self.planner.statement

        if stmt is None:
            raise PlanningException('Statement is not prepared')

        columns_result = []

        for column in stmt.columns:
            table, ds = None, None
            if column.table is not None:
                table = column.table.name
                ds = column.table.ds
            columns_result.append(dict(
                alias=column.alias,
                type=column.type,
                name=column.name,
                table_name=table,
                table_alias=table,
                ds=ds,
            ))

        parameters = []
        for param in stmt.params:
            name = '?'
            parameters.append(dict(
                alias=name,
                type='str',
                name=name,
            ))

        return {
            'parameters': parameters,
            'columns': columns_result
        }
Exemple #5
0
def disambiguate_integration_column_identifier(identifier,
                                               integration_name,
                                               table,
                                               initial_name_as_alias=False):
    """Removes integration name from column if it's present, adds table path if it's absent"""
    column_table_ref = [table.alias.to_string(
        alias=False)] if table.alias else table.parts
    parts = list(identifier.parts)

    if len(parts) > 1:
        if parts[0] == integration_name:
            parts = parts[1:]

    if len(parts) > 1:
        if (len(parts) <= len(column_table_ref)
                or parts[:len(column_table_ref)] != column_table_ref):
            raise PlanningException(
                f'Tried to query column {identifier.to_tree()} from integration {integration_name} table {column_table_ref}, but a different table name has been specified.'
            )
    elif len(parts) == 1:
        # if parts[0] != column_table_ref:
        parts = column_table_ref + parts

    new_identifier = Identifier(parts=parts)
    if identifier.alias:
        new_identifier.alias = identifier.alias
    elif initial_name_as_alias:
        new_identifier.alias = Identifier(parts[-1])

    return new_identifier
Exemple #6
0
    def plan_select_from_predictor(self, select):
        predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier(
            select.from_table, self.default_namespace)

        if select.where == BinaryOperation('=',
                                           args=[Constant(1),
                                                 Constant(0)]):
            # Hardcoded mysql way of getting predictor columns
            predictor_step = self.plan.add_step(
                GetPredictorColumns(namespace=predictor_namespace,
                                    predictor=predictor))
        else:
            new_query_targets = []
            for target in select.targets:
                if isinstance(target, Identifier):
                    new_query_targets.append(
                        disambiguate_predictor_column_identifier(
                            target, predictor))
                elif type(target) in (Star, Constant):
                    new_query_targets.append(target)
                else:
                    raise PlanningException(
                        f'Unknown select target {type(target)}')

            if select.group_by or select.having:
                raise PlanningException(
                    f'Unsupported operation when querying predictor. Only WHERE is allowed and required.'
                )

            row_dict = {}
            where_clause = select.where
            if not where_clause:
                raise PlanningException(
                    f'WHERE clause required when selecting from predictor')

            recursively_extract_column_values(where_clause, row_dict,
                                              predictor)

            predictor_step = self.plan.add_step(
                ApplyPredictorRowStep(namespace=predictor_namespace,
                                      predictor=predictor,
                                      row_dict=row_dict))
        project_step = self.plan_project(select, predictor_step.result)
        return predictor_step, project_step
Exemple #7
0
def recursively_disambiguate_identifiers(obj, integration_name, table):
    if isinstance(obj, Operation):
        recursively_disambiguate_identifiers_in_op(obj, integration_name,
                                                   table)
    elif isinstance(obj, Select):
        recursively_disambiguate_identifiers_in_select(obj, integration_name,
                                                       table)
    else:
        raise PlanningException(
            f'Unsupported object for disambiguation {type(obj)}')
Exemple #8
0
def validate_ts_where_condition(op, allowed_columns, allow_and=True):
    """Error if the where condition caontains invalid ops, is nested or filters on some column that's not time or partition"""
    if not op:
        return
    allowed_ops = ['and', '>', '>=', '=', '<', '<=', 'between', 'in']
    if not allow_and:
        allowed_ops.remove('and')
    if op.op not in allowed_ops:
        raise PlanningException(
            f'For time series predictors only the following operations are allowed in WHERE: {str(allowed_ops)}, found instead: {str(op)}.')

    for arg in op.args:
        if isinstance(arg, Identifier):
            if arg.parts[-1].lower() not in allowed_columns:
                raise PlanningException(
                    f'For time series predictor only the following columns are allowed in WHERE: {str(allowed_columns)}, found instead: {str(arg)}.')

    if isinstance(op.args[0], Operation):
        validate_ts_where_condition(op.args[0], allowed_columns, allow_and=True)
    if isinstance(op.args[1], Operation):
        validate_ts_where_condition(op.args[1], allowed_columns, allow_and=True)
Exemple #9
0
def get_predictor_namespace_and_name_from_identifier(identifier,
                                                     default_namespace):
    parts = identifier.parts
    namespace = parts[0]
    new_parts = parts[1:]
    if len(parts) == 1:
        if default_namespace:
            namespace = default_namespace
            new_parts = [parts[0]]
        else:
            raise PlanningException(
                f'No predictor name specified for predictor: {str(identifier)}'
            )
    elif len(parts) > 4:
        raise PlanningException(
            f'Too many parts (dots) in predictor identifier: {str(identifier)}'
        )

    new_identifier = copy.deepcopy(identifier)
    new_identifier.parts = new_parts
    return namespace, new_identifier
Exemple #10
0
def find_time_filter(op, time_column_name):
    if not op:
        return
    if op.op == 'and':
        left = find_time_filter(op.args[0], time_column_name)
        right = find_time_filter(op.args[1], time_column_name)
        if left and right:
            raise PlanningException('Can provide only one filter by predictor order_by column, found two')

        return left or right
    elif ((isinstance(op.args[0], Identifier) and op.args[0].parts[-1].lower() == time_column_name.lower()) or
          (isinstance(op.args[1], Identifier) and op.args[1].parts[-1].lower() == time_column_name.lower())):
        return op
Exemple #11
0
def recursively_extract_column_values(op, row_dict, predictor):
    if isinstance(op, BinaryOperation) and op.op == '=':
        id = op.args[0]
        value = op.args[1]

        if not (isinstance(id, Identifier) and isinstance(value, Constant)):
            raise PlanningException(
                f'The WHERE clause for selecting from a predictor'
                f' must contain pairs \'Identifier(...) = Constant(...)\','
                f' found instead: {id.to_tree()}, {value.to_tree()}')

        id = disambiguate_predictor_column_identifier(id, predictor)

        if str(id) in row_dict:
            raise PlanningException(f'Multiple values provided for {str(id)}')
        row_dict[str(id)] = value.value
    elif isinstance(op, BinaryOperation) and op.op == 'and':
        recursively_extract_column_values(op.args[0], row_dict, predictor)
        recursively_extract_column_values(op.args[1], row_dict, predictor)
    else:
        raise PlanningException(
            f'Only \'and\' and \'=\' operations allowed in WHERE clause, found: {op.to_tree()}'
        )
Exemple #12
0
    def plan_select(self, query, integration=None):
        from_table = query.from_table

        if isinstance(from_table, Identifier):
            if self.is_predictor(from_table):
                return self.plan_select_from_predictor(query)
            else:
                return self.plan_integration_select(query)
        elif isinstance(from_table, Select):
            return self.plan_nested_select(query)
        elif isinstance(from_table, Join):
            return self.plan_join(query, integration=integration)
        else:
            raise PlanningException(
                f'Unsupported from_table {type(from_table)}')
Exemple #13
0
    def plan_insert(self, query):
        if query.from_select is None:
            raise PlanningException(f'Support only insert from select')

        integration_name = query.table.parts[0]

        # plan sub-select first
        last_step = self.plan_select(query.from_select,
                                     integration=integration_name)

        table = query.table
        self.plan.add_step(InsertToTable(
            table=table,
            dataframe=last_step,
        ))
Exemple #14
0
    def from_query(self, query=None):
        if query is None:
            query = self.query

        if isinstance(query, Select):
            self.plan_select(query)
        elif isinstance(query, Union):
            self.plan_union(query)
        elif isinstance(query, CreateTable):
            self.plan_create_table(query)
        elif isinstance(query, Insert):
            self.plan_insert(query)
        else:
            raise PlanningException(f'Unsupported query type {type(query)}')

        return self.plan
Exemple #15
0
    def plan_create_table(self, query):
        if query.from_select is None:
            raise PlanningException(
                f'Not implemented "create table": {query.to_string()}')

        integration_name = query.name.parts[0]

        last_step = self.plan_select(query.from_select,
                                     integration=integration_name)

        # create table step
        self.plan.add_step(
            SaveToTable(
                table=query.name,
                dataframe=last_step,
                is_replace=query.is_replace,
            ))
Exemple #16
0
    def plan_join_two_tables(self, join):
        select_left_step = self.plan_integration_select(
            Select(targets=[Star()], from_table=join.left))
        select_right_step = self.plan_integration_select(
            Select(targets=[Star()], from_table=join.right))

        left_integration_name, left_table = self.get_integration_path_from_identifier_or_error(
            join.left)
        right_integration_name, right_table = self.get_integration_path_from_identifier_or_error(
            join.right)

        left_table_path = left_table.to_string(alias=False)
        right_table_path = right_table.to_string(alias=False)

        new_condition_args = []
        for arg in join.condition.args:
            if isinstance(arg, Identifier):
                if left_table_path in arg.parts:
                    new_condition_args.append(
                        disambiguate_integration_column_identifier(
                            arg, left_integration_name, left_table))
                elif right_table_path in arg.parts:
                    new_condition_args.append(
                        disambiguate_integration_column_identifier(
                            arg, right_integration_name, right_table))
                else:
                    raise PlanningException(
                        f'Wrong table or no source table in join condition for column: {str(arg)}'
                    )
            else:
                new_condition_args.append(arg)
        new_join = copy.deepcopy(join)
        new_join.condition.args = new_condition_args
        new_join.left = Identifier(left_table_path, alias=left_table.alias)
        new_join.right = Identifier(right_table_path, alias=right_table.alias)

        # FIXME: INFORMATION_SCHEMA with condition
        # clear join condition for INFORMATION_SCHEMA
        if right_integration_name == 'INFORMATION_SCHEMA':
            new_join.condition = None

        return self.plan.add_step(
            JoinStep(left=select_left_step.result,
                     right=select_right_step.result,
                     query=new_join))
Exemple #17
0
def disambiguate_select_targets(targets, integration_name, table):
    new_query_targets = []
    for target in targets:
        if isinstance(target, Identifier):
            new_query_targets.append(
                disambiguate_integration_column_identifier(
                    target,
                    integration_name,
                    table,
                    initial_name_as_alias=True))
        elif type(target) in (Star, Constant, NullConstant):
            new_query_targets.append(target)
        elif isinstance(target, Operation) or isinstance(target, Select):
            new_op = copy.deepcopy(target)
            recursively_disambiguate_identifiers(new_op, integration_name,
                                                 table)
            new_query_targets.append(new_op)
        else:
            raise PlanningException(f'Unknown select target {type(target)}')
    return new_query_targets
Exemple #18
0
 def get_integration_path_from_identifier_or_error(self,
                                                   identifier,
                                                   recurse=True):
     try:
         integration_name, table = get_integration_path_from_identifier(
             identifier)
         if not integration_name.lower() in self.integrations:
             raise PlanningException(
                 f'Unknown integration {integration_name} for table {str(identifier)}. Available integrations: {", ".join(self.integrations)}'
             )
     except PlanningException:
         if not recurse or not self.default_namespace:
             raise
         else:
             new_identifier = copy.deepcopy(identifier)
             new_identifier.parts = [
                 self.default_namespace, *identifier.parts
             ]
             return self.get_integration_path_from_identifier_or_error(
                 new_identifier, recurse=False)
     return integration_name, table
Exemple #19
0
def recursively_check_join_identifiers_for_ambiguity(item,
                                                     aliased_fields=None):
    if item is None:
        return
    elif isinstance(item, Identifier):
        if len(item.parts) == 1:
            if aliased_fields is not None and item.parts[0] in aliased_fields:
                # is alias
                return
            raise PlanningException(
                f'Ambigous identifier {str(item)}, provide table name for operations on a join.'
            )
    elif isinstance(item, Operation):
        recursively_check_join_identifiers_for_ambiguity(
            item.args, aliased_fields=aliased_fields)
    elif isinstance(item, OrderBy):
        recursively_check_join_identifiers_for_ambiguity(
            item.field, aliased_fields=aliased_fields)
    elif isinstance(item, list):
        for arg in item:
            recursively_check_join_identifiers_for_ambiguity(
                arg, aliased_fields=aliased_fields)
Exemple #20
0
    def plan_join(self, query, integration=None):
        join = query.from_table
        join_left = join.left
        join_right = join.right

        if isinstance(join_left, Select):
            # dbt query.
            # TODO support complex query. Only one table is supported at the moment.
            if not isinstance(join_left.from_table, Identifier):
                raise PlanningException(
                    f'Statement not supported: {query.to_string()}')

            # move properties to upper query
            query = join_left

            if query.from_table.alias is not None:
                table_alias = [query.from_table.alias.parts[0]]
            else:
                table_alias = query.from_table.parts

            def add_aliases(node, is_table, **kwargs):
                if not is_table and isinstance(node, Identifier):
                    if len(node.parts) == 1:
                        # add table alias to field
                        node.parts = table_alias + node.parts

            query_traversal(query.where, add_aliases)

            if isinstance(query.from_table, Identifier):
                # DBT workaround: allow use tables without integration.
                #   if table.part[0] not in integration - take integration name from create table command
                if (integration is not None and query.from_table.parts[0]
                        not in self.integrations):
                    # add integration name to table
                    query.from_table.parts.insert(0, integration)

            join_left = join_left.from_table

        aliased_fields = self.get_aliased_fields(query.targets)

        recursively_check_join_identifiers_for_ambiguity(query.where)
        recursively_check_join_identifiers_for_ambiguity(
            query.group_by, aliased_fields=aliased_fields)
        recursively_check_join_identifiers_for_ambiguity(query.having)
        recursively_check_join_identifiers_for_ambiguity(
            query.order_by, aliased_fields=aliased_fields)

        if isinstance(join_left, Identifier) and isinstance(
                join_right, Identifier):
            if self.is_predictor(join_left) and self.is_predictor(join_right):
                raise PlanningException(
                    f'Can\'t join two predictors {str(join_left.parts[0])} and {str(join_left.parts[1])}'
                )

            predictor_namespace = None
            predictor = None
            table = None
            predictor_is_left = False
            if self.is_predictor(join_left):
                predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier(
                    join_left, self.default_namespace)
                predictor_is_left = True
            else:
                table = join_left

            if self.is_predictor(join_right):
                predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier(
                    join_right, self.default_namespace)
            else:
                table = join_right

            last_step = None
            if predictor:
                # One argument is a table, another is a predictor
                # Apply mindsdb model to result of last dataframe fetch
                # Then join results of applying mindsdb with table

                predictor_name = self.predictor_names[predictor.to_string(
                    alias=False).lower()]
                if self.predictor_metadata[predictor_name].get('timeseries'):
                    predictor_steps = self.plan_timeseries_predictor(
                        query, table, predictor_namespace, predictor)
                else:
                    predictor_steps = self.plan_predictor(
                        query, table, predictor_namespace, predictor)

                # add join
                # Update reference
                _, table = self.get_integration_path_from_identifier_or_error(
                    table)
                table_alias = table.alias or Identifier(
                    table.to_string(alias=False).replace('.', '_'))

                left = Identifier(
                    predictor_steps['predictor'].result.ref_name,
                    alias=predictor.alias
                    or Identifier(predictor.to_string(alias=False)))
                right = Identifier(predictor_steps['data'].result.ref_name,
                                   alias=table_alias)

                if not predictor_is_left:
                    # swap join
                    left, right = right, left
                new_join = Join(left=left,
                                right=right,
                                join_type=join.join_type)

                left = predictor_steps['predictor'].result
                right = predictor_steps['data'].result
                if not predictor_is_left:
                    # swap join
                    left, right = right, left

                last_step = self.plan.add_step(
                    JoinStep(left=left, right=right, query=new_join))

                # limit from timeseries
                if predictor_steps.get('saved_limit'):
                    last_step = self.plan.add_step(
                        LimitOffsetStep(dataframe=last_step.result,
                                        limit=predictor_steps['saved_limit']))

            else:
                # Both arguments are tables, join results of 2 dataframe fetches

                join_step = self.plan_join_two_tables(join)
                last_step = join_step
                if query.where:
                    # FIXME: INFORMATION_SCHEMA with Where
                    right_integration_name, _ = self.get_integration_path_from_identifier_or_error(
                        join.right)
                    if right_integration_name == 'INFORMATION_SCHEMA':
                        ...
                    else:
                        last_step = self.plan.add_step(
                            FilterStep(dataframe=last_step.result,
                                       query=query.where))

                if query.group_by:
                    group_by_targets = []
                    for t in query.targets:
                        target_copy = copy.deepcopy(t)
                        target_copy.alias = None
                        group_by_targets.append(target_copy)
                    last_step = self.plan.add_step(
                        GroupByStep(dataframe=last_step.result,
                                    columns=query.group_by,
                                    targets=group_by_targets))

                if query.having:
                    last_step = self.plan.add_step(
                        FilterStep(dataframe=last_step.result,
                                   query=query.having))

                if query.order_by:
                    last_step = self.plan.add_step(
                        OrderByStep(dataframe=last_step.result,
                                    order_by=query.order_by))

                if query.limit is not None or query.offset is not None:
                    limit = query.limit.value if query.limit is not None else None
                    offset = query.offset.value if query.offset is not None else None
                    last_step = self.plan.add_step(
                        LimitOffsetStep(dataframe=last_step.result,
                                        limit=limit,
                                        offset=offset))

        else:
            raise PlanningException(
                f'Join of unsupported objects, currently only tables and predictors can be joined.'
            )
        return self.plan_project(query, last_step.result)
Exemple #21
0
    def plan_timeseries_predictor(self, query, table, predictor_namespace,
                                  predictor):
        predictor_name = predictor.to_string(alias=False).lower()
        # to original case
        predictor_name = self.predictor_names[predictor_name]

        predictor_time_column_name = self.predictor_metadata[predictor_name][
            'order_by_column']
        predictor_group_by_names = self.predictor_metadata[predictor_name][
            'group_by_columns']
        if predictor_group_by_names is None:
            predictor_group_by_names = []
        predictor_window = self.predictor_metadata[predictor_name]['window']

        if query.order_by:
            raise PlanningException(
                f'Can\'t provide ORDER BY to time series predictor, it will be taken from predictor settings. Found: {query.order_by}'
            )

        saved_limit = query.limit

        if query.group_by or query.having or query.offset:
            raise PlanningException(
                f'Unsupported query to timeseries predictor: {str(query)}')

        allowed_columns = [predictor_time_column_name.lower()]
        if len(predictor_group_by_names) > 0:
            allowed_columns += [i.lower() for i in predictor_group_by_names]
        validate_ts_where_condition(query.where,
                                    allowed_columns=allowed_columns)

        time_filter = find_time_filter(
            query.where, time_column_name=predictor_time_column_name)

        order_by = [
            OrderBy(Identifier(parts=[predictor_time_column_name]),
                    direction='DESC')
        ]

        preparation_where = copy.deepcopy(query.where)

        # add {order_by_field} is not null
        def add_order_not_null(condition):
            order_field_not_null = BinaryOperation(
                op='is not',
                args=[
                    Identifier(parts=[predictor_time_column_name]),
                    NullConstant()
                ])
            if condition is not None:
                condition = BinaryOperation(
                    op='and', args=[condition, order_field_not_null])
            else:
                condition = order_field_not_null
            return condition

        preparation_where2 = copy.deepcopy(preparation_where)
        preparation_where = add_order_not_null(preparation_where)

        # Obtain integration selects
        if isinstance(time_filter, BetweenOperation):
            between_from = time_filter.args[1]
            preparation_time_filter = BinaryOperation(
                '<',
                args=[Identifier(predictor_time_column_name), between_from])
            preparation_where2 = replace_time_filter(preparation_where2,
                                                     time_filter,
                                                     preparation_time_filter)
            integration_select_1 = Select(
                targets=[Star()],
                from_table=table,
                where=add_order_not_null(preparation_where2),
                order_by=order_by,
                limit=Constant(predictor_window))

            integration_select_2 = Select(targets=[Star()],
                                          from_table=table,
                                          where=preparation_where,
                                          order_by=order_by)

            integration_selects = [integration_select_1, integration_select_2]
        elif isinstance(
                time_filter, BinaryOperation
        ) and time_filter.op == '>' and time_filter.args[1] == Latest():
            integration_select = Select(
                targets=[Star()],
                from_table=table,
                where=preparation_where,
                order_by=order_by,
                limit=Constant(predictor_window),
            )
            integration_select.where = find_and_remove_time_filter(
                integration_select.where, time_filter)
            integration_selects = [integration_select]

        elif isinstance(time_filter,
                        BinaryOperation) and time_filter.op in ('>', '>='):
            time_filter_date = time_filter.args[1]
            preparation_time_filter_op = {'>': '<=', '>=': '<'}[time_filter.op]

            preparation_time_filter = BinaryOperation(
                preparation_time_filter_op,
                args=[
                    Identifier(predictor_time_column_name), time_filter_date
                ])
            preparation_where2 = replace_time_filter(preparation_where2,
                                                     time_filter,
                                                     preparation_time_filter)
            integration_select_1 = Select(
                targets=[Star()],
                from_table=table,
                where=add_order_not_null(preparation_where2),
                order_by=order_by,
                limit=Constant(predictor_window))

            integration_select_2 = Select(targets=[Star()],
                                          from_table=table,
                                          where=preparation_where,
                                          order_by=order_by)

            integration_selects = [integration_select_1, integration_select_2]
        else:
            integration_select = Select(
                targets=[Star()],
                from_table=table,
                where=preparation_where,
                order_by=order_by,
            )
            integration_selects = [integration_select]

        if len(predictor_group_by_names) == 0:
            # ts query without grouping
            # one or multistep
            if len(integration_selects) == 1:
                select_partition_step = self.get_integration_select_step(
                    integration_selects[0])
            else:
                select_partition_step = MultipleSteps(steps=[
                    self.get_integration_select_step(s)
                    for s in integration_selects
                ],
                                                      reduce='union')

            # fetch data step
            data_step = self.plan.add_step(select_partition_step)
        else:
            # inject $var to queries
            for integration_select in integration_selects:
                condition = integration_select.where
                for num, column in enumerate(predictor_group_by_names):
                    cond = BinaryOperation(
                        '=',
                        args=[Identifier(column),
                              Constant(f'$var[{column}]')])

                    # join to main condition
                    if condition is None:
                        condition = cond
                    else:
                        condition = BinaryOperation('and',
                                                    args=[condition, cond])

                integration_select.where = condition
            # one or multistep
            if len(integration_selects) == 1:
                select_partition_step = self.get_integration_select_step(
                    integration_selects[0])
            else:
                select_partition_step = MultipleSteps(steps=[
                    self.get_integration_select_step(s)
                    for s in integration_selects
                ],
                                                      reduce='union')

            # get groping values
            no_time_filter_query = copy.deepcopy(query)
            no_time_filter_query.where = find_and_remove_time_filter(
                no_time_filter_query.where, time_filter)
            select_partitions_step = self.plan_fetch_timeseries_partitions(
                no_time_filter_query, table, predictor_group_by_names)

            # sub-query by every grouping value
            map_reduce_step = self.plan.add_step(
                MapReduceStep(values=select_partitions_step.result,
                              reduce='union',
                              step=select_partition_step))
            data_step = map_reduce_step

        predictor_step = self.plan.add_step(
            ApplyTimeseriesPredictorStep(
                output_time_filter=time_filter,
                namespace=predictor_namespace,
                dataframe=data_step.result,
                predictor=predictor,
            ))

        return {
            'predictor': predictor_step,
            'data': data_step,
            'saved_limit': saved_limit,
        }
Exemple #22
0
 def result(self):
     if self.step_num is None:
         raise PlanningException(
             f'Can\'t reference a step with no assigned step number. Tried to reference: {type(self)}'
         )
     return Result(self.step_num)
Exemple #23
0
    def prepare_select(self, query):
        # prepare select with or without predictor

        stmt = self.planner.statement

        # get all predictors
        query_predictors = []

        def find_predictors(node, is_table, **kwargs):
            if is_table and isinstance(node, ast.Identifier):
                if self.planner.is_predictor(node):
                    query_predictors.append(node)

        utils.query_traversal(query, find_predictors)

        # only 1 predictor is allowed
        if len(query_predictors) > 1:
            raise PlanningException(f'To many predictors in query: {len(query_predictors)}')

        # === get all tables from 1st level of query ===
        stmt.tables_map = {}
        stmt.tables_lvl1 = []
        if query.from_table is not None:

            if isinstance(query.from_table, ast.Join):
                # get all tables
                join_tables = utils.convert_join_to_list(query.from_table)
            else:
                join_tables = [dict(table=query.from_table)]

            if isinstance(query.from_table, ast.Select):
                # nested select, get only last select
                join_tables = [
                    dict(
                        table=utils.get_deepest_select(query.from_table).from_table
                    )
                ]

            for i, join_table in enumerate(join_tables):
                table = join_table['table']
                if isinstance(table, ast.Identifier):
                    tbl = self.table_from_identifier(table)

                    if tbl.is_predictor:
                        # Is the last table?
                        if i + 1 < len(join_tables):
                            raise PlanningException(f'Predictor must be last table in query')

                    stmt.tables_lvl1.append(tbl)
                    for key in tbl.keys:
                        stmt.tables_map[key] = tbl

                else:
                    # don't add unknown table to looking list
                    continue

        # is there any predictors at other levels?
        lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor]
        if len(query_predictors) != len(lvl1_predictors):
            raise PlanningException('Predictor is not at first level')

        # === get targets ===
        columns = []
        get_all_tables = False
        for t in query.targets:

            column = Column(t)

            # column alias
            alias = None
            if t.alias is not None:
                alias = to_string(t.alias)

            if isinstance(t, ast.Star):
                if len(stmt.tables_lvl1) == 0:
                    # if "from" is emtpy we can't make plan
                    raise PlanningException("Can't find table")

                column.is_star = True
                get_all_tables = True

            elif isinstance(t, ast.Identifier):
                if alias is None:
                    alias = t.parts[-1]

                table = self.get_table_of_column(t)
                if table is None:
                    # table is not known
                    get_all_tables = True
                else:
                    column.table = table

            elif isinstance(t, ast.Constant):
                if alias is None:
                    alias = str(t.value)
                column.type = self.get_type_of_var(t.value)
            elif isinstance(t, ast.Function):
                # mysql function
                if t.op == 'connection_id':
                    column.type = 'integer'
                else:
                    column.type = 'str'
            else:
                # TODO go down into lower level.
                #  It can be function, operation, select.
                #  But now show it as string

                # TODO add several known types for function, i.e ABS-int

                # TODO TypeCast - as casted type
                column.type = 'str'

            if alias is not None:
                column.alias = alias
            columns.append(column)

        # === get columns from tables ===
        request_tables = set()
        for column in columns:
            if column.table is not None:
                request_tables.add(column.table.name)

        for table in stmt.tables_lvl1:
            if get_all_tables or table.name in request_tables:
                if table.is_predictor:
                    step = steps.GetPredictorColumns(namespace=table.ds, predictor=table.node)
                else:
                    step = steps.GetTableColumns(namespace=table.ds, table=table.name)
                yield step

                if step.result_data is not None:
                    # save results

                    if len(step.result_data['tables']) > 0:
                        table_info = step.result_data['tables'][0]
                        columns_info = step.result_data['columns'][table_info]

                        table.columns = []
                        table.ds = table_info[0]
                        for col in columns_info:
                            if isinstance(col, tuple):
                                # is predictor
                                col = dict(name=col[0], type='str')
                            table.columns.append(
                                Column(
                                    name=col['name'],
                                    type=col['type'],
                                )
                            )

                    # map by names
                    table.columns_map = {
                        i.name.upper(): i
                        for i in table.columns
                    }

        # === create columns list ===
        columns_result = []
        for i, column in enumerate(columns):
            if column.is_star:
                # add data from all tables
                for table in stmt.tables_lvl1:
                    if table.columns is None:
                        raise PlanningException(f'Table is not found {table.name}')

                    for col in table.columns:
                        # col = {name: 'col', type: 'str'}
                        column2 = Column(table=table, name=col.name)
                        column2.alias = col.name
                        column2.type = col.type

                        columns_result.append(column2)

                # to next column
                continue

            elif column.name is not None:
                # is Identifier
                col_name = column.name.upper()
                if column.table is not None:
                    table = column.table
                    if table.columns_map is not None:
                        if col_name in table.columns_map:
                            column.type = table.columns_map[col_name].type
                        else:
                            # print(col_name, table.name, query.to_string())
                            # continue
                            raise PlanningException(f'Column not found {col_name}')


                else:
                    # table is not found, looking for in all tables
                    for table in stmt.tables_lvl1:
                        if table.columns_map is not None:
                            col = table.columns_map.get(col_name)
                            if col is not None:
                                column.type = col.type
                                column.table = table
                                break



            # forcing alias
            if column.alias is None:
                column.alias = f'column_{i}'

            # forcing type
            if column.type is None:
                column.type = 'str'

            columns_result.append(column)

        # save columns
        stmt.columns = columns_result