def execute_steps(self, params=None): # find all parameters stmt = self.planner.statement # is already executed if stmt is None: if params is not None: raise PlanningException("Can't execute statement") stmt = Statement() # === form query with new target === query = self.planner.query if params is not None: if len(params) != len(stmt.params): raise PlanningException("Count of execution parameters don't match prepared statement") query = utils.fill_query_params(query, params) self.planner.query = query # prevent from second execution stmt.params = None if ( isinstance(query, ast.Select) or isinstance(query, ast.Union) or isinstance(query, ast.CreateTable) or isinstance(query, ast.Insert) ): return self.plan_query(query) else: return []
def recursively_disambiguate_identifiers_in_select(select, integration_name, table): select.targets = disambiguate_select_targets(select.targets, integration_name, table) if select.from_table: if isinstance(select.from_table, Identifier): select.from_table = table if select.where: if not isinstance(select.where, BinaryOperation) and not isinstance( select.where, BetweenOperation): raise PlanningException( f'Unsupported where clause {type(select.where)}, only BinaryOperation is supported now.' ) where = copy.deepcopy(select.where) recursively_disambiguate_identifiers_in_op(where, integration_name, table) select.where = where if select.group_by: group_by = copy.deepcopy(select.group_by) group_by2 = [] for field in group_by: if isinstance(field, Identifier): field = disambiguate_integration_column_identifier( field, integration_name, table) group_by2.append(field) select.group_by = group_by2 if select.having: if not isinstance(select.having, BinaryOperation): raise PlanningException( f'Unsupported having clause {type(select.having)}, only BinaryOperation is supported now.' ) having = copy.deepcopy(select.having) recursively_disambiguate_identifiers_in_op(having, integration_name, table) select.having = having if select.order_by: order_by = [] for order_by_item in select.order_by: new_order_item = copy.deepcopy(order_by_item) new_order_item.field = disambiguate_integration_column_identifier( new_order_item.field, integration_name, table) order_by.append(new_order_item) select.order_by = order_by
def get_integration_path_from_identifier(identifier): parts = identifier.parts integration_name = parts[0] new_parts = parts[1:] if len(parts) == 1: raise PlanningException( f'No integration specified for table: {str(identifier)}') elif len(parts) > 4: raise PlanningException( f'Too many parts (dots) in table identifier: {str(identifier)}') new_identifier = copy.deepcopy(identifier) new_identifier.parts = new_parts return integration_name, new_identifier
def get_statement_info(self): stmt = self.planner.statement if stmt is None: raise PlanningException('Statement is not prepared') columns_result = [] for column in stmt.columns: table, ds = None, None if column.table is not None: table = column.table.name ds = column.table.ds columns_result.append(dict( alias=column.alias, type=column.type, name=column.name, table_name=table, table_alias=table, ds=ds, )) parameters = [] for param in stmt.params: name = '?' parameters.append(dict( alias=name, type='str', name=name, )) return { 'parameters': parameters, 'columns': columns_result }
def disambiguate_integration_column_identifier(identifier, integration_name, table, initial_name_as_alias=False): """Removes integration name from column if it's present, adds table path if it's absent""" column_table_ref = [table.alias.to_string( alias=False)] if table.alias else table.parts parts = list(identifier.parts) if len(parts) > 1: if parts[0] == integration_name: parts = parts[1:] if len(parts) > 1: if (len(parts) <= len(column_table_ref) or parts[:len(column_table_ref)] != column_table_ref): raise PlanningException( f'Tried to query column {identifier.to_tree()} from integration {integration_name} table {column_table_ref}, but a different table name has been specified.' ) elif len(parts) == 1: # if parts[0] != column_table_ref: parts = column_table_ref + parts new_identifier = Identifier(parts=parts) if identifier.alias: new_identifier.alias = identifier.alias elif initial_name_as_alias: new_identifier.alias = Identifier(parts[-1]) return new_identifier
def plan_select_from_predictor(self, select): predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier( select.from_table, self.default_namespace) if select.where == BinaryOperation('=', args=[Constant(1), Constant(0)]): # Hardcoded mysql way of getting predictor columns predictor_step = self.plan.add_step( GetPredictorColumns(namespace=predictor_namespace, predictor=predictor)) else: new_query_targets = [] for target in select.targets: if isinstance(target, Identifier): new_query_targets.append( disambiguate_predictor_column_identifier( target, predictor)) elif type(target) in (Star, Constant): new_query_targets.append(target) else: raise PlanningException( f'Unknown select target {type(target)}') if select.group_by or select.having: raise PlanningException( f'Unsupported operation when querying predictor. Only WHERE is allowed and required.' ) row_dict = {} where_clause = select.where if not where_clause: raise PlanningException( f'WHERE clause required when selecting from predictor') recursively_extract_column_values(where_clause, row_dict, predictor) predictor_step = self.plan.add_step( ApplyPredictorRowStep(namespace=predictor_namespace, predictor=predictor, row_dict=row_dict)) project_step = self.plan_project(select, predictor_step.result) return predictor_step, project_step
def recursively_disambiguate_identifiers(obj, integration_name, table): if isinstance(obj, Operation): recursively_disambiguate_identifiers_in_op(obj, integration_name, table) elif isinstance(obj, Select): recursively_disambiguate_identifiers_in_select(obj, integration_name, table) else: raise PlanningException( f'Unsupported object for disambiguation {type(obj)}')
def validate_ts_where_condition(op, allowed_columns, allow_and=True): """Error if the where condition caontains invalid ops, is nested or filters on some column that's not time or partition""" if not op: return allowed_ops = ['and', '>', '>=', '=', '<', '<=', 'between', 'in'] if not allow_and: allowed_ops.remove('and') if op.op not in allowed_ops: raise PlanningException( f'For time series predictors only the following operations are allowed in WHERE: {str(allowed_ops)}, found instead: {str(op)}.') for arg in op.args: if isinstance(arg, Identifier): if arg.parts[-1].lower() not in allowed_columns: raise PlanningException( f'For time series predictor only the following columns are allowed in WHERE: {str(allowed_columns)}, found instead: {str(arg)}.') if isinstance(op.args[0], Operation): validate_ts_where_condition(op.args[0], allowed_columns, allow_and=True) if isinstance(op.args[1], Operation): validate_ts_where_condition(op.args[1], allowed_columns, allow_and=True)
def get_predictor_namespace_and_name_from_identifier(identifier, default_namespace): parts = identifier.parts namespace = parts[0] new_parts = parts[1:] if len(parts) == 1: if default_namespace: namespace = default_namespace new_parts = [parts[0]] else: raise PlanningException( f'No predictor name specified for predictor: {str(identifier)}' ) elif len(parts) > 4: raise PlanningException( f'Too many parts (dots) in predictor identifier: {str(identifier)}' ) new_identifier = copy.deepcopy(identifier) new_identifier.parts = new_parts return namespace, new_identifier
def find_time_filter(op, time_column_name): if not op: return if op.op == 'and': left = find_time_filter(op.args[0], time_column_name) right = find_time_filter(op.args[1], time_column_name) if left and right: raise PlanningException('Can provide only one filter by predictor order_by column, found two') return left or right elif ((isinstance(op.args[0], Identifier) and op.args[0].parts[-1].lower() == time_column_name.lower()) or (isinstance(op.args[1], Identifier) and op.args[1].parts[-1].lower() == time_column_name.lower())): return op
def recursively_extract_column_values(op, row_dict, predictor): if isinstance(op, BinaryOperation) and op.op == '=': id = op.args[0] value = op.args[1] if not (isinstance(id, Identifier) and isinstance(value, Constant)): raise PlanningException( f'The WHERE clause for selecting from a predictor' f' must contain pairs \'Identifier(...) = Constant(...)\',' f' found instead: {id.to_tree()}, {value.to_tree()}') id = disambiguate_predictor_column_identifier(id, predictor) if str(id) in row_dict: raise PlanningException(f'Multiple values provided for {str(id)}') row_dict[str(id)] = value.value elif isinstance(op, BinaryOperation) and op.op == 'and': recursively_extract_column_values(op.args[0], row_dict, predictor) recursively_extract_column_values(op.args[1], row_dict, predictor) else: raise PlanningException( f'Only \'and\' and \'=\' operations allowed in WHERE clause, found: {op.to_tree()}' )
def plan_select(self, query, integration=None): from_table = query.from_table if isinstance(from_table, Identifier): if self.is_predictor(from_table): return self.plan_select_from_predictor(query) else: return self.plan_integration_select(query) elif isinstance(from_table, Select): return self.plan_nested_select(query) elif isinstance(from_table, Join): return self.plan_join(query, integration=integration) else: raise PlanningException( f'Unsupported from_table {type(from_table)}')
def plan_insert(self, query): if query.from_select is None: raise PlanningException(f'Support only insert from select') integration_name = query.table.parts[0] # plan sub-select first last_step = self.plan_select(query.from_select, integration=integration_name) table = query.table self.plan.add_step(InsertToTable( table=table, dataframe=last_step, ))
def from_query(self, query=None): if query is None: query = self.query if isinstance(query, Select): self.plan_select(query) elif isinstance(query, Union): self.plan_union(query) elif isinstance(query, CreateTable): self.plan_create_table(query) elif isinstance(query, Insert): self.plan_insert(query) else: raise PlanningException(f'Unsupported query type {type(query)}') return self.plan
def plan_create_table(self, query): if query.from_select is None: raise PlanningException( f'Not implemented "create table": {query.to_string()}') integration_name = query.name.parts[0] last_step = self.plan_select(query.from_select, integration=integration_name) # create table step self.plan.add_step( SaveToTable( table=query.name, dataframe=last_step, is_replace=query.is_replace, ))
def plan_join_two_tables(self, join): select_left_step = self.plan_integration_select( Select(targets=[Star()], from_table=join.left)) select_right_step = self.plan_integration_select( Select(targets=[Star()], from_table=join.right)) left_integration_name, left_table = self.get_integration_path_from_identifier_or_error( join.left) right_integration_name, right_table = self.get_integration_path_from_identifier_or_error( join.right) left_table_path = left_table.to_string(alias=False) right_table_path = right_table.to_string(alias=False) new_condition_args = [] for arg in join.condition.args: if isinstance(arg, Identifier): if left_table_path in arg.parts: new_condition_args.append( disambiguate_integration_column_identifier( arg, left_integration_name, left_table)) elif right_table_path in arg.parts: new_condition_args.append( disambiguate_integration_column_identifier( arg, right_integration_name, right_table)) else: raise PlanningException( f'Wrong table or no source table in join condition for column: {str(arg)}' ) else: new_condition_args.append(arg) new_join = copy.deepcopy(join) new_join.condition.args = new_condition_args new_join.left = Identifier(left_table_path, alias=left_table.alias) new_join.right = Identifier(right_table_path, alias=right_table.alias) # FIXME: INFORMATION_SCHEMA with condition # clear join condition for INFORMATION_SCHEMA if right_integration_name == 'INFORMATION_SCHEMA': new_join.condition = None return self.plan.add_step( JoinStep(left=select_left_step.result, right=select_right_step.result, query=new_join))
def disambiguate_select_targets(targets, integration_name, table): new_query_targets = [] for target in targets: if isinstance(target, Identifier): new_query_targets.append( disambiguate_integration_column_identifier( target, integration_name, table, initial_name_as_alias=True)) elif type(target) in (Star, Constant, NullConstant): new_query_targets.append(target) elif isinstance(target, Operation) or isinstance(target, Select): new_op = copy.deepcopy(target) recursively_disambiguate_identifiers(new_op, integration_name, table) new_query_targets.append(new_op) else: raise PlanningException(f'Unknown select target {type(target)}') return new_query_targets
def get_integration_path_from_identifier_or_error(self, identifier, recurse=True): try: integration_name, table = get_integration_path_from_identifier( identifier) if not integration_name.lower() in self.integrations: raise PlanningException( f'Unknown integration {integration_name} for table {str(identifier)}. Available integrations: {", ".join(self.integrations)}' ) except PlanningException: if not recurse or not self.default_namespace: raise else: new_identifier = copy.deepcopy(identifier) new_identifier.parts = [ self.default_namespace, *identifier.parts ] return self.get_integration_path_from_identifier_or_error( new_identifier, recurse=False) return integration_name, table
def recursively_check_join_identifiers_for_ambiguity(item, aliased_fields=None): if item is None: return elif isinstance(item, Identifier): if len(item.parts) == 1: if aliased_fields is not None and item.parts[0] in aliased_fields: # is alias return raise PlanningException( f'Ambigous identifier {str(item)}, provide table name for operations on a join.' ) elif isinstance(item, Operation): recursively_check_join_identifiers_for_ambiguity( item.args, aliased_fields=aliased_fields) elif isinstance(item, OrderBy): recursively_check_join_identifiers_for_ambiguity( item.field, aliased_fields=aliased_fields) elif isinstance(item, list): for arg in item: recursively_check_join_identifiers_for_ambiguity( arg, aliased_fields=aliased_fields)
def plan_join(self, query, integration=None): join = query.from_table join_left = join.left join_right = join.right if isinstance(join_left, Select): # dbt query. # TODO support complex query. Only one table is supported at the moment. if not isinstance(join_left.from_table, Identifier): raise PlanningException( f'Statement not supported: {query.to_string()}') # move properties to upper query query = join_left if query.from_table.alias is not None: table_alias = [query.from_table.alias.parts[0]] else: table_alias = query.from_table.parts def add_aliases(node, is_table, **kwargs): if not is_table and isinstance(node, Identifier): if len(node.parts) == 1: # add table alias to field node.parts = table_alias + node.parts query_traversal(query.where, add_aliases) if isinstance(query.from_table, Identifier): # DBT workaround: allow use tables without integration. # if table.part[0] not in integration - take integration name from create table command if (integration is not None and query.from_table.parts[0] not in self.integrations): # add integration name to table query.from_table.parts.insert(0, integration) join_left = join_left.from_table aliased_fields = self.get_aliased_fields(query.targets) recursively_check_join_identifiers_for_ambiguity(query.where) recursively_check_join_identifiers_for_ambiguity( query.group_by, aliased_fields=aliased_fields) recursively_check_join_identifiers_for_ambiguity(query.having) recursively_check_join_identifiers_for_ambiguity( query.order_by, aliased_fields=aliased_fields) if isinstance(join_left, Identifier) and isinstance( join_right, Identifier): if self.is_predictor(join_left) and self.is_predictor(join_right): raise PlanningException( f'Can\'t join two predictors {str(join_left.parts[0])} and {str(join_left.parts[1])}' ) predictor_namespace = None predictor = None table = None predictor_is_left = False if self.is_predictor(join_left): predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier( join_left, self.default_namespace) predictor_is_left = True else: table = join_left if self.is_predictor(join_right): predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier( join_right, self.default_namespace) else: table = join_right last_step = None if predictor: # One argument is a table, another is a predictor # Apply mindsdb model to result of last dataframe fetch # Then join results of applying mindsdb with table predictor_name = self.predictor_names[predictor.to_string( alias=False).lower()] if self.predictor_metadata[predictor_name].get('timeseries'): predictor_steps = self.plan_timeseries_predictor( query, table, predictor_namespace, predictor) else: predictor_steps = self.plan_predictor( query, table, predictor_namespace, predictor) # add join # Update reference _, table = self.get_integration_path_from_identifier_or_error( table) table_alias = table.alias or Identifier( table.to_string(alias=False).replace('.', '_')) left = Identifier( predictor_steps['predictor'].result.ref_name, alias=predictor.alias or Identifier(predictor.to_string(alias=False))) right = Identifier(predictor_steps['data'].result.ref_name, alias=table_alias) if not predictor_is_left: # swap join left, right = right, left new_join = Join(left=left, right=right, join_type=join.join_type) left = predictor_steps['predictor'].result right = predictor_steps['data'].result if not predictor_is_left: # swap join left, right = right, left last_step = self.plan.add_step( JoinStep(left=left, right=right, query=new_join)) # limit from timeseries if predictor_steps.get('saved_limit'): last_step = self.plan.add_step( LimitOffsetStep(dataframe=last_step.result, limit=predictor_steps['saved_limit'])) else: # Both arguments are tables, join results of 2 dataframe fetches join_step = self.plan_join_two_tables(join) last_step = join_step if query.where: # FIXME: INFORMATION_SCHEMA with Where right_integration_name, _ = self.get_integration_path_from_identifier_or_error( join.right) if right_integration_name == 'INFORMATION_SCHEMA': ... else: last_step = self.plan.add_step( FilterStep(dataframe=last_step.result, query=query.where)) if query.group_by: group_by_targets = [] for t in query.targets: target_copy = copy.deepcopy(t) target_copy.alias = None group_by_targets.append(target_copy) last_step = self.plan.add_step( GroupByStep(dataframe=last_step.result, columns=query.group_by, targets=group_by_targets)) if query.having: last_step = self.plan.add_step( FilterStep(dataframe=last_step.result, query=query.having)) if query.order_by: last_step = self.plan.add_step( OrderByStep(dataframe=last_step.result, order_by=query.order_by)) if query.limit is not None or query.offset is not None: limit = query.limit.value if query.limit is not None else None offset = query.offset.value if query.offset is not None else None last_step = self.plan.add_step( LimitOffsetStep(dataframe=last_step.result, limit=limit, offset=offset)) else: raise PlanningException( f'Join of unsupported objects, currently only tables and predictors can be joined.' ) return self.plan_project(query, last_step.result)
def plan_timeseries_predictor(self, query, table, predictor_namespace, predictor): predictor_name = predictor.to_string(alias=False).lower() # to original case predictor_name = self.predictor_names[predictor_name] predictor_time_column_name = self.predictor_metadata[predictor_name][ 'order_by_column'] predictor_group_by_names = self.predictor_metadata[predictor_name][ 'group_by_columns'] if predictor_group_by_names is None: predictor_group_by_names = [] predictor_window = self.predictor_metadata[predictor_name]['window'] if query.order_by: raise PlanningException( f'Can\'t provide ORDER BY to time series predictor, it will be taken from predictor settings. Found: {query.order_by}' ) saved_limit = query.limit if query.group_by or query.having or query.offset: raise PlanningException( f'Unsupported query to timeseries predictor: {str(query)}') allowed_columns = [predictor_time_column_name.lower()] if len(predictor_group_by_names) > 0: allowed_columns += [i.lower() for i in predictor_group_by_names] validate_ts_where_condition(query.where, allowed_columns=allowed_columns) time_filter = find_time_filter( query.where, time_column_name=predictor_time_column_name) order_by = [ OrderBy(Identifier(parts=[predictor_time_column_name]), direction='DESC') ] preparation_where = copy.deepcopy(query.where) # add {order_by_field} is not null def add_order_not_null(condition): order_field_not_null = BinaryOperation( op='is not', args=[ Identifier(parts=[predictor_time_column_name]), NullConstant() ]) if condition is not None: condition = BinaryOperation( op='and', args=[condition, order_field_not_null]) else: condition = order_field_not_null return condition preparation_where2 = copy.deepcopy(preparation_where) preparation_where = add_order_not_null(preparation_where) # Obtain integration selects if isinstance(time_filter, BetweenOperation): between_from = time_filter.args[1] preparation_time_filter = BinaryOperation( '<', args=[Identifier(predictor_time_column_name), between_from]) preparation_where2 = replace_time_filter(preparation_where2, time_filter, preparation_time_filter) integration_select_1 = Select( targets=[Star()], from_table=table, where=add_order_not_null(preparation_where2), order_by=order_by, limit=Constant(predictor_window)) integration_select_2 = Select(targets=[Star()], from_table=table, where=preparation_where, order_by=order_by) integration_selects = [integration_select_1, integration_select_2] elif isinstance( time_filter, BinaryOperation ) and time_filter.op == '>' and time_filter.args[1] == Latest(): integration_select = Select( targets=[Star()], from_table=table, where=preparation_where, order_by=order_by, limit=Constant(predictor_window), ) integration_select.where = find_and_remove_time_filter( integration_select.where, time_filter) integration_selects = [integration_select] elif isinstance(time_filter, BinaryOperation) and time_filter.op in ('>', '>='): time_filter_date = time_filter.args[1] preparation_time_filter_op = {'>': '<=', '>=': '<'}[time_filter.op] preparation_time_filter = BinaryOperation( preparation_time_filter_op, args=[ Identifier(predictor_time_column_name), time_filter_date ]) preparation_where2 = replace_time_filter(preparation_where2, time_filter, preparation_time_filter) integration_select_1 = Select( targets=[Star()], from_table=table, where=add_order_not_null(preparation_where2), order_by=order_by, limit=Constant(predictor_window)) integration_select_2 = Select(targets=[Star()], from_table=table, where=preparation_where, order_by=order_by) integration_selects = [integration_select_1, integration_select_2] else: integration_select = Select( targets=[Star()], from_table=table, where=preparation_where, order_by=order_by, ) integration_selects = [integration_select] if len(predictor_group_by_names) == 0: # ts query without grouping # one or multistep if len(integration_selects) == 1: select_partition_step = self.get_integration_select_step( integration_selects[0]) else: select_partition_step = MultipleSteps(steps=[ self.get_integration_select_step(s) for s in integration_selects ], reduce='union') # fetch data step data_step = self.plan.add_step(select_partition_step) else: # inject $var to queries for integration_select in integration_selects: condition = integration_select.where for num, column in enumerate(predictor_group_by_names): cond = BinaryOperation( '=', args=[Identifier(column), Constant(f'$var[{column}]')]) # join to main condition if condition is None: condition = cond else: condition = BinaryOperation('and', args=[condition, cond]) integration_select.where = condition # one or multistep if len(integration_selects) == 1: select_partition_step = self.get_integration_select_step( integration_selects[0]) else: select_partition_step = MultipleSteps(steps=[ self.get_integration_select_step(s) for s in integration_selects ], reduce='union') # get groping values no_time_filter_query = copy.deepcopy(query) no_time_filter_query.where = find_and_remove_time_filter( no_time_filter_query.where, time_filter) select_partitions_step = self.plan_fetch_timeseries_partitions( no_time_filter_query, table, predictor_group_by_names) # sub-query by every grouping value map_reduce_step = self.plan.add_step( MapReduceStep(values=select_partitions_step.result, reduce='union', step=select_partition_step)) data_step = map_reduce_step predictor_step = self.plan.add_step( ApplyTimeseriesPredictorStep( output_time_filter=time_filter, namespace=predictor_namespace, dataframe=data_step.result, predictor=predictor, )) return { 'predictor': predictor_step, 'data': data_step, 'saved_limit': saved_limit, }
def result(self): if self.step_num is None: raise PlanningException( f'Can\'t reference a step with no assigned step number. Tried to reference: {type(self)}' ) return Result(self.step_num)
def prepare_select(self, query): # prepare select with or without predictor stmt = self.planner.statement # get all predictors query_predictors = [] def find_predictors(node, is_table, **kwargs): if is_table and isinstance(node, ast.Identifier): if self.planner.is_predictor(node): query_predictors.append(node) utils.query_traversal(query, find_predictors) # only 1 predictor is allowed if len(query_predictors) > 1: raise PlanningException(f'To many predictors in query: {len(query_predictors)}') # === get all tables from 1st level of query === stmt.tables_map = {} stmt.tables_lvl1 = [] if query.from_table is not None: if isinstance(query.from_table, ast.Join): # get all tables join_tables = utils.convert_join_to_list(query.from_table) else: join_tables = [dict(table=query.from_table)] if isinstance(query.from_table, ast.Select): # nested select, get only last select join_tables = [ dict( table=utils.get_deepest_select(query.from_table).from_table ) ] for i, join_table in enumerate(join_tables): table = join_table['table'] if isinstance(table, ast.Identifier): tbl = self.table_from_identifier(table) if tbl.is_predictor: # Is the last table? if i + 1 < len(join_tables): raise PlanningException(f'Predictor must be last table in query') stmt.tables_lvl1.append(tbl) for key in tbl.keys: stmt.tables_map[key] = tbl else: # don't add unknown table to looking list continue # is there any predictors at other levels? lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor] if len(query_predictors) != len(lvl1_predictors): raise PlanningException('Predictor is not at first level') # === get targets === columns = [] get_all_tables = False for t in query.targets: column = Column(t) # column alias alias = None if t.alias is not None: alias = to_string(t.alias) if isinstance(t, ast.Star): if len(stmt.tables_lvl1) == 0: # if "from" is emtpy we can't make plan raise PlanningException("Can't find table") column.is_star = True get_all_tables = True elif isinstance(t, ast.Identifier): if alias is None: alias = t.parts[-1] table = self.get_table_of_column(t) if table is None: # table is not known get_all_tables = True else: column.table = table elif isinstance(t, ast.Constant): if alias is None: alias = str(t.value) column.type = self.get_type_of_var(t.value) elif isinstance(t, ast.Function): # mysql function if t.op == 'connection_id': column.type = 'integer' else: column.type = 'str' else: # TODO go down into lower level. # It can be function, operation, select. # But now show it as string # TODO add several known types for function, i.e ABS-int # TODO TypeCast - as casted type column.type = 'str' if alias is not None: column.alias = alias columns.append(column) # === get columns from tables === request_tables = set() for column in columns: if column.table is not None: request_tables.add(column.table.name) for table in stmt.tables_lvl1: if get_all_tables or table.name in request_tables: if table.is_predictor: step = steps.GetPredictorColumns(namespace=table.ds, predictor=table.node) else: step = steps.GetTableColumns(namespace=table.ds, table=table.name) yield step if step.result_data is not None: # save results if len(step.result_data['tables']) > 0: table_info = step.result_data['tables'][0] columns_info = step.result_data['columns'][table_info] table.columns = [] table.ds = table_info[0] for col in columns_info: if isinstance(col, tuple): # is predictor col = dict(name=col[0], type='str') table.columns.append( Column( name=col['name'], type=col['type'], ) ) # map by names table.columns_map = { i.name.upper(): i for i in table.columns } # === create columns list === columns_result = [] for i, column in enumerate(columns): if column.is_star: # add data from all tables for table in stmt.tables_lvl1: if table.columns is None: raise PlanningException(f'Table is not found {table.name}') for col in table.columns: # col = {name: 'col', type: 'str'} column2 = Column(table=table, name=col.name) column2.alias = col.name column2.type = col.type columns_result.append(column2) # to next column continue elif column.name is not None: # is Identifier col_name = column.name.upper() if column.table is not None: table = column.table if table.columns_map is not None: if col_name in table.columns_map: column.type = table.columns_map[col_name].type else: # print(col_name, table.name, query.to_string()) # continue raise PlanningException(f'Column not found {col_name}') else: # table is not found, looking for in all tables for table in stmt.tables_lvl1: if table.columns_map is not None: col = table.columns_map.get(col_name) if col is not None: column.type = col.type column.table = table break # forcing alias if column.alias is None: column.alias = f'column_{i}' # forcing type if column.type is None: column.type = 'str' columns_result.append(column) # save columns stmt.columns = columns_result