def make_full_transform_query(aggregation_queries, dataset, aggregation_params, transform_params, encoding_feature=False): is_hdfs = 'hiveTableName' in dataset.get_config().get('params').keys() inner = SelectQuery() if is_hdfs: inner.select_from('_'.join(dataset.name.split('.'))) else: inner.select_from(dataset) if aggregation_params.is_rolling_window(): inner.select(Column('*')) else: inner.distinct() #TODO why?! -> avoid dupplicate for key in aggregation_params.get_effective_keys(): inner.select(Column(key)) prefilter = _make_prefilter(aggregation_params, transform_params) inner.where(prefilter) outer = SelectQuery() outer.select_from(inner, alias='inner') if aggregation_params.is_rolling_window(): outer.select(Column('*', 'inner')) else: for col in aggregation_params.get_effective_keys(): #+ feature_names: outer.select(Column(col, 'inner')) reverse_mapping_dict = {} for idx, agg_query in enumerate(aggregation_queries): agg_query.alias( agg_query.get_alias() or 'cte_' + str(idx)) #TODO remove, make sure they have ids outer.with_cte(agg_query) join_cond = Expression() for key in aggregation_params.get_effective_keys(): join_cond = join_cond.and_( Column(key, 'inner').eq_null_unsafe( Column(key, agg_query.get_alias()))) outer.join(agg_query.get_alias(), JoinTypes.LEFT, join_cond) for idx2, col in enumerate(agg_query.get_columns_alias()): if encoding_feature: if aggregation_params.feature_name_mapping.get(col): new_alias = '{}_{}_{}'.format( aggregation_params.feature_name_mapping.get(col), idx, idx2) outer.select(Column(col, agg_query.get_alias()), new_alias) reverse_mapping_dict[new_alias] = col else: outer.select(Column(col, agg_query.get_alias())) return dialectHandler(dataset).convertToSQL(outer), reverse_mapping_dict
toSQL(sb, Dialects.MYSQL) # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Group by and limit sb = SelectQuery() sb.select_from('myTable') sb.group_by(Column('groupCol')) sb.limit(1000) toSQL(sb, Dialects.MYSQL) # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Where clause and select distinct sb = SelectQuery() sb.distinct() sb.select_from('myTable') sb.where(Column('wherecol').le(Constant(33))) toSQL(sb, Dialects.MYSQL) # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Having clause sb = SelectQuery() sb.select_from('myTable') sb.having(Column('havingcol').ge(Constant(33))) toSQL(sb, Dialects.MYSQL) # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # CTE