def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]: """ The representation of the datasource containing only the required data to render the provided slices. Used to reduce the payload when loading a dashboard. """ data = self.data metric_names = set() column_names = set() for slc in slices: form_data = slc.form_data # pull out all required metrics from the form_data for param in METRIC_FORM_DATA_PARAMS: for metric in utils.get_iterable(form_data.get(param) or []): metric_names.add(utils.get_metric_name(metric)) if utils.is_adhoc_metric(metric): column_names.add((metric.get("column") or {}).get("column_name")) # pull out all required columns from the form_data for filter_ in form_data.get("adhoc_filters") or []: if filter_["clause"] == "WHERE" and filter_.get("subject"): column_names.add(filter_.get("subject")) for param in COLUMN_FORM_DATA_PARAMS: for column in utils.get_iterable(form_data.get(param) or []): column_names.add(column) filtered_metrics = [ metric for metric in data["metrics"] if metric["metric_name"] in metric_names ] filtered_columns = [ column for column in data["columns"] if column["column_name"] in column_names ] del data["description"] data.update({"metrics": filtered_metrics}) data.update({"columns": filtered_columns}) verbose_map = {"__timestamp": "Time"} verbose_map.update({ metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] for metric in filtered_metrics }) verbose_map.update({ column["column_name"]: column["verbose_name"] or column["column_name"] for column in filtered_columns }) data["verbose_map"] = verbose_map return data
def _get_timeseries_orderby(self, timeseries_limit_metric, metrics_dict, cols): if utils.is_adhoc_metric(timeseries_limit_metric): ob = self.adhoc_metric_to_sqla(timeseries_limit_metric, cols) elif timeseries_limit_metric in metrics_dict: timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) ob = timeseries_limit_metric.get_sqla_col() else: raise Exception( _("Metric '%(metric)s' does not exist", metric=timeseries_limit_metric) ) return ob
def _get_timeseries_orderby(self, timeseries_limit_metric, metrics_dict, cols): if utils.is_adhoc_metric(timeseries_limit_metric): ob = self.adhoc_metric_to_sqla(timeseries_limit_metric, cols) elif timeseries_limit_metric in metrics_dict: timeseries_limit_metric = metrics_dict.get( timeseries_limit_metric, ) ob = timeseries_limit_metric.get_sqla_col() else: raise Exception(_("Metric '{}' is not valid".format(timeseries_limit_metric))) return ob
def _get_timeseries_orderby( self, timeseries_limit_metric: Metric, metrics_dict: Dict[str, SqlMetric], cols: Dict[str, Column], ) -> Optional[Column]: if utils.is_adhoc_metric(timeseries_limit_metric): assert isinstance(timeseries_limit_metric, dict) ob = self.adhoc_metric_to_sqla(timeseries_limit_metric, cols) elif (isinstance(timeseries_limit_metric, str) and timeseries_limit_metric in metrics_dict): ob = metrics_dict[timeseries_limit_metric].get_sqla_col() else: raise Exception( _("Metric '%(metric)s' does not exist", metric=timeseries_limit_metric)) return ob
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, order_desc=True, prequeries=None, is_prequery=False, ): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, 'filter': filter, 'columns': {col.column_name: col for col in self.columns}, } template_kwargs.update(self.template_params_dict) template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception( _('Datetime column not provided as part table configuration ' 'and is required by this type of chart')) if not groupby and not metrics and not columns: raise Exception(_('Empty query?')) metrics_exprs = [] for m in metrics: if utils.is_adhoc_metric(m): metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols)) elif m in metrics_dict: metrics_exprs.append(metrics_dict.get(m).get_sqla_col()) else: raise Exception(_("Metric '{}' is not valid".format(m))) if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column('COUNT(*)').label( db_engine_spec.make_label_compatible('count')) select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.get_sqla_col() inner = col.get_sqla_col(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].get_sqla_col()) metrics_exprs = [] if granularity: dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] # Use main dttm column to support index with secondary dttm columns if db_engine_spec.time_secondary_columns and \ self.main_dttm_col in self.dttm_cols and \ self.main_dttm_col != dttm_col.column_name: time_filters.append(cols[self.main_dttm_col].get_time_filter( from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op']]): continue col = flt['col'] op = flt['op'] col_obj = cols.get(col) if col_obj: is_list_target = op in ('in', 'not in') eq = self.filter_values_handler( flt.get('val'), target_column_is_numeric=col_obj.is_num, is_list_target=is_list_target) if op in ('in', 'not in'): cond = col_obj.get_sqla_col().in_(eq) if '<NULL>' in eq: cond = or_(cond, col_obj.get_sqla_col() == None) # noqa if op == 'not in': cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt['val']) if op == '==': where_clause_and.append(col_obj.get_sqla_col() == eq) elif op == '!=': where_clause_and.append(col_obj.get_sqla_col() != eq) elif op == '>': where_clause_and.append(col_obj.get_sqla_col() > eq) elif op == '<': where_clause_and.append(col_obj.get_sqla_col() < eq) elif op == '>=': where_clause_and.append(col_obj.get_sqla_col() >= eq) elif op == '<=': where_clause_and.append(col_obj.get_sqla_col() <= eq) elif op == 'LIKE': where_clause_and.append( col_obj.get_sqla_col().like(eq)) elif op == 'IS NULL': where_clause_and.append( col_obj.get_sqla_col() == None) # noqa elif op == 'IS NOT NULL': where_clause_and.append( col_obj.get_sqla_col() != None) # noqa if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and not columns: orderby = [(main_metric_expr, not order_desc)] for col, ascending in orderby: direction = asc if ascending else desc if utils.is_adhoc_metric(col): col = self.adhoc_metric_to_sqla(col, cols) qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and \ timeseries_limit and groupby and not time_groupby_inline: if self.database.db_engine_spec.inner_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where( and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: if utils.is_adhoc_metric(timeseries_limit_metric): ob = self.adhoc_metric_to_sqla(timeseries_limit_metric, cols) elif timeseries_limit_metric in metrics_dict: timeseries_limit_metric = metrics_dict.get( timeseries_limit_metric, ) ob = timeseries_limit_metric.get_sqla_col() else: raise Exception(_( "Metric '{}' is not valid".format(m))) direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: # run subquery to get top groups subquery_obj = { 'prequeries': prequeries, 'is_prequery': True, 'is_timeseries': False, 'row_limit': timeseries_limit, 'groupby': groupby, 'metrics': metrics, 'granularity': granularity, 'from_dttm': inner_from_dttm or from_dttm, 'to_dttm': inner_to_dttm or to_dttm, 'filter': filter, 'orderby': orderby, 'extras': extras, 'columns': columns, 'order_desc': True, } result = self.query(subquery_obj) cols = {col.column_name: col for col in self.columns} dimensions = [ c for c in result.df.columns if c not in metrics and c in cols ] top_groups = self._get_top_groups(result.df, dimensions) qry = qry.where(top_groups) return qry.select_from(tbl)
def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]: """ The representation of the datasource containing only the required data to render the provided slices. Used to reduce the payload when loading a dashboard. """ data = self.data metric_names = set() column_names = set() for slc in slices: form_data = slc.form_data # pull out all required metrics from the form_data for param in METRIC_FORM_DATA_PARAMS: for metric in utils.get_iterable(form_data.get(param) or []): metric_names.add(utils.get_metric_name(metric)) if utils.is_adhoc_metric(metric): column_names.add( (metric.get("column") or {}).get("column_name") ) # Columns used in query filters column_names.update( filter_["subject"] for filter_ in form_data.get("adhoc_filters") or [] if filter_.get("clause") == "WHERE" and filter_.get("subject") ) # columns used by Filter Box column_names.update( filter_config["column"] for filter_config in form_data.get("filter_configs") or [] if "column" in filter_config ) column_names.update( column for column in utils.get_iterable(form_data.get(param) or []) for param in COLUMN_FORM_DATA_PARAMS ) filtered_metrics = [ metric for metric in data["metrics"] if metric["metric_name"] in metric_names ] filtered_columns: List[Column] = [] column_types: Set[GenericDataType] = set() for column in data["columns"]: generic_type = column.get("type_generic") if generic_type is not None: column_types.add(generic_type) if column["column_name"] in column_names: filtered_columns.append(column) data["column_types"] = list(column_types) del data["description"] data.update({"metrics": filtered_metrics}) data.update({"columns": filtered_columns}) verbose_map = {"__timestamp": "Time"} verbose_map.update( { metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] for metric in filtered_metrics } ) verbose_map.update( { column["column_name"]: column["verbose_name"] or column["column_name"] for column in filtered_columns } ) data["verbose_map"] = verbose_map return data
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, order_desc=True, ): """Querying any sqla table from this common interface""" template_kwargs = { "from_dttm": from_dttm, "groupby": groupby, "metrics": metrics, "row_limit": row_limit, "to_dttm": to_dttm, "filter": filter, "columns": {col.column_name: col for col in self.columns}, } template_kwargs.update(self.template_params_dict) extra_cache_keys: List[Any] = [] template_kwargs["extra_cache_keys"] = extra_cache_keys template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec prequeries: List[str] = [] orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) if not groupby and not metrics and not columns: raise Exception(_("Empty query?")) metrics_exprs = [] for m in metrics: if utils.is_adhoc_metric(m): metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols)) elif m in metrics_dict: metrics_exprs.append(metrics_dict.get(m).get_sqla_col()) else: raise Exception( _("Metric '%(metric)s' does not exist", metric=m)) if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr, label = literal_column("COUNT(*)"), "ccount" main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, label) select_exprs = [] groupby_exprs_sans_timestamp = OrderedDict() if groupby: select_exprs = [] for s in groupby: if s in cols: outer = cols[s].get_sqla_col() else: outer = literal_column(f"({s})") outer = self.make_sqla_column_compatible(outer, s) groupby_exprs_sans_timestamp[outer.name] = outer select_exprs.append(outer) elif columns: for s in columns: select_exprs.append( cols[s].get_sqla_col() if s in cols else self. make_sqla_column_compatible(literal_column(s))) metrics_exprs = [] groupby_exprs_with_timestamp = OrderedDict( groupby_exprs_sans_timestamp.items()) if granularity: dttm_col = cols[granularity] time_grain = extras.get("time_grain_sqla") time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs_with_timestamp[timestamp.name] = timestamp # Use main dttm column to support index with secondary dttm columns if (db_engine_spec.time_secondary_columns and self.main_dttm_col in self.dttm_cols and self.main_dttm_col != dttm_col.column_name): time_filters.append(cols[self.main_dttm_col].get_time_filter( from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs labels_expected = [c._df_label_expected for c in select_exprs] select_exprs = db_engine_spec.make_select_compatible( groupby_exprs_with_timestamp.values(), select_exprs) qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if not columns: qry = qry.group_by(*groupby_exprs_with_timestamp.values()) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ["col", "op"]]): continue col = flt["col"] op = flt["op"] col_obj = cols.get(col) if col_obj: is_list_target = op in ("in", "not in") eq = self.filter_values_handler( flt.get("val"), target_column_is_numeric=col_obj.is_num, is_list_target=is_list_target, ) if op in ("in", "not in"): cond = col_obj.get_sqla_col().in_(eq) if "<NULL>" in eq: cond = or_(cond, col_obj.get_sqla_col() == None) # noqa if op == "not in": cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt["val"]) if op == "==": where_clause_and.append(col_obj.get_sqla_col() == eq) elif op == "!=": where_clause_and.append(col_obj.get_sqla_col() != eq) elif op == ">": where_clause_and.append(col_obj.get_sqla_col() > eq) elif op == "<": where_clause_and.append(col_obj.get_sqla_col() < eq) elif op == ">=": where_clause_and.append(col_obj.get_sqla_col() >= eq) elif op == "<=": where_clause_and.append(col_obj.get_sqla_col() <= eq) elif op == "LIKE": where_clause_and.append( col_obj.get_sqla_col().like(eq)) elif op == "IS NULL": where_clause_and.append( col_obj.get_sqla_col() == None) # noqa elif op == "IS NOT NULL": where_clause_and.append( col_obj.get_sqla_col() != None) # noqa if extras: where = extras.get("where") if where: where = template_processor.process_template(where) where_clause_and += [sa.text("({})".format(where))] having = extras.get("having") if having: having = template_processor.process_template(having) having_clause_and += [sa.text("({})".format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and not columns: orderby = [(main_metric_expr, not order_desc)] for col, ascending in orderby: direction = asc if ascending else desc if utils.is_adhoc_metric(col): col = self.adhoc_metric_to_sqla(col, cols) qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and timeseries_limit and groupby and not time_groupby_inline: if self.database.db_engine_spec.allows_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, "mme_inner__") inner_groupby_exprs = [] inner_select_exprs = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): inner = self.make_sqla_column_compatible( gby_obj, gby_name + "__") inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs).select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm) subq = subq.where( and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: ob = self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols) direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): # in this case the column name, not the alias, needs to be # conditionally mutated, as it refers to the column alias in # the inner query col_name = db_engine_spec.make_label_compatible(gby_name + "__") on_clause.append(gby_obj == column(col_name)) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: if timeseries_limit_metric: orderby = [( self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols), False, )] # run prequery to get top groups prequery_obj = { "is_timeseries": False, "row_limit": timeseries_limit, "groupby": groupby, "metrics": metrics, "granularity": granularity, "from_dttm": inner_from_dttm or from_dttm, "to_dttm": inner_to_dttm or to_dttm, "filter": filter, "orderby": orderby, "extras": extras, "columns": columns, "order_desc": True, } result = self.query(prequery_obj) prequeries.append(result.query) dimensions = [ c for c in result.df.columns if c not in metrics and c in groupby_exprs_sans_timestamp ] top_groups = self._get_top_groups( result.df, dimensions, groupby_exprs_sans_timestamp) qry = qry.where(top_groups) return SqlaQuery( extra_cache_keys=extra_cache_keys, labels_expected=labels_expected, sqla_query=qry.select_from(tbl), prequeries=prequeries, )
def is_str_or_adhoc(metric: Metric) -> bool: return isinstance(metric, str) or is_adhoc_metric(metric)
def get_sqla_query( # sqla self, metrics: List[Metric], granularity: str, from_dttm: Optional[datetime], to_dttm: Optional[datetime], columns: Optional[List[str]] = None, groupby: Optional[List[str]] = None, filter: Optional[List[Dict[str, Any]]] = None, is_timeseries: bool = True, timeseries_limit: int = 15, timeseries_limit_metric: Optional[Metric] = None, row_limit: Optional[int] = None, inner_from_dttm: Optional[datetime] = None, inner_to_dttm: Optional[datetime] = None, orderby: Optional[List[Tuple[ColumnElement, bool]]] = None, extras: Optional[Dict[str, Any]] = None, order_desc: bool = True, ) -> SqlaQuery: """Querying any sqla table from this common interface""" template_kwargs = { "from_dttm": from_dttm, "groupby": groupby, "metrics": metrics, "row_limit": row_limit, "to_dttm": to_dttm, "filter": filter, "columns": {col.column_name: col for col in self.columns}, } is_sip_38 = is_feature_enabled("SIP_38_VIZ_REARCHITECTURE") template_kwargs.update(self.template_params_dict) extra_cache_keys: List[Any] = [] template_kwargs["extra_cache_keys"] = extra_cache_keys template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec prequeries: List[str] = [] orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols: Dict[str, Column] = {col.column_name: col for col in self.columns} metrics_dict: Dict[str, SqlMetric] = { m.metric_name: m for m in self.metrics } if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) if (not metrics and not columns and (is_sip_38 or (not is_sip_38 and not groupby))): raise Exception(_("Empty query?")) metrics_exprs: List[ColumnElement] = [] for m in metrics: if utils.is_adhoc_metric(m): assert isinstance(m, dict) metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols)) elif isinstance(m, str) and m in metrics_dict: metrics_exprs.append(metrics_dict[m].get_sqla_col()) else: raise Exception( _("Metric '%(metric)s' does not exist", metric=m)) if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr, label = literal_column("COUNT(*)"), "ccount" main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, label) select_exprs: List[Column] = [] groupby_exprs_sans_timestamp: OrderedDict = OrderedDict() if (is_sip_38 and metrics and columns) or (not is_sip_38 and groupby): # dedup columns while preserving order columns_ = columns if is_sip_38 else groupby assert columns_ groupby = list(dict.fromkeys(columns_)) select_exprs = [] for s in groupby: if s in cols: outer = cols[s].get_sqla_col() else: outer = literal_column(f"({s})") outer = self.make_sqla_column_compatible(outer, s) groupby_exprs_sans_timestamp[outer.name] = outer select_exprs.append(outer) elif columns: for s in columns: select_exprs.append( cols[s].get_sqla_col() if s in cols else self. make_sqla_column_compatible(literal_column(s))) metrics_exprs = [] assert extras is not None time_range_endpoints = extras.get("time_range_endpoints") groupby_exprs_with_timestamp = OrderedDict( groupby_exprs_sans_timestamp.items()) if granularity: dttm_col = cols[granularity] time_grain = extras.get("time_grain_sqla") time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs_with_timestamp[timestamp.name] = timestamp # Use main dttm column to support index with secondary dttm columns. if (db_engine_spec.time_secondary_columns and self.main_dttm_col in self.dttm_cols and self.main_dttm_col != dttm_col.column_name): time_filters.append(cols[self.main_dttm_col].get_time_filter( from_dttm, to_dttm, time_range_endpoints)) time_filters.append( dttm_col.get_time_filter(from_dttm, to_dttm, time_range_endpoints)) select_exprs += metrics_exprs labels_expected = [c._df_label_expected for c in select_exprs] select_exprs = db_engine_spec.make_select_compatible( groupby_exprs_with_timestamp.values(), select_exprs) qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if (is_sip_38 and metrics) or (not is_sip_38 and not columns): qry = qry.group_by(*groupby_exprs_with_timestamp.values()) where_clause_and = [] having_clause_and: List = [] for flt in filter: # type: ignore if not all([flt.get(s) for s in ["col", "op"]]): continue col = flt["col"] op = flt["op"].upper() col_obj = cols.get(col) if col_obj: is_list_target = op in ( utils.FilterOperator.IN.value, utils.FilterOperator.NOT_IN.value, ) eq = self.filter_values_handler( values=flt.get("val"), target_column_is_numeric=col_obj.is_numeric, is_list_target=is_list_target, ) if op in ( utils.FilterOperator.IN.value, utils.FilterOperator.NOT_IN.value, ): cond = col_obj.get_sqla_col().in_(eq) if isinstance(eq, str) and NULL_STRING in eq: cond = or_(cond, col_obj.get_sqla_col() is None) if op == utils.FilterOperator.NOT_IN.value: cond = ~cond where_clause_and.append(cond) else: if col_obj.is_numeric: eq = utils.cast_to_num(flt["val"]) if op == utils.FilterOperator.EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() == eq) elif op == utils.FilterOperator.NOT_EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() != eq) elif op == utils.FilterOperator.GREATER_THAN.value: where_clause_and.append(col_obj.get_sqla_col() > eq) elif op == utils.FilterOperator.LESS_THAN.value: where_clause_and.append(col_obj.get_sqla_col() < eq) elif op == utils.FilterOperator.GREATER_THAN_OR_EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() >= eq) elif op == utils.FilterOperator.LESS_THAN_OR_EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() <= eq) elif op == utils.FilterOperator.LIKE.value: where_clause_and.append( col_obj.get_sqla_col().like(eq)) elif op == utils.FilterOperator.IS_NULL.value: where_clause_and.append(col_obj.get_sqla_col() == None) elif op == utils.FilterOperator.IS_NOT_NULL.value: where_clause_and.append(col_obj.get_sqla_col() != None) else: raise Exception( _("Invalid filter operation type: %(op)s", op=op)) if config["ENABLE_ROW_LEVEL_SECURITY"]: where_clause_and += self._get_sqla_row_level_filters( template_processor) if extras: where = extras.get("where") if where: where = template_processor.process_template(where) where_clause_and += [sa.text("({})".format(where))] having = extras.get("having") if having: having = template_processor.process_template(having) having_clause_and += [sa.text("({})".format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and ((is_sip_38 and metrics) or (not is_sip_38 and not columns)): orderby = [(main_metric_expr, not order_desc)] # To ensure correct handling of the ORDER BY labeling we need to reference the # metric instance if defined in the SELECT clause. metrics_exprs_by_label = {m._label: m for m in metrics_exprs} for col, ascending in orderby: direction = asc if ascending else desc if utils.is_adhoc_metric(col): col = self.adhoc_metric_to_sqla(col, cols) elif col in cols: col = cols[col].get_sqla_col() if isinstance(col, Label) and col._label in metrics_exprs_by_label: col = metrics_exprs_by_label[col._label] qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if (is_timeseries and timeseries_limit and not time_groupby_inline and ((is_sip_38 and columns) or (not is_sip_38 and groupby))): if self.database.db_engine_spec.allows_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, "mme_inner__") inner_groupby_exprs = [] inner_select_exprs = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): inner = self.make_sqla_column_compatible( gby_obj, gby_name + "__") inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs).select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, time_range_endpoints, ) subq = subq.where( and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: ob = self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols) direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): # in this case the column name, not the alias, needs to be # conditionally mutated, as it refers to the column alias in # the inner query col_name = db_engine_spec.make_label_compatible(gby_name + "__") on_clause.append(gby_obj == column(col_name)) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: if timeseries_limit_metric: orderby = [( self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols), False, )] # run prequery to get top groups prequery_obj = { "is_timeseries": False, "row_limit": timeseries_limit, "metrics": metrics, "granularity": granularity, "from_dttm": inner_from_dttm or from_dttm, "to_dttm": inner_to_dttm or to_dttm, "filter": filter, "orderby": orderby, "extras": extras, "columns": columns, "order_desc": True, } if not is_sip_38: prequery_obj["groupby"] = groupby result = self.query(prequery_obj) prequeries.append(result.query) dimensions = [ c for c in result.df.columns if c not in metrics and c in groupby_exprs_sans_timestamp ] top_groups = self._get_top_groups( result.df, dimensions, groupby_exprs_sans_timestamp) qry = qry.where(top_groups) return SqlaQuery( extra_cache_keys=extra_cache_keys, labels_expected=labels_expected, sqla_query=qry.select_from(tbl), prequeries=prequeries, )
def __init__( self, datasource: Optional[DatasourceDict] = None, result_type: Optional[ChartDataResultType] = None, annotation_layers: Optional[List[Dict[str, Any]]] = None, applied_time_extras: Optional[Dict[str, str]] = None, apply_fetch_values_predicate: bool = False, granularity: Optional[str] = None, metrics: Optional[List[Union[Dict[str, Any], str]]] = None, groupby: Optional[List[str]] = None, filters: Optional[List[Dict[str, Any]]] = None, time_range: Optional[str] = None, time_shift: Optional[str] = None, is_timeseries: Optional[bool] = None, timeseries_limit: int = 0, row_limit: Optional[int] = None, row_offset: Optional[int] = None, timeseries_limit_metric: Optional[Metric] = None, order_desc: bool = True, extras: Optional[Dict[str, Any]] = None, columns: Optional[List[str]] = None, orderby: Optional[List[OrderBy]] = None, post_processing: Optional[List[Optional[Dict[str, Any]]]] = None, is_rowcount: bool = False, **kwargs: Any, ): columns = columns or [] groupby = groupby or [] extras = extras or {} annotation_layers = annotation_layers or [] self.is_rowcount = is_rowcount self.datasource = None if datasource: self.datasource = ConnectorRegistry.get_datasource( str(datasource["type"]), int(datasource["id"]), db.session) self.result_type = result_type self.apply_fetch_values_predicate = apply_fetch_values_predicate or False self.annotation_layers = [ layer for layer in annotation_layers # formula annotations don't affect the payload, hence can be dropped if layer["annotationType"] != "FORMULA" ] self.applied_time_extras = applied_time_extras or {} self.granularity = granularity self.from_dttm, self.to_dttm = get_since_until( relative_start=extras.get("relative_start", config["DEFAULT_RELATIVE_START_TIME"]), relative_end=extras.get("relative_end", config["DEFAULT_RELATIVE_END_TIME"]), time_range=time_range, time_shift=time_shift, ) # is_timeseries is True if time column is in either columns or groupby # (both are dimensions) self.is_timeseries = (is_timeseries if is_timeseries is not None else DTTM_ALIAS in columns + groupby) self.time_range = time_range self.time_shift = parse_human_timedelta(time_shift) self.post_processing = [ post_proc for post_proc in post_processing or [] if post_proc ] # Support metric reference/definition in the format of # 1. 'metric_name' - name of predefined metric # 2. { label: 'label_name' } - legacy format for a predefined metric # 3. { expressionType: 'SIMPLE' | 'SQL', ... } - adhoc metric self.metrics = metrics and [ x if isinstance(x, str) or is_adhoc_metric(x) else x["label"] # type: ignore for x in metrics ] self.row_limit = config["ROW_LIMIT"] if row_limit is None else row_limit self.row_offset = row_offset or 0 self.filter = filters or [] self.timeseries_limit = timeseries_limit self.timeseries_limit_metric = timeseries_limit_metric self.order_desc = order_desc self.extras = extras if config["SIP_15_ENABLED"]: self.extras["time_range_endpoints"] = get_time_range_endpoints( form_data=self.extras) self.columns = columns self.groupby = groupby or [] self.orderby = orderby or [] # rename deprecated fields for field in DEPRECATED_FIELDS: if field.old_name in kwargs: logger.warning( "The field `%s` is deprecated, please use `%s` instead.", field.old_name, field.new_name, ) value = kwargs[field.old_name] if value: if hasattr(self, field.new_name): logger.warning( "The field `%s` is already populated, " "replacing value with contents from `%s`.", field.new_name, field.old_name, ) setattr(self, field.new_name, value) # move deprecated extras fields to extras for field in DEPRECATED_EXTRAS_FIELDS: if field.old_name in kwargs: logger.warning( "The field `%s` is deprecated and should " "be passed to `extras` via the `%s` property.", field.old_name, field.new_name, ) value = kwargs[field.old_name] if value: if hasattr(self.extras, field.new_name): logger.warning( "The field `%s` is already populated in " "`extras`, replacing value with contents " "from `%s`.", field.new_name, field.old_name, ) self.extras[field.new_name] = value
def data_for_slices( # pylint: disable=too-many-locals self, slices: List[Slice]) -> Dict[str, Any]: """ The representation of the datasource containing only the required data to render the provided slices. Used to reduce the payload when loading a dashboard. """ data = self.data metric_names = set() column_names = set() for slc in slices: form_data = slc.form_data # pull out all required metrics from the form_data for metric_param in METRIC_FORM_DATA_PARAMS: for metric in utils.get_iterable( form_data.get(metric_param) or []): metric_names.add(utils.get_metric_name(metric)) if utils.is_adhoc_metric(metric): column_names.add((metric.get("column") or {}).get("column_name")) # Columns used in query filters column_names.update( filter_["subject"] for filter_ in form_data.get("adhoc_filters") or [] if filter_.get("clause") == "WHERE" and filter_.get("subject")) # columns used by Filter Box column_names.update( filter_config["column"] for filter_config in form_data.get("filter_configs") or [] if "column" in filter_config) # for legacy dashboard imports which have the wrong query_context in them try: query_context = slc.get_query_context() except DatasetNotFoundError: query_context = None # legacy charts don't have query_context charts if query_context: column_names.update([ utils.get_column_name(column) for query in query_context.queries for column in query.columns ] or []) else: _columns = [ utils.get_column_name(column) if utils.is_adhoc_column(column) else column for column_param in COLUMN_FORM_DATA_PARAMS for column in utils.get_iterable( form_data.get(column_param) or []) ] column_names.update(_columns) filtered_metrics = [ metric for metric in data["metrics"] if metric["metric_name"] in metric_names ] filtered_columns: List[Column] = [] column_types: Set[GenericDataType] = set() for column in data["columns"]: generic_type = column.get("type_generic") if generic_type is not None: column_types.add(generic_type) if column["column_name"] in column_names: filtered_columns.append(column) data["column_types"] = list(column_types) del data["description"] data.update({"metrics": filtered_metrics}) data.update({"columns": filtered_columns}) verbose_map = {"__timestamp": "Time"} verbose_map.update({ metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] for metric in filtered_metrics }) verbose_map.update({ column["column_name"]: column["verbose_name"] or column["column_name"] for column in filtered_columns }) data["verbose_map"] = verbose_map return data
def test_is_adhoc_metric(): assert is_adhoc_metric(STR_METRIC) is False assert is_adhoc_metric(SIMPLE_SUM_ADHOC_METRIC) is True assert is_adhoc_metric(SQL_ADHOC_METRIC) is True
def __init__( # pylint: disable=too-many-arguments,too-many-locals self, query_context: "QueryContext", annotation_layers: Optional[List[Dict[str, Any]]] = None, applied_time_extras: Optional[Dict[str, str]] = None, apply_fetch_values_predicate: bool = False, columns: Optional[List[str]] = None, datasource: Optional[DatasourceDict] = None, extras: Optional[Dict[str, Any]] = None, filters: Optional[List[QueryObjectFilterClause]] = None, granularity: Optional[str] = None, is_rowcount: bool = False, is_timeseries: Optional[bool] = None, metrics: Optional[List[Metric]] = None, order_desc: bool = True, orderby: Optional[List[OrderBy]] = None, post_processing: Optional[List[Optional[Dict[str, Any]]]] = None, result_type: Optional[ChartDataResultType] = None, row_limit: Optional[int] = None, row_offset: Optional[int] = None, series_columns: Optional[List[str]] = None, series_limit: int = 0, series_limit_metric: Optional[Metric] = None, time_range: Optional[str] = None, time_shift: Optional[str] = None, **kwargs: Any, ): columns = columns or [] extras = extras or {} annotation_layers = annotation_layers or [] self.time_offsets = kwargs.get("time_offsets", []) self.inner_from_dttm = kwargs.get("inner_from_dttm") self.inner_to_dttm = kwargs.get("inner_to_dttm") if series_columns: self.series_columns = series_columns elif is_timeseries and metrics: self.series_columns = columns else: self.series_columns = [] self.is_rowcount = is_rowcount self.datasource = None if datasource: self.datasource = ConnectorRegistry.get_datasource( str(datasource["type"]), int(datasource["id"]), db.session) self.result_type = result_type or query_context.result_type self.apply_fetch_values_predicate = apply_fetch_values_predicate or False self.annotation_layers = [ layer for layer in annotation_layers # formula annotations don't affect the payload, hence can be dropped if layer["annotationType"] != "FORMULA" ] self.applied_time_extras = applied_time_extras or {} self.granularity = granularity self.from_dttm, self.to_dttm = get_since_until( relative_start=extras.get("relative_start", config["DEFAULT_RELATIVE_START_TIME"]), relative_end=extras.get("relative_end", config["DEFAULT_RELATIVE_END_TIME"]), time_range=time_range, time_shift=time_shift, ) # is_timeseries is True if time column is in either columns or groupby # (both are dimensions) self.is_timeseries = (is_timeseries if is_timeseries is not None else DTTM_ALIAS in columns) self.time_range = time_range self.time_shift = parse_human_timedelta(time_shift) self.post_processing = [ post_proc for post_proc in post_processing or [] if post_proc ] # Support metric reference/definition in the format of # 1. 'metric_name' - name of predefined metric # 2. { label: 'label_name' } - legacy format for a predefined metric # 3. { expressionType: 'SIMPLE' | 'SQL', ... } - adhoc metric self.metrics = metrics and [ x if isinstance(x, str) or is_adhoc_metric(x) else x["label"] # type: ignore for x in metrics ] default_row_limit = (config["SAMPLES_ROW_LIMIT"] if self.result_type == ChartDataResultType.SAMPLES else config["ROW_LIMIT"]) self.row_limit = apply_max_row_limit(row_limit or default_row_limit) self.row_offset = row_offset or 0 self.filter = filters or [] self.series_limit = series_limit self.series_limit_metric = series_limit_metric self.order_desc = order_desc self.extras = extras if config["SIP_15_ENABLED"]: self.extras["time_range_endpoints"] = get_time_range_endpoints( form_data=self.extras) self.columns = columns self.orderby = orderby or [] self._rename_deprecated_fields(kwargs) self._move_deprecated_extra_fields(kwargs)
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, order_desc=True, prequeries=None, is_prequery=False, ): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, 'filter': filter, 'columns': {col.column_name: col for col in self.columns}, } template_kwargs.update(self.template_params_dict) template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception(_( 'Datetime column not provided as part table configuration ' 'and is required by this type of chart')) if not groupby and not metrics and not columns: raise Exception(_('Empty query?')) metrics_exprs = [] for m in metrics: if utils.is_adhoc_metric(m): metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols)) elif m in metrics_dict: metrics_exprs.append(metrics_dict.get(m).get_sqla_col()) else: raise Exception(_("Metric '{}' is not valid".format(m))) if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column('COUNT(*)').label( db_engine_spec.make_label_compatible('count')) select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.get_sqla_col() inner = col.get_sqla_col(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].get_sqla_col()) metrics_exprs = [] if granularity: dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] # Use main dttm column to support index with secondary dttm columns if db_engine_spec.time_secondary_columns and \ self.main_dttm_col in self.dttm_cols and \ self.main_dttm_col != dttm_col.column_name: time_filters.append(cols[self.main_dttm_col]. get_time_filter(from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op']]): continue col = flt['col'] op = flt['op'] col_obj = cols.get(col) if col_obj: is_list_target = op in ('in', 'not in') eq = self.filter_values_handler( flt.get('val'), target_column_is_numeric=col_obj.is_num, is_list_target=is_list_target) if op in ('in', 'not in'): cond = col_obj.get_sqla_col().in_(eq) if '<NULL>' in eq: cond = or_(cond, col_obj.get_sqla_col() == None) # noqa if op == 'not in': cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt['val']) if op == '==': where_clause_and.append(col_obj.get_sqla_col() == eq) elif op == '!=': where_clause_and.append(col_obj.get_sqla_col() != eq) elif op == '>': where_clause_and.append(col_obj.get_sqla_col() > eq) elif op == '<': where_clause_and.append(col_obj.get_sqla_col() < eq) elif op == '>=': where_clause_and.append(col_obj.get_sqla_col() >= eq) elif op == '<=': where_clause_and.append(col_obj.get_sqla_col() <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.get_sqla_col().like(eq)) elif op == 'IS NULL': where_clause_and.append(col_obj.get_sqla_col() == None) # noqa elif op == 'IS NOT NULL': where_clause_and.append( col_obj.get_sqla_col() != None) # noqa if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and not columns: orderby = [(main_metric_expr, not order_desc)] for col, ascending in orderby: direction = asc if ascending else desc if utils.is_adhoc_metric(col): col = self.adhoc_metric_to_sqla(col, cols) qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and \ timeseries_limit and groupby and not time_groupby_inline: if self.database.db_engine_spec.inner_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: if utils.is_adhoc_metric(timeseries_limit_metric): ob = self.adhoc_metric_to_sqla(timeseries_limit_metric, cols) elif timeseries_limit_metric in metrics_dict: timeseries_limit_metric = metrics_dict.get( timeseries_limit_metric, ) ob = timeseries_limit_metric.get_sqla_col() else: raise Exception(_("Metric '{}' is not valid".format(m))) direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append( groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: # run subquery to get top groups subquery_obj = { 'prequeries': prequeries, 'is_prequery': True, 'is_timeseries': False, 'row_limit': timeseries_limit, 'groupby': groupby, 'metrics': metrics, 'granularity': granularity, 'from_dttm': inner_from_dttm or from_dttm, 'to_dttm': inner_to_dttm or to_dttm, 'filter': filter, 'orderby': orderby, 'extras': extras, 'columns': columns, 'order_desc': True, } result = self.query(subquery_obj) cols = {col.column_name: col for col in self.columns} dimensions = [ c for c in result.df.columns if c not in metrics and c in cols ] top_groups = self._get_top_groups(result.df, dimensions) qry = qry.where(top_groups) return qry.select_from(tbl)