def check_restricted_metrics(self, aggregations): rejected_metrics = [ m.metric_name for m in self.metrics if m.is_restricted and m.metric_name in aggregations.keys() and not sm.has_access('metric_access', m.perm) ] if rejected_metrics: raise MetricPermException( 'Access to the metrics denied: ' + ', '.join(rejected_metrics), )
def get_dim_acl_where(self): #维度验证 cols = {col.column_name: col for col in self.columns if self.has_col_access(col)} dim_acslist = security.get_permission_view_by_permission("dim_access") dim_acl_map = {} acl_where_clause_and = [] for ac in dim_acslist: if sm.has_access('dim_access', ac) and len(ac.split('_')) > 1 and ac.split('_')[0] in self.filterable_column_names : if dim_acl_map.has_key(ac.split('_')[0]): dim_acl_map[ac.split('_')[0]] += [ac.split('_')[1]] else : dim_acl_map[ac.split('_')[0]] = [ac.split('_')[1]] acl_filter=[] for dim in dim_acl_map : acl_filter += [{ 'col': dim, 'op': 'in', 'val': dim_acl_map[dim], }] for flt in acl_filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] if col not in cols: raise Exception(("字段 '{}' 不存在,或没有权限访问".format(col))) col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) acl_where_clause_and.append(cond) return acl_where_clause_and
def run_query( # noqa / druid self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=None, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, # noqa select=None, # noqa columns=None, phase=2, client=None, form_data=None, order_desc=True): """Runs a query against Druid and returns a dataframe. """ # TODO refactor into using a TBD Query object client = client or self.cluster.get_pydruid_client() if not is_timeseries: granularity = 'all' inner_from_dttm = inner_from_dttm or from_dttm inner_to_dttm = inner_to_dttm or to_dttm # add tzinfo to native datetime with config from_dttm = from_dttm.replace(tzinfo=DRUID_TZ) to_dttm = to_dttm.replace(tzinfo=DRUID_TZ) timezone = from_dttm.tzname() query_str = '' metrics_dict = {m.metric_name: m for m in self.metrics} columns_dict = {c.column_name: c for c in self.columns} all_metrics, post_aggs = self._metrics_and_post_aggs( metrics, metrics_dict) aggregations = OrderedDict() for m in self.metrics: if m.metric_name in all_metrics: aggregations[m.metric_name] = m.json_obj rejected_metrics = [ m.metric_name for m in self.metrics if m.is_restricted and m.metric_name in aggregations.keys() and not sm.has_access('metric_access', m.perm) ] if rejected_metrics: raise MetricPermException( 'Access to the metrics denied: ' + ', '.join(rejected_metrics), ) # the dimensions list with dimensionSpecs expanded dimensions = [] groupby = [gb for gb in groupby if gb in columns_dict] for column_name in groupby: col = columns_dict.get(column_name) dim_spec = col.dimension_spec if dim_spec: dimensions.append(dim_spec) else: dimensions.append(column_name) qry = dict( datasource=self.datasource_name, dimensions=dimensions, aggregations=aggregations, granularity=DruidDatasource.granularity( granularity, timezone=timezone, origin=extras.get('druid_time_origin'), ), post_aggregations=post_aggs, intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(), ) filters = DruidDatasource.get_filters(filter, self.num_cols) if filters: qry['filter'] = filters having_filters = self.get_having_filters(extras.get('having_druid')) if having_filters: qry['having'] = having_filters order_direction = 'descending' if order_desc else 'ascending' if len(groupby) == 0 and not having_filters: del qry['dimensions'] client.timeseries(**qry) if (not having_filters and len(groupby) == 1 and order_desc and not isinstance(list(qry.get('dimensions'))[0], dict)): dim = list(qry.get('dimensions'))[0] if timeseries_limit_metric: order_by = timeseries_limit_metric else: order_by = list(qry['aggregations'].keys())[0] # Limit on the number of timeseries, doing a two-phases query pre_qry = deepcopy(qry) pre_qry['granularity'] = 'all' pre_qry['threshold'] = min(row_limit, timeseries_limit or row_limit) pre_qry['metric'] = order_by pre_qry['dimension'] = dim del pre_qry['dimensions'] client.topn(**pre_qry) query_str += '// Two phase query\n// Phase 1\n' query_str += json.dumps(client.query_builder.last_query.query_dict, indent=2) query_str += '\n' if phase == 1: return query_str query_str += ("// Phase 2 (built based on phase one's results)\n") df = client.export_pandas() qry['filter'] = self._add_filter_from_pre_query_data( df, qry['dimensions'], filters) qry['threshold'] = timeseries_limit or 1000 if row_limit and granularity == 'all': qry['threshold'] = row_limit qry['dimension'] = list(qry.get('dimensions'))[0] qry['dimension'] = dim del qry['dimensions'] qry['metric'] = list(qry['aggregations'].keys())[0] client.topn(**qry) elif len(groupby) > 1 or having_filters or not order_desc: # If grouping on multiple fields or using a having filter # we have to force a groupby query if timeseries_limit and is_timeseries: order_by = metrics[0] if metrics else self.metrics[0] if timeseries_limit_metric: order_by = timeseries_limit_metric # Limit on the number of timeseries, doing a two-phases query pre_qry = deepcopy(qry) pre_qry['granularity'] = 'all' pre_qry['limit_spec'] = { 'type': 'default', 'limit': min(timeseries_limit, row_limit), 'intervals': (inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat()), 'columns': [{ 'dimension': order_by, 'direction': order_direction, }], } client.groupby(**pre_qry) query_str += '// Two phase query\n// Phase 1\n' query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) query_str += '\n' if phase == 1: return query_str query_str += ( "// Phase 2 (built based on phase one's results)\n") df = client.export_pandas() qry['filter'] = self._add_filter_from_pre_query_data( df, qry['dimensions'], filters, ) qry['limit_spec'] = None if row_limit: qry['limit_spec'] = { 'type': 'default', 'limit': row_limit, 'columns': [{ 'dimension': (metrics[0] if metrics else self.metrics[0]), 'direction': order_direction, }], } client.groupby(**qry) query_str += json.dumps(client.query_builder.last_query.query_dict, indent=2) return query_str
def run_query( # noqa / druid self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=None, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, # noqa select=None, # noqa columns=None, phase=2, client=None, form_data=None): """Runs a query against Druid and returns a dataframe. """ # TODO refactor into using a TBD Query object client = client or self.cluster.get_pydruid_client() if not is_timeseries: granularity = 'all' inner_from_dttm = inner_from_dttm or from_dttm inner_to_dttm = inner_to_dttm or to_dttm # add tzinfo to native datetime with config from_dttm = from_dttm.replace(tzinfo=DRUID_TZ) to_dttm = to_dttm.replace(tzinfo=DRUID_TZ) timezone = from_dttm.tzname() query_str = "" metrics_dict = {m.metric_name: m for m in self.metrics} columns_dict = {c.column_name: c for c in self.columns} all_metrics, post_aggs = self._metrics_and_post_aggs( metrics, metrics_dict) aggregations = OrderedDict() for m in self.metrics: if m.metric_name in all_metrics: aggregations[m.metric_name] = m.json_obj rejected_metrics = [ m.metric_name for m in self.metrics if m.is_restricted and m.metric_name in aggregations.keys() and not sm.has_access('metric_access', m.perm) ] if rejected_metrics: raise MetricPermException("Access to the metrics denied: " + ', '.join(rejected_metrics)) # the dimensions list with dimensionSpecs expanded dimensions = [] groupby = [gb for gb in groupby if gb in columns_dict] for column_name in groupby: col = columns_dict.get(column_name) dim_spec = col.dimension_spec if dim_spec: dimensions.append(dim_spec) else: dimensions.append(column_name) qry = dict( datasource=self.datasource_name, dimensions=dimensions, aggregations=aggregations, granularity=DruidDatasource.granularity( granularity, timezone=timezone, origin=extras.get('druid_time_origin'), ), post_aggregations=post_aggs, intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(), ) filters = self.get_filters(filter) if filters: qry['filter'] = filters having_filters = self.get_having_filters(extras.get('having_druid')) if having_filters: qry['having'] = having_filters orig_filters = filters if len(groupby) == 0 and not having_filters: del qry['dimensions'] client.timeseries(**qry) if not having_filters and len(groupby) == 1: qry['threshold'] = timeseries_limit or 1000 if row_limit and granularity == 'all': qry['threshold'] = row_limit qry['dimension'] = list(qry.get('dimensions'))[0] del qry['dimensions'] qry['metric'] = list(qry['aggregations'].keys())[0] client.topn(**qry) elif len(groupby) > 1 or having_filters: # If grouping on multiple fields or using a having filter # we have to force a groupby query if timeseries_limit and is_timeseries: order_by = metrics[0] if metrics else self.metrics[0] if timeseries_limit_metric: order_by = timeseries_limit_metric # Limit on the number of timeseries, doing a two-phases query pre_qry = deepcopy(qry) pre_qry['granularity'] = "all" pre_qry['limit_spec'] = { "type": "default", "limit": timeseries_limit, 'intervals': (inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat()), "columns": [{ "dimension": order_by, "direction": "descending", }], } client.groupby(**pre_qry) query_str += "// Two phase query\n// Phase 1\n" query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) query_str += "\n" if phase == 1: return query_str query_str += ( "//\nPhase 2 (built based on phase one's results)\n") df = client.export_pandas() if df is not None and not df.empty: dims = qry['dimensions'] filters = [] for unused, row in df.iterrows(): fields = [] for dim in dims: f = Dimension(dim) == row[dim] fields.append(f) if len(fields) > 1: filt = Filter(type="and", fields=fields) filters.append(filt) elif fields: filters.append(fields[0]) if filters: ff = Filter(type="or", fields=filters) if not orig_filters: qry['filter'] = ff else: qry['filter'] = Filter(type="and", fields=[ff, orig_filters]) qry['limit_spec'] = None if row_limit: qry['limit_spec'] = { "type": "default", "limit": row_limit, "columns": [{ "dimension": (metrics[0] if metrics else self.metrics[0]), "direction": "descending", }], } client.groupby(**qry) query_str += json.dumps(client.query_builder.last_query.query_dict, indent=2) return query_str
def run_query( # noqa / druid self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=None, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, # noqa select=None, # noqa columns=None, phase=2, client=None, form_data=None, order_desc=True): """Runs a query against Druid and returns a dataframe. """ # TODO refactor into using a TBD Query object client = client or self.cluster.get_pydruid_client() if not is_timeseries: granularity = 'all' inner_from_dttm = inner_from_dttm or from_dttm inner_to_dttm = inner_to_dttm or to_dttm # add tzinfo to native datetime with config from_dttm = from_dttm.replace(tzinfo=DRUID_TZ) to_dttm = to_dttm.replace(tzinfo=DRUID_TZ) timezone = from_dttm.tzname() query_str = "" metrics_dict = {m.metric_name: m for m in self.metrics} columns_dict = {c.column_name: c for c in self.columns} all_metrics, post_aggs = self._metrics_and_post_aggs( metrics, metrics_dict) aggregations = OrderedDict() for m in self.metrics: if m.metric_name in all_metrics: aggregations[m.metric_name] = m.json_obj rejected_metrics = [ m.metric_name for m in self.metrics if m.is_restricted and m.metric_name in aggregations.keys() and not sm.has_access('metric_access', m.perm) ] if rejected_metrics: raise MetricPermException( "Access to the metrics denied: " + ', '.join(rejected_metrics) ) # the dimensions list with dimensionSpecs expanded dimensions = [] groupby = [gb for gb in groupby if gb in columns_dict] for column_name in groupby: col = columns_dict.get(column_name) dim_spec = col.dimension_spec if dim_spec: dimensions.append(dim_spec) else: dimensions.append(column_name) qry = dict( datasource=self.datasource_name, dimensions=dimensions, aggregations=aggregations, granularity=DruidDatasource.granularity( granularity, timezone=timezone, origin=extras.get('druid_time_origin'), ), post_aggregations=post_aggs, intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(), ) filters = self.get_filters(filter) if filters: qry['filter'] = filters having_filters = self.get_having_filters(extras.get('having_druid')) if having_filters: qry['having'] = having_filters order_direction = "descending" if order_desc else "ascending" if len(groupby) == 0 and not having_filters: del qry['dimensions'] client.timeseries(**qry) if not having_filters and len(groupby) == 1 and order_desc: dim = list(qry.get('dimensions'))[0] if timeseries_limit_metric: order_by = timeseries_limit_metric else: order_by = list(qry['aggregations'].keys())[0] # Limit on the number of timeseries, doing a two-phases query pre_qry = deepcopy(qry) pre_qry['granularity'] = "all" pre_qry['threshold'] = min(row_limit, timeseries_limit or row_limit) pre_qry['metric'] = order_by pre_qry['dimension'] = dim del pre_qry['dimensions'] client.topn(**pre_qry) query_str += "// Two phase query\n// Phase 1\n" query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) query_str += "\n" if phase == 1: return query_str query_str += ( "//\nPhase 2 (built based on phase one's results)\n") df = client.export_pandas() qry['filter'] = self._add_filter_from_pre_query_data( df, qry['dimensions'], filters) qry['threshold'] = timeseries_limit or 1000 if row_limit and granularity == 'all': qry['threshold'] = row_limit qry['dimension'] = list(qry.get('dimensions'))[0] qry['dimension'] = dim del qry['dimensions'] qry['metric'] = list(qry['aggregations'].keys())[0] client.topn(**qry) elif len(groupby) > 1 or having_filters or not order_desc: # If grouping on multiple fields or using a having filter # we have to force a groupby query if timeseries_limit and is_timeseries: order_by = metrics[0] if metrics else self.metrics[0] if timeseries_limit_metric: order_by = timeseries_limit_metric # Limit on the number of timeseries, doing a two-phases query pre_qry = deepcopy(qry) pre_qry['granularity'] = "all" pre_qry['limit_spec'] = { "type": "default", "limit": min(timeseries_limit, row_limit), 'intervals': ( inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat()), "columns": [{ "dimension": order_by, "direction": order_direction, }], } client.groupby(**pre_qry) query_str += "// Two phase query\n// Phase 1\n" query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) query_str += "\n" if phase == 1: return query_str query_str += ( "//\nPhase 2 (built based on phase one's results)\n") df = client.export_pandas() qry['filter'] = self._add_filter_from_pre_query_data( df, qry['dimensions'], filters) qry['limit_spec'] = None if row_limit: qry['limit_spec'] = { "type": "default", "limit": row_limit, "columns": [{ "dimension": ( metrics[0] if metrics else self.metrics[0]), "direction": order_direction, }], } client.groupby(**qry) query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) return query_str
def run_query( # noqa / druid self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=None, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, # noqa select=None, # noqa columns=None, phase=2, client=None, form_data=None): """Runs a query against Druid and returns a dataframe. """ # TODO refactor into using a TBD Query object client = client or self.cluster.get_pydruid_client() if not is_timeseries: granularity = 'all' inner_from_dttm = inner_from_dttm or from_dttm inner_to_dttm = inner_to_dttm or to_dttm # add tzinfo to native datetime with config from_dttm = from_dttm.replace(tzinfo=DRUID_TZ) to_dttm = to_dttm.replace(tzinfo=DRUID_TZ) timezone = from_dttm.tzname() query_str = "" metrics_dict = {m.metric_name: m for m in self.metrics} all_metrics = [] post_aggs = {} columns_dict = {c.column_name: c for c in self.columns} def recursive_get_fields(_conf): _fields = _conf.get('fields', []) field_names = [] for _f in _fields: _type = _f.get('type') if _type in ['fieldAccess', 'hyperUniqueCardinality']: field_names.append(_f.get('fieldName')) elif _type == 'arithmetic': field_names += recursive_get_fields(_f) return list(set(field_names)) for metric_name in metrics: metric = metrics_dict[metric_name] if metric.metric_type != 'postagg': all_metrics.append(metric_name) else: mconf = metric.json_obj all_metrics += recursive_get_fields(mconf) all_metrics += mconf.get('fieldNames', []) if mconf.get('type') == 'javascript': post_aggs[metric_name] = JavascriptPostAggregator( name=mconf.get('name', ''), field_names=mconf.get('fieldNames', []), function=mconf.get('function', '')) elif mconf.get('type') == 'quantile': post_aggs[metric_name] = Quantile( mconf.get('name', ''), mconf.get('probability', ''), ) elif mconf.get('type') == 'quantiles': post_aggs[metric_name] = Quantiles( mconf.get('name', ''), mconf.get('probabilities', ''), ) elif mconf.get('type') == 'fieldAccess': post_aggs[metric_name] = Field(mconf.get('name')) elif mconf.get('type') == 'constant': post_aggs[metric_name] = Const( mconf.get('value'), output_name=mconf.get('name', '') ) elif mconf.get('type') == 'hyperUniqueCardinality': post_aggs[metric_name] = HyperUniqueCardinality( mconf.get('name') ) else: post_aggs[metric_name] = Postaggregator( mconf.get('fn', "/"), mconf.get('fields', []), mconf.get('name', '')) aggregations = OrderedDict() for m in self.metrics: if m.metric_name in all_metrics: aggregations[m.metric_name] = m.json_obj rejected_metrics = [ m.metric_name for m in self.metrics if m.is_restricted and m.metric_name in aggregations.keys() and not sm.has_access('metric_access', m.perm) ] if rejected_metrics: raise MetricPermException( "Access to the metrics denied: " + ', '.join(rejected_metrics) ) # the dimensions list with dimensionSpecs expanded dimensions = [] groupby = [gb for gb in groupby if gb in columns_dict] for column_name in groupby: col = columns_dict.get(column_name) dim_spec = col.dimension_spec if dim_spec: dimensions.append(dim_spec) else: dimensions.append(column_name) qry = dict( datasource=self.datasource_name, dimensions=dimensions, aggregations=aggregations, granularity=DruidDatasource.granularity( granularity, timezone=timezone, origin=extras.get('druid_time_origin'), ), post_aggregations=post_aggs, intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(), ) filters = self.get_filters(filter) if filters: qry['filter'] = filters having_filters = self.get_having_filters(extras.get('having_druid')) if having_filters: qry['having'] = having_filters orig_filters = filters if len(groupby) == 0 and not having_filters: del qry['dimensions'] client.timeseries(**qry) if not having_filters and len(groupby) == 1: qry['threshold'] = timeseries_limit or 1000 if row_limit and granularity == 'all': qry['threshold'] = row_limit qry['dimension'] = list(qry.get('dimensions'))[0] del qry['dimensions'] qry['metric'] = list(qry['aggregations'].keys())[0] client.topn(**qry) elif len(groupby) > 1 or having_filters: # If grouping on multiple fields or using a having filter # we have to force a groupby query if timeseries_limit and is_timeseries: order_by = metrics[0] if metrics else self.metrics[0] if timeseries_limit_metric: order_by = timeseries_limit_metric # Limit on the number of timeseries, doing a two-phases query pre_qry = deepcopy(qry) pre_qry['granularity'] = "all" pre_qry['limit_spec'] = { "type": "default", "limit": timeseries_limit, 'intervals': ( inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat()), "columns": [{ "dimension": order_by, "direction": "descending", }], } client.groupby(**pre_qry) query_str += "// Two phase query\n// Phase 1\n" query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) query_str += "\n" if phase == 1: return query_str query_str += ( "//\nPhase 2 (built based on phase one's results)\n") df = client.export_pandas() if df is not None and not df.empty: dims = qry['dimensions'] filters = [] for unused, row in df.iterrows(): fields = [] for dim in dims: f = Dimension(dim) == row[dim] fields.append(f) if len(fields) > 1: filt = Filter(type="and", fields=fields) filters.append(filt) elif fields: filters.append(fields[0]) if filters: ff = Filter(type="or", fields=filters) if not orig_filters: qry['filter'] = ff else: qry['filter'] = Filter(type="and", fields=[ ff, orig_filters]) qry['limit_spec'] = None if row_limit: qry['limit_spec'] = { "type": "default", "limit": row_limit, "columns": [{ "dimension": ( metrics[0] if metrics else self.metrics[0]), "direction": "descending", }], } client.groupby(**qry) query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) return query_str
def get_query_str( # noqa / druid self, client, qry_start_dttm, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=None, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, # noqa select=None, # noqa columns=None, phase=2): """Runs a query against Druid and returns a dataframe. This query interface is common to SqlAlchemy and Druid """ # TODO refactor into using a TBD Query object if not is_timeseries: granularity = 'all' inner_from_dttm = inner_from_dttm or from_dttm inner_to_dttm = inner_to_dttm or to_dttm # add tzinfo to native datetime with config from_dttm = from_dttm.replace(tzinfo=DRUID_TZ) to_dttm = to_dttm.replace(tzinfo=DRUID_TZ) timezone = from_dttm.tzname() query_str = "" metrics_dict = {m.metric_name: m for m in self.metrics} all_metrics = [] post_aggs = {} columns_dict = {c.column_name: c for c in self.columns} def recursive_get_fields(_conf): _fields = _conf.get('fields', []) field_names = [] for _f in _fields: _type = _f.get('type') if _type in ['fieldAccess', 'hyperUniqueCardinality']: field_names.append(_f.get('fieldName')) elif _type == 'arithmetic': field_names += recursive_get_fields(_f) return list(set(field_names)) for metric_name in metrics: metric = metrics_dict[metric_name] if metric.metric_type != 'postagg': all_metrics.append(metric_name) else: mconf = metric.json_obj all_metrics += recursive_get_fields(mconf) all_metrics += mconf.get('fieldNames', []) if mconf.get('type') == 'javascript': post_aggs[metric_name] = JavascriptPostAggregator( name=mconf.get('name', ''), field_names=mconf.get('fieldNames', []), function=mconf.get('function', '')) elif mconf.get('type') == 'quantile': post_aggs[metric_name] = Quantile( mconf.get('name', ''), mconf.get('probability', ''), ) elif mconf.get('type') == 'quantiles': post_aggs[metric_name] = Quantiles( mconf.get('name', ''), mconf.get('probabilities', ''), ) elif mconf.get('type') == 'fieldAccess': post_aggs[metric_name] = Field(mconf.get('name'), '') elif mconf.get('type') == 'constant': post_aggs[metric_name] = Const(mconf.get('value'), output_name=mconf.get( 'name', '')) elif mconf.get('type') == 'hyperUniqueCardinality': post_aggs[metric_name] = HyperUniqueCardinality( mconf.get('name'), '') else: post_aggs[metric_name] = Postaggregator( mconf.get('fn', "/"), mconf.get('fields', []), mconf.get('name', '')) aggregations = OrderedDict() for m in self.metrics: if m.metric_name in all_metrics: aggregations[m.metric_name] = m.json_obj rejected_metrics = [ m.metric_name for m in self.metrics if m.is_restricted and m.metric_name in aggregations.keys() and not sm.has_access('metric_access', m.perm) ] if rejected_metrics: raise MetricPermException("Access to the metrics denied: " + ', '.join(rejected_metrics)) # the dimensions list with dimensionSpecs expanded dimensions = [] groupby = [gb for gb in groupby if gb in columns_dict] for column_name in groupby: col = columns_dict.get(column_name) dim_spec = col.dimension_spec if dim_spec: dimensions.append(dim_spec) else: dimensions.append(column_name) qry = dict( datasource=self.datasource_name, dimensions=dimensions, aggregations=aggregations, granularity=DruidDatasource.granularity( granularity, timezone=timezone, origin=extras.get('druid_time_origin'), ), post_aggregations=post_aggs, intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(), ) filters = self.get_filters(filter) if filters: qry['filter'] = filters having_filters = self.get_having_filters(extras.get('having_druid')) if having_filters: qry['having'] = having_filters orig_filters = filters if len(groupby) == 0: del qry['dimensions'] client.timeseries(**qry) if not having_filters and len(groupby) == 1: qry['threshold'] = timeseries_limit or 1000 if row_limit and granularity == 'all': qry['threshold'] = row_limit qry['dimension'] = list(qry.get('dimensions'))[0] del qry['dimensions'] qry['metric'] = list(qry['aggregations'].keys())[0] client.topn(**qry) elif len(groupby) > 1 or having_filters: # If grouping on multiple fields or using a having filter # we have to force a groupby query if timeseries_limit and is_timeseries: order_by = metrics[0] if metrics else self.metrics[0] if timeseries_limit_metric: order_by = timeseries_limit_metric # Limit on the number of timeseries, doing a two-phases query pre_qry = deepcopy(qry) pre_qry['granularity'] = "all" pre_qry['limit_spec'] = { "type": "default", "limit": timeseries_limit, 'intervals': (inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat()), "columns": [{ "dimension": order_by, "direction": "descending", }], } client.groupby(**pre_qry) query_str += "// Two phase query\n// Phase 1\n" query_str += json.dumps( client.query_builder.last_query.query_dict, indent=2) query_str += "\n" if phase == 1: return query_str query_str += ( "//\nPhase 2 (built based on phase one's results)\n") df = client.export_pandas() if df is not None and not df.empty: dims = qry['dimensions'] filters = [] for unused, row in df.iterrows(): fields = [] for dim in dims: f = Dimension(dim) == row[dim] fields.append(f) if len(fields) > 1: filt = Filter(type="and", fields=fields) filters.append(filt) elif fields: filters.append(fields[0]) if filters: ff = Filter(type="or", fields=filters) if not orig_filters: qry['filter'] = ff else: qry['filter'] = Filter(type="and", fields=[ff, orig_filters]) qry['limit_spec'] = None if row_limit: qry['limit_spec'] = { "type": "default", "limit": row_limit, "columns": [{ "dimension": (metrics[0] if metrics else self.metrics[0]), "direction": "descending", }], } client.groupby(**qry) query_str += json.dumps(client.query_builder.last_query.query_dict, indent=2) return query_str
def has_met_access(m): return not m.is_restricted or (m.is_restricted and sm.has_access('metric_access', m.perm))
def has_col_access(c): return not c.is_restricted or (c.is_restricted and sm.has_access('columns_access', c.perm))