def execute_query(self): """Execute query and return provided data. Returns: (Dict): Dictionary response of query params, data, and total """ data = [] q_table = self._mapper.query_table with tenant_context(self.tenant): query = q_table.objects.filter(self.query_filter) query_data = query.annotate(**self.annotations) query_group_by = ['date'] + self._get_group_by() query_order_by = [ '-date', ] query_order_by.extend([self.order]) annotations = self._mapper.report_type_map.get('annotations') query_data = query_data.values(*query_group_by).annotate( **annotations) if 'account' in query_group_by: query_data = query_data.annotate(account_alias=Coalesce( F(self._mapper.provider_map.get('alias')), 'usage_account_id')) query_sum = self._build_sum(query) if self._limit: rank_order = getattr(F(self.order_field), self.order_direction)() rank_by_total = Window(expression=RowNumber(), partition_by=F('date'), order_by=rank_order) query_data = query_data.annotate(rank=rank_by_total) query_order_by.insert(1, 'rank') query_data = self._ranked_list(query_data) if self._delta: query_data = self.add_deltas(query_data, query_sum) is_csv_output = self.parameters.accept_type and 'text/csv' in self.parameters.accept_type query_data, query_group_by = self.strip_label_column_name( query_data, query_group_by) query_data = self.order_by(query_data, query_order_by) if is_csv_output: if self._limit: data = self._ranked_list(list(query_data)) else: data = list(query_data) else: groups = copy.deepcopy(query_group_by) groups.remove('date') data = self._apply_group_by(list(query_data), groups) data = self._transform_data(query_group_by, 0, data) key_order = list(['units'] + list(annotations.keys())) ordered_total = { total_key: query_sum[total_key] for total_key in key_order if total_key in query_sum } ordered_total.update(query_sum) self.query_sum = ordered_total self.query_data = data return self._format_query_response()
def execute_individual_query(self, org_unit_applied=False): # noqa: C901 """Execute query and return provided data. Returns: (Dict): Dictionary response of query params, data, and total """ data = [] with tenant_context(self.tenant): query_table = self.query_table LOG.debug(f"Using query table: {query_table}") tag_results = None query = query_table.objects.filter(self.query_filter) query_data = query.annotate(**self.annotations) query_group_by = ["date"] + self._get_group_by() query_order_by = ["-date"] query_order_by.extend([self.order]) annotations = copy.deepcopy(self._mapper.report_type_map.get("annotations", {})) if not self.parameters.parameters.get("compute_count"): # Query parameter indicates count should be removed from DB queries annotations.pop("count", None) annotations.pop("count_units", None) query_data = query_data.values(*query_group_by).annotate(**annotations) if "account" in query_group_by: query_data = query_data.annotate( account_alias=Coalesce(F(self._mapper.provider_map.get("alias")), "usage_account_id") ) if self.parameters.parameters.get("check_tags"): tag_results = self._get_associated_tags(query_table, self.query_filter) query_sum = self._build_sum(query, annotations) if self._limit and not org_unit_applied: rank_order = getattr(F(self.order_field), self.order_direction)() rank_by_total = Window(expression=RowNumber(), partition_by=F("date"), order_by=rank_order) query_data = query_data.annotate(rank=rank_by_total) query_order_by.insert(1, "rank") query_data = self._ranked_list(query_data) if self._delta: query_data = self.add_deltas(query_data, query_sum) is_csv_output = self.parameters.accept_type and "text/csv" in self.parameters.accept_type query_data = self.order_by(query_data, query_order_by) # Fetch the data (returning list(dict)) query_results = list(query_data) # Resolve tag exists for unique account returned # if tag_results is not Falsey # Append the flag to the query result for the report if tag_results is not None: # Add the tag results to the report query result dicts for res in query_results: res["tags_exist"] = tag_results.get(res["account_alias"], False) if is_csv_output: if self._limit: data = self._ranked_list(query_results) else: data = query_results else: groups = copy.deepcopy(query_group_by) groups.remove("date") data = self._apply_group_by(query_results, groups) data = self._transform_data(query_group_by, 0, data) key_order = list(["units"] + list(annotations.keys())) ordered_total = {total_key: query_sum[total_key] for total_key in key_order if total_key in query_sum} ordered_total.update(query_sum) query_sum = ordered_total query_data = data return query_data, query_sum
def execute_query(self): # noqa: C901 """Execute query and return provided data. Returns: (Dict): Dictionary response of query params, data, and total """ query_sum = self.initialize_totals() data = [] q_table = self._mapper.query_table with tenant_context(self.tenant): query = q_table.objects.filter(self.query_filter) query_data = query.annotate(**self.annotations) group_by_value = self._get_group_by() query_group_by = ["date"] + group_by_value query_order_by = ["-date"] query_order_by.extend([self.order]) annotations = self._mapper.report_type_map.get("annotations") query_data = query_data.values(*query_group_by).annotate( **annotations) if "account" in query_group_by: query_data = query_data.annotate(account_alias=Coalesce( F(self._mapper.provider_map.get("alias")), "usage_account_id")) if self._limit: rank_order = getattr(F(self.order_field), self.order_direction)() rank_by_total = Window(expression=RowNumber(), partition_by=F("date"), order_by=rank_order) query_data = query_data.annotate(rank=rank_by_total) query_order_by.insert(1, "rank") query_data = self._ranked_list(query_data) if query.exists(): aggregates = self._mapper.report_type_map.get("aggregates") metric_sum = query.aggregate(**aggregates) query_sum = {key: metric_sum.get(key) for key in aggregates} if self._delta: query_data = self.add_deltas(query_data, query_sum) is_csv_output = self.parameters.accept_type and "text/csv" in self.parameters.accept_type query_data = self.order_by(query_data, query_order_by) cost_units_value = self._mapper.report_type_map.get( "cost_units_fallback", "USD") usage_units_value = self._mapper.report_type_map.get( "usage_units_fallback") count_units_value = self._mapper.report_type_map.get( "count_units_fallback") if query_data: cost_units_value = query_data[0].get("cost_units") if self._mapper.usage_units_key: usage_units_value = query_data[0].get("usage_units") if self._mapper.report_type_map.get("annotations", {}).get("count_units"): count_units_value = query_data[0].get("count_units") if is_csv_output: if self._limit: data = self._ranked_list(list(query_data)) else: data = list(query_data) else: groups = copy.deepcopy(query_group_by) groups.remove("date") data = self._apply_group_by(list(query_data), groups) data = self._transform_data(query_group_by, 0, data) init_order_keys = [] query_sum["cost_units"] = cost_units_value if self._mapper.usage_units_key and usage_units_value: init_order_keys = ["usage_units"] query_sum["usage_units"] = usage_units_value if self._mapper.report_type_map.get( "annotations", {}).get("count_units") and count_units_value: query_sum["count_units"] = count_units_value key_order = list(init_order_keys + list(annotations.keys())) ordered_total = { total_key: query_sum[total_key] for total_key in key_order if total_key in query_sum } ordered_total.update(query_sum) self._pack_data_object(ordered_total, **self._mapper.PACK_DEFINITIONS) self.query_sum = ordered_total self.query_data = data return self._format_query_response()
def merge_m2m(self, data, field, prefetch): # Strategy: pull out all my IDs, do a reverse filter on remote object. # e.g.: If prefetching User.groups, do # Groups.filter(users__in=<user_ids>) ids = self._get_ids(data) base_qs = prefetch.query.queryset # base queryset on remote model remote_pk_field = base_qs.model._meta.pk.attname # get pk field name reverse_field = get_reverse_m2m_field_name(field) if reverse_field is None: # Note: We can't just reuse self.queryset here because it's # been sliced already. filters = {field.attname + '__isnull': False} qs = self.queryset.model.objects.filter(pk__in=ids, **filters) joins = list(qs.values_list(field.attname, self.pk_field)) else: # Get reverse mapping (for User.groups, get Group.users) # Note: `qs` already has base filter applied on remote model. filters = {f'{reverse_field}__in': ids} if has_limits(base_qs): # remove limits, then use CTE + RowNumber # to re-introduce them using window functions base_qs = base_qs._clone() low, high = get_limits(base_qs) clear_limits(base_qs) order_by = base_qs.query.order_by if not order_by: # if there is no order, we need to use pk order_by = ['pk'] cte = With( base_qs.annotate( **{ '..row': Window(expression=RowNumber(), partition_by=[reverse_field], order_by=order_by) }).filter(**filters)) joins = cte.queryset().with_cte(cte).filter(**{ '..row__lte': high, '..row__gt': low }).order_by(*order_by).distinct() else: # no limits, use simple filtering joins = base_qs.filter(**filters) joins = list(joins.values_list(remote_pk_field, reverse_field)) # Fetch remote objects, as values. remote_ids = set([o[0] for o in joins]) query = prefetch.query._clone() # remove limits to get IDs without extra filtering issues if has_limits(query.queryset): clear_limits(query.queryset) remote_objects = query.get_ids(remote_ids).execute() id_map = self._make_id_map(remote_objects, pk_field=remote_pk_field) # Create mapping of local ID -> remote objects to_attr = prefetch.to_attr or prefetch.field object_map = defaultdict(List) for remote_id, local_id in joins: if remote_id in id_map: object_map[local_id].append(id_map[remote_id]) # Merge into working data set. for row in data: row[to_attr] = object_map[row.get(self.pk_field, row['pk'])] return data
def execute_query(self): # noqa: C901 """Execute query and return provided data. Returns: (Dict): Dictionary response of query params, data, and total """ query_sum = self.initialize_totals() data = [] q_table = self._mapper.query_table with tenant_context(self.tenant): query = q_table.objects.filter(self.query_filter) query_data = query.annotate(**self.annotations) group_by_value = self._get_group_by() query_group_by = ['date'] + group_by_value query_order_by = [ '-date', ] query_order_by.extend([self.order]) clustered_group_by = self._get_cluster_group_by(query_group_by) annotations = self._mapper.report_type_map.get('annotations') query_data = query_data.values(*clustered_group_by).annotate( **annotations) if 'account' in query_group_by: query_data = query_data.annotate(account_alias=Coalesce( F(self._mapper.provider_map.get('alias')), 'usage_account_id')) elif 'cluster' in query_group_by or 'cluster' in self.query_filter: query_data = query_data.annotate( cluster_alias=Coalesce('cluster_alias', 'cluster_id')) if self._limit: rank_order = getattr(F(self.order_field), self.order_direction)() rank_by_total = Window(expression=RowNumber(), partition_by=F('date'), order_by=rank_order) query_data = query_data.annotate(rank=rank_by_total) query_order_by.insert(1, 'rank') query_data = self._ranked_list(query_data) if query.exists(): aggregates = self._mapper.report_type_map.get('aggregates') metric_sum = query.aggregate(**aggregates) query_sum = {key: metric_sum.get(key) for key in aggregates} if self._delta: query_data = self.add_deltas(query_data, query_sum) is_csv_output = self._accept_type and 'text/csv' in self._accept_type query_data, query_group_by = self.strip_label_column_name( query_data, query_group_by) query_data = self.order_by(query_data, query_order_by) cost_units_value = self._mapper.report_type_map.get( 'cost_units_fallback', 'USD') usage_units_value = self._mapper.report_type_map.get( 'usage_units_fallback') count_units_value = self._mapper.report_type_map.get( 'count_units_fallback') if len(query_data) > 0: cost_units_value = query_data[0].get('cost_units') if self._mapper.usage_units_key: usage_units_value = query_data[0].get('usage_units') if self._mapper.report_type_map.get('annotations', {}).get('count_units'): count_units_value = query_data[0].get('count_units') if is_csv_output: if self._limit: data = self._ranked_list(list(query_data)) else: data = list(query_data) else: groups = copy.deepcopy(query_group_by) groups.remove('date') data = self._apply_group_by(list(query_data), groups) data = self._transform_data(query_group_by, 0, data) init_order_keys = [] query_sum['cost_units'] = cost_units_value if self._mapper.usage_units_key and usage_units_value: init_order_keys = ['usage_units'] query_sum['usage_units'] = usage_units_value if self._mapper.report_type_map.get( 'annotations', {}).get('count_units') and count_units_value: query_sum['count_units'] = count_units_value key_order = list(init_order_keys + list(annotations.keys())) ordered_total = { total_key: query_sum[total_key] for total_key in key_order if total_key in query_sum } ordered_total.update(query_sum) self._pack_data_object(ordered_total, **self._mapper.PACK_DEFINITIONS) self.query_sum = ordered_total self.query_data = data return self._format_query_response()
def get_rank(self): ranked_cantons = Canton.objects.exclude(score__isnull=True).annotate(rank=Window(expression=RowNumber(), order_by=[F('score').desc()])) for canton in ranked_cantons: if canton.pk == self.pk: return canton.rank return None
def get_rank(self): ranked_countries = Country.objects.exclude(score__isnull=True).annotate(rank=Window(expression=RowNumber(), order_by=[F('score').desc()])) for country in ranked_countries: if country.pk == self.pk: return country.rank return None
class OrgGraphViewSet(viewsets.ModelViewSet): orgs = Org.objects.filter(has_three_pubs=True) queryset = orgs.values('organization_name', 'data_score').annotate( index=Window(expression=RowNumber(), order_by=F('data_score').asc())) serializer_class = OrgGraphSerializer pagination_class = OrgPagination
class PersonGraphViewSet(viewsets.ModelViewSet): authors = Person.objects.filter(has_three_pubs=True) queryset = authors.values('full_name', 'data_score').annotate( index=Window(expression=RowNumber(), order_by=F('data_score').asc())) serializer_class = PersonGraphSerializer pagination_class = PersonPagination
def get_neighbour_pks(model, pk, filterset=None, ordering=None): ''' Given a model and pk that identify an object (model instance) will, given an ordering (defaulting to the models ordering) and optionally a filterset (from url_filter), will return a tuple that contains two PKs that of the prior and next neighbour in the list either of all objects by that ordering or the filtered list (if a filterset is provided) :param model: The model the object is an instance of :param pk: The primary key of the model instance being considered :param filterset: An optional filterset (see https://github.com/miki725/django-url-filter) :param ordering: An optional ordering (otherwise default model ordering is used). See: https://docs.djangoproject.com/en/2.0/ref/models/options/#ordering ''' # If a filterset is provided ensure it's of the same model as specified (consistency). if filterset and not filterset.Meta.model == model: return (None, None) # Get the ordering list for the model (a list of fields # See: https://docs.djangoproject.com/en/2.0/ref/models/options/#ordering if ordering is None: ordering = model._meta.ordering order_by = [] for f in ordering: if f.startswith("-"): order_by.append(F(f[1:]).desc()) else: order_by.append(F(f).asc()) # A default order. We need an order or the window functions crash if len(order_by) == 0: order_by = ['pk'] # Define the window functions for each neighbour window_lag = Window(expression=Lag("pk"), order_by=order_by) window_lead = Window(expression=Lead("pk"), order_by=order_by) window_rownnum = Window(expression=RowNumber(), order_by=order_by) # Get a queryset annotated with neighbours. If annotated attrs clash with existing attrs an exception # will be raised: https://code.djangoproject.com/ticket/11256 try: # Start with all objects qs = model.objects.all() # Now apply a filterset if we have one if not filterset is None: # We respect the filterset. BUT we need to wrap it inside a sub query, so that # we can apply a DISTNCT ON Pk to avoid duplicate tuples that the window # functions can introduce when we are matching multiple remote objects. # Alas that's what they do. So we have to constrain it to one tuple per # PK. # # FIXME: Aaargh this won't work for injecting the current PK into the query! # My desire is to make sure that the query results include the provided pk. # Needs testing in both cases. I can't think of a way to do it alas. This is # frustrating me. Problem is across related object filters, or JOINS. # qs = filterset.filter() | (model.objects.filter(pk=pk).distinct() & filterset.filter()) qs = qs.filter(pk__in=Subquery(filterset.filter().distinct( 'pk').order_by('pk').values('pk'))) # Now order the objects properly qs = qs.order_by(*order_by) # Now annotate the queryset with the prior and next PKs qs = qs.annotate(neighbour_prior=window_lag, neighbour_next=window_lead, row_number=window_rownnum) except: return None # Finally we need some trickery alas to do a query on the queryset! We can't add this WHERE # as a filter because the LAG and LEAD Window functions fail then, they are empty because # there is no lagger or leader on the one line result! So we have to run that query on the # whole table, then extract from the result the one line we want! Wish I could find a way to # do this in the Django ORM not with a raw() call. # First we need the SQL from the existing query. Many on-line sources seem to recommend # str(qs.query) but this does not return reliable SQL! A bug in Django and much discussed: # https://code.djangoproject.com/ticket/30132 # https://code.djangoproject.com/ticket/25705 # https://code.djangoproject.com/ticket/25092 # https://code.djangoproject.com/ticket/24991 # https://code.djangoproject.com/ticket/17741 # # But this, it seems is the reliable method which involves dipping into Djangos # inards a litte (the SQL compiler) sql, params = qs.query.get_compiler(using=qs.db).as_sql() # Now we wrap the SQL sql = "SELECT * FROM ({}) ao WHERE {}={}".format(sql, model._meta.pk.name, pk) # And create a new QuerySet ao = model.objects.raw(sql, params) try: if ao: if len(list(ao)) == 1: return (ao[0].neighbour_prior, ao[0].neighbour_next, ao[0].row_number, qs.count()) else: raise ValueError( "Query error: object appears more than once in neighbour hunt." ) else: return (None, None) # Means the pk does not exist except: return (None, None)