Ejemplo n.º 1
0
    def execute_query(self):
        """Execute query and return provided data.

        Returns:
            (Dict): Dictionary response of query params, data, and total

        """
        data = []

        q_table = self._mapper.query_table
        with tenant_context(self.tenant):
            query = q_table.objects.filter(self.query_filter)
            query_data = query.annotate(**self.annotations)
            query_group_by = ['date'] + self._get_group_by()
            query_order_by = [
                '-date',
            ]
            query_order_by.extend([self.order])

            annotations = self._mapper.report_type_map.get('annotations')
            query_data = query_data.values(*query_group_by).annotate(
                **annotations)

            if 'account' in query_group_by:
                query_data = query_data.annotate(account_alias=Coalesce(
                    F(self._mapper.provider_map.get('alias')),
                    'usage_account_id'))

            query_sum = self._build_sum(query)

            if self._limit:
                rank_order = getattr(F(self.order_field),
                                     self.order_direction)()
                rank_by_total = Window(expression=RowNumber(),
                                       partition_by=F('date'),
                                       order_by=rank_order)
                query_data = query_data.annotate(rank=rank_by_total)
                query_order_by.insert(1, 'rank')
                query_data = self._ranked_list(query_data)

            if self._delta:
                query_data = self.add_deltas(query_data, query_sum)

            is_csv_output = self.parameters.accept_type and 'text/csv' in self.parameters.accept_type

            query_data, query_group_by = self.strip_label_column_name(
                query_data, query_group_by)
            query_data = self.order_by(query_data, query_order_by)

            if is_csv_output:
                if self._limit:
                    data = self._ranked_list(list(query_data))
                else:
                    data = list(query_data)
            else:
                groups = copy.deepcopy(query_group_by)
                groups.remove('date')
                data = self._apply_group_by(list(query_data), groups)
                data = self._transform_data(query_group_by, 0, data)

        key_order = list(['units'] + list(annotations.keys()))
        ordered_total = {
            total_key: query_sum[total_key]
            for total_key in key_order if total_key in query_sum
        }
        ordered_total.update(query_sum)

        self.query_sum = ordered_total
        self.query_data = data
        return self._format_query_response()
Ejemplo n.º 2
0
    def execute_individual_query(self, org_unit_applied=False):  # noqa: C901
        """Execute query and return provided data.

        Returns:
            (Dict): Dictionary response of query params, data, and total

        """
        data = []

        with tenant_context(self.tenant):
            query_table = self.query_table
            LOG.debug(f"Using query table: {query_table}")
            tag_results = None
            query = query_table.objects.filter(self.query_filter)
            query_data = query.annotate(**self.annotations)
            query_group_by = ["date"] + self._get_group_by()
            query_order_by = ["-date"]
            query_order_by.extend([self.order])

            annotations = copy.deepcopy(self._mapper.report_type_map.get("annotations", {}))
            if not self.parameters.parameters.get("compute_count"):
                # Query parameter indicates count should be removed from DB queries
                annotations.pop("count", None)
                annotations.pop("count_units", None)

            query_data = query_data.values(*query_group_by).annotate(**annotations)

            if "account" in query_group_by:
                query_data = query_data.annotate(
                    account_alias=Coalesce(F(self._mapper.provider_map.get("alias")), "usage_account_id")
                )

                if self.parameters.parameters.get("check_tags"):
                    tag_results = self._get_associated_tags(query_table, self.query_filter)

            query_sum = self._build_sum(query, annotations)

            if self._limit and not org_unit_applied:
                rank_order = getattr(F(self.order_field), self.order_direction)()
                rank_by_total = Window(expression=RowNumber(), partition_by=F("date"), order_by=rank_order)
                query_data = query_data.annotate(rank=rank_by_total)
                query_order_by.insert(1, "rank")
                query_data = self._ranked_list(query_data)

            if self._delta:
                query_data = self.add_deltas(query_data, query_sum)

            is_csv_output = self.parameters.accept_type and "text/csv" in self.parameters.accept_type

            query_data = self.order_by(query_data, query_order_by)

            # Fetch the data (returning list(dict))
            query_results = list(query_data)

            # Resolve tag exists for unique account returned
            # if tag_results is not Falsey
            # Append the flag to the query result for the report
            if tag_results is not None:
                # Add the tag results to the report query result dicts
                for res in query_results:
                    res["tags_exist"] = tag_results.get(res["account_alias"], False)

            if is_csv_output:
                if self._limit:
                    data = self._ranked_list(query_results)
                else:
                    data = query_results
            else:
                groups = copy.deepcopy(query_group_by)
                groups.remove("date")
                data = self._apply_group_by(query_results, groups)
                data = self._transform_data(query_group_by, 0, data)

        key_order = list(["units"] + list(annotations.keys()))
        ordered_total = {total_key: query_sum[total_key] for total_key in key_order if total_key in query_sum}
        ordered_total.update(query_sum)

        query_sum = ordered_total
        query_data = data
        return query_data, query_sum
Ejemplo n.º 3
0
    def execute_query(self):  # noqa: C901
        """Execute query and return provided data.

        Returns:
            (Dict): Dictionary response of query params, data, and total

        """
        query_sum = self.initialize_totals()
        data = []

        q_table = self._mapper.query_table
        with tenant_context(self.tenant):
            query = q_table.objects.filter(self.query_filter)
            query_data = query.annotate(**self.annotations)
            group_by_value = self._get_group_by()
            query_group_by = ["date"] + group_by_value
            query_order_by = ["-date"]
            query_order_by.extend([self.order])

            annotations = self._mapper.report_type_map.get("annotations")
            query_data = query_data.values(*query_group_by).annotate(
                **annotations)

            if "account" in query_group_by:
                query_data = query_data.annotate(account_alias=Coalesce(
                    F(self._mapper.provider_map.get("alias")),
                    "usage_account_id"))

            if self._limit:
                rank_order = getattr(F(self.order_field),
                                     self.order_direction)()
                rank_by_total = Window(expression=RowNumber(),
                                       partition_by=F("date"),
                                       order_by=rank_order)
                query_data = query_data.annotate(rank=rank_by_total)
                query_order_by.insert(1, "rank")
                query_data = self._ranked_list(query_data)

            if query.exists():
                aggregates = self._mapper.report_type_map.get("aggregates")
                metric_sum = query.aggregate(**aggregates)
                query_sum = {key: metric_sum.get(key) for key in aggregates}

            if self._delta:
                query_data = self.add_deltas(query_data, query_sum)

            is_csv_output = self.parameters.accept_type and "text/csv" in self.parameters.accept_type

            query_data = self.order_by(query_data, query_order_by)
            cost_units_value = self._mapper.report_type_map.get(
                "cost_units_fallback", "USD")
            usage_units_value = self._mapper.report_type_map.get(
                "usage_units_fallback")
            count_units_value = self._mapper.report_type_map.get(
                "count_units_fallback")
            if query_data:
                cost_units_value = query_data[0].get("cost_units")
                if self._mapper.usage_units_key:
                    usage_units_value = query_data[0].get("usage_units")
                if self._mapper.report_type_map.get("annotations",
                                                    {}).get("count_units"):
                    count_units_value = query_data[0].get("count_units")

            if is_csv_output:
                if self._limit:
                    data = self._ranked_list(list(query_data))
                else:
                    data = list(query_data)
            else:
                groups = copy.deepcopy(query_group_by)
                groups.remove("date")
                data = self._apply_group_by(list(query_data), groups)
                data = self._transform_data(query_group_by, 0, data)

        init_order_keys = []
        query_sum["cost_units"] = cost_units_value
        if self._mapper.usage_units_key and usage_units_value:
            init_order_keys = ["usage_units"]
            query_sum["usage_units"] = usage_units_value
        if self._mapper.report_type_map.get(
                "annotations", {}).get("count_units") and count_units_value:
            query_sum["count_units"] = count_units_value
        key_order = list(init_order_keys + list(annotations.keys()))
        ordered_total = {
            total_key: query_sum[total_key]
            for total_key in key_order if total_key in query_sum
        }
        ordered_total.update(query_sum)
        self._pack_data_object(ordered_total, **self._mapper.PACK_DEFINITIONS)

        self.query_sum = ordered_total
        self.query_data = data
        return self._format_query_response()
Ejemplo n.º 4
0
    def merge_m2m(self, data, field, prefetch):
        # Strategy: pull out all my IDs, do a reverse filter on remote object.
        # e.g.: If prefetching User.groups, do
        #       Groups.filter(users__in=<user_ids>)

        ids = self._get_ids(data)

        base_qs = prefetch.query.queryset  # base queryset on remote model
        remote_pk_field = base_qs.model._meta.pk.attname  # get pk field name
        reverse_field = get_reverse_m2m_field_name(field)

        if reverse_field is None:
            # Note: We can't just reuse self.queryset here because it's
            #       been sliced already.
            filters = {field.attname + '__isnull': False}
            qs = self.queryset.model.objects.filter(pk__in=ids, **filters)
            joins = list(qs.values_list(field.attname, self.pk_field))
        else:
            # Get reverse mapping (for User.groups, get Group.users)
            # Note: `qs` already has base filter applied on remote model.
            filters = {f'{reverse_field}__in': ids}
            if has_limits(base_qs):
                # remove limits, then use CTE + RowNumber
                # to re-introduce them using window functions
                base_qs = base_qs._clone()
                low, high = get_limits(base_qs)
                clear_limits(base_qs)
                order_by = base_qs.query.order_by
                if not order_by:
                    # if there is no order, we need to use pk
                    order_by = ['pk']
                cte = With(
                    base_qs.annotate(
                        **{
                            '..row':
                            Window(expression=RowNumber(),
                                   partition_by=[reverse_field],
                                   order_by=order_by)
                        }).filter(**filters))
                joins = cte.queryset().with_cte(cte).filter(**{
                    '..row__lte': high,
                    '..row__gt': low
                }).order_by(*order_by).distinct()
            else:
                # no limits, use simple filtering
                joins = base_qs.filter(**filters)

            joins = list(joins.values_list(remote_pk_field, reverse_field))

        # Fetch remote objects, as values.
        remote_ids = set([o[0] for o in joins])

        query = prefetch.query._clone()
        # remove limits to get IDs without extra filtering issues
        if has_limits(query.queryset):
            clear_limits(query.queryset)

        remote_objects = query.get_ids(remote_ids).execute()
        id_map = self._make_id_map(remote_objects, pk_field=remote_pk_field)

        # Create mapping of local ID -> remote objects
        to_attr = prefetch.to_attr or prefetch.field
        object_map = defaultdict(List)
        for remote_id, local_id in joins:
            if remote_id in id_map:
                object_map[local_id].append(id_map[remote_id])

        # Merge into working data set.
        for row in data:
            row[to_attr] = object_map[row.get(self.pk_field, row['pk'])]

        return data
Ejemplo n.º 5
0
    def execute_query(self):  # noqa: C901
        """Execute query and return provided data.

        Returns:
            (Dict): Dictionary response of query params, data, and total

        """
        query_sum = self.initialize_totals()
        data = []

        q_table = self._mapper.query_table
        with tenant_context(self.tenant):
            query = q_table.objects.filter(self.query_filter)
            query_data = query.annotate(**self.annotations)
            group_by_value = self._get_group_by()
            query_group_by = ['date'] + group_by_value
            query_order_by = [
                '-date',
            ]
            query_order_by.extend([self.order])
            clustered_group_by = self._get_cluster_group_by(query_group_by)

            annotations = self._mapper.report_type_map.get('annotations')
            query_data = query_data.values(*clustered_group_by).annotate(
                **annotations)

            if 'account' in query_group_by:
                query_data = query_data.annotate(account_alias=Coalesce(
                    F(self._mapper.provider_map.get('alias')),
                    'usage_account_id'))
            elif 'cluster' in query_group_by or 'cluster' in self.query_filter:
                query_data = query_data.annotate(
                    cluster_alias=Coalesce('cluster_alias', 'cluster_id'))

            if self._limit:
                rank_order = getattr(F(self.order_field),
                                     self.order_direction)()
                rank_by_total = Window(expression=RowNumber(),
                                       partition_by=F('date'),
                                       order_by=rank_order)
                query_data = query_data.annotate(rank=rank_by_total)
                query_order_by.insert(1, 'rank')
                query_data = self._ranked_list(query_data)

            if query.exists():
                aggregates = self._mapper.report_type_map.get('aggregates')
                metric_sum = query.aggregate(**aggregates)
                query_sum = {key: metric_sum.get(key) for key in aggregates}

            if self._delta:
                query_data = self.add_deltas(query_data, query_sum)

            is_csv_output = self._accept_type and 'text/csv' in self._accept_type

            query_data, query_group_by = self.strip_label_column_name(
                query_data, query_group_by)
            query_data = self.order_by(query_data, query_order_by)
            cost_units_value = self._mapper.report_type_map.get(
                'cost_units_fallback', 'USD')
            usage_units_value = self._mapper.report_type_map.get(
                'usage_units_fallback')
            count_units_value = self._mapper.report_type_map.get(
                'count_units_fallback')
            if len(query_data) > 0:
                cost_units_value = query_data[0].get('cost_units')
                if self._mapper.usage_units_key:
                    usage_units_value = query_data[0].get('usage_units')
                if self._mapper.report_type_map.get('annotations',
                                                    {}).get('count_units'):
                    count_units_value = query_data[0].get('count_units')

            if is_csv_output:
                if self._limit:
                    data = self._ranked_list(list(query_data))
                else:
                    data = list(query_data)
            else:
                groups = copy.deepcopy(query_group_by)
                groups.remove('date')
                data = self._apply_group_by(list(query_data), groups)
                data = self._transform_data(query_group_by, 0, data)

        init_order_keys = []
        query_sum['cost_units'] = cost_units_value
        if self._mapper.usage_units_key and usage_units_value:
            init_order_keys = ['usage_units']
            query_sum['usage_units'] = usage_units_value
        if self._mapper.report_type_map.get(
                'annotations', {}).get('count_units') and count_units_value:
            query_sum['count_units'] = count_units_value
        key_order = list(init_order_keys + list(annotations.keys()))
        ordered_total = {
            total_key: query_sum[total_key]
            for total_key in key_order if total_key in query_sum
        }
        ordered_total.update(query_sum)
        self._pack_data_object(ordered_total, **self._mapper.PACK_DEFINITIONS)

        self.query_sum = ordered_total
        self.query_data = data
        return self._format_query_response()
Ejemplo n.º 6
0
 def get_rank(self):
     ranked_cantons = Canton.objects.exclude(score__isnull=True).annotate(rank=Window(expression=RowNumber(), order_by=[F('score').desc()]))
     for canton in ranked_cantons:
         if canton.pk == self.pk:
             return canton.rank
     return None
Ejemplo n.º 7
0
 def get_rank(self):
     ranked_countries = Country.objects.exclude(score__isnull=True).annotate(rank=Window(expression=RowNumber(), order_by=[F('score').desc()]))
     for country in ranked_countries:
         if country.pk == self.pk:
             return country.rank
     return None
Ejemplo n.º 8
0
class OrgGraphViewSet(viewsets.ModelViewSet):
    orgs = Org.objects.filter(has_three_pubs=True)
    queryset = orgs.values('organization_name', 'data_score').annotate(
        index=Window(expression=RowNumber(), order_by=F('data_score').asc()))
    serializer_class = OrgGraphSerializer
    pagination_class = OrgPagination
Ejemplo n.º 9
0
class PersonGraphViewSet(viewsets.ModelViewSet):
    authors = Person.objects.filter(has_three_pubs=True)
    queryset = authors.values('full_name', 'data_score').annotate(
        index=Window(expression=RowNumber(), order_by=F('data_score').asc()))
    serializer_class = PersonGraphSerializer
    pagination_class = PersonPagination
Ejemplo n.º 10
0
def get_neighbour_pks(model, pk, filterset=None, ordering=None):
    '''
    Given a model and pk that identify an object (model instance) will, given an ordering
    (defaulting to the models ordering) and optionally a filterset (from url_filter), will
    return a tuple that contains two PKs that of the prior and next neighbour in the list
    either of all objects by that ordering or the filtered list (if a filterset is provided) 
    :param model:        The model the object is an instance of
    :param pk:           The primary key of the model instance being considered
    :param filterset:    An optional filterset (see https://github.com/miki725/django-url-filter)
    :param ordering:     An optional ordering (otherwise default model ordering is used). See: https://docs.djangoproject.com/en/2.0/ref/models/options/#ordering  
    '''
    # If a filterset is provided ensure it's of the same model as specified (consistency).
    if filterset and not filterset.Meta.model == model:
        return (None, None)

    # Get the ordering list for the model (a list of fields
    # See: https://docs.djangoproject.com/en/2.0/ref/models/options/#ordering
    if ordering is None:
        ordering = model._meta.ordering

    order_by = []
    for f in ordering:
        if f.startswith("-"):
            order_by.append(F(f[1:]).desc())
        else:
            order_by.append(F(f).asc())

    # A default order. We need an order or the window functions crash
    if len(order_by) == 0:
        order_by = ['pk']

    # Define the window functions for each neighbour
    window_lag = Window(expression=Lag("pk"), order_by=order_by)
    window_lead = Window(expression=Lead("pk"), order_by=order_by)
    window_rownnum = Window(expression=RowNumber(), order_by=order_by)

    # Get a queryset annotated with neighbours. If annotated attrs clash with existing attrs an exception
    # will be raised: https://code.djangoproject.com/ticket/11256
    try:
        # Start with all objects
        qs = model.objects.all()

        # Now apply a filterset if we have one
        if not filterset is None:
            # We respect the filterset. BUT we need to wrap it inside a sub query, so that
            # we can apply a DISTNCT ON Pk to avoid duplicate tuples that the window
            # functions can introduce when we are matching multiple remote objects.
            # Alas that's what they do. So we have to constrain it to one tuple per
            # PK.
            #
            # FIXME: Aaargh this won't work for injecting the current PK into the query!
            # My desire is to make sure that the query results include the provided pk.
            # Needs testing in both cases. I can't think of a way to do it alas. This is
            # frustrating me. Problem is across related object filters, or JOINS.
            # qs = filterset.filter() | (model.objects.filter(pk=pk).distinct() & filterset.filter())
            qs = qs.filter(pk__in=Subquery(filterset.filter().distinct(
                'pk').order_by('pk').values('pk')))

        # Now order the objects properly
        qs = qs.order_by(*order_by)

        # Now annotate the queryset with the prior and next PKs
        qs = qs.annotate(neighbour_prior=window_lag,
                         neighbour_next=window_lead,
                         row_number=window_rownnum)
    except:
        return None

    # Finally we need some trickery alas to do a query on the queryset! We can't add this WHERE
    # as a filter because the LAG and LEAD Window functions fail then, they are empty because
    # there is no lagger or leader on the one line result! So we have to run that query on the
    # whole table, then extract from the result the one line we want! Wish I could find a way to
    # do this in the Django ORM not with a raw() call.

    # First we need the SQL from the existing query. Many on-line sources seem to recommend
    # str(qs.query) but this does not return reliable SQL! A bug in Django and much discussed:
    #    https://code.djangoproject.com/ticket/30132
    #    https://code.djangoproject.com/ticket/25705
    #    https://code.djangoproject.com/ticket/25092
    #    https://code.djangoproject.com/ticket/24991
    #    https://code.djangoproject.com/ticket/17741
    #
    # But this, it seems is the reliable method which involves dipping into Djangos
    # inards a litte (the SQL compiler)
    sql, params = qs.query.get_compiler(using=qs.db).as_sql()

    # Now we wrap the SQL
    sql = "SELECT * FROM ({}) ao WHERE {}={}".format(sql, model._meta.pk.name,
                                                     pk)

    # And create a new QuerySet
    ao = model.objects.raw(sql, params)

    try:
        if ao:
            if len(list(ao)) == 1:
                return (ao[0].neighbour_prior, ao[0].neighbour_next,
                        ao[0].row_number, qs.count())
            else:
                raise ValueError(
                    "Query error: object appears more than once in neighbour hunt."
                )
        else:
            return (None, None)  # Means the pk does not exist
    except:
        return (None, None)