Example #1
0
def _location_queryset_helper(domain, location_pks):
    fixture_ids = With(raw_cte_sql(
        """
        SELECT "id", "path", "depth"
        FROM get_location_fixture_ids(%s::TEXT, %s)
        """,
        [domain, location_pks],
        {"id": int_field, "path": int_array, "depth": int_field},
    ))

    return fixture_ids.join(
        SQLLocation.objects.all(),
        id=fixture_ids.col.id,
    ).annotate(
        path=fixture_ids.col.path,
        depth=fixture_ids.col.depth,
    ).with_cte(fixture_ids).prefetch_related('location_type', 'parent')
Example #2
0
    def get_ancestors(self, node, ascending=False, include_self=False):
        """Query node ancestors

        :param node: A model instance or a QuerySet or Q object querying
        the adjacency list model. If a QuerySet, it should query a
        single value with something like `.values('id')`. If Q the
        `include_self` argument will be ignored.
        :param ascending: Order of results. The default (`False`) gets
        results in descending order (root ancestor first, immediate
        parent last).
        :param include_self:
        :returns: A `QuerySet` instance.
        """
        if isinstance(node, Q):
            where = node
        elif include_self:
            if isinstance(node, QuerySet):
                if _is_empty(node):
                    return self.none()
                where = Q(id__in=node.order_by())
            else:
                where = Q(id=node.id)
        elif isinstance(node, QuerySet):
            if _is_empty(node):
                return self.none()
            where = Q(id__in=node.order_by().values("parent_id"))
        else:
            where = Q(id=node.parent_id)

        def make_cte_query(cte):
            return self.filter(where).order_by().annotate(
                _depth=Value(0, output_field=field),
            ).union(
                cte.join(
                    self.all().order_by(),
                    id=cte.col.parent_id,
                ).annotate(
                    _depth=cte.col._depth + Value(1, output_field=field),
                ),
            )

        cte = With.recursive(make_cte_query)
        return (
            cte.queryset()
            .with_cte(cte)
            .order_by(("" if ascending else "-") + "_depth")
        )
Example #3
0
    def get_descendants(self, node, include_self=False):
        """Query node descendants

        :param node: A model instance or a QuerySet or Q object querying
        the adjacency list model. If a QuerySet, it should query a
        single value with something like `.values('id')`. If Q the
        `include_self` argument will be ignored.
        :returns: A `QuerySet` instance.
        """
        ordering_col = self.model.ordering_col_attr

        discard_dups = False
        if isinstance(node, Q):
            where = node
            discard_dups = True
        elif include_self:
            if isinstance(node, QuerySet):
                if _is_empty(node):
                    return self.none()
                where = Q(id__in=node.order_by())
                discard_dups = True
            else:
                where = Q(id=node.id)
        elif isinstance(node, QuerySet):
            if _is_empty(node):
                return self.none()
            where = Q(parent_id__in=node.order_by())
            discard_dups = True
        else:
            where = Q(parent_id=node.id)

        def make_cte_query(cte):
            return self.filter(where).order_by().annotate(
                _cte_ordering=str_array(ordering_col),
            ).union(
                cte.join(
                    self.all().order_by(),
                    parent_id=cte.col.id,
                ).annotate(
                    _cte_ordering=array_append(
                        cte.col._cte_ordering,
                        F(ordering_col),
                    )
                ),
                all=True,
            )
        cte = With.recursive(make_cte_query)
        query = cte.queryset().with_cte(cte)

        if discard_dups:
            # Remove duplicates when the supplied Queryset or Q object
            # may contain/match both parents and children. For a given
            # id, retain the row with the longest path. TODO remove this
            # and ensure duplicates do not matter or the criteria never
            # matches both parents and children in all calling code.
            xdups = With(
                cte.queryset().annotate(
                    max_len=array_length(
                        F("_cte_ordering"),
                        output_field=field
                    ),
                ).distinct("id").order_by(
                    "id",
                    "-max_len",
                ).values(
                    "id",
                    "_cte_ordering",
                ),
                name="xdups"
            )
            query = query.annotate(
                _exclude_dups=Exists(SubQueryset(xdups.queryset().filter(
                    id=OuterRef("id"),
                    _cte_ordering=OuterRef("_cte_ordering"),
                )))
            ).filter(_exclude_dups=True).with_cte(xdups)

        return query.order_by(cte.col._cte_ordering)
    def get_descendants(self, node, include_self=False):
        """Query node descendants

        :param node: A model instance or a QuerySet or Q object querying
        the adjacency list model. If a QuerySet, it should query a
        single value with something like `.values('id')`. If Q the
        `include_self` argument will be ignored.
        :returns: A `QuerySet` instance.
        """
        ordering_col = self.model.ordering_col_attr

        discard_dups = False
        if isinstance(node, Q):
            where = node
            discard_dups = True
        elif include_self:
            if isinstance(node, QuerySet):
                if _is_empty(node):
                    return self.none()
                where = Q(id__in=node.order_by())
                discard_dups = True
            else:
                where = Q(id=node.id)
        elif isinstance(node, QuerySet):
            if _is_empty(node):
                return self.none()
            where = Q(parent_id__in=node.order_by())
            discard_dups = True
        else:
            where = Q(parent_id=node.id)

        def make_cte_query(cte):
            return self.filter(where).order_by().annotate(
                _cte_ordering=str_array(ordering_col), ).union(
                    cte.join(
                        self.all().order_by(),
                        parent_id=cte.col.id,
                    ).annotate(_cte_ordering=array_append(
                        cte.col._cte_ordering,
                        F(ordering_col),
                    )),
                    all=True,
                )

        cte = With.recursive(make_cte_query)
        query = cte.queryset().with_cte(cte)

        if discard_dups:
            # Remove duplicates when the supplied Queryset or Q object
            # may contain/match both parents and children. For a given
            # id, retain the row with the longest path. TODO remove this
            # and ensure duplicates do not matter or the criteria never
            # matches both parents and children in all calling code.
            xdups = With(cte.queryset().annotate(max_len=array_length(
                F("_cte_ordering"),
                output_field=field), ).distinct("id").order_by(
                    "id",
                    "-max_len",
                ).values(
                    "id",
                    "_cte_ordering",
                ),
                         name="xdups")
            query = query.annotate(
                _exclude_dups=Exists(xdups.queryset().filter(
                    id=OuterRef("id"),
                    _cte_ordering=OuterRef("_cte_ordering"),
                ))).filter(_exclude_dups=True).with_cte(xdups)

        return query.order_by(cte.col._cte_ordering)
Example #5
0
 def build(self):
     tree_ids = self.query.values('tree_id')
     return With(ContentNode.objects.filter(tree_id__in=tree_ids).values(
         *set(self.columns)),
                 name='tree_cte')
Example #6
0
def get_recursive_subgroups(user_group: UserGroup) -> "QuerySet[UserGroup]":
    cte = With.recursive(
        lambda cte: UserGroup.objects.filter(id=user_group.id).values("id").
        union(cte.join(UserGroup, direct_supergroups=cte.col.id).values("id")))
    return cte.join(UserGroup, id=cte.col.id).with_cte(cte)
Example #7
0
def get_recursive_membership_groups(
        user_profile: UserProfile) -> "QuerySet[UserGroup]":
    cte = With.recursive(
        lambda cte: user_profile.direct_groups.values("id").union(
            cte.join(UserGroup, direct_subgroups=cte.col.id).values("id")))
    return cte.join(UserGroup, id=cte.col.id).with_cte(cte)
Example #8
0
    def handle(self, *args, **options):
        start = time.time()

        # Mark invalid titles
        titlestart = time.time()
        logging.info('Marking blank titles...')
        count = ContentNode.objects.exclude(complete=False).filter(title='').order_by().update(complete=False)
        logging.info('Marked {} invalid titles (finished in {})'.format(count, time.time() - titlestart))

        # Mark invalid licenses
        licensestart = time.time()
        logging.info('Marking blank licenses...')
        count = ContentNode.objects.exclude(complete=False, kind_id=content_kinds.TOPIC).filter(license__isnull=True).order_by().update(complete=False)
        logging.info('Marked {} invalid licenses (finished in {})'.format(count, time.time() - licensestart))

        licensestart = time.time()
        logging.info('Marking blank license descriptions...')
        custom_licenses = list(License.objects.filter(is_custom=True).values_list("pk", flat=True))
        count = ContentNode.objects.exclude(complete=False, kind_id=content_kinds.TOPIC)\
            .filter(license_id__in=custom_licenses).filter(Q(license_description__isnull=True) | Q(license_description=''))\
            .order_by().update(complete=False)
        logging.info('Marked {} invalid license descriptions (finished in {})'.format(count, time.time() - licensestart))

        licensestart = time.time()
        logging.info('Marking blank copyright holders...')
        copyright_licenses = list(License.objects.filter(copyright_holder_required=True).values_list("pk", flat=True))
        count = ContentNode.objects.exclude(complete=False, kind_id=content_kinds.TOPIC)\
            .filter(license_id__in=copyright_licenses).filter(Q(copyright_holder__isnull=True) | Q(copyright_holder=''))\
            .order_by().update(complete=False)
        logging.info('Marked {} invalid copyright holders (finished in {})'.format(count, time.time() - licensestart))

        # Mark invalid file resources
        resourcestart = time.time()
        logging.info('Marking file resources...')
        file_check_query = With(File.objects.filter(preset__supplementary=False).values("contentnode_id").order_by(), name="t_file")

        query = file_check_query.join(ContentNode, id=file_check_query.col.contentnode_id, _join_type=LOUTER)\
            .with_cte(file_check_query) \
            .annotate(t_contentnode_id=file_check_query.col.contentnode_id) \
            .exclude(kind_id=content_kinds.TOPIC) \
            .exclude(kind_id=content_kinds.EXERCISE) \
            .exclude(complete=False) \
            .filter(t_contentnode_id__isnull=True)\
            .order_by()
        count = ContentNode.objects.filter(id__in=query.order_by().values_list('id', flat=True)).update(complete=False)
        logging.info('Marked {} invalid file resources (finished in {})'.format(count, time.time() - resourcestart))

        # Mark invalid exercises
        exercisestart = time.time()
        logging.info('Marking exercises...')

        has_questions_query = With(AssessmentItem.objects.all().values("contentnode_id").order_by(), name="t_assessmentitem")

        query = has_questions_query.join(ContentNode, id=has_questions_query.col.contentnode_id, _join_type=LOUTER)\
            .with_cte(has_questions_query) \
            .annotate(t_contentnode_id=has_questions_query.col.contentnode_id) \
            .filter(kind_id=content_kinds.EXERCISE) \
            .exclude(complete=False) \
            .filter(t_contentnode_id__isnull=True)\
            .order_by()
        exercisestart = time.time()
        count = ContentNode.objects.filter(id__in=query.order_by().values_list('id', flat=True)).update(complete=False)

        logging.info('Marked {} questionless exercises (finished in {})'.format(count, time.time() - exercisestart))

        exercisestart = time.time()

        exercise_check_query = With(AssessmentItem.objects.exclude(type=exercises.PERSEUS_QUESTION)
                                    .filter(
                                        Q(question='') |
                                        Q(answers='[]') |
                                        # hack to check if no correct answers
                                        (~Q(type=exercises.INPUT_QUESTION) & ~Q(answers__iregex=r'"correct":\s*true'))).order_by(), name="t_assessmentitem")

        query = exercise_check_query.join(ContentNode, id=has_questions_query.col.contentnode_id)\
            .with_cte(exercise_check_query) \
            .annotate(t_contentnode_id=exercise_check_query.col.contentnode_id) \
            .filter(kind_id=content_kinds.EXERCISE) \
            .exclude(complete=False) \
            .order_by()

        count = ContentNode.objects.filter(id__in=query.order_by().values_list('id', flat=True)).update(complete=False)

        logging.info('Marked {} invalid exercises (finished in {})'.format(count, time.time() - exercisestart))

        exercisestart = time.time()
        logging.info('Marking mastery_model less exercises...')
        count = ContentNode.objects.exclude(complete=False).filter(kind_id=content_kinds.EXERCISE).filter(~Q(extra_fields__has_key='mastery_model'))\
            .order_by().update(complete=False)
        logging.info('Marked {} mastery_model less exercises(finished in {})'.format(count, time.time() - exercisestart))

        exercisestart = time.time()
        logging.info('Marking bad mastery model exercises...')
        count = ContentNode.objects.exclude(complete=False).filter(kind_id=content_kinds.EXERCISE)\
            .filter(Q(extra_fields__mastery_model=exercises.M_OF_N) & (~Q(extra_fields__has_key='m') | ~Q(extra_fields__has_key='n')))\
            .order_by().update(complete=False)
        logging.info('Marked {} bad mastery model exercises (finished in {})'.format(count, time.time() - exercisestart))

        logging.info('Mark incomplete command completed in {}s'.format(time.time() - start))
Example #9
0
    def test_named_ctes(self):
        def make_paths_cte(paths):
            return Region.objects.filter(parent__isnull=True).values(
                "name",
                path=F("name"),
            ).union(
                paths.join(Region, parent=paths.col.name).values(
                    "name",
                    path=Concat(
                        paths.col.path,
                        Value(" "),
                        F("name"),
                        output_field=text_field,
                    ),
                ),
                all=True,
            )

        paths = With.recursive(make_paths_cte, name="region_paths")

        def make_groups_cte(groups):
            return paths.join(Region, name=paths.col.name).values(
                "name",
                parent_path=paths.col.path,
                parent_name=F("name"),
            ).union(
                groups.join(Region, parent=groups.col.name).values(
                    "name",
                    parent_path=groups.col.parent_path,
                    parent_name=groups.col.parent_name,
                ),
                all=True,
            )

        groups = With.recursive(make_groups_cte, name="region_groups")

        region_totals = With(
            groups.join(Order, region_id=groups.col.name).values(
                name=groups.col.parent_name,
                path=groups.col.parent_path,
            ).annotate(
                orders_count=Count("id"),
                region_total=Sum("amount"),
            ),
            name="region_totals",
        )

        regions = (
            region_totals.join(Region, name=region_totals.col.name).with_cte(
                paths).with_cte(groups).with_cte(region_totals).annotate(
                    path=region_totals.col.path,
                    # count of orders in this region and all subregions
                    orders_count=region_totals.col.orders_count,
                    # sum of order amounts in this region and all subregions
                    region_total=region_totals.col.region_total,
                ).order_by("path"))
        print(regions.query)

        data = [(r.name, r.orders_count, r.region_total) for r in regions]
        self.assertEqual(data, [
            ('proxima centauri', 4, 2033),
            ('proxima centauri b', 3, 33),
            ('sun', 18, 1374),
            ('earth', 7, 132),
            ('moon', 3, 6),
            ('mars', 3, 123),
            ('mercury', 3, 33),
            ('venus', 4, 86),
        ])
Example #10
0
    def merge_m2m(self, data, field, prefetch):
        # Strategy: pull out all my IDs, do a reverse filter on remote object.
        # e.g.: If prefetching User.groups, do
        #       Groups.filter(users__in=<user_ids>)

        ids = self._get_ids(data)

        base_qs = prefetch.query.queryset  # base queryset on remote model
        remote_pk_field = base_qs.model._meta.pk.attname  # get pk field name
        reverse_field = get_reverse_m2m_field_name(field)

        if reverse_field is None:
            # Note: We can't just reuse self.queryset here because it's
            #       been sliced already.
            filters = {field.attname + '__isnull': False}
            qs = self.queryset.model.objects.filter(pk__in=ids, **filters)
            joins = list(qs.values_list(field.attname, self.pk_field))
        else:
            # Get reverse mapping (for User.groups, get Group.users)
            # Note: `qs` already has base filter applied on remote model.
            filters = {f'{reverse_field}__in': ids}
            if has_limits(base_qs):
                # remove limits, then use CTE + RowNumber
                # to re-introduce them using window functions
                base_qs = base_qs._clone()
                low, high = get_limits(base_qs)
                clear_limits(base_qs)
                order_by = base_qs.query.order_by
                if not order_by:
                    # if there is no order, we need to use pk
                    order_by = ['pk']
                cte = With(
                    base_qs.annotate(
                        **{
                            '..row':
                            Window(expression=RowNumber(),
                                   partition_by=[reverse_field],
                                   order_by=order_by)
                        }).filter(**filters))
                joins = cte.queryset().with_cte(cte).filter(**{
                    '..row__lte': high,
                    '..row__gt': low
                }).order_by(*order_by).distinct()
            else:
                # no limits, use simple filtering
                joins = base_qs.filter(**filters)

            joins = list(joins.values_list(remote_pk_field, reverse_field))

        # Fetch remote objects, as values.
        remote_ids = set([o[0] for o in joins])

        query = prefetch.query._clone()
        # remove limits to get IDs without extra filtering issues
        if has_limits(query.queryset):
            clear_limits(query.queryset)

        remote_objects = query.get_ids(remote_ids).execute()
        id_map = self._make_id_map(remote_objects, pk_field=remote_pk_field)

        # Create mapping of local ID -> remote objects
        to_attr = prefetch.to_attr or prefetch.field
        object_map = defaultdict(List)
        for remote_id, local_id in joins:
            if remote_id in id_map:
                object_map[local_id].append(id_map[remote_id])

        # Merge into working data set.
        for row in data:
            row[to_attr] = object_map[row.get(self.pk_field, row['pk'])]

        return data
Example #11
0
def disaster_filter_function(filters: dict, download_type: str,
                             values: List[str]):
    aggregation_mapping = {
        "disaster_recipient": _disaster_recipient_aggregations
    }

    def_codes = filters["def_codes"]
    query = filters.get("query")
    award_type_codes = filters.get("award_type_codes")

    award_filters = [
        ~Q(total_loan_value=0) | ~Q(total_obligation_by_award=0)
        | ~Q(total_outlay_by_award=0)
    ]
    if query:
        query_text = query["text"]
        q = Q()
        for field in query["fields"]:
            q |= Q(**{f"{field}__icontains": query_text})
        award_filters.append(q)
    if award_type_codes:
        award_filters.append(Q(type__in=award_type_codes))

    faba_filters = [
        filter_by_defc_closed_periods(),
        Q(disaster_emergency_fund__code__in=def_codes)
    ]

    dollar_annotations = {
        "inner_obligation":
        Coalesce(Sum("transaction_obligated_amount"), 0),
        "inner_outlay":
        Coalesce(
            Sum(
                Case(
                    When(
                        filter_by_latest_closed_periods(),
                        then=Coalesce(F("gross_outlay_amount_by_award_cpe"), 0)
                        + Coalesce(
                            F("ussgl487200_down_adj_pri_ppaid_undel_orders_oblig_refund_cpe"
                              ), 0) +
                        Coalesce(
                            F("ussgl497200_down_adj_pri_paid_deliv_orders_oblig_refund_cpe"
                              ), 0),
                    ),
                    default=Value(0),
                )),
            0,
        ),
    }

    cte = With(
        FinancialAccountsByAwards.objects.filter(
            *faba_filters).values("award_id").annotate(
                **dollar_annotations).exclude(inner_obligation=0,
                                              inner_outlay=0))

    return (cte.join(AwardSearchView,
                     award_id=cte.col.award_id).with_cte(cte).annotate(
                         total_obligation_by_award=cte.col.inner_obligation,
                         total_outlay_by_award=cte.col.inner_outlay).filter(
                             *award_filters).values(*values).annotate(
                                 **aggregation_mapping[download_type]()))
Example #12
0
    def construct_loan_queryset(self, faba_grouping_column, base_model, base_model_column):
        grouping_key = F(faba_grouping_column) if isinstance(faba_grouping_column, str) else faba_grouping_column

        base_values = With(
            FinancialAccountsByAwards.objects.filter(
                Q(award__type__in=loan_type_mapping), self.all_closed_defc_submissions, self.is_in_provided_def_codes,
            )
            .annotate(
                grouping_key=grouping_key,
                total_loan_value=F("award__total_loan_value"),
                reporting_fiscal_year=F("submission__reporting_fiscal_year"),
                reporting_fiscal_period=F("submission__reporting_fiscal_period"),
                quarter_format_flag=F("submission__quarter_format_flag"),
            )
            .filter(grouping_key__isnull=False)
            .values(
                "grouping_key",
                "financial_accounts_by_awards_id",
                "award_id",
                "transaction_obligated_amount",
                "gross_outlay_amount_by_award_cpe",
                "reporting_fiscal_year",
                "reporting_fiscal_period",
                "quarter_format_flag",
                "total_loan_value",
            ),
            "base_values",
        )

        q = Q()
        for sub in final_submissions_for_all_fy():
            q |= (
                Q(reporting_fiscal_year=sub.fiscal_year)
                & Q(quarter_format_flag=sub.is_quarter)
                & Q(reporting_fiscal_period=sub.fiscal_period)
            )

        aggregate_faba = With(
            base_values.queryset()
            .values("grouping_key")
            .annotate(
                obligation=Coalesce(Sum("transaction_obligated_amount"), 0),
                outlay=Coalesce(Sum(Case(When(q, then=F("gross_outlay_amount_by_award_cpe")), default=Value(0),)), 0,),
            )
            .values("grouping_key", "obligation", "outlay"),
            "aggregate_faba",
        )

        distinct_awards = With(
            base_values.queryset().values("grouping_key", "award_id", "total_loan_value").distinct(), "distinct_awards",
        )

        aggregate_awards = With(
            distinct_awards.queryset()
            .values("grouping_key")
            .annotate(award_count=Count("award_id"), face_value_of_loan=Coalesce(Sum("total_loan_value"), 0))
            .values("grouping_key", "award_count", "face_value_of_loan"),
            "aggregate_awards",
        )

        return Bunch(
            award_count_column=aggregate_awards.col.award_count,
            obligation_column=aggregate_faba.col.obligation,
            outlay_column=aggregate_faba.col.outlay,
            face_value_of_loan_column=aggregate_awards.col.face_value_of_loan,
            queryset=aggregate_awards.join(
                aggregate_faba.join(base_model, **{base_model_column: aggregate_faba.col.grouping_key}),
                **{base_model_column: aggregate_awards.col.grouping_key},
            )
            .with_cte(base_values)
            .with_cte(aggregate_faba)
            .with_cte(distinct_awards)
            .with_cte(aggregate_awards),
        )