Exemple #1
0
    def _filter_request(self, request: request.Request,
                        queryset: QuerySet) -> QuerySet:
        if request.GET.get("id"):
            ids = request.GET["id"].split(",")
            queryset = queryset.filter(id__in=ids)
        if request.GET.get("uuid"):
            uuids = request.GET["uuid"].split(",")
            queryset = queryset.filter(uuid__in=uuids)
        if request.GET.get("search"):
            parts = request.GET["search"].split(" ")
            contains = []
            for part in parts:
                if ":" in part:
                    matcher, key = part.split(":")
                    if matcher == "has":
                        # Matches for example has:email or has:name
                        queryset = queryset.filter(properties__has_key=key)
                else:
                    contains.append(part)
            queryset = queryset.filter(
                Q(properties__icontains=" ".join(contains))
                | Q(persondistinctid__distinct_id__icontains=" ".join(
                    contains))).distinct("id")
        if request.GET.get("cohort"):
            queryset = queryset.filter(cohort__id=request.GET["cohort"])
        if request.GET.get("properties"):
            filter = Filter(
                data={"properties": json.loads(request.GET["properties"])})
            queryset = queryset.filter(
                properties_to_Q(filter.properties, team_id=self.team_id))

        queryset = queryset.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))
        return queryset
Exemple #2
0
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet:
    if params.get("include_count"):
        queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS))

    queryset = queryset.prefetch_related(
        Prefetch("steps", queryset=ActionStep.objects.order_by("id")))
    return queryset.filter(team_id=team_id).order_by("-id")
Exemple #3
0
    def _filter_request(self, request: request.Request, queryset: QuerySet,
                        team: Team) -> QuerySet:
        if request.GET.get("id"):
            ids = request.GET["id"].split(",")
            queryset = queryset.filter(id__in=ids)
        if request.GET.get("uuid"):
            uuids = request.GET["uuid"].split(",")
            queryset = queryset.filter(uuid__in=uuids)
        if request.GET.get("search"):
            parts = request.GET["search"].split(" ")
            contains = []
            for part in parts:
                if ":" in part:
                    queryset = queryset.filter(
                        properties__has_key=part.split(":")[1])
                else:
                    contains.append(part)
            queryset = queryset.filter(
                Q(properties__icontains=" ".join(contains))
                | Q(persondistinctid__distinct_id__icontains=" ".join(
                    contains))).distinct("id")
        if request.GET.get("cohort"):
            queryset = queryset.filter(cohort__id=request.GET["cohort"])
        if request.GET.get("properties"):
            queryset = queryset.filter(
                Filter(data={
                    "properties": json.loads(request.GET["properties"])
                }).properties_to_Q(team_id=team.pk))

        queryset = queryset.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))
        return queryset
Exemple #4
0
def prefetch_league_tournament_related(qs: QuerySet) -> QuerySet:
    return qs.prefetch_related(
        'tournament__participants',
        Prefetch(
            'tournament__participants__deck',
            queryset = PoolDeck.objects.all().only(
                'id',
                'name',
                'created_at',
                'pool_id',
            )
        ),
        Prefetch(
            'tournament__participants__player',
            queryset = USER_QUERY_SET,
        ),
        'tournament__participants__deck__pool',
        Prefetch(
            'tournament__participants__deck__pool__user',
            queryset = USER_QUERY_SET,
        ),
        'tournament__rounds',
        'tournament__rounds__matches',
        'tournament__rounds__matches__seats',
        'tournament__rounds__matches__seats__participant',
        Prefetch(
            'tournament__rounds__matches__seats__participant__player',
            queryset = USER_QUERY_SET,
        ),
        Prefetch(
            'tournament__rounds__matches__seats__participant__deck',
            queryset = PoolDeck.objects.all().only(
                'id',
                'name',
                'created_at',
                'pool_id',
            )
        ),
        Prefetch(
            'tournament__rounds__matches__seats__participant__deck__pool__user',
            queryset = USER_QUERY_SET,
        ),
        'tournament__rounds__matches__seats__result',
        'tournament__rounds__matches__result',
        'tournament__results',
        'tournament__results__participant',
        Prefetch(
            'tournament__results__participant__player',
            queryset = USER_QUERY_SET,
        ),
        'tournament__results__participant__deck',
        'tournament__results__participant__deck__pool',
        Prefetch(
            'tournament__results__participant__deck__pool__user',
            queryset = USER_QUERY_SET,
        ),
    )
Exemple #5
0
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet:
    if params.get(TREND_FILTER_TYPE_ACTIONS):
        queryset = queryset.filter(
            pk__in=[action.id for action in Filter({"actions": json.loads(params.get("actions", "[]"))}).actions]
        )

    if params.get("include_count"):
        queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS))

    queryset = queryset.prefetch_related(Prefetch("steps", queryset=ActionStep.objects.order_by("id")))
    return queryset.filter(team_id=team_id).order_by("-id")
Exemple #6
0
def optimize_query(query: QuerySet, meta: FieldMeta) -> QuerySet:
    model = query.model
    only_cols = get_only_cols(model, meta.sections)
    select_related_cols, prefetchs = get_related_cols(model, meta.sub_fields)
    if only_cols:
        query = query.only(*only_cols)
    if select_related_cols:
        query = query.select_related(*select_related_cols)
    if prefetchs:
        query = query.prefetch_related(*prefetchs)
    return query
Exemple #7
0
    def prefetch_current_user_permissions(self, queryset: models.QuerySet):
        """Prefetch permissions for the current user."""
        user = self.request.user
        filters = models.Q(user__username=settings.ANONYMOUS_USER_NAME)
        if not user.is_anonymous:
            filters |= models.Q(user=user) | models.Q(
                group__in=user.groups.all())

        qs_permission_model = self.qs_permission_model.filter(filters)
        return queryset.prefetch_related(
            models.Prefetch("permission_group__permissions",
                            queryset=qs_permission_model))
Exemple #8
0
    def query_optimization(self, source: QuerySet, fields, prefix=''):
        self.check_if_requesting_missing_fields(fields)
        for field_name, data in fields.items():

            # Explicit prefetch
            fields_to_prefetch = getattr(self, field_name).prefetch

            for f in fields_to_prefetch or ():
                source = source.prefetch_related(prefix + f)

            # Related pre-fetch
            node = self.get_node_for(field_name)
            if node:
                if fields_to_prefetch is None:
                    source = source.prefetch_related(
                        Prefetch(prefix + field_name,
                                 queryset=node.filter(node.Meta.source,
                                                      **data['kwargs'])))
                source = node.query_optimization(source,
                                                 data['fields'],
                                                 prefix=field_name + '__')

        return source
    def filter_queryset(self, request: Request, queryset: QuerySet,
                        view: GenericViewSet):
        if (not issubclass(view.get_serializer_class(),
                           FlexFieldsSerializerMixin)
                or request.method != "GET"):
            return queryset

        auto_remove_fields_from_query = getattr(
            view, "auto_remove_fields_from_query", True)
        auto_select_related_on_query = getattr(view,
                                               "auto_select_related_on_query",
                                               True)
        required_query_fields = list(getattr(view, "required_query_fields",
                                             []))

        serializer = view.get_serializer(  # type: FlexFieldsSerializerMixin
            context=view.get_serializer_context())

        serializer.apply_flex_fields()

        model_fields = [
            self._get_field(field.source, queryset.model)
            for field in serializer.fields.values()
            if self._get_field(field.source, queryset.model)
        ]

        nested_model_fields = [
            self._get_field(field.source, queryset.model)
            for field in serializer.fields.values()
            if self._get_field(field.source, queryset.model)
            and field.field_name in serializer.expanded_fields
        ]

        if auto_remove_fields_from_query:
            queryset = queryset.only(*(required_query_fields + [
                model_field.name for model_field in model_fields
                if not model_field.is_relation or model_field.many_to_one
            ]))

        if auto_select_related_on_query and nested_model_fields:
            queryset = queryset.select_related(
                *(model_field.name for model_field in nested_model_fields
                  if model_field.is_relation and model_field.many_to_one))

            queryset = queryset.prefetch_related(
                *(model_field.name for model_field in nested_model_fields
                  if model_field.is_relation and not model_field.many_to_one))

        return queryset
Exemple #10
0
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet:
    if params.get(TREND_FILTER_TYPE_ACTIONS):
        queryset = queryset.filter(pk__in=[
            action.id for action in Filter({
                'actions':
                json.loads(params.get('actions', '[]'))
            }).actions
        ])

    if params.get('include_count'):
        queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS))

    queryset = queryset.prefetch_related(
        Prefetch('steps', queryset=ActionStep.objects.order_by('id')))
    return queryset\
        .filter(team_id=team_id)\
        .order_by('-id')
Exemple #11
0
    def identifiers_dataframe(self, qs: QuerySet) -> pd.DataFrame:
        """
        Returns identifiers references for an assessment from external databases or tools.

        Args:
            qs (QuerySet): A queryset

        Returns:
            pd.DataFrame: A pandas dataframe
        """
        qs = qs.prefetch_related("identifiers")

        captured = {None, constants.HERO, constants.PUBMED}
        diff = set(
            qs.values_list("identifiers__database",
                           flat=True).distinct()) - captured
        if diff:
            logging.warning(
                f"Missing some identifier IDs from id export: {diff}")

        data = defaultdict(dict)

        # capture HERO ids
        heros = qs.filter(identifiers__database=constants.HERO).values_list(
            "id", "identifiers__unique_id")
        for hawc_id, hero_id in heros:
            data[hawc_id]["hero_id"] = int(hero_id)

        # capture PUBMED ids
        pubmeds = qs.filter(
            identifiers__database=constants.PUBMED).values_list(
                "id", "identifiers__unique_id")
        for hawc_id, pubmed_id in pubmeds:
            data[hawc_id]["pubmed_id"] = int(pubmed_id)

        # create a dataframe
        df = (pd.DataFrame.from_dict(data,
                                     orient="index").reset_index().rename(
                                         columns={"index": "reference_id"}))

        # set missing columns
        for col in ["hero_id", "pubmed_id"]:
            if col not in df.columns:
                df[col] = None

        return df
Exemple #12
0
    def _filter_request(self, request: request.Request, queryset: QuerySet, team: Team) -> QuerySet:
        if request.GET.get('id'):
            people = request.GET['id'].split(',')
            queryset = queryset.filter(id__in=people)
        if request.GET.get('search'):
            parts = request.GET['search'].split(' ')
            contains = []
            for part in parts:
                if ':' in part:
                    queryset = queryset.filter(properties__has_key=part.split(':')[1])
                else:
                    contains.append(part)
            queryset = queryset.filter(properties__icontains=' '.join(contains))
        if request.GET.get('cohort'):
            queryset = self._filter_cohort(request, queryset, team)

        queryset = queryset.prefetch_related(Prefetch('persondistinctid_set', to_attr='distinct_ids_cache'))
        return queryset
Exemple #13
0
    def decorate_queryset(
        cls,
        feature_type: FeatureType,
        queryset: models.QuerySet,
        output_crs: CRS,
        **params,
    ) -> models.QuerySet:
        """Apply presentation layer logic to the queryset."""
        # Avoid fetching relations, fetch these within the same query,
        related = cls._get_prefetch_related(feature_type, output_crs)
        if related:
            queryset = queryset.prefetch_related(*related)

        # Also limit the queryset to the actual fields that are shown.
        # No need to request more data
        fields = [
            f.orm_field for f in feature_type.xsd_type.elements if
            not f.is_many or f.is_array  # exclude M2M, but include ArrayField
        ]
        return queryset.only("pk", *fields)
Exemple #14
0
    def _filter_request(self, request: request.Request, queryset: QuerySet,
                        team: Team) -> QuerySet:
        if request.GET.get("id"):
            people = request.GET["id"].split(",")
            queryset = queryset.filter(id__in=people)
        if request.GET.get("search"):
            parts = request.GET["search"].split(" ")
            contains = []
            for part in parts:
                if ":" in part:
                    queryset = queryset.filter(
                        properties__has_key=part.split(":")[1])
                else:
                    contains.append(part)
            queryset = queryset.filter(
                Q(properties__icontains=" ".join(contains))
                | Q(persondistinctid__distinct_id__icontains=" ".join(
                    contains))).distinct("id")
        if request.GET.get("cohort"):
            queryset = queryset.filter(cohort__id=request.GET["cohort"])
        if request.GET.get("properties"):
            queryset = queryset.filter(
                Filter(data={
                    "properties": json.loads(request.GET["properties"])
                }).properties_to_Q(team_id=team.pk))

        queryset_category_pass = None
        category = request.query_params.get("category")
        if category == "identified":
            queryset_category_pass = queryset.filter
        elif category == "anonymous":
            queryset_category_pass = queryset.exclude
        if queryset_category_pass is not None:
            queryset = queryset_category_pass(is_identified=True)

        queryset = queryset.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))
        return queryset
Exemple #15
0
    def prefetch(self, queryset: models.QuerySet) -> models.QuerySet:
        """Recursively prefetch joins to speed up the database query."""
        subquery = self.model.objects.all()

        if self.filters:
            q = reduce(operator.and_, [f.get() for f in self.filters])
            subquery = subquery.filter(q)

        if self.sort:
            subquery = subquery.order_by(*self.sort)

        subquery = subquery.select_related(
            *[f for f in self._one_fields if f not in self.joins.keys()])
        subquery = subquery.prefetch_related(
            *[f for f in self._many_fields if f not in self.joins.keys()])

        new = queryset.prefetch_related(
            models.Prefetch(self.field, queryset=subquery))

        # Recursively prefetch inner joins
        for j in self.joins.values():
            new = j.prefetch(new)

        return new
Exemple #16
0
 def filter_user_not_in(self, queryset: QuerySet, name, value):
     return queryset.prefetch_related(
         Prefetch('users', queryset=models.User.objects.exclude(pk=value))
     )
Exemple #17
0
 def prefetch_related_fields(qs: QuerySet):
     return qs.prefetch_related(
         "source", "object__type", "parent_object", "problem_type",
     )
Exemple #18
0
    def bulk_updater(self,
                     queryset: QuerySet,
                     update_fields: Optional[Set[str]] = None,
                     return_pks: bool = False,
                     local_only: bool = False,
                     querysize: Optional[int] = None) -> Optional[Set[Any]]:
        """
        Update local computed fields and descent in the dependency tree by calling
        ``update_dependent`` for dependent models.

        This method does the local field updates on `queryset`:

            - eval local `MRO` of computed fields
            - expand `update_fields`
            - apply optional `select_related` and `prefetch_related` rules to `queryset`
            - walk all records and recalculate fields in `update_fields`
            - aggregate changeset and save as batched `bulk_update` to the database

        By default this method triggers the update of dependent models by calling
        ``update_dependent`` with `update_fields` (next level of tree traversal).
        This can be suppressed by setting `local_only=True`.

        If `return_pks` is set, the method returns a set of altered pks of `queryset`.
        """
        model: Type[Model] = queryset.model

        # distinct issue workaround
        # the workaround is needed for already sliced/distinct querysets coming from outside
        # TODO: distinct is a major query perf smell, and is in fact only needed on back relations
        #       may need some rework in _querysets_for_update
        #       ideally we find a way to avoid it for forward relations
        #       also see #101
        if queryset.query.can_filter() and not queryset.query.distinct_fields:
            queryset = queryset.distinct()
        else:
            queryset = model.objects.filter(
                pk__in=subquery_pk(queryset, queryset.db))

        # correct update_fields by local mro
        mro = self.get_local_mro(model, update_fields)
        fields: Any = set(
            mro)  # FIXME: narrow type once issue in django-stubs is resolved
        if update_fields:
            update_fields.update(fields)

        select = self.get_select_related(model, fields)
        prefetch = self.get_prefetch_related(model, fields)
        if select:
            queryset = queryset.select_related(*select)
        if prefetch:
            queryset = queryset.prefetch_related(*prefetch)

        pks = []
        if fields:
            q_size = self.get_querysize(model, fields, querysize)
            change: List[Model] = []
            for elem in slice_iterator(queryset, q_size):
                # note on the loop: while it is technically not needed to batch things here,
                # we still prebatch to not cause memory issues for very big querysets
                has_changed = False
                for comp_field in mro:
                    new_value = self._compute(elem, model, comp_field)
                    if new_value != getattr(elem, comp_field):
                        has_changed = True
                        setattr(elem, comp_field, new_value)
                if has_changed:
                    change.append(elem)
                    pks.append(elem.pk)
                if len(change) >= self._batchsize:
                    self._update(queryset, change, fields)
                    change = []
            if change:
                self._update(queryset, change, fields)

        # trigger dependent comp field updates from changed records
        # other than before we exit the update tree early, if we have no changes at all
        # also cuts the update tree for recursive deps (tree-like)
        if not local_only and pks:
            self.update_dependent(model.objects.filter(pk__in=pks),
                                  model,
                                  fields,
                                  update_local=False)
        return set(pks) if return_pks else None
Exemple #19
0
    def __prefetch(query: QuerySet, prefetch: bool) -> QuerySet:
        """Helper method to decide whether to prefetch or not"""

        if prefetch:
            query = query.prefetch_related("categories", "leaf_category")
        return query