Exemple #1
0
def _projects_filter_for_non_privileged_users(user: User,
                                              queryset: QuerySet,
                                              project_relation: str,
                                              action: str = 'create'):
    if not user.is_anonymous and (user.is_admin or user.is_superuser):
        return queryset.distinct()

    # Construct the public projects filter field lookup.
    project_filter = project_relation + 'is_public'

    # Filter the object list into two querysets;
    # One where the related Projects are public and one where they are private
    public_objects = queryset.filter(**{project_filter: True}).distinct()
    private_objects = queryset.filter(**{project_filter: False}).distinct()

    # In case of an anonymous user, only return the public objects
    if user.is_anonymous:
        unpublished_exclude = project_relation + 'publishingstatus__status'
        queryset = public_objects.exclude(
            **{
                unpublished_exclude: PublishingStatus.STATUS_UNPUBLISHED
            }).distinct()

    # Otherwise, check to which objects the user has (change) permission
    elif private_objects.exists():
        include_user_owned = hasattr(queryset.model, 'user')
        if action == 'list':
            # The view permission is new, and previously only the change
            # permission existed. To avoid adding new view permissions for
            # all the objects, we also check if a user has change
            # permissions, which implicitly implies view permissions.
            change_permission = type(
                private_objects[0])._meta.db_table.replace('_', '.change_')
            change_filter = user.get_permission_filter(change_permission,
                                                       project_relation,
                                                       include_user_owned)
            change_objects = private_objects.filter(change_filter).distinct()

            # Check if user has view permission on the queryset
            view_permission = change_permission.replace('.change_', '.view_')
            view_filter = user.get_permission_filter(view_permission,
                                                     project_relation,
                                                     include_user_owned)
            view_objects = private_objects.filter(view_filter).distinct()

            private_objects = (change_objects | view_objects).distinct()
        else:
            permission = type(private_objects[0])._meta.db_table.replace(
                '_', '.change_')
            filter_ = user.get_permission_filter(permission, project_relation,
                                                 include_user_owned)
            private_objects = private_objects.filter(filter_).distinct()

        queryset = public_objects | private_objects

    return queryset.distinct()
Exemple #2
0
    def _filter_queryset(self, queryset: models.QuerySet, prefix: str):
        """Internal logic to filter a queryset on request-parameters for a temporal slice."""
        if not self.is_versioned or self.slice_value == "*":
            # allow this method to be called unconditionally on objects,
            # and allow ?geldigOp=* to return all data
            return queryset

        # Either take a given ?geldigOp=yyyy-mm-dd OR ?geldigOp=NOW()
        slice_value = self.slice_value or now()
        range_fields = self.slice_range_fields or self.default_range_fields

        if range_fields is not None:
            # start <= value AND (end IS NULL OR value < end)
            start, end = map(to_snake_case, range_fields)
            return queryset.filter(
                Q(**{f"{prefix}{start}__lte": slice_value})
                & (
                    Q(**{f"{prefix}{end}__isnull": True})
                    | Q(**{f"{prefix}{end}__gt": slice_value})
                )
            ).order_by(f"-{prefix}{start}")
        else:
            # Last attempt to get only the current temporal record; order by sequence.
            # does SELECT DISTINCT ON(identifier) ... ORDER BY identifier, sequence DESC
            # Alternative would be something like `HAVING sequence = MAX(SELECT sequence FROM ..)`
            identifier = self.table_schema.identifier[0]  # from ["identificatie", "volgnummer"]
            sequence_name = self.table_schema.temporal.identifier
            return queryset.distinct(
                *queryset.query.distinct_fields, f"{prefix}{identifier}"
            ).order_by(f"{prefix}{identifier}", f"-{prefix}{sequence_name}")
Exemple #3
0
def get_fields_by_rain_date(date: date,
                            fields_qs: QuerySet = None) -> QuerySet:

    if fields_qs is None:
        fields_qs = get_fields()

    fields_qs = fields_qs.filter(rain__date__gte=date)

    return fields_qs.distinct()
Exemple #4
0
def get_fields_by_rain_quantity(quantity: float,
                                fields_qs: QuerySet = None) -> QuerySet:

    if fields_qs is None:
        fields_qs = get_fields()

    fields_qs = fields_qs.filter(rain__quantity__gte=quantity)

    return fields_qs.distinct()
Exemple #5
0
 def export_raw_accesslogs_to_stream_lowlevel(self, stream: IO,
                                              queryset: QuerySet):
     text_id_to_text = {
         dt['id']: dt['text']
         for dt in DimensionText.objects.all().values('id', 'text')
     }
     rt_to_dimensions = {
         rt.pk: rt.dimensions_sorted
         for rt in ReportType.objects.filter(pk__in=queryset.distinct(
             'report_type_id').values('report_type_id'))
     }
     # get all field names for the CSV
     field_name_map = {(f'{dim}__{attr}' if attr else dim): dim
                       for dim, attr in self.implicit_dims.items()}
     field_name_map.update(
         {f'target__{attr}': attr
          for attr in self.title_attrs})
     field_names = list(field_name_map.values())
     for tr, dims in rt_to_dimensions.items():
         field_names += [
             dim.short_name for dim in dims
             if dim.short_name not in field_names
         ]
     field_names.append('value')
     # values that will be retrieved from the accesslogs
     values = ['value', 'report_type_id']
     values += list(field_name_map.keys())
     values += [f'dim{i+1}' for i in range(7)]
     # crate the writer
     writer = csv.DictWriter(stream, field_names)
     writer.writeheader()
     # write the records
     for rec_num, log in enumerate(
             queryset.values(*values).iterator()):  # type: int, dict
         record = {
             attr_out: log.get(attr_in)
             for attr_in, attr_out in field_name_map.items()
         }
         record['value'] = log['value']
         record['date'] = log['date']
         for i, dim in enumerate(rt_to_dimensions[log['report_type_id']]):
             value = log.get(f'dim{i+1}')
             if dim.type == dim.TYPE_TEXT:
                 record[dim.short_name] = text_id_to_text.get(value, value)
             else:
                 record[dim.short_name] = value
         writer.writerow(record)
         if rec_num % 999 == 0:
             self.store_progress(rec_num + 1)
     self.store_progress(rec_num + 1)
Exemple #6
0
 def build_queryset(self, queryset: models.QuerySet) -> models.QuerySet:
     if self.query:
         queryset = self.query.apply_to_queryset(queryset)
     if self.order_by:
         queryset = queryset.order_by(*self.order_by)
     if self.distinct:
         queryset = queryset.distinct(*self.distinct)
     if self.prefetch_trees:
         select_related = []
         for prefetch_tree in self.prefetch_trees:
             tree_select_related = self._flatten_prefetch_tree(
                 prefetch_tree=prefetch_tree)
             if tree_select_related is not None:
                 select_related += tree_select_related
         queryset = queryset.select_related(
             *[sr for sr in select_related if sr is not None])
     return queryset
Exemple #7
0
    def filter_list(queryset: QuerySet, request: Request, name_filter: str,
                    name_field: str) -> QuerySet:
        items_selected = request.query_params.getlist(
            '{name_filter}[]'.format(name_filter=name_filter))
        items_selected = [name.upper() for name in items_selected]

        if len(items_selected) > 0:
            if json.loads(
                    request.query_params.get(
                        '{name_filter}Exclude'.format(name_filter=name_filter),
                        'false')):
                queryset = queryset.exclude(
                    **{
                        '{name_field}__in'.format(name_field=name_field):
                        items_selected
                    })
            else:
                queryset = queryset.filter(
                    **{
                        '{name_field}__in'.format(name_field=name_field):
                        items_selected
                    })

        return queryset.distinct()
Exemple #8
0
    def filter_queryset(
        self, queryset: QuerySet, feature_type: FeatureType
    ) -> QuerySet:
        """Apply the filters and lookups to the queryset.

        :param queryset: The queryset to filter.
        :param feature_type: The feature type that the queryset originated from.
        """
        if self.is_empty:
            return queryset.none()

        if self.extra_lookups:
            # Each time an expression node calls add_extra_lookup(),
            # the parent should have used apply_extra_lookups()
            raise RuntimeError("apply_extra_lookups() was not called")

        # All are applied at once.
        if self.annotations:
            queryset = queryset.annotate(**self.annotations)

        lookups = self.lookups
        try:
            lookups += self.typed_lookups[feature_type.name]
        except KeyError:
            pass

        if lookups:
            queryset = queryset.filter(*lookups)

        if self.ordering:
            queryset = queryset.order_by(*self.ordering)

        if self.distinct:
            queryset = queryset.distinct()

        return queryset
Exemple #9
0
def build_queryset(queryset: QuerySet,
                   params: QueryDict) -> Tuple[QuerySet, bool]:

    filtered = False

    name = params.get('name', None)
    if name is not None:
        queryset = queryset.filter(name__icontains=name)
        filtered = True

    last_name = params.get('last_name', None)
    if last_name is not None:
        queryset = queryset.filter(last_name__icontains=last_name)
        filtered = True

    naming = params.get('naming', None)
    if naming == SubjectSegment.NAMING_NAMED:
        queryset = queryset.exclude(name='', last_name='')
        filtered = True
    elif naming == SubjectSegment.NAMING_UNNAMED:
        queryset = queryset.filter(name='', last_name='')
        filtered = True

    tasks = params.getlist('tasks', None)
    if tasks is not None and len(tasks):
        queryset = queryset.filter(faces__task__in=tasks)
        filtered = True

    tasks_tags = params.getlist('tasks_tags', None)
    if tasks_tags is not None and len(tasks_tags):
        queryset = queryset.filter(faces__task__tags__in=tasks_tags)
        filtered = True

    min_time = params.get('min_time', None)
    if min_time is not None:
        queryset = queryset.filter(faces__created_at__time__gte=min_time)
        filtered = True

    max_time = params.get('max_time', None)
    if max_time is not None:
        queryset = queryset.filter(faces__created_at__time__lte=max_time)
        filtered = True

    min_date = params.get('min_date', None)
    if min_date is not None:
        queryset = queryset.filter(faces__created_at__date__gte=min_date)
        filtered = True

    max_date = params.get('max_date', None)
    if max_date is not None:
        queryset = queryset.filter(faces__created_at__date__lte=max_date)
        filtered = True

    max_age = params.get('max_age', None)
    if max_age is not None:
        try:
            max_age = int(max_age)
        except ValueError:
            pass
        else:
            queryset = queryset.filter(
                birthdate__gt=Subject.birthdate_from_age(max_age))
            filtered = True

    min_age = params.get('min_age', None)
    if min_age is not None:
        try:
            min_age = int(min_age)
        except ValueError:
            pass
        else:
            queryset = queryset.filter(
                birthdate__lt=Subject.birthdate_from_age(min_age))
            filtered = True

    min_pred_age = params.get('min_pred_age', None)
    if min_pred_age is not None:
        try:
            min_pred_age = int(min_pred_age)
        except ValueError:
            pass
        else:
            queryset = queryset.filter(pred_age__gte=min_pred_age)
            filtered = True

    max_pred_age = params.get('max_pred_age', None)
    if max_pred_age is not None:
        try:
            max_pred_age = int(max_pred_age)
        except ValueError:
            pass
        else:
            queryset = queryset.filter(pred_age__lte=max_pred_age)
            filtered = True

    sex = params.get('sex', None)
    if sex is not None:
        queryset = queryset.filter(sex=sex)
        filtered = True

    pred_sex = params.get('pred_sex', None)
    if pred_sex is not None:
        queryset = queryset.filter(pred_sex=pred_sex)
        filtered = True

    skin = params.get('skin', None)
    if skin is not None:
        queryset = queryset.filter(skin=skin)
        filtered = True

    order_by = params.get('order_by', None)
    if order_by is not None:
        queryset = queryset.order_by(order_by)

    if filtered:
        queryset = queryset.distinct()

    return queryset, filtered
Exemple #10
0
    def export_raw_accesslogs_to_stream_lowlevel(self, stream: IO,
                                                 queryset: QuerySet):
        start = monotonic()
        text_id_to_text = {
            dt['id']: dt['text']
            for dt in DimensionText.objects.all().values('id', 'text')
        }
        logger.debug('Finished loading text remaps: %.2f s',
                     monotonic() - start)
        rt_to_dimensions = {
            rt.pk: rt.dimensions_sorted
            for rt in ReportType.objects.filter(pk__in=queryset.distinct(
                'report_type_id').values('report_type_id'))
        }
        logger.debug('Finished loading report_types and dimensions: %.2f s',
                     monotonic() - start)
        # get all field names for the CSV
        field_name_map = {(f'{dim}__{attr}' if attr else dim): dim
                          for dim, attr in self.implicit_dims.items()}
        field_name_map.update(
            {f'target__{attr}': attr
             for attr in self.title_attrs})
        field_names = list(field_name_map.values())
        for tr, dims in rt_to_dimensions.items():
            field_names += [
                dim.short_name for dim in dims
                if dim.short_name not in field_names
            ]
        field_names.append('value')
        logger.debug('Finished preparing field names: %.2f s',
                     monotonic() - start)
        # values that will be retrieved from the accesslogs
        values = ['value', 'report_type_id']
        values += list(field_name_map.keys())
        values += [f'dim{i+1}' for i in range(7)]
        # crate the writer
        writer = csv.DictWriter(stream, field_names)
        writer.writeheader()
        logger.debug('Finished preparing CSV writer: %.2f s',
                     monotonic() - start)
        # write the records
        rec_num = 0
        with cachalot_disabled(True):
            # disable cachalot for this query because it returns a potentially huge number of records
            # and would clog the cache
            for rec_num, log in enumerate(
                    queryset.values(*values).iterator()):  # type: int, dict
                record = {
                    attr_out: log.get(attr_in)
                    for attr_in, attr_out in field_name_map.items()
                }
                record['value'] = log['value']
                record['date'] = log['date']
                for i, dim in enumerate(
                        rt_to_dimensions[log['report_type_id']]):
                    value = log.get(f'dim{i+1}')
                    if dim.type == dim.TYPE_TEXT:
                        record[dim.short_name] = text_id_to_text.get(
                            value, value)
                    else:
                        record[dim.short_name] = value
                writer.writerow(record)
                if rec_num % 999 == 0:
                    self.store_progress(rec_num + 1)
                if rec_num % 99999 == 0:
                    logger.debug('Stored %d records: %.2f s', rec_num,
                                 monotonic() - start)

        self.store_progress(rec_num + 1)
Exemple #11
0
    def bulk_updater(self,
                     queryset: QuerySet,
                     update_fields: Optional[Set[str]] = None,
                     return_pks: bool = False,
                     local_only: bool = False,
                     querysize: Optional[int] = None) -> Optional[Set[Any]]:
        """
        Update local computed fields and descent in the dependency tree by calling
        ``update_dependent`` for dependent models.

        This method does the local field updates on `queryset`:

            - eval local `MRO` of computed fields
            - expand `update_fields`
            - apply optional `select_related` and `prefetch_related` rules to `queryset`
            - walk all records and recalculate fields in `update_fields`
            - aggregate changeset and save as batched `bulk_update` to the database

        By default this method triggers the update of dependent models by calling
        ``update_dependent`` with `update_fields` (next level of tree traversal).
        This can be suppressed by setting `local_only=True`.

        If `return_pks` is set, the method returns a set of altered pks of `queryset`.
        """
        model: Type[Model] = queryset.model

        # distinct issue workaround
        # the workaround is needed for already sliced/distinct querysets coming from outside
        # TODO: distinct is a major query perf smell, and is in fact only needed on back relations
        #       may need some rework in _querysets_for_update
        #       ideally we find a way to avoid it for forward relations
        #       also see #101
        if queryset.query.can_filter() and not queryset.query.distinct_fields:
            queryset = queryset.distinct()
        else:
            queryset = model.objects.filter(
                pk__in=subquery_pk(queryset, queryset.db))

        # correct update_fields by local mro
        mro = self.get_local_mro(model, update_fields)
        fields: Any = set(
            mro)  # FIXME: narrow type once issue in django-stubs is resolved
        if update_fields:
            update_fields.update(fields)

        select = self.get_select_related(model, fields)
        prefetch = self.get_prefetch_related(model, fields)
        if select:
            queryset = queryset.select_related(*select)
        if prefetch:
            queryset = queryset.prefetch_related(*prefetch)

        pks = []
        if fields:
            q_size = self.get_querysize(model, fields, querysize)
            change: List[Model] = []
            for elem in slice_iterator(queryset, q_size):
                # note on the loop: while it is technically not needed to batch things here,
                # we still prebatch to not cause memory issues for very big querysets
                has_changed = False
                for comp_field in mro:
                    new_value = self._compute(elem, model, comp_field)
                    if new_value != getattr(elem, comp_field):
                        has_changed = True
                        setattr(elem, comp_field, new_value)
                if has_changed:
                    change.append(elem)
                    pks.append(elem.pk)
                if len(change) >= self._batchsize:
                    self._update(queryset, change, fields)
                    change = []
            if change:
                self._update(queryset, change, fields)

        # trigger dependent comp field updates from changed records
        # other than before we exit the update tree early, if we have no changes at all
        # also cuts the update tree for recursive deps (tree-like)
        if not local_only and pks:
            self.update_dependent(model.objects.filter(pk__in=pks),
                                  model,
                                  fields,
                                  update_local=False)
        return set(pks) if return_pks else None