def _projects_filter_for_non_privileged_users(user: User, queryset: QuerySet, project_relation: str, action: str = 'create'): if not user.is_anonymous and (user.is_admin or user.is_superuser): return queryset.distinct() # Construct the public projects filter field lookup. project_filter = project_relation + 'is_public' # Filter the object list into two querysets; # One where the related Projects are public and one where they are private public_objects = queryset.filter(**{project_filter: True}).distinct() private_objects = queryset.filter(**{project_filter: False}).distinct() # In case of an anonymous user, only return the public objects if user.is_anonymous: unpublished_exclude = project_relation + 'publishingstatus__status' queryset = public_objects.exclude( **{ unpublished_exclude: PublishingStatus.STATUS_UNPUBLISHED }).distinct() # Otherwise, check to which objects the user has (change) permission elif private_objects.exists(): include_user_owned = hasattr(queryset.model, 'user') if action == 'list': # The view permission is new, and previously only the change # permission existed. To avoid adding new view permissions for # all the objects, we also check if a user has change # permissions, which implicitly implies view permissions. change_permission = type( private_objects[0])._meta.db_table.replace('_', '.change_') change_filter = user.get_permission_filter(change_permission, project_relation, include_user_owned) change_objects = private_objects.filter(change_filter).distinct() # Check if user has view permission on the queryset view_permission = change_permission.replace('.change_', '.view_') view_filter = user.get_permission_filter(view_permission, project_relation, include_user_owned) view_objects = private_objects.filter(view_filter).distinct() private_objects = (change_objects | view_objects).distinct() else: permission = type(private_objects[0])._meta.db_table.replace( '_', '.change_') filter_ = user.get_permission_filter(permission, project_relation, include_user_owned) private_objects = private_objects.filter(filter_).distinct() queryset = public_objects | private_objects return queryset.distinct()
def _filter_queryset(self, queryset: models.QuerySet, prefix: str): """Internal logic to filter a queryset on request-parameters for a temporal slice.""" if not self.is_versioned or self.slice_value == "*": # allow this method to be called unconditionally on objects, # and allow ?geldigOp=* to return all data return queryset # Either take a given ?geldigOp=yyyy-mm-dd OR ?geldigOp=NOW() slice_value = self.slice_value or now() range_fields = self.slice_range_fields or self.default_range_fields if range_fields is not None: # start <= value AND (end IS NULL OR value < end) start, end = map(to_snake_case, range_fields) return queryset.filter( Q(**{f"{prefix}{start}__lte": slice_value}) & ( Q(**{f"{prefix}{end}__isnull": True}) | Q(**{f"{prefix}{end}__gt": slice_value}) ) ).order_by(f"-{prefix}{start}") else: # Last attempt to get only the current temporal record; order by sequence. # does SELECT DISTINCT ON(identifier) ... ORDER BY identifier, sequence DESC # Alternative would be something like `HAVING sequence = MAX(SELECT sequence FROM ..)` identifier = self.table_schema.identifier[0] # from ["identificatie", "volgnummer"] sequence_name = self.table_schema.temporal.identifier return queryset.distinct( *queryset.query.distinct_fields, f"{prefix}{identifier}" ).order_by(f"{prefix}{identifier}", f"-{prefix}{sequence_name}")
def get_fields_by_rain_date(date: date, fields_qs: QuerySet = None) -> QuerySet: if fields_qs is None: fields_qs = get_fields() fields_qs = fields_qs.filter(rain__date__gte=date) return fields_qs.distinct()
def get_fields_by_rain_quantity(quantity: float, fields_qs: QuerySet = None) -> QuerySet: if fields_qs is None: fields_qs = get_fields() fields_qs = fields_qs.filter(rain__quantity__gte=quantity) return fields_qs.distinct()
def export_raw_accesslogs_to_stream_lowlevel(self, stream: IO, queryset: QuerySet): text_id_to_text = { dt['id']: dt['text'] for dt in DimensionText.objects.all().values('id', 'text') } rt_to_dimensions = { rt.pk: rt.dimensions_sorted for rt in ReportType.objects.filter(pk__in=queryset.distinct( 'report_type_id').values('report_type_id')) } # get all field names for the CSV field_name_map = {(f'{dim}__{attr}' if attr else dim): dim for dim, attr in self.implicit_dims.items()} field_name_map.update( {f'target__{attr}': attr for attr in self.title_attrs}) field_names = list(field_name_map.values()) for tr, dims in rt_to_dimensions.items(): field_names += [ dim.short_name for dim in dims if dim.short_name not in field_names ] field_names.append('value') # values that will be retrieved from the accesslogs values = ['value', 'report_type_id'] values += list(field_name_map.keys()) values += [f'dim{i+1}' for i in range(7)] # crate the writer writer = csv.DictWriter(stream, field_names) writer.writeheader() # write the records for rec_num, log in enumerate( queryset.values(*values).iterator()): # type: int, dict record = { attr_out: log.get(attr_in) for attr_in, attr_out in field_name_map.items() } record['value'] = log['value'] record['date'] = log['date'] for i, dim in enumerate(rt_to_dimensions[log['report_type_id']]): value = log.get(f'dim{i+1}') if dim.type == dim.TYPE_TEXT: record[dim.short_name] = text_id_to_text.get(value, value) else: record[dim.short_name] = value writer.writerow(record) if rec_num % 999 == 0: self.store_progress(rec_num + 1) self.store_progress(rec_num + 1)
def build_queryset(self, queryset: models.QuerySet) -> models.QuerySet: if self.query: queryset = self.query.apply_to_queryset(queryset) if self.order_by: queryset = queryset.order_by(*self.order_by) if self.distinct: queryset = queryset.distinct(*self.distinct) if self.prefetch_trees: select_related = [] for prefetch_tree in self.prefetch_trees: tree_select_related = self._flatten_prefetch_tree( prefetch_tree=prefetch_tree) if tree_select_related is not None: select_related += tree_select_related queryset = queryset.select_related( *[sr for sr in select_related if sr is not None]) return queryset
def filter_list(queryset: QuerySet, request: Request, name_filter: str, name_field: str) -> QuerySet: items_selected = request.query_params.getlist( '{name_filter}[]'.format(name_filter=name_filter)) items_selected = [name.upper() for name in items_selected] if len(items_selected) > 0: if json.loads( request.query_params.get( '{name_filter}Exclude'.format(name_filter=name_filter), 'false')): queryset = queryset.exclude( **{ '{name_field}__in'.format(name_field=name_field): items_selected }) else: queryset = queryset.filter( **{ '{name_field}__in'.format(name_field=name_field): items_selected }) return queryset.distinct()
def filter_queryset( self, queryset: QuerySet, feature_type: FeatureType ) -> QuerySet: """Apply the filters and lookups to the queryset. :param queryset: The queryset to filter. :param feature_type: The feature type that the queryset originated from. """ if self.is_empty: return queryset.none() if self.extra_lookups: # Each time an expression node calls add_extra_lookup(), # the parent should have used apply_extra_lookups() raise RuntimeError("apply_extra_lookups() was not called") # All are applied at once. if self.annotations: queryset = queryset.annotate(**self.annotations) lookups = self.lookups try: lookups += self.typed_lookups[feature_type.name] except KeyError: pass if lookups: queryset = queryset.filter(*lookups) if self.ordering: queryset = queryset.order_by(*self.ordering) if self.distinct: queryset = queryset.distinct() return queryset
def build_queryset(queryset: QuerySet, params: QueryDict) -> Tuple[QuerySet, bool]: filtered = False name = params.get('name', None) if name is not None: queryset = queryset.filter(name__icontains=name) filtered = True last_name = params.get('last_name', None) if last_name is not None: queryset = queryset.filter(last_name__icontains=last_name) filtered = True naming = params.get('naming', None) if naming == SubjectSegment.NAMING_NAMED: queryset = queryset.exclude(name='', last_name='') filtered = True elif naming == SubjectSegment.NAMING_UNNAMED: queryset = queryset.filter(name='', last_name='') filtered = True tasks = params.getlist('tasks', None) if tasks is not None and len(tasks): queryset = queryset.filter(faces__task__in=tasks) filtered = True tasks_tags = params.getlist('tasks_tags', None) if tasks_tags is not None and len(tasks_tags): queryset = queryset.filter(faces__task__tags__in=tasks_tags) filtered = True min_time = params.get('min_time', None) if min_time is not None: queryset = queryset.filter(faces__created_at__time__gte=min_time) filtered = True max_time = params.get('max_time', None) if max_time is not None: queryset = queryset.filter(faces__created_at__time__lte=max_time) filtered = True min_date = params.get('min_date', None) if min_date is not None: queryset = queryset.filter(faces__created_at__date__gte=min_date) filtered = True max_date = params.get('max_date', None) if max_date is not None: queryset = queryset.filter(faces__created_at__date__lte=max_date) filtered = True max_age = params.get('max_age', None) if max_age is not None: try: max_age = int(max_age) except ValueError: pass else: queryset = queryset.filter( birthdate__gt=Subject.birthdate_from_age(max_age)) filtered = True min_age = params.get('min_age', None) if min_age is not None: try: min_age = int(min_age) except ValueError: pass else: queryset = queryset.filter( birthdate__lt=Subject.birthdate_from_age(min_age)) filtered = True min_pred_age = params.get('min_pred_age', None) if min_pred_age is not None: try: min_pred_age = int(min_pred_age) except ValueError: pass else: queryset = queryset.filter(pred_age__gte=min_pred_age) filtered = True max_pred_age = params.get('max_pred_age', None) if max_pred_age is not None: try: max_pred_age = int(max_pred_age) except ValueError: pass else: queryset = queryset.filter(pred_age__lte=max_pred_age) filtered = True sex = params.get('sex', None) if sex is not None: queryset = queryset.filter(sex=sex) filtered = True pred_sex = params.get('pred_sex', None) if pred_sex is not None: queryset = queryset.filter(pred_sex=pred_sex) filtered = True skin = params.get('skin', None) if skin is not None: queryset = queryset.filter(skin=skin) filtered = True order_by = params.get('order_by', None) if order_by is not None: queryset = queryset.order_by(order_by) if filtered: queryset = queryset.distinct() return queryset, filtered
def export_raw_accesslogs_to_stream_lowlevel(self, stream: IO, queryset: QuerySet): start = monotonic() text_id_to_text = { dt['id']: dt['text'] for dt in DimensionText.objects.all().values('id', 'text') } logger.debug('Finished loading text remaps: %.2f s', monotonic() - start) rt_to_dimensions = { rt.pk: rt.dimensions_sorted for rt in ReportType.objects.filter(pk__in=queryset.distinct( 'report_type_id').values('report_type_id')) } logger.debug('Finished loading report_types and dimensions: %.2f s', monotonic() - start) # get all field names for the CSV field_name_map = {(f'{dim}__{attr}' if attr else dim): dim for dim, attr in self.implicit_dims.items()} field_name_map.update( {f'target__{attr}': attr for attr in self.title_attrs}) field_names = list(field_name_map.values()) for tr, dims in rt_to_dimensions.items(): field_names += [ dim.short_name for dim in dims if dim.short_name not in field_names ] field_names.append('value') logger.debug('Finished preparing field names: %.2f s', monotonic() - start) # values that will be retrieved from the accesslogs values = ['value', 'report_type_id'] values += list(field_name_map.keys()) values += [f'dim{i+1}' for i in range(7)] # crate the writer writer = csv.DictWriter(stream, field_names) writer.writeheader() logger.debug('Finished preparing CSV writer: %.2f s', monotonic() - start) # write the records rec_num = 0 with cachalot_disabled(True): # disable cachalot for this query because it returns a potentially huge number of records # and would clog the cache for rec_num, log in enumerate( queryset.values(*values).iterator()): # type: int, dict record = { attr_out: log.get(attr_in) for attr_in, attr_out in field_name_map.items() } record['value'] = log['value'] record['date'] = log['date'] for i, dim in enumerate( rt_to_dimensions[log['report_type_id']]): value = log.get(f'dim{i+1}') if dim.type == dim.TYPE_TEXT: record[dim.short_name] = text_id_to_text.get( value, value) else: record[dim.short_name] = value writer.writerow(record) if rec_num % 999 == 0: self.store_progress(rec_num + 1) if rec_num % 99999 == 0: logger.debug('Stored %d records: %.2f s', rec_num, monotonic() - start) self.store_progress(rec_num + 1)
def bulk_updater(self, queryset: QuerySet, update_fields: Optional[Set[str]] = None, return_pks: bool = False, local_only: bool = False, querysize: Optional[int] = None) -> Optional[Set[Any]]: """ Update local computed fields and descent in the dependency tree by calling ``update_dependent`` for dependent models. This method does the local field updates on `queryset`: - eval local `MRO` of computed fields - expand `update_fields` - apply optional `select_related` and `prefetch_related` rules to `queryset` - walk all records and recalculate fields in `update_fields` - aggregate changeset and save as batched `bulk_update` to the database By default this method triggers the update of dependent models by calling ``update_dependent`` with `update_fields` (next level of tree traversal). This can be suppressed by setting `local_only=True`. If `return_pks` is set, the method returns a set of altered pks of `queryset`. """ model: Type[Model] = queryset.model # distinct issue workaround # the workaround is needed for already sliced/distinct querysets coming from outside # TODO: distinct is a major query perf smell, and is in fact only needed on back relations # may need some rework in _querysets_for_update # ideally we find a way to avoid it for forward relations # also see #101 if queryset.query.can_filter() and not queryset.query.distinct_fields: queryset = queryset.distinct() else: queryset = model.objects.filter( pk__in=subquery_pk(queryset, queryset.db)) # correct update_fields by local mro mro = self.get_local_mro(model, update_fields) fields: Any = set( mro) # FIXME: narrow type once issue in django-stubs is resolved if update_fields: update_fields.update(fields) select = self.get_select_related(model, fields) prefetch = self.get_prefetch_related(model, fields) if select: queryset = queryset.select_related(*select) if prefetch: queryset = queryset.prefetch_related(*prefetch) pks = [] if fields: q_size = self.get_querysize(model, fields, querysize) change: List[Model] = [] for elem in slice_iterator(queryset, q_size): # note on the loop: while it is technically not needed to batch things here, # we still prebatch to not cause memory issues for very big querysets has_changed = False for comp_field in mro: new_value = self._compute(elem, model, comp_field) if new_value != getattr(elem, comp_field): has_changed = True setattr(elem, comp_field, new_value) if has_changed: change.append(elem) pks.append(elem.pk) if len(change) >= self._batchsize: self._update(queryset, change, fields) change = [] if change: self._update(queryset, change, fields) # trigger dependent comp field updates from changed records # other than before we exit the update tree early, if we have no changes at all # also cuts the update tree for recursive deps (tree-like) if not local_only and pks: self.update_dependent(model.objects.filter(pk__in=pks), model, fields, update_local=False) return set(pks) if return_pks else None