class Meta: indexes = [ models.Index(fields=['book']), models.Index(fields=['user']), models.Index(fields=['rating']), models.Index(models.Func('rating', function='FLOOR'), name='floor_rating_idx'), models.Index(fields=['predicted_rating']), models.Index(models.Func('predicted_rating', function='FLOOR'), name='floor_predicted_rating_idx'), models.Index(fields=['saved']), models.Index(fields=['blocked']), models.Index(fields=['last_updated']) ] constraints = [ models.UniqueConstraint(fields=['book', 'user'], name='OneRatingPerBookAndUser'), models.CheckConstraint(check=models.Q(rating__gte=1), name="RatingAtLeast1"), models.CheckConstraint(check=models.Q(rating__lte=10), name="RatingAtMost10"), models.CheckConstraint( check=models.Q(original_rating__gte=models.F('original_min')), name="OriginalRatingAtLeastMin"), models.CheckConstraint( check=models.Q(original_rating__lte=models.F('original_max')), name="OriginalRatingAtMostMax"), ]
def calculate_avg_distance_school(self): schools_count = self.schools.count() schools_point = self.schools.annotate( lon=models.Func(models.F('geopoint'), function='ST_X', output_field=models.FloatField()), lat=models.Func(models.F('geopoint'), function='ST_Y', output_field=models.FloatField()), ).values_list('lat', 'lon') if schools_count < 2: return None elif schools_count < 5000: return self._calculate_batch_avg_distance_school(schools_point) else: kmeans = MiniBatchKMeans(n_clusters=5000, batch_size=250, random_state=0).fit(schools_point) return self._calculate_batch_avg_distance_school(kmeans.cluster_centers_)
def get_passed_law_project_ratio(self): avg_passing_time = LawProject.get_avg_passing_time() proposed_count = self.get_proposed_law_project_count() proposed_count = self.law_projects.exclude( date_passed__isnull=True, date__gt=models.Func(models.Func(function='CURDATE'), models.Func( avg_passing_time, template='INTERVAL %(expressions)s DAY'), function='DATE_SUB')).count() if proposed_count: return (float(self.get_passed_law_project_count()) / proposed_count * 100.0) else: return 0.0
def get_amount(self): zero = decimal.Decimal('0.00') return max((self.items.aggregate(sum=models.Sum( ((models.F('price') + models.F('vat') - models.Func(models.F('discount'), function='ABS')) * models.F('quantity')), output_field=models.DecimalField()))['sum'] or zero), zero)
def test_serialize_complex_func_index(self): index = models.Index( models.Func("rating", function="ABS"), models.Case( models.When(name="special", then=models.Value("X")), default=models.Value("other"), ), models.ExpressionWrapper( models.F("pages"), output_field=models.IntegerField(), ), models.OrderBy(models.F("name").desc()), name="complex_func_index", ) string, imports = MigrationWriter.serialize(index) self.assertEqual( string, "models.Index(models.Func('rating', function='ABS'), " "models.Case(models.When(name='special', then=models.Value('X')), " "default=models.Value('other')), " "models.ExpressionWrapper(" "models.F('pages'), output_field=models.IntegerField()), " "models.OrderBy(models.OrderBy(models.F('name'), descending=True)), " "name='complex_func_index')", ) self.assertEqual(imports, {"from django.db import models"})
def order_queryset_by_z_coord_desc(queryset, geometry_field="location"): """Order an queryset based on point geometry's z coordinate""" return queryset.annotate( z_coord=models.ExpressionWrapper( models.Func(geometry_field, function="ST_Z"), output_field=models.FloatField(), ) ).order_by("-z_coord")
def with_status(self): duplicates_subquery = (self.values( "period", "form", "org_unit").annotate(ids=ArrayAgg("id")).annotate( c=models.Func("ids", models.Value(1), function="array_length") ).filter(form__in=Form.objects.filter( single_per_period=True)).filter(c__gt=1).annotate( id=models.Func("ids", function="unnest")).values("id")) return self.annotate(status=models.Case( models.When(id__in=duplicates_subquery, then=models.Value(Instance.STATUS_DUPLICATED)), models.When(last_export_success_at__isnull=False, then=models.Value(Instance.STATUS_EXPORTED)), default=models.Value(Instance.STATUS_READY), output_field=models.CharField(), ))
def with_weekend_sales(self): subquery = Order.objects.filter( created_at__year=models.OuterRef('year'), created_at__month=models.OuterRef('month'), created_at__week_day__in=[7, 1]).values_list( models.Func('lines__gross_amount', function='SUM'), ) return self.annotate(weekend_sales=models.Subquery(subquery), )
def get_avg_passing_time(cls): avg = cls.objects.filter(date_passed__isnull=False).aggregate( avg_passing_time=models.Avg( models.Func( models.F('date_passed'), models.F('date'), function='DATEDIFF', ))) return avg.get('avg_passing_time', 0.0)
def test_grouping_by_annotations_with_array_field_param(self): value = models.Value([1], output_field=ArrayField(models.IntegerField())) self.assertEqual( NullableIntegerArrayModel.objects.annotate( array_length=models.Func(value, 1, function='ARRAY_LENGTH'), ).values('array_length').annotate( count=models.Count('pk'), ).get()['array_length'], 1, )
class RestScheduler(models.Model): id = models.AutoField(primary_key=True) uuid = models.UUIDField(max_length=64, unique=True, auto_created=True, editable=False) name = models.CharField(max_length=100, unique=True) frequency = models.CharField(max_length=20, choices=FREQUENCIES, default=FREQUENCIES[0][0]) every = models.IntegerField(default=1) maxRuns = models.IntegerField(default=0) schedulerType = models.CharField(max_length=10, choices=SCHDULER_TYPE, default=SCHDULER_TYPE[0][1]) function = models.Func(max_length=32, null=True, blank=True) method = models.CharField(max_length=10, choices=REST_METHODS, default=REST_METHODS[0][0]) URI = models.CharField(max_length=256, null=True, blank=True) requestBody = models.TextField(max_length=2048, null=True, blank=True) exprn = models.CharField(max_length=32, null=True, blank=True) args = models.CharField(max_length=1024, null=True, blank=True) kwargs = models.CharField(max_length=1024, null=True, blank=True) lastRunTime = models.DateTimeField(null=True, blank=True) nextRunTime = models.DateTimeField(null=True, blank=True) updated = models.DateTimeField(auto_now=True) class Meta: db_table = "ap_rest_scheduler" def save(self, *args, **kwargs): if not self.uuid: self.uuid = uuid.uuid4() self.nextRunTime = api.createScheduler(self.toDict()).get( 'nextRunTime', None) super(RestScheduler, self).save(*args, **kwargs) def toDict(self): data = {} data['uuid'] = self.uuid data['name'] = self.name data['frequency'] = self.frequency data['schedulerType'] = self.schedulerType data['method'] = self.method data['every'] = self.every data['uri'] = self.URI data['requestBody'] = self.requestBody data['exprn'] = self.exprn return data def delete(self, using=None): uuid = self.uuid api.deleteScheduler(str(uuid)) super(RestScheduler, self).delete(using)
class OptimizationMethod(models.Model): id = models.AutoField(primary_key=True) name = models.CharField(max_length=30) formula = models.Func() domain_minimum = models.FloatField(default=0) domain_maximum = models.FloatField(default=0) body = models.CharField(max_length=250, default='') pass def __str__(self): return self.name
def get_queryset(self): ctype = ContentType.objects.get_for_model(self.model) transaction_subquery = Transaction.objects.filter( entity_type__pk=ctype.id, entity_id=models.OuterRef('pk'), ).values('entity_id').annotate(total=models.Sum('amount')).values('total') return super(ExpenseManager, self).get_queryset().annotate( payed_amount=models.functions.Coalesce( models.Func(models.Subquery(transaction_subquery), function='ABS'), 0 ) )
def _count_by_project_date_qs(qs): data = defaultdict(list) for project, count, date in (qs.order_by( 'project', 'created_at__date').values( 'project', 'created_at__date').annotate( count=models.Count('id', distinct=True)).values_list( 'project', 'count', models.Func(models.F('created_at__date'), function='DATE'))): data[project].append({ 'date': date and date.strftime('%Y-%m-%d'), 'count': count, }) return data
def remove_duplicate_results(apps, schema_editor): # We get the model from the versioned app registry; # if we directly import it, it'll be the wrong version # see https://docs.djangoproject.com/en/1.11/ref/migration-operations/#django.db.migrations.operations.RunPython VariantSearchResults = apps.get_model("seqr", "VariantSearchResults") db_alias = schema_editor.connection.alias duplicate_agg = VariantSearchResults.objects.using(db_alias).values('search_hash')\ .annotate(sorts=ArrayAgg('sort'))\ .annotate(c=models.Func('sorts', models.Value(1), function='array_length')).filter(c__gt=1) for dup_searches in duplicate_agg: for i, sort in enumerate(dup_searches['sorts'][1:]): result = VariantSearchResults.objects.using(db_alias).get( search_hash=dup_searches['search_hash'], sort=sort) result.search_hash = '{}_{}'.format(result.search_hash, i) result.save()
def handle(self, *args, **options): """Runs the digest for tickets.""" user_digests = {} self.request_host = options.get('request_host') tickets = Ticket.objects.filter( models.Q(sent_notifications_at__gte=models.Func(function='now')) | models.Q(sent_notifications_at=None)) self.stdout.write('Processing {} tickets.'.format(tickets.count())) for ticket in tickets: digest = self.digest(ticket) if not digest: continue for user in (list(ticket.users.all()) + [ticket.requester]): upcoming_in = 'never' user_digests[user.id] = user_digests.get(user.id) or { 'request_host': self.request_host, 'site_name': settings.SITE_NAME, 'user': user, 'digests': [], 'upcoming': set() } user_digests[user.id]['digests'] += digest if ticket.deadline_at and user != ticket.requester: upcoming_in = (ticket.deadline_at - datetime.utcnow()).days if upcoming_in in self.UPCOMING_DAYS_LEFT: user_digests[user.id]['upcoming'].add(ticket) ticket.sent_notifications_at = datetime.utcnow() ticket.save() if user_digests: status, count = self.email(user_digests) else: status, count = True, 0 if status: color = self.style.SUCCESS else: color = self.style.ERROR return color('Sent {} notifications.'.format(count))
def get_projects_summary(self, request, pk=None, version=None): projects = Project.get_for_member(request.user) # Lead stats leads = Lead.objects.filter(project__in=projects) total_leads_tagged_count = leads.annotate(entries_count=models.Count('entry')).filter(entries_count__gt=0).count() total_leads_tagged_and_controlled_count = leads.annotate( entries_count=models.Count('entry'), controlled_entries_count=models.Count( 'entry', filter=models.Q(entry__controlled=True) ), ).filter(entries_count__gt=0, entries_count=models.F('controlled_entries_count')).count() # Entries activity recent_projects_id = list( projects.annotate( entries_count=Cast(KeyTextTransform('entries_activity', 'stats_cache'), models.IntegerField()) ).filter(entries_count__gt=0).order_by('-entries_count').values_list('id', flat=True)[:3]) recent_entries = Entry.objects.filter( project__in=recent_projects_id, created_at__gte=(timezone.now() + relativedelta(months=-3)) ) recent_entries_activity = { 'projects': ( recent_entries.order_by().values('project') .annotate(count=models.Count('*')) .filter(count__gt=0) .values('count', id=models.F('project'), title=models.F('project__title')) ), 'activities': ( recent_entries.order_by('project', 'created_at__date').values('project', 'created_at__date') .annotate(count=models.Count('*')) .values('project', 'count', date=models.Func(models.F('created_at__date'), function='DATE')) ), } return response.Response({ 'projects_count': projects.count(), 'total_leads_count': leads.count(), 'total_leads_tagged_count': total_leads_tagged_count, 'total_leads_tagged_and_controlled_count': total_leads_tagged_and_controlled_count, 'recent_entries_activity': recent_entries_activity, })
def overview(self, request, pk=None): def _get_projects_per_enum_field(projects, EnumType, enum_field): return [ { enum_field: enum_field_value, f'{enum_field}_display': EnumType(int(enum_field_value)).label, 'count': count, } for enum_field_value, count in ( projects.order_by().values(enum_field).annotate(count=models.Count('id')).values_list( enum_field, 'count', ).order_by(enum_field) ) ] projects = self.get_projects() projects_unnest_tags = ( projects # XXX: Without cast django throws 'int' is not iterable .annotate(secondary_sector=Cast( models.Func(models.F('secondary_sectors'), function='UNNEST'), output_field=models.CharField(), )) ) target_total = projects.aggregate(target_total=models.Sum('target_total'))['target_total'] return Response({ 'total_ongoing_projects': projects.filter(status=Statuses.ONGOING).count(), 'ns_with_ongoing_activities': ( projects.filter(status=Statuses.ONGOING) .order_by('reporting_ns').values('reporting_ns').distinct().count() ), 'target_total': target_total, 'projects_per_sector': _get_projects_per_enum_field(projects, Sectors, 'primary_sector'), 'projects_per_programme_type': _get_projects_per_enum_field(projects, ProgrammeTypes, 'programme_type'), 'projects_per_secondary_sectors': _get_projects_per_enum_field( projects_unnest_tags, SectorTags, 'secondary_sector' ), })
def _fetch_all(self): from kra.models import ResourceUsage super()._fetch_all() step = self._hints.get('_prefetch_resource_usage_buckets_step', None) if step is None: return workloads = self._result_cache container_ids = \ itertools.chain.from_iterable( itertools.chain.from_iterable( (c.id for c in pod.container_set.all()) for pod in wl.pod_set.all() ) for wl in workloads ) qs = ResourceUsage.objects.filter(container_id__in=container_ids)\ .annotate( ts=models.Func( models.Value(f'{step} seconds'), 'measured_at', function='time_bucket', output_field=models.DateTimeField() ), )\ .values('container_id', 'ts')\ .order_by('container_id', 'ts')\ .annotate( memory_mi=models.Max('memory_mi'), cpu_m_seconds=models.Max('cpu_m_seconds'), ) buckets_by_container_id = defaultdict(list) for b in qs: buckets_by_container_id[b.pop('container_id')].append(b) for wl in workloads: for pod in wl.pod_set.all(): for c in pod.container_set.all(): c.resource_usage_buckets = buckets_by_container_id[c.id]
def provision(cls, user, password, flow_add, up_user=None): # todo 这里有bug,怎么能够锁住记录,确保获取到正确的最大port是个问题。目前的实现在大并发中会有问题 from .services import change, add try: ssuser = cls.objects.get(user=user) # 用户非首次开通 ssuser.password = password ssuser.flow_limit += flow_add ssuser.flow_remaining += flow_add change(ssuser.port, password, ssuser.flow_limit) ssuser.save() except cls.DoesNotExist: # 用户首次开通,创建记录 current_port = cls.objects.annotate(max_port=models.Func( models.F('port'), function='max')).get().port ssuser = SSUser() ssuser.user = user ssuser.port = current_port + 1 if current_port else 10000 ssuser.flow_limit = flow_add ssuser.used = 0 ssuser.flow_remaining = flow_add ssuser.up_user = up_user add(ssuser.port, password, ssuser.flow_limit) ssuser.save()
def with_end_date(self): """ Returns a :class:`QuerySet` where the :attr:`validity_end` date and the :attr:`valid_between` date range have been annotated onto the query. The resulting annotations can be queried on like fully materialised fields. E.g, it is possible to filter on the `valid_between` field. .. code-block:: python Model.objects.with_end_date().filter( valid_between__contains=date.today(), ) """ # Models with a single validity date always represent some feature of a # "parent model" and are only live for as long as that model is live. # The `over_field` is the field on this model that is a foreign key to # the "parent model". E.g. for a description it is the described model. over_field = self.model._meta.get_field(self.model.validity_over) # When we are working out the validity of the next mdoel, only models # for the same "parent model" are considered. So this partition selects # only the models that match on the same parent fields. partition = [ models.F(f"{over_field.name}__{field}") for field in over_field.related_model.identifying_fields ] # To work out the end date efficiently an SQL window expression is used. # The rule for models with only a validity start date is that they are # valid up until the next model takes over. So this is the same as # ordering the models by their start dates and then takeing the start # date of the model that appears after this one. window = expressions.Window( expression=aggregates.Max("validity_start"), partition_by=partition, order_by=models.F("validity_start").asc(), frame=expressions.RowRange(start=0, end=1), ) # If the value returned by the window expression is the same as the # model's own start date, that means there was no future model with a # later start date. Hence, this model is at the moment valid for # unlimited time. NULLIF returns NULL if the two values match. A day has # to be subtracted from the final result because the end date is one day # before the next start date. end_date_field = functions.Cast( functions.NullIf(window, models.F("validity_start")) - timedelta(days=1), models.DateField(), ) # To allow the resulting field to be queried, this must be done as part # of a Common Table Expression (CTE) because window expressions cannot # appear in a WHERE clause. # # The end date and the start date are combined together into a single # DATERANGE field to allow using __contains operators. with_dates_added = With( self.annotate( validity_end=end_date_field, valid_between=models.Func( models.F("validity_start"), models.F("validity_end"), expressions.Value("[]"), function="DATERANGE", output_field=TaricDateRangeField(), ), ), ) return ( with_dates_added.join(self.model, pk=with_dates_added.col.pk) .with_cte(with_dates_added) .annotate( validity_end=with_dates_added.col.validity_end, valid_between=with_dates_added.col.valid_between, ) )
def get_recent_activities(cls, user): from entry.models import Entry from lead.models import Lead from quality_assurance.models import EntryReviewComment project_qs = cls.get_for_member(user) created_by_expression = models.functions.Coalesce( models.Func(models.Value(' '), models.F('created_by_id__first_name'), models.F('created_by_id__last_name'), function='CONCAT_WS'), models.F('created_by_id__email'), output_field=models.CharField()) leads_qs = Lead.objects.filter(project__in=project_qs).values_list( 'id', 'created_at', 'project_id', 'project__title', 'created_by_id', 'created_by__profile__display_picture__file', models.Value('lead', output_field=models.CharField()), created_by_expression, ) entry_qs = Entry.objects.filter(project__in=project_qs).values_list( 'id', 'created_at', 'project_id', 'project__title', 'created_by_id', 'created_by__profile__display_picture__file', models.Value('entry', output_field=models.CharField()), created_by_expression, ) entry_comment_qs = EntryReviewComment.objects.filter( entry__project__in=project_qs).values_list( 'id', 'created_at', 'entry__project_id', 'entry__project__title', 'created_by_id', 'created_by__profile__display_picture__file', models.Value('entry-comment', output_field=models.CharField()), created_by_expression, ).distinct('id') def _get_activities(): return list( leads_qs.union(entry_qs).union(entry_comment_qs).order_by( '-created_at')[:30]) activities = cache.get_or_set( CacheKey.RECENT_ACTIVITIES_KEY_FORMAT.format(user.pk), _get_activities, 60 * 5, # 5min ) return [{ field: item[index] for index, field in enumerate([ 'id', 'created_at', 'project', 'project_display_name', 'created_by', 'created_by_display_picture', 'type', 'created_by_display_name', ]) } for item in activities]
def result_stdout_raw_handle(self, enforce_max_bytes=True): """ This method returns a file-like object ready to be read which contains all stdout for the Job. If the size of the file is greater than 1048576, a StdoutMaxBytesExceeded exception will be raised. """ max_supported = 1048576 if enforce_max_bytes: # If enforce_max_bytes is True, we're not grabbing the whole file, # just the first <settings.STDOUT_MAX_BYTES_DISPLAY> bytes; # in this scenario, it's probably safe to use a StringIO. fd = StringIO() else: # If enforce_max_bytes = False, that means they're downloading # the entire file. To avoid ballooning memory, let's write the # stdout content to a temporary disk location if not os.path.exists(settings.JOBOUTPUT_ROOT): os.makedirs(settings.JOBOUTPUT_ROOT) fd = tempfile.NamedTemporaryFile(prefix='{}-{}-'.format( self.model_to_str(), self.pk), suffix='.out', dir=settings.JOBOUTPUT_ROOT) # Note: the code in this block _intentionally_ does not use the # Django ORM because of the potential size (many MB+) of # `main_jobevent.stdout`; we *do not* want to generate queries # here that construct model objects by fetching large gobs of # data (and potentially ballooning memory usage); instead, we # just want to write concatenated values of a certain column # (`stdout`) directly to a file with connection.cursor() as cursor: if enforce_max_bytes: # detect the length of all stdout for this Job, and # if it exceeds settings.STDOUT_MAX_BYTES_DISPLAY bytes, # don't bother actually fetching the data total = self.get_event_queryset().aggregate(total=models.Sum( models.Func(models.F('stdout'), function='LENGTH')))['total'] if total > max_supported: raise StdoutMaxBytesExceeded(total, max_supported) cursor.copy_expert( "copy (select stdout from {} where {}={} order by start_line) to stdout" .format(self._meta.db_table + 'event', self.event_parent_key, self.id), fd) if hasattr(fd, 'name'): # If we're dealing with a physical file, use `sed` to clean # up escaped line sequences fd.flush() subprocess.Popen("sed -i 's/\\\\r\\\\n/\\n/g' {}".format( fd.name), shell=True).wait() return open(fd.name, 'r') else: # If we're dealing with an in-memory string buffer, use # string.replace() fd = StringIO(fd.getvalue().replace('\\r\\n', '\n')) return fd