def get_field_values(params): root_server_id = params.get('root_server_id') meeting_key = params.get('meeting_key') meeting_qs = Meeting.objects.filter(deleted=False, published=True) if root_server_id: meeting_qs = meeting_qs.filter(root_server_id=root_server_id) if meeting_key in field_keys: model_field = meeting_field_map.get(meeting_key)[0] if isinstance(model_field, tuple): # This means we have a m2m field. At the time of this writing, the only # m2m field we have is formats. In this case, we want to get the distinct # list of format_ids for all meetings, and then get all meetings that have # the same formats in the "ids" Array id_field = '__'.join(model_field[0].split('.')[0:-1]) + '__id' meeting_qs = meeting_qs.annotate(**{model_field[1]: ArrayAgg(id_field)}) meeting_qs = meeting_qs.annotate( ids=Window( expression=ArrayAgg('id'), partition_by=[F(model_field[1])] ) ) meeting_qs = meeting_qs.values(model_field[1], 'ids') meeting_qs = meeting_qs.distinct() else: if model_field: model_field = model_field.replace('.', '__') meeting_qs = meeting_qs.values(model_field) meeting_qs = meeting_qs.annotate(ids=ArrayAgg('id')) return meeting_qs
class CatalogueItemsInstanceViewSet(viewsets.ModelViewSet): """ API endpoint to list catalogue items for consumption by data flow. """ fields = [ "id", "name", "short_description", "description", "published", "created_date", "published_at", "information_asset_owner", "information_asset_manager", "enquiries_contact", "licence", "slug", "purpose", "source_tags", "draft", "personal_data", "retention_policy", "eligibility_criteria", ] queryset = (DataSet.objects.live().annotate( purpose=models.F("type")).annotate(source_tags=ArrayAgg( "tags__name", filter=models.Q(tags__type=TagType.SOURCE), distinct=True, )).annotate(draft=_static_bool(None)).values(*fields).union( ReferenceDataset.objects.live( ).annotate(personal_data=_static_char(None)).annotate( retention_policy=_static_char(None)).annotate( eligibility_criteria=_static_char(None)).annotate( purpose=_static_int(DataSetType.REFERENCE)).annotate( source_tags=ArrayAgg( "tags__name", filter=models.Q(tags__type=TagType.SOURCE), distinct=True, )).annotate(draft=F("is_draft")). values(*_replace(fields, "id", "uuid"))).union( VisualisationCatalogueItem.objects.live().annotate( purpose=_static_int(DataSetType.VISUALISATION)).annotate( source_tags=ArrayAgg( "tags__name", filter=models.Q(tags__type=TagType.SOURCE), distinct=True, )).annotate(draft=_static_bool(None)).values( *fields))).order_by("created_date") serializer_class = CatalogueItemSerializer # PageNumberPagination is used instead of CursorPagination # as filters cannot be applied to a union-ed queryset. pagination_class = PageNumberPagination
def get_queryset(self): queryset = Item.active_objects.filter(creating_task__is_trashed=False) team = self.request.query_params.get('team', None) if team is None: raise serializers.ValidationError('Request must include "team" query param') # filter by team queryset = queryset.filter(team_inventory=team) process_types = self.request.query_params.get('process_types', None) if process_types is not None: process_ids = process_types.strip().split(',') queryset = queryset.filter(creating_task__process_type__in=process_ids) product_types = self.request.query_params.get('product_types', None) if product_types is not None: product_ids = product_types.strip().split(',') queryset = queryset.filter(creating_task__product_type__in=product_ids) category_types = self.request.query_params.get('category_types', None) if category_types is not None: category_codes = category_types.strip().split(',') queryset = queryset.filter(creating_task__process_type__category__in=category_codes) tags = self.request.query_params.get('tags', None) if tags is not None: tag_names = tags.strip().split(',') queryset = queryset.filter(creating_task__process_type__tags__name__in=tag_names) | \ queryset.filter(creating_task__product_type__tags__name__in=tag_names) aggregate_products = self.request.query_params.get('aggregate_products', None) queryset_values = [ 'creating_task__process_type', 'creating_task__process_type__name', 'creating_task__process_type__unit', 'creating_task__process_type__code', 'creating_task__process_type__icon', 'creating_task__process_type__category', 'team_inventory' ] ordering_values = ['creating_task__process_type__name'] # Unless aggregate product param is true, return a separate row for each product type if not aggregate_products or aggregate_products.lower() != 'true': queryset_values.append('creating_task__product_type') ordering_values.append('creating_task__product_type__name') return queryset.values(*queryset_values).annotate( product_type_ids=ArrayAgg('creating_task__product_type'), product_type_names=ArrayAgg('creating_task__product_type__name'), product_type_codes=ArrayAgg('creating_task__product_type__code'), ).order_by(*ordering_values)
def _annotate_tags(datasets): """ Adds annotation for source and topic tags @param datasets: django queryset @return: """ datasets = datasets.annotate(source_tag_ids=ArrayAgg( "tags", filter=Q(tags__type=TagType.SOURCE), distinct=True)) datasets = datasets.annotate(topic_tag_ids=ArrayAgg( "tags", filter=Q(tags__type=TagType.TOPIC), distinct=True)) return datasets
def cv_results_(self): import numpy as np from django.contrib.postgres.aggregates.general import ArrayAgg from AnyTimeGridSearchCV.grids.models import GridSearch gridsearch = GridSearch.objects.get(uuid=self._uuid) if not gridsearch.results.exists(): raise NotFittedError( "This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method." % {'name': type(self.estimator).__name__}) self.n_candidates = gridsearch.results.count() self.n_splits = len( gridsearch.results.all().first().scores.all().first().test_scores) scorers = gridsearch.results.values_list('scores__scorer', flat=True).distinct() cv_results_ = gridsearch.results.all().aggregate( fit_time=ArrayAgg('fit_time'), params=ArrayAgg('params'), score_time=ArrayAgg('score_time')) scorer_dicts = { scorer: gridsearch.results.filter(scores__scorer=scorer).aggregate( train=ArrayAgg('scores__train_scores'), test=ArrayAgg('scores__test_scores')) for scorer in scorers } def _store(key_name, array, weights=None, splits=False, rank=False): array = np.array(array, dtype=np.float64).reshape(self.n_candidates, self.n_splits) if splits: for idx, scores in enumerate(array.T): cv_results_['split%d_%s' % (idx, key_name)] = scores array_means = np.average(array, axis=1, weights=weights) array_stds = np.sqrt( np.average((array - array_means[:, np.newaxis])**2, axis=1, weights=weights)) cv_results_['mean_%s' % key_name] = array_means cv_results_['std_%s' % key_name] = array_stds for operation_time in ['fit_time', 'score_time']: _store(operation_time, cv_results_[operation_time]) del cv_results_[operation_time] for scorer in scorer_dicts: for _set in ['train', 'test']: _store('%s_%s' % (_set, scorer), scorer_dicts[scorer][_set], splits=True, weights=self.test_sample_counts if self.iid else None) return cv_results_
def get(self, request, *args, **kwargs): MAX_OBJECTS = int(request.headers["Max-Objects"] ) if request.headers.get("Max-Objects") else None modelos = Modelo.objects.all()[:MAX_OBJECTS] data = { "results": list(modelos.values().annotate( combustibles=ArrayAgg('combustibles', distinct=True), cajas_cambios=ArrayAgg('cajas_cambios', distinct=True), tracciones=ArrayAgg('tracciones', distinct=True))) } return JsonResponse(data)
class CatalogueItemsInstanceViewSet(viewsets.ModelViewSet): """ API endpoint to list catalogue items for consumption by data flow. """ fields = [ 'id', 'name', 'short_description', 'description', 'published', 'created_date', 'published_at', 'information_asset_owner', 'information_asset_manager', 'enquiries_contact', 'licence', 'purpose', 'source_tags', 'personal_data', 'retention_policy', 'eligibility_criteria', ] queryset = (DataSet.objects.live().annotate( purpose=models.F('type')).annotate(source_tags=ArrayAgg( 'tags__name', filter=models.Q(tags__type=TagType.SOURCE.value), distinct=True, )).values(*fields).union( ReferenceDataset.objects.live().annotate( personal_data=_static_char(None)).annotate( retention_policy=_static_char(None)).annotate( eligibility_criteria=_static_char(None)).annotate( purpose=_static_int(DataSetType.REFERENCE.value)). annotate(source_tags=ArrayAgg( 'tags__name', filter=models.Q(tags__type=TagType.SOURCE.value), distinct=True, )).values(*_replace(fields, 'id', 'uuid'))).union( VisualisationCatalogueItem.objects.live().annotate( purpose=_static_int(DataSetType.VISUALISATION.value)). annotate(source_tags=ArrayAgg( 'tags__name', filter=models.Q(tags__type=TagType.SOURCE.value), distinct=True, )).values(*fields))).order_by('created_date') serializer_class = CatalogueItemSerializer # PageNumberPagination is used instead of CursorPagination # as filters cannot be applied to a union-ed queryset. pagination_class = PageNumberPagination
def get_queryset(self): q_query = self.request.query_params.get('q', None) queryset = Response.objects.exclude(a__isnull=True) if q_query is not None: queryset = queryset.filter(q=q_query).select_related().values( 'q', 'a', 'q__question', 'a__answer').annotate(total=Count('a')).annotate( id_list=ArrayAgg('id', distinct=True)).order_by('q') else: queryset = queryset.select_related().values( 'q', 'a', 'q__question', 'a__answer').annotate(total=Count('a')).annotate( id_list=ArrayAgg('id', distinct=True)).order_by('q') return queryset
def filter_duplicate_states(unmatched_states): """ Takes a QuerySet of -States and flags then separates exact duplicates. This method returns two items: - list of IDs of unique -States + IDs for representative -States of duplicates - count of duplicates that were filtered out of the original set Sets of IDs for duplicate -States are captured in lists. The list being returned is created by taking one member of each of the duplicate sets. The IDs that were not taken are used to flag the corresponding -States with DATA_STATE_DELETE. :param unmatched_states: QS :return: canonical_state_ids, duplicate_count """ ids_grouped_by_hash = unmatched_states.\ values('hash_object').\ annotate(duplicate_sets=ArrayAgg('id')).\ values_list('duplicate_sets', flat=True) # For consistency, take the first member of each of the duplicate sets canonical_state_ids = [ ids.pop(ids.index(min(ids))) for ids in ids_grouped_by_hash ] duplicate_state_ids = reduce(lambda x, y: x + y, ids_grouped_by_hash) duplicate_count = unmatched_states.filter( pk__in=duplicate_state_ids).update(data_state=DATA_STATE_DELETE) return canonical_state_ids, duplicate_count
def loyal_customers(request): orders_count = Order.objects.count() customers = Order.objects.all()\ .values('customer')\ .annotate(total=Count('customer'))\ .annotate(orders=ArrayAgg('created_at'))\ .filter(total__gt=2) all_average_timedeltas = [] for customer in customers: orders = sorted(customer['orders'], reverse=True) timedeltas = [orders[i - 1] - orders[i] for i in range(1, len(orders))] customer_average = average_timedelta(timedeltas) customer.update({'average_between_orders': customer_average}) all_average_timedeltas.append(customer_average) customers = sorted(customers, key=lambda x: x['average_between_orders'], reverse=True)[int(len(customers) * 0.2):int(len(customers) * 0.5)] stats = { 'tag': 'loyal_customers', 'amount': sum([c['total'] for c in customers]), 'total': orders_count, 'customers': [c['customer'] for c in customers] } return JsonResponse(stats)
def at_risk_repeat_customers(request): orders_count = Order.objects.count() customers = Order.objects.all()\ .values('customer')\ .annotate(total=Count('customer'))\ .annotate(orders=ArrayAgg('created_at'))\ .filter(total__gt=1) all_average_timedeltas = [] for customer in customers: orders = sorted(customer['orders'], reverse=True) timedeltas = [orders[i - 1] - orders[i] for i in range(1, len(orders))] customer_average = average_timedelta(timedeltas) customer.update({'average_between_orders': customer_average}) all_average_timedeltas.append(customer_average) total_average = average_timedelta(all_average_timedeltas) customers = [ c for c in customers if c['average_between_orders'] < total_average ] stats = { 'tips': "Make sure they don't leave you! Send them coupon codes, 'We miss you' emails, win-back surveys", 'tag': 'at_risk_repeat_customers', 'amount': sum([c['total'] for c in customers]), 'total': orders_count, 'customers': [c['customer'] for c in customers] } return JsonResponse(stats)
def check_api_limits(at_datetime): contributor_logs = RequestLog.objects.filter( response_code__gte=200, response_code__lte=299).annotate(contributor=F( 'user__contributor__id')).values('contributor').annotate( log_dates=ArrayAgg('created_at')) for c in contributor_logs: check_contributor_api_limit(at_datetime, c)
def optimize_node_queryset(self, queryset): auth = get_user_auth(self.request) admin_scope = has_admin_scope(self.request) abstract_node_contenttype_id = ContentType.objects.get_for_model( AbstractNode).id guid = Guid.objects.filter( content_type_id=abstract_node_contenttype_id, object_id=OuterRef('parent_id')) parent = NodeRelation.objects.annotate( parent__id=Subquery(guid.values('_id')[:1])).filter( child=OuterRef('pk'), is_node_link=False) wiki_addon = WikiNodeSettings.objects.filter(owner=OuterRef('pk'), deleted=False) contribs = Contributor.objects.filter(user=auth.user, node=OuterRef('pk')) return queryset.prefetch_related('root').prefetch_related( 'subjects').annotate( user_is_contrib=Exists(contribs), contrib_read=Subquery(contribs.values('read')[:1]), contrib_write=Subquery(contribs.values('write')[:1]), contrib_admin=Subquery(contribs.values('admin')[:1]), has_wiki_addon=Exists(wiki_addon), annotated_parent_id=Subquery(parent.values('parent__id')[:1], output_field=CharField()), annotated_tags=ArrayAgg('tags__name'), has_admin_scope=Value(admin_scope, output_field=BooleanField()), )
def get_loaded_projects_for_user(user, fields=None): projects = Project.objects.all() collaborators_filter = Q(projectcollaborator__user=user) if user.is_staff: projects = projects.filter(collaborators_filter | Q(disable_staff_access=False)) else: projects = projects.filter(collaborators_filter) if fields: projects = projects.only(*fields) projects = projects.annotate( es_indices=ArrayAgg('vcffile__elasticsearch_index')) mongo_projects = [ pr for pr in projects if all(es_index == None for es_index in pr.es_indices) ] if mongo_projects: loaded_mogo_project_ids = get_mongo_project_datastore( ).all_loaded_projects() mongo_projects = filter( lambda p: p.project_id in loaded_mogo_project_ids, mongo_projects) for project in mongo_projects: project.datastore_type = 'mongo' es_projects = [pr for pr in projects if any(pr.es_indices)] for project in es_projects: project.datastore_type = 'es' return mongo_projects + es_projects
def getIngredientsForBatches(request): team_id = request.query_params.get('team') team = Team.objects.get(pk=team_id) # batches = Batch.objects.filter(status='i', product__team=team) ingredient_amount_map = {} b = Batch.objects.filter(status='i', product__team=team)\ .annotate(ingredients_list=ArrayAgg(Concat(F('active_recipe__ingredients__product__id'), Value('|'), ExpressionWrapper(F('amount')*F('active_recipe__ingredients__amount')/F('active_recipe__default_batch_size'), output_field=DecimalField()), output_field=CharField()))) # for each batch, add the ingredient amount to it's spot in the ingredient amount map ingredient_amount_map = {} for x in b: ingredients = x.ingredients_list for ing in ingredients: parts = ing.split('|') product_id = int(parts[0]) amount = float(parts[1]) if product_id in ingredient_amount_map: ingredient_amount_map[product_id] += amount else: ingredient_amount_map[product_id] = amount ing_list = [] for obj in ingredient_amount_map: qs = annotateProductWithInventory(Product.objects.filter(pk=obj)) amount_used = amountUsedOfProduct(obj) inventory_amount = qs[0].received_amount_total - amount_used + qs[0].completed_amount ing_list.append({'product_id': obj, 'amount_needed': ingredient_amount_map[obj], 'amount_in_inventory': inventory_amount}) serializer = IngredientAmountSerializer(ing_list, many=True) return Response(serializer.data)
def matching_criteria_columns(self, request, pk=None): """ Retrieve all matching criteria columns for an org. --- response_serializer: OrganizationUsersSerializer parameter_strategy: replace parameters: - name: pk type: integer description: Organization ID (primary key) required: true paramType: path """ try: org = Organization.objects.get(pk=pk) except ObjectDoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'Could not retrieve organization at pk = ' + str(pk) }, status=status.HTTP_404_NOT_FOUND) matching_criteria_column_names = dict( org.column_set.filter( is_matching_criteria=True).values('table_name').annotate( column_names=ArrayAgg('column_name')).values_list( 'table_name', 'column_names')) return JsonResponse(matching_criteria_column_names)
def get_annotation(self, cte): ancestor_condition = self.build_ancestor_condition(cte) return ArrayAgg(Case(When(condition=WhenQ(*ancestor_condition), then=cte.col.pk), default=Value(None)), output_field=CharField())
def inclusive_match_and_merge(unmatched_state_ids, org, StateClass): """ Takes a list of unmatched_state_ids, combines matches of the corresponding -States, and returns a set of IDs of the remaining -States. :param unmatched_states_ids: list :param org: Organization object :param StateClass: PropertyState or TaxLotState :return: promoted_ids: list """ column_names = matching_criteria_column_names(org.id, StateClass.__name__) # IDs of -States with all matching criteria equal to None are intially promoted # as they're not eligible for matching. promoted_ids = list( StateClass.objects.filter(pk__in=unmatched_state_ids, **empty_criteria_filter( StateClass, column_names)).values_list('id', flat=True)) # Update the list of IDs whose states haven't been checked for matches. unmatched_state_ids = list(set(unmatched_state_ids) - set(promoted_ids)) # Group IDs by -States that match each other matched_id_groups = StateClass.objects.\ filter(id__in=unmatched_state_ids).\ values(*column_names).\ annotate(matched_ids=ArrayAgg('id')).\ values_list('matched_ids', flat=True) # Collapse groups of matches found in the previous step into 1 -State per group merges_within_file = 0 priorities = Column.retrieve_priorities(org) for ids in matched_id_groups: if len(ids) == 1: # If there's only 1, no merging is needed, so just promote the ID. promoted_ids += ids else: states = [ s for s in StateClass.objects.filter(pk__in=ids).order_by('-id') ] merge_state = states.pop() merges_within_file += len(states) while len(states) > 0: newer_state = states.pop() merge_state = save_state_match(merge_state, newer_state, priorities) promoted_ids.append(merge_state.id) # Flag the soon to be promoted ID -States as having gone through matching StateClass.objects.filter(pk__in=promoted_ids).update( data_state=DATA_STATE_MATCHING) return promoted_ids, merges_within_file
def qs(self): project_qs = self.get_project_queryset() # Filter project if filter is provided project_filter = self.data.get('project_filter') if project_filter: project_qs = ProjectGqlFilterSet(data=project_filter, queryset=project_qs, request=self.request).qs return super().qs.annotate( projects_id=ArrayAgg('project', distinct=True, ordering='project', filter=models.Q(project__in=project_qs)), ).filter(projects_id__isnull=False).only('id', 'centroid')
def batch_load_fn(self, keys): qs = self.get_region_queryset().filter(project__in=keys).annotate( projects_id=ArrayAgg( 'project', filter=models.Q(project__in=keys)), ).defer('geo_options') _map = defaultdict(list) for region in qs.all(): for project_id in region.projects_id: _map[project_id].append(region) return Promise.resolve([_map.get(key) for key in keys])
def ns_ongoing_projects_stats(self, request, pk=None): projects = self.get_projects() ref_projects = projects.filter(reporting_ns=models.OuterRef('pk')) project_per_sector = defaultdict(list) for reporting_ns, primary_sector, count in ( projects.order_by('reporting_ns', 'primary_sector') .values('reporting_ns', 'primary_sector') .annotate(count=models.Count('id')) .values_list('reporting_ns', 'primary_sector', 'count') ): project_per_sector[reporting_ns].append({ 'primary_sector': primary_sector, 'primary_sector_display': Sectors(primary_sector).label, 'count': count, }) return Response({ 'results': [ { **ns_data, 'projects_per_sector': project_per_sector.get(ns_data['id']), 'operation_types_display': [ OperationTypes(operation_type).label for operation_type in ns_data['operation_types'] ] } for ns_data in Country.objects.annotate( ongoing_projects=Coalesce(models.Subquery( ref_projects.values('reporting_ns').annotate( count=models.Count('id')).values('count')[:1], output_field=models.IntegerField(), ), 0), target_total=Coalesce(models.Subquery( ref_projects.values('reporting_ns').annotate( target_total=models.Sum('target_total')).values('target_total')[:1], output_field=models.IntegerField(), ), 0), budget_amount_total=Coalesce(models.Subquery( ref_projects.values('reporting_ns').annotate( budget_amount_total=models.Sum('budget_amount')).values('budget_amount_total')[:1], output_field=models.IntegerField(), ), 0), operation_types=Coalesce(models.Subquery( ref_projects.values('reporting_ns').annotate( operation_types=ArrayAgg('operation_type', distinct=True)).values('operation_types')[:1], output_field=ArrayField(models.IntegerField()), ), []), ).filter(ongoing_projects__gt=0).order_by('id').values( 'id', 'name', 'iso3', 'iso3', 'society_name', 'ongoing_projects', 'target_total', 'budget_amount_total', 'operation_types', ) ] })
def user_batch_load_fn(objs, field): objs_by_model = defaultdict(list) for obj in objs: objs_by_model[type(obj)].append(obj.pk) _map = {} for model, keys in objs_by_model.items(): by_qs = User.objects\ .annotate(related_id=F(f'{model.__name__.lower()}_{field}__pk'))\ .filter(related_id__in=keys)\ .annotate(related_ids=ArrayAgg(F('related_id'))) for user in by_qs.all(): for related_id in user.related_ids: _map[related_id] = user return Promise.resolve([_map.get(obj.id) for obj in objs])
def get_queryset(self): try: user = self.request.user creator = user.customer_profile team = creator.team investment_size = self.request.query_params.get( 'investment_size', None) target_return = self.request.query_params.get( 'target_return', None) queryset = filter_contact_queryset(team, creator, investment_size, target_return) except: # TODO: break down the exceptions and log them queryset = Contact.objects.none() return queryset.values('company').annotate( minimum_investment_size=Min('minimum_investment_size'), maximum_investment_size=Max('maximum_investment_size'), minimum_irr_return=Min('minimum_irr_return'), maximum_irr_return=Min('maximum_irr_return'), first_names=ArrayAgg('first_name'), last_names=ArrayAgg('last_name'), emails=ArrayAgg('email'), )
def _merge_matches_across_cycles(matching_views, org_id, given_state_id, StateClass): """ This is a helper method for match_merge_link(). Given a QS of matching -Views, group them by Cycle. Merge the corresponding -States of each group with priority given based on most recent AuditLog. If the given -View/-State has matches in its own Cycle, AuditLogs are still used to determine merge order, but overarching precedence is given to the provided -View's -State. The count of merges as well as the target -State ID is returned. The target -State ID is either the given -State ID or the merged -State ID of merges involving the given -State ID. """ # Group matching -Views by Cycle and capture state_ids to be merged # For the purpose of merging, we only care if match_count is greater than 1. states_to_merge = matching_views.values('cycle_id').\ annotate(state_ids=ArrayAgg('state_id'), match_count=Count('id')).\ filter(match_count__gt=1).\ values_list('state_ids', flat=True) target_state_id = given_state_id count = 0 for state_ids in states_to_merge: ordered_ids = list( StateClass.objects.filter( id__in=state_ids).order_by('updated').values_list('id', flat=True)) if given_state_id in ordered_ids: # If the given -State ID is included, give it precedence and # capture resulting merged_state ID to be returned ordered_ids.remove(given_state_id) ordered_ids.append(given_state_id) merged_state = merge_states_with_views(ordered_ids, org_id, 'System Match', StateClass) target_state_id = merged_state.id else: merge_states_with_views(ordered_ids, org_id, 'System Match', StateClass) count += len(ordered_ids) return count, target_state_id
def whole_org_match_merge(org_id): """ Scope: all PropertyViews and TaxLotViews for an Org. Algorithm: - Start with PropertyViews then repeat for TaxLotViews - For each Cycle, - Looking at the corresponding -States attached to these -Views,... - Disregard/ignore any -States where all matching criteria is None (likely a subquery or extra exclude). - Group together IDs of -States that match each other. - For each group of size larger than 1, run manual merging logic so that there's only one record left but make the -AuditLog a "System Match". """ summary = { 'PropertyState': { 'merged_count': 0, 'new_merged_state_ids': [] }, 'TaxLotState': { 'merged_count': 0, 'new_merged_state_ids': [] }, } for StateClass in (PropertyState, TaxLotState): ViewClass = PropertyView if StateClass == PropertyState else TaxLotView column_names = matching_criteria_column_names(org_id, StateClass.__name__) cycle_ids = Cycle.objects.filter(organization_id=org_id).values_list('id', flat=True) for cycle_id in cycle_ids: existing_cycle_views = ViewClass.objects.filter(cycle_id=cycle_id) matched_id_groups = StateClass.objects.\ filter(id__in=Subquery(existing_cycle_views.values('state_id'))).\ exclude(**empty_criteria_filter(org_id, StateClass)).\ values(*column_names).\ annotate(matched_ids=ArrayAgg('id'), matched_count=Count('id')).\ values_list('matched_ids', flat=True).\ filter(matched_count__gt=1) for state_ids in matched_id_groups: state_ids.sort() # Ensures priority given to most recently uploaded record merged_state = merge_states_with_views(state_ids, org_id, 'System Match', StateClass) summary[StateClass.__name__]['merged_count'] += len(state_ids) summary[StateClass.__name__]['new_merged_state_ids'].append(merged_state.id) return summary
def survey(request, survey_name): survey = KoboData.objects.filter(dataset_name=survey_name) village_geojson = survey_villages(survey_name) q1 = Answer.objects.filter(dataset_uuid__dataset_name=survey_name)\ .annotate(num_hh=Count('answerhhmembers')+1)\ .aggregate(Max('survey_date'), Min('survey_date'), Avg('num_hh'), districts=StringAgg('district', delimiter=",", distinct=True), landscape=ArrayAgg('landscape', distinct=True)) q2 = Answer.objects.filter(dataset_uuid__dataset_name=survey_name).\ values('hh_type_control')\ .annotate(num_hh=Count('answer_id'))\ .order_by('hh_type_control') if len(q2) >= 2: survey_size_control = q2[1]["num_hh"] survey_size = q2[0]["num_hh"] + q2[1]["num_hh"] elif len(q2) == 1: survey_size_control = 0 survey_size = q2[0]["num_hh"] + 0 else: survey_size = 0 survey_size_control = 0 survey_facts = { 'start_date': q1["survey_date__min"].date(), 'end_date': q1["survey_date__max"].date(), 'survey_size': survey_size, 'survey_size_control': survey_size_control, 'avg_hh_size': round(q1["num_hh__avg"], 2), 'districts': q1["districts"], 'landscape': q1["landscape"][0] } # TODO review return values, can be better structured return render(request, 'bns_survey.html', {'survey': survey, 'surveys': [survey], 'landscape_geojson': '{"type" : "FeatureCollection", "features" :[]}', 'village_geojson': village_geojson, 'survey_name': survey_name, 'survey_facts': survey_facts})
def matching_criteria_columns(self, request, pk=None): """ Retrieve all matching criteria columns for an org. """ try: org = Organization.objects.get(pk=pk) except ObjectDoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'Could not retrieve organization at pk = ' + str(pk) }, status=status.HTTP_404_NOT_FOUND) matching_criteria_column_names = dict( org.column_set.filter( is_matching_criteria=True).values('table_name').annotate( column_names=ArrayAgg('column_name')).values_list( 'table_name', 'column_names')) return JsonResponse(matching_criteria_column_names)
def _load_period_ipstat(fill_date: date, period: timedelta): end_time = timezone.make_aware( time(hour=23, minute=59, second=59, microsecond=999999)) end_date = datetime.combine(fill_date, end_time) start_date = (end_date - period).replace(hour=0, minute=0, second=0, microsecond=0) return Lead.objects \ .filter(session_started__range=(start_date, end_date)) \ .filter(ip_addr__isnull=False) \ .values('ip_addr', 'geo', 'geo__country', 'geo__postal_code', 'provider') \ .annotate(s_cnt=Count('id')) \ .annotate(s_time=Sum(ExpressionWrapper( Coalesce('created', 'last_event_time') - F('session_started'), output_field=DurationField()))) \ .annotate(s0_cnt=Count(Case( When(created__isnull=True, then=F('id')), default=None, output_field=UUIDField()))) \ .annotate(s_beg=Cast(Avg( Cast(F('session_started'), output_field=TimeField()) ), output_field=TimeField())) \ .annotate(user_ids=ArrayAgg('pixel__project__user__id', distinct=True))\ .annotate(cnt_dev=Count('device_id'))
def get_queryset(self): return Shop.objects.public_shops().annotate( products=ArrayAgg('ShopProduct__Slug'))
def get_search_results(params): page_size = params.get('page_size') page_size = abs(int(page_size)) if page_size is not None else page_size page_num = params.get('page_num') page_num = abs(int(page_num)) if page_num is not None else page_num meeting_ids = params.getlist('meeting_ids[]', []) meeting_ids = [int(m) for m in meeting_ids] weekdays = params.get('weekdays') weekdays = params.getlist('weekdays[]', []) if weekdays is None else [weekdays] weekdays = [int(w) for w in weekdays] weekdays_include = [w for w in weekdays if w > 0] weekdays_exclude = [abs(w) for w in weekdays if w < 0] venue_types = params.get('venue_types') venue_types = params.getlist('venue_types[]', []) if venue_types is None else [venue_types] venue_types = [int(vt) for vt in venue_types] venue_types_include = [vt for vt in venue_types if vt > 0] venue_types_exclude = [vt for vt in venue_types if vt < 0] services = params.get('services') services = params.getlist('services[]', []) if services is None else [services] services = [int(s) for s in services] services_include = [s for s in services if s > 0] services_exclude = [abs(s) for s in services if s < 0] recursive = params.get('recursive', None) == '1' if recursive: services_include.extend(get_child_service_bodies(services_include)) services_exclude.extend(get_child_service_bodies(services_exclude)) formats = params.get('formats') formats = params.getlist('formats[]', []) if formats is None else [formats] formats = [int(f) for f in formats] formats_include = [f for f in formats if f > 0] formats_exclude = [abs(f) for f in formats if f < 0] formats_comparison_operator = 'AND' if params.get('formats_comparison_operator') != 'OR' else 'OR' root_server_ids = params.get('root_server_ids') root_server_ids = params.getlist('root_server_ids[]', []) if root_server_ids is None else [root_server_ids] root_server_ids = [int(rs) for rs in root_server_ids] root_server_ids_include = [rs for rs in root_server_ids if rs > 0] root_server_ids_exclude = [abs(rs) for rs in root_server_ids if rs < 0] meeting_key = params.get('meeting_key') meeting_key_value = params.get('meeting_key_value') data_field_keys = extract_specific_keys_param(params) starts_after = parse_time_params(params.get('StartsAfterH'), params.get('StartsAfterM')) starts_before = parse_time_params(params.get('StartsBeforeH'), params.get('StartsBeforeM')) ends_before = parse_time_params(params.get('EndsBeforeH'), params.get('EndsBeforeM')) min_duration = parse_timedelta_params(params.get('MinDurationH'), params.get('MinDurationM')) max_duration = parse_timedelta_params(params.get('MaxDurationH'), params.get('MaxDurationM')) long_val = params.get('long_val') lat_val = params.get('lat_val') geo_width = params.get('geo_width') geo_width_km = params.get('geo_width_km') sort_results_by_distance = params.get('sort_results_by_distance', None) == '1' search_string = params.get('SearchString') search_string_is_address = params.get('StringSearchIsAnAddress', None) == '1' search_string_radius = params.get('SearchStringRadius') search_string_all = params.get('SearchStringAll', None) == '1' search_string_exact = params.get('SearchStringExact', None) == '1' sort_keys = extract_specific_keys_param(params, 'sort_keys', distance_field_keys) if not sort_keys: # default sort order sort_keys = ['lang_enum', 'weekday_tinyint', 'start_time', 'id_bigint'] meeting_qs = Meeting.objects.filter(deleted=False, published=True) if data_field_keys: select_related_fields = ['meetinginfo'] if 'service_body_bigint' in data_field_keys: select_related_fields.append('service_body') meeting_qs = meeting_qs.select_related(*select_related_fields) else: meeting_qs = meeting_qs.select_related('meetinginfo', 'service_body', 'root_server') if meeting_ids: meeting_qs = meeting_qs.filter(pk__in=meeting_ids) if weekdays_include: meeting_qs = meeting_qs.filter(weekday__in=weekdays_include) if weekdays_exclude: meeting_qs = meeting_qs.exclude(weekday__in=weekdays_exclude) if venue_types_include: meeting_qs = meeting_qs.filter(venue_type__in=venue_types_include) if venue_types_exclude: meeting_qs = meeting_qs.exclude(venue_type__in=venue_types_exclude) if services_include: meeting_qs = meeting_qs.filter(service_body_id__in=services_include) if services_exclude: meeting_qs = meeting_qs.exclude(service_body_id__in=services_exclude) if formats_include: if formats_comparison_operator == 'AND': for id in formats_include: meeting_qs = meeting_qs.filter(models.Q(formats__id=id)) else: condition = None for id in formats_include: if condition is None: condition = models.Q(formats__id=id) else: condition |= models.Q(formats__id=id) meeting_qs = meeting_qs.filter(condition) if formats_exclude: for id in formats_exclude: meeting_qs = meeting_qs.filter(~models.Q(formats__id=id)) if root_server_ids_include: meeting_qs = meeting_qs.filter(root_server_id__in=root_server_ids_include) if root_server_ids_exclude: meeting_qs = meeting_qs.exclude(root_server_id__in=root_server_ids_exclude) if meeting_key and meeting_key_value: if meeting_key in valid_meeting_search_keys: model_field = meeting_field_map.get(meeting_key)[0] if isinstance(model_field, tuple): model_field = model_field[0] if model_field: model_field = model_field.replace('.', '__') meeting_qs = meeting_qs.filter(**{model_field: meeting_key_value}) if starts_after: meeting_qs = meeting_qs.filter(start_time__gt=starts_after) if starts_before: meeting_qs = meeting_qs.filter(start_time__lt=starts_before) if ends_before: exp = models.F('start_time') + models.F('duration') exp_wrapper = models.ExpressionWrapper(exp, output_field=models.TimeField()) meeting_qs = meeting_qs.annotate(end_time=exp_wrapper) meeting_qs = meeting_qs.filter(end_time__lt=ends_before) if min_duration: meeting_qs = meeting_qs.filter(duration__gte=min_duration) if max_duration: meeting_qs = meeting_qs.filter(duration__lte=max_duration) if search_string and not search_string_is_address: vector_fields = ( 'name', 'meetinginfo__location_text', 'meetinginfo__location_info', 'meetinginfo__location_street', 'meetinginfo__location_city_subsection', 'meetinginfo__location_neighborhood', 'meetinginfo__location_municipality', 'meetinginfo__location_sub_province', 'meetinginfo__location_province', 'meetinginfo__location_postal_code_1', 'meetinginfo__location_nation', 'meetinginfo__comments', ) if search_string_exact: meeting_qs = meeting_qs.annotate(fields=Concat(*vector_fields, output_field=models.TextField())) meeting_qs = meeting_qs.filter(fields__icontains=search_string) else: vector = SearchVector(*vector_fields) meeting_qs = meeting_qs.annotate(search=vector) if search_string_all: meeting_qs = meeting_qs.filter(search=search_string) else: meeting_ids = [] query = None for word in search_string.replace(',', ' ').split(): if word.isdigit(): meeting_ids.append(int(word)) continue elif len(word) < 3 or word.lower() == 'the': continue q = SearchQuery(word) query = q if not query else query | q q = None if query: q = models.Q(search=query) if meeting_ids: if not q: q = models.Q(id=meeting_ids.pop()) for meeting_id in meeting_ids: q = q | models.Q(id=meeting_id) if q: meeting_qs = meeting_qs.filter(q) is_geo = (long_val and lat_val and (geo_width or geo_width_km or set(data_field_keys).intersection(distance_field_keys))) or (search_string and search_string_is_address) if is_geo: # Get latitude and longitude values, either directly from the request # or from the an address try: get_nearest = False if search_string and search_string_is_address: get_nearest = 10 if search_string_radius: search_string_radius = int(search_string_radius) if search_string_radius < 0: get_nearest = abs(search_string_radius) latitude, longitude = address_to_coordinates(search_string) else: latitude = float(lat_val) longitude = float(long_val) if geo_width is not None: geo_width = float(geo_width) if geo_width < 0: get_nearest = abs(int(geo_width)) elif geo_width_km is not None: geo_width_km = float(geo_width_km) if geo_width_km < 0: get_nearest = abs(int(geo_width_km)) else: get_nearest = sys.maxsize point = Point(x=longitude, y=latitude, srid=4326) except Exception as e: if isinstance(e, ValueError) or isinstance(e, GeocodeAPIException): meeting_qs = meeting_qs.filter(pk=-1) else: raise else: meeting_qs = meeting_qs.annotate(distance=Distance('point', point)) if get_nearest: qs = meeting_qs.order_by('distance').values_list('id') meeting_ids = [m[0] for m in qs[:get_nearest]] meeting_qs = meeting_qs.filter(id__in=meeting_ids) else: d = geo_width if geo_width is not None else geo_width_km d = D(mi=d) if geo_width is not None else D(km=d) meeting_qs = meeting_qs.filter(point__distance_lte=(point, d)) if sort_results_by_distance: meeting_qs = meeting_qs.order_by('distance') if data_field_keys: values = [] for key in data_field_keys: model_field = meeting_field_map.get(key)[0] if key in distance_field_keys: continue elif isinstance(model_field, tuple): field_name = model_field[0].replace('.', '__') agg_name = model_field[1] meeting_qs = meeting_qs.annotate(**{agg_name: ArrayAgg(field_name)}) elif not callable(model_field): model_field = model_field.replace('.', '__') values.append(model_field) meeting_qs = meeting_qs.only(*values) if sort_keys and not sort_results_by_distance: values = [] for key in sort_keys: model_field = meeting_field_map.get(key)[0] if model_field: if isinstance(model_field, tuple): continue # no sorting by many to many relationships model_field = model_field.replace('.', '__') values.append(model_field) meeting_qs = meeting_qs.order_by(*values) if page_size is not None and page_num is not None: offset = page_size * (page_num - 1) limit = offset + page_size meeting_qs = meeting_qs[offset:limit] # We can't do prefetch related because we use .iterator() to stream results from the db # return meeting_qs.prefetch_related('formats') return meeting_qs