def __init__(self, user_pk, foia_pks): super(ExportCsv, self).__init__(user_pk, ''.join(str(pk) for pk in foia_pks[:100])) self.foias = (FOIARequest.objects.filter( pk__in=foia_pks).select_related( 'composer__user', 'agency__jurisdiction__parent', ).only( 'composer__user__username', 'title', 'status', 'slug', 'agency__jurisdiction__name', 'agency__jurisdiction__slug', 'agency__jurisdiction__id', 'agency__jurisdiction__parent__name', 'agency__jurisdiction__parent__id', 'agency__name', 'agency__id', 'date_followup', 'date_estimate', 'embargo', 'composer__requested_docs', ).annotate( days_since_submitted=ExtractDay( Cast(Now() - F('composer__datetime_submitted'), DurationField())), days_since_updated=ExtractDay( Cast(Now() - F('datetime_updated'), DurationField())), project_names=StringAgg('projects__title', ',', distinct=True), tag_names=StringAgg('tags__name', ',', distinct=True), ))
def get_generic_values(doc: Document) -> Dict[str, Any]: # If changing keys of the returned dictionary - please change field code constants # in apps/rawdb/field_value_tables.py accordingly (_FIELD_CODE_CLUSTER_ID and others) document_qs = Document.all_objects.filter(pk=doc.pk) \ .annotate(cluster_id=Max('documentcluster'), parties=StringAgg('textunit__partyusage__party__name', delimiter=', ', distinct=True), min_date=Min('textunit__dateusage__date'), max_date=Max('textunit__dateusage__date')) # if a Document was suddenly removed to this time if not document_qs.exists(): raise Document.DoesNotExist values = document_qs.values('cluster_id', 'parties', 'min_date', 'max_date').first() # type: Dict[str, Any] max_currency = CurrencyUsage.objects.filter(text_unit__document_id=doc.id) \ .order_by('-amount').values('currency', 'amount').first() # type: Dict[str, Any] values['max_currency'] = max_currency values['max_currency_name'] = max_currency[ 'currency'] if max_currency else None values['max_currency_amount'] = max_currency[ 'amount'] if max_currency else None return values
def get_values(self, doc: Document, text: str) -> List[Tuple[Any, Optional[int], Optional[int]]]: v = PartyUsage.objects.filter(text_unit__document_id=doc.id) \ .aggregate(value=StringAgg('party__name', delimiter=', ', distinct=True)) if v: return [(v['value'], None, None)] else: return []
def get_value( self, log, field: DocumentField, doc: Document, cur_field_code_to_value: Dict[str, Any]) -> Optional[FieldValueDTO]: v = PartyUsage.objects.filter(text_unit__document_id=doc.id) \ .aggregate(value=StringAgg('party__name', delimiter=', ', distinct=True)) value = TypedField.by(field).field_value_python_to_json(v['value']) return FieldValueDTO(field_value=value) if v else None
def benefits(obj_list): """ Return string of all Benefit names related to the Salaries, whose id's are passed in obj_list. """ return(Salary.objects.filter( id__in=obj_list).aggregate(list=StringAgg( 'benefits__name', distinct=True, delimiter = ', '))['list'])
def documents(self, request, **kwargs): """ Get list of project documents """ project = self.get_object() # if project.is_generic() and (not project.projectclustering_set.exists() # or not project.projectclustering_set.last().completed): # return Response('Project documents clustering is not completed.', status=500) if project.type.is_generic() and ( project.projectclustering_set.exists() and not project.projectclustering_set.last().completed): return Response('Project documents clustering is not completed.', status=500) qs = list( Document.objects.filter(project=project).values( 'id', 'name').annotate( cluster_id=Max('documentcluster'), parties=StringAgg('textunit__partyusage__party__name', delimiter=', ', distinct=True), min_date=Min('textunit__dateusage__date'), # max_currency=Max('textunit__currencyusage__amount'), max_date=Max('textunit__dateusage__date')).order_by( 'cluster_id', 'name')) # get max currency amount and currency itself # as it's hard to get currency itself like USD # along with amount in previous query currencies_qs = CurrencyUsage.objects \ .filter(text_unit__document__project=project) \ .values('text_unit__document__name', 'currency') \ .annotate(max_currency=Max('amount')).order_by('max_currency') max_currencies = { i['text_unit__document__name']: { 'max_currency': i['max_currency'], 'max_currency_str': '{} {}'.format( i['currency'], int(i['max_currency']) if int(i['max_currency']) == i['max_currency'] else i['max_currency']) } for i in currencies_qs } # join two queries results for item in qs: if item['name'] in max_currencies: item.update(max_currencies[item['name']]) else: item.update({'max_currency': None, 'max_currency_str': None}) return Response(qs)
def get_generic_values(self, doc: Document, generic_values_to_fill: FieldSpec = None) \ -> Dict[str, Any]: # If changing keys of the returned dictionary - please change field code constants # in apps/rawdb/field_value_tables.py accordingly (FIELD_CODE_CLUSTER_ID and others) document_qs = Document.all_objects.filter(pk=doc.pk) annotations = dict() if DocumentGenericField.cluster_id.specified_in( generic_values_to_fill): annotations['cluster_id'] = Max('documentcluster') if generic_values_to_fill is True: annotations['parties'] = StringAgg( 'textunit__partyusage__party__name', delimiter=', ', distinct=True) annotations['min_date'] = Min('textunit__dateusage__date') annotations['max_date'] = Max('textunit__dateusage__date') # if a Document was suddenly removed to this time if not document_qs.exists(): raise Document.DoesNotExist values = {} if annotations: values = document_qs.annotate(**annotations).values( *annotations.keys()).first() # type: Dict[str, Any] if generic_values_to_fill is True: # max_currency = CurrencyUsage.objects.filter(text_unit__document_id=doc.pk) \ # .order_by('-amount').values('currency', 'amount').first() # type: Dict[str, Any] # use raw sql as the query above sometimes hangs up to 1 minute max_currency_sql = ''' SELECT c.currency, MAX(c.amount) amount FROM extract_currencyusage c INNER JOIN document_textunit dtu ON c.text_unit_id = dtu.id WHERE dtu.document_id = {} GROUP BY c.currency ORDER BY amount DESC limit 1;'''.format( doc.pk) with connection.cursor() as cursor: cursor.execute(max_currency_sql) max_currency = dictfetchone(cursor) values['max_currency'] = max_currency values['max_currency_name'] = max_currency[ 'currency'] if max_currency else None values['max_currency_amount'] = max_currency[ 'amount'] if max_currency else None return values
def cache_generic_values(self): document_qs = Document.objects.filter(pk=self.pk) \ .annotate(cluster_id=Max('documentcluster'), parties=StringAgg('textunit__partyusage__party__name', delimiter=', ', distinct=True), max_currency_amount=Max('textunit__currencyusage__amount'), max_currency_name=Max('textunit__currencyusage__currency'), min_date=Min('textunit__dateusage__date'), max_date=Max('textunit__dateusage__date')) values = document_qs.values('cluster_id', 'parties', 'max_currency_amount', 'max_currency_name', 'min_date', 'max_date').first() self.generic_data = values self.save(update_fields=['generic_data'])
def survey(request, survey_name): survey = KoboData.objects.filter(dataset_name=survey_name) village_geojson = survey_villages(survey_name) q1 = Answer.objects.filter(dataset_uuid__dataset_name=survey_name)\ .annotate(num_hh=Count('answerhhmembers')+1)\ .aggregate(Max('survey_date'), Min('survey_date'), Avg('num_hh'), districts=StringAgg('district', delimiter=",", distinct=True), landscape=ArrayAgg('landscape', distinct=True)) q2 = Answer.objects.filter(dataset_uuid__dataset_name=survey_name).\ values('hh_type_control')\ .annotate(num_hh=Count('answer_id'))\ .order_by('hh_type_control') if len(q2) >= 2: survey_size_control = q2[1]["num_hh"] survey_size = q2[0]["num_hh"] + q2[1]["num_hh"] elif len(q2) == 1: survey_size_control = 0 survey_size = q2[0]["num_hh"] + 0 else: survey_size = 0 survey_size_control = 0 survey_facts = { 'start_date': q1["survey_date__min"].date(), 'end_date': q1["survey_date__max"].date(), 'survey_size': survey_size, 'survey_size_control': survey_size_control, 'avg_hh_size': round(q1["num_hh__avg"], 2), 'districts': q1["districts"], 'landscape': q1["landscape"][0] } # TODO review return values, can be better structured return render(request, 'bns_survey.html', {'survey': survey, 'surveys': [survey], 'landscape_geojson': '{"type" : "FeatureCollection", "features" :[]}', 'village_geojson': village_geojson, 'survey_name': survey_name, 'survey_facts': survey_facts})
def get_context_data(self, *, object_list=None, **kwargs): fromdate = self.request.session['fromdate'] todate = self.request.session['todate'] orders_set_sorted = OrderItem.objects.all() \ .annotate(name=Concat(Value('Товар - '), 'product_name', output_field=CharField())) \ .annotate(concat=Concat(Value('Заказ - '), 'order__number', Value(', Цена - '), 'product_price', Value(', Дата - '), 'order__create_date', Value(', Количество - '), 'amount', output_field=CharField())) \ .filter(order__create_date__gte=fromdate, order__create_date__lte=todate) queryset = orders_set_sorted.values('name').order_by('-name') \ .annotate( concat_=StringAgg('concat', delimiter='; '), customer_name=F('name') ) top_amount = queryset.values('name') \ .annotate(all_amount=ExpressionWrapper(Sum("amount"), output_field=CharField())) \ .aggregate(Max('all_amount'))['all_amount__max'] top_product = queryset.values('name') \ .annotate(sum_num=ExpressionWrapper(F("amount") * F('product_price'), output_field=DecimalField())) top_product_max = top_product.order_by('order__id').aggregate( Max('sum_num'))['sum_num__max'] queries = connection.queries reset_queries() context = { "items": queryset[:100], "queries": queries, 'sum_num__max': top_product_max, 'all_amount__max': top_amount } return context
def format_items_iterator(analysis, sample_ids, items): """ A few things are done in JS formatters, eg changing -1 to missing values (? in grid) and tags We can't just add tags via node queryset (in monkey patch func above) as we'll get an issue with tacked on zygosity columns etc not being in GROUP BY or aggregate func. So, just patch items via iterator """ SAMPLE_FIELDS = ["allele_depth", "allele_frequency", "read_depth", "genotype_quality", "phred_likelihood"] variant_tags_qs = Variant.objects.filter(varianttag__analysis=analysis) variant_tags_qs = variant_tags_qs.annotate(tags=StringAgg("varianttag__tag", delimiter=', ', distinct=True)) variant_tags = dict(variant_tags_qs.values_list("id", "tags")) for item in items: for sample_id in sample_ids: for f in SAMPLE_FIELDS: sample_field = f"sample_{sample_id}_ov_{f}" val = item.get(sample_field) if val and val == -1: item[sample_field] = "." variant_id = item["id"] tags = variant_tags.get(variant_id) if tags: item["tags"] = tags yield item
def validate(self, data): room_id = data.get('room_id', None) user_id = data.get('user_id', None) if room_id is None and user_id is None: raise ValidationError({ 'room_id': 'One of this fields:room_id, user_id is required.' }) if room_id: self.context['room'] = Room.objects.filter( room_id=room_id, owner=self.context['user']).first() if not self.context['room']: raise ValidationError({'room_id': 'Room was not found.'}) if user_id: if user_id == self.context['user'].id: raise ValidationError( {'user_id': 'You can\'t send message to yourself.'}) user = User.objects.filter(id=user_id).first() if not user: raise ValidationError({'user_id': 'User was not found.'}) room_ids = RoomMember.objects.filter( user=self.context['user']).values_list('room_id', flat=True) room_exist = RoomMember.objects.filter(room_id__in=room_ids) \ .values('room_id') \ .annotate(item=StringAgg(Cast('user_id', ModelCharField()), ',', ordering=('user_id',))) \ .filter(item=','.join(sorted([str(user_id), str(self.context['user'].id)]))) if len(room_exist): print('+++++++++++') return data
def query(request): delimiter = request.GET.get("delimiter", ',') queryset = Address.objects.values("customer__name").annotate( phones=StringAgg('phone', delimiter), ) return HttpResponse(queryset)
def render_to_response(self, context, **kwargs): """Allow CSV responses""" wants_csv = self.request.GET.get('content_type') == 'csv' has_perm = self.request.user.has_perm('foia.export_csv') if wants_csv and has_perm: psuedo_buffer = Echo() fields = ( (lambda f: f.user.username, 'User'), (lambda f: f.title, 'Title'), (lambda f: f.get_status_display(), 'Status'), (lambda f: settings.MUCKROCK_URL + f.get_absolute_url(), 'URL'), (lambda f: f.jurisdiction.name, 'Jurisdiction'), (lambda f: f.jurisdiction.pk, 'Jurisdiction ID'), ( lambda f: f.jurisdiction.get_level_display(), 'Jurisdiction Level', ), ( lambda f: f.jurisdiction.parent.name if f.jurisdiction.level == 'l' else f.jurisdiction.name, 'Jurisdiction State', ), (lambda f: f.agency.name if f.agency else '', 'Agency'), (lambda f: f.agency.pk if f.agency else '', 'Agency ID'), (lambda f: f.date_followup, 'Followup Date'), (lambda f: f.date_estimate, 'Estimated Completion Date'), (lambda f: f.composer.requested_docs, 'Requested Documents'), (lambda f: f.current_tracking_id(), 'Tracking Number'), (lambda f: f.embargo, 'Embargo'), (lambda f: f.days_since_submitted, 'Days since submitted'), (lambda f: f.days_since_updated, 'Days since updated'), (lambda f: f.project_names, 'Projects'), (lambda f: f.tag_names, 'Tags'), ) foias = (context['paginator'].object_list.select_related( None).select_related( 'composer__user', 'agency__jurisdiction', ).prefetch_related('tracking_ids', ).only( 'composer__user__username', 'title', 'status', 'slug', 'agency__jurisdiction__name', 'agency__jurisdiction__slug', 'agency__jurisdiction__id', 'agency__name', 'agency__id', 'date_followup', 'date_estimate', 'embargo', 'composer__requested_docs', ).annotate( days_since_submitted=ExtractDay( Cast(Now() - F('composer__datetime_submitted'), DurationField())), days_since_updated=ExtractDay( Cast(Now() - F('datetime_updated'), DurationField())), project_names=StringAgg('projects__title', ',', distinct=True), tag_names=StringAgg('tags__name', ',', distinct=True), )) writer = csv.writer(psuedo_buffer) response = StreamingHttpResponse( chain( [writer.writerow(f[1] for f in fields)], (writer.writerow(f[0](foia) for f in fields) for foia in foias), ), content_type='text/csv', ) response[ 'Content-Disposition'] = 'attachment; filename="requests.csv"' return response else: return super(RequestList, self).render_to_response(context, **kwargs)
def groups(self): return self.groupmember_set.aggregate(groups=StringAgg( "group__name", delimiter=", ", ))["groups"]
def export_metadata(ds, bins): name = ds.name dataset_location = ds.location dataset_depth = ds.depth bqs = bins qs = bqs.values('id', 'pid', 'sample_time', 'location', 'ml_analyzed', 'cruise', 'cast', 'niskin', 'depth', 'instrument__number', 'skip', 'sample_type', 'n_images', 'tags__name').order_by('pid', 'tags__name') # fetch all tags and compute number of tag columns tags_by_id = defaultdict(list) n_tag_cols = 0 for item in qs: tag_name = item['tags__name'] if tag_name: id = item['id'] tags = tags_by_id[id] tags.append(tag_name) if len(tags) > n_tag_cols: n_tag_cols = len(tags) # fetch all comment summaries comment_summary_by_id = \ dict(bqs.filter(comments__isnull=False).values_list('id') \ .annotate(comment_summary=StringAgg('comments__content', delimiter='; ', ordering='comments__timestamp'))) # fetch selected metadata fields # PMTtriggerSelection_DAQ_MCConly trigger_selection_key = 'PMTtriggerSelection_DAQ_MCConly' id2md = bqs.filter( metadata_json__contains=trigger_selection_key).values_list( 'id', 'metadata_json') trigger_selection_by_id = dict([ (id, json.loads(md).get(trigger_selection_key)) for id, md in id2md ]) # now construct the dataframe r = defaultdict(list) r.update({'dataset': name}) # fast way to remove duplicates prev_pid = None for item in qs: if item['pid'] == prev_pid: continue prev_pid = item['pid'] def add(field, rename=None): if rename is not None: r[rename].append(item[field]) else: r[field].append(item[field]) add('pid') add('sample_time') add('instrument__number', rename='ifcb') add('ml_analyzed') if item['location'] is not None: r['latitude'].append(item['location'].y) r['longitude'].append(item['location'].x) elif dataset_location is not None: r['latitude'].append(dataset_location.y) r['longitude'].append(dataset_location.x) else: r['latitude'].append(np.nan) r['longitude'].append(np.nan) if item['depth'] is not None: add('depth') elif dataset_depth is not None: r['depth'].append(dataset_depth) else: r['depth'].append(np.nan) add('cruise') add('cast') add('niskin') add('sample_type') add('n_images') tag_names = tags_by_id[item['id']] for i in range(n_tag_cols): v = tag_names[i] if i < len(tag_names) else '' r[f'tag{i+1}'].append(v) r['comment_summary'].append(comment_summary_by_id.get(item['id'], '')) r['trigger_selection'].append( trigger_selection_by_id.get(item['id'], '')) r['skip'].append(1 if item['skip'] else 0) df = pd.DataFrame(r) return df
def export_csv(foia_pks, user_pk): """Export a csv of the selected FOIA requests""" fields = ( (lambda f: f.user.username, 'User'), (lambda f: f.title, 'Title'), (lambda f: f.get_status_display(), 'Status'), (lambda f: settings.MUCKROCK_URL + f.get_absolute_url(), 'URL'), (lambda f: f.jurisdiction.name, 'Jurisdiction'), (lambda f: f.jurisdiction.pk, 'Jurisdiction ID'), ( lambda f: f.jurisdiction.get_level_display(), 'Jurisdiction Level', ), ( lambda f: f.jurisdiction.parent.name if f.jurisdiction.level == 'l' else f.jurisdiction.name, 'Jurisdiction State', ), (lambda f: f.agency.name if f.agency else '', 'Agency'), (lambda f: f.agency.pk if f.agency else '', 'Agency ID'), (lambda f: f.date_followup, 'Followup Date'), (lambda f: f.date_estimate, 'Estimated Completion Date'), (lambda f: f.composer.requested_docs, 'Requested Documents'), (lambda f: f.current_tracking_id(), 'Tracking Number'), (lambda f: f.embargo, 'Embargo'), (lambda f: f.days_since_submitted, 'Days since submitted'), (lambda f: f.days_since_updated, 'Days since updated'), (lambda f: f.project_names, 'Projects'), (lambda f: f.tag_names, 'Tags'), (lambda f: f.price, 'Price'), (lambda f: f.composer.datetime_submitted, 'Date Submitted'), (lambda f: f.date_due, 'Date Due'), (lambda f: f.datetime_done, 'Date Done'), ) foias = (FOIARequest.objects.filter(pk__in=foia_pks).select_related( 'composer__user', 'agency__jurisdiction__parent', ).only( 'composer__user__username', 'title', 'status', 'slug', 'agency__jurisdiction__name', 'agency__jurisdiction__slug', 'agency__jurisdiction__id', 'agency__jurisdiction__parent__name', 'agency__jurisdiction__parent__id', 'agency__name', 'agency__id', 'date_followup', 'date_estimate', 'embargo', 'composer__requested_docs', ).annotate( days_since_submitted=ExtractDay( Cast(Now() - F('composer__datetime_submitted'), DurationField())), days_since_updated=ExtractDay( Cast(Now() - F('datetime_updated'), DurationField())), project_names=StringAgg('projects__title', ',', distinct=True), tag_names=StringAgg('tags__name', ',', distinct=True), )) conn = S3Connection( settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, ) bucket = conn.get_bucket(settings.AWS_STORAGE_BUCKET_NAME) today = date.today() file_key = 'exported_csv/{y:4d}/{m:02d}/{d:02d}/{md5}/requests.csv'.format( y=today.year, m=today.month, d=today.day, md5=md5('{}{}'.format( int(timestamp()), ''.join(str(pk) for pk in foia_pks[:100]), )).hexdigest(), ) key = bucket.new_key(file_key) with smart_open(key, 'wb') as csv_file: writer = csv.writer(csv_file) writer.writerow(f[1] for f in fields) for foia in foias.iterator(): writer.writerow(f[0](foia) for f in fields) key.set_acl('public-read') notification = TemplateEmail( user=User.objects.get(pk=user_pk), extra_context={'file': file_key}, text_template='message/notification/csv_export.txt', html_template='message/notification/csv_export.html', subject='Your CSV Export', ) notification.send(fail_silently=False)
def query(request): queryset = Address.objects.values("customer__name").annotate( phones=StringAgg('phone', delimiter="'"), ) return HttpResponse(queryset)