Exemple #1
0
 def __init__(self, user_pk, foia_pks):
     super(ExportCsv,
           self).__init__(user_pk,
                          ''.join(str(pk) for pk in foia_pks[:100]))
     self.foias = (FOIARequest.objects.filter(
         pk__in=foia_pks).select_related(
             'composer__user',
             'agency__jurisdiction__parent',
         ).only(
             'composer__user__username',
             'title',
             'status',
             'slug',
             'agency__jurisdiction__name',
             'agency__jurisdiction__slug',
             'agency__jurisdiction__id',
             'agency__jurisdiction__parent__name',
             'agency__jurisdiction__parent__id',
             'agency__name',
             'agency__id',
             'date_followup',
             'date_estimate',
             'embargo',
             'composer__requested_docs',
         ).annotate(
             days_since_submitted=ExtractDay(
                 Cast(Now() - F('composer__datetime_submitted'),
                      DurationField())),
             days_since_updated=ExtractDay(
                 Cast(Now() - F('datetime_updated'), DurationField())),
             project_names=StringAgg('projects__title', ',', distinct=True),
             tag_names=StringAgg('tags__name', ',', distinct=True),
         ))
Exemple #2
0
def get_generic_values(doc: Document) -> Dict[str, Any]:
    # If changing keys of the returned dictionary - please change field code constants
    # in apps/rawdb/field_value_tables.py accordingly (_FIELD_CODE_CLUSTER_ID and others)

    document_qs = Document.all_objects.filter(pk=doc.pk) \
        .annotate(cluster_id=Max('documentcluster'),
                  parties=StringAgg('textunit__partyusage__party__name',
                                    delimiter=', ',
                                    distinct=True),
                  min_date=Min('textunit__dateusage__date'),
                  max_date=Max('textunit__dateusage__date'))

    # if a Document was suddenly removed to this time
    if not document_qs.exists():
        raise Document.DoesNotExist

    values = document_qs.values('cluster_id', 'parties', 'min_date',
                                'max_date').first()  # type: Dict[str, Any]

    max_currency = CurrencyUsage.objects.filter(text_unit__document_id=doc.id) \
        .order_by('-amount').values('currency', 'amount').first()  # type: Dict[str, Any]
    values['max_currency'] = max_currency
    values['max_currency_name'] = max_currency[
        'currency'] if max_currency else None
    values['max_currency_amount'] = max_currency[
        'amount'] if max_currency else None

    return values
 def get_values(self, doc: Document, text: str) -> List[Tuple[Any, Optional[int], Optional[int]]]:
     v = PartyUsage.objects.filter(text_unit__document_id=doc.id) \
         .aggregate(value=StringAgg('party__name', delimiter=', ', distinct=True))
     if v:
         return [(v['value'], None, None)]
     else:
         return []
Exemple #4
0
 def get_value(
         self, log, field: DocumentField, doc: Document,
         cur_field_code_to_value: Dict[str,
                                       Any]) -> Optional[FieldValueDTO]:
     v = PartyUsage.objects.filter(text_unit__document_id=doc.id) \
         .aggregate(value=StringAgg('party__name', delimiter=', ', distinct=True))
     value = TypedField.by(field).field_value_python_to_json(v['value'])
     return FieldValueDTO(field_value=value) if v else None
Exemple #5
0
def benefits(obj_list):
    """
    Return string of all Benefit names related to the Salaries, 
    whose id's are passed in obj_list.
    """
    return(Salary.objects.filter(
        id__in=obj_list).aggregate(list=StringAgg(
            'benefits__name', distinct=True, delimiter = ', '))['list'])
Exemple #6
0
    def documents(self, request, **kwargs):
        """
        Get list of project documents
        """
        project = self.get_object()

        # if project.is_generic() and (not project.projectclustering_set.exists()
        #         or not project.projectclustering_set.last().completed):
        #     return Response('Project documents clustering is not completed.', status=500)

        if project.type.is_generic() and (
                project.projectclustering_set.exists()
                and not project.projectclustering_set.last().completed):
            return Response('Project documents clustering is not completed.',
                            status=500)

        qs = list(
            Document.objects.filter(project=project).values(
                'id', 'name').annotate(
                    cluster_id=Max('documentcluster'),
                    parties=StringAgg('textunit__partyusage__party__name',
                                      delimiter=', ',
                                      distinct=True),
                    min_date=Min('textunit__dateusage__date'),
                    # max_currency=Max('textunit__currencyusage__amount'),
                    max_date=Max('textunit__dateusage__date')).order_by(
                        'cluster_id', 'name'))

        # get max currency amount and currency itself
        # as it's hard to get currency itself like USD
        # along with amount in previous query
        currencies_qs = CurrencyUsage.objects \
            .filter(text_unit__document__project=project) \
            .values('text_unit__document__name', 'currency') \
            .annotate(max_currency=Max('amount')).order_by('max_currency')
        max_currencies = {
            i['text_unit__document__name']: {
                'max_currency':
                i['max_currency'],
                'max_currency_str':
                '{} {}'.format(
                    i['currency'],
                    int(i['max_currency']) if int(i['max_currency'])
                    == i['max_currency'] else i['max_currency'])
            }
            for i in currencies_qs
        }

        # join two queries results
        for item in qs:
            if item['name'] in max_currencies:
                item.update(max_currencies[item['name']])
            else:
                item.update({'max_currency': None, 'max_currency_str': None})

        return Response(qs)
    def get_generic_values(self, doc: Document, generic_values_to_fill: FieldSpec = None) \
            -> Dict[str, Any]:
        # If changing keys of the returned dictionary - please change field code constants
        # in apps/rawdb/field_value_tables.py accordingly (FIELD_CODE_CLUSTER_ID and others)

        document_qs = Document.all_objects.filter(pk=doc.pk)

        annotations = dict()

        if DocumentGenericField.cluster_id.specified_in(
                generic_values_to_fill):
            annotations['cluster_id'] = Max('documentcluster')

        if generic_values_to_fill is True:
            annotations['parties'] = StringAgg(
                'textunit__partyusage__party__name',
                delimiter=', ',
                distinct=True)
            annotations['min_date'] = Min('textunit__dateusage__date')
            annotations['max_date'] = Max('textunit__dateusage__date')

        # if a Document was suddenly removed to this time
        if not document_qs.exists():
            raise Document.DoesNotExist

        values = {}
        if annotations:
            values = document_qs.annotate(**annotations).values(
                *annotations.keys()).first()  # type: Dict[str, Any]

        if generic_values_to_fill is True:
            # max_currency = CurrencyUsage.objects.filter(text_unit__document_id=doc.pk) \
            #     .order_by('-amount').values('currency', 'amount').first()  # type: Dict[str, Any]

            # use raw sql as the query above sometimes hangs up to 1 minute
            max_currency_sql = '''
                SELECT c.currency, MAX(c.amount) amount
                FROM extract_currencyusage c
                INNER JOIN document_textunit dtu ON c.text_unit_id = dtu.id
                WHERE dtu.document_id = {}
                GROUP BY c.currency ORDER BY amount DESC limit 1;'''.format(
                doc.pk)
            with connection.cursor() as cursor:
                cursor.execute(max_currency_sql)
                max_currency = dictfetchone(cursor)

                values['max_currency'] = max_currency
                values['max_currency_name'] = max_currency[
                    'currency'] if max_currency else None
                values['max_currency_amount'] = max_currency[
                    'amount'] if max_currency else None

        return values
 def cache_generic_values(self):
     document_qs = Document.objects.filter(pk=self.pk) \
         .annotate(cluster_id=Max('documentcluster'),
                   parties=StringAgg('textunit__partyusage__party__name',
                                     delimiter=', ',
                                     distinct=True),
                   max_currency_amount=Max('textunit__currencyusage__amount'),
                   max_currency_name=Max('textunit__currencyusage__currency'),
                   min_date=Min('textunit__dateusage__date'),
                   max_date=Max('textunit__dateusage__date'))
     values = document_qs.values('cluster_id', 'parties',
                                 'max_currency_amount', 'max_currency_name',
                                 'min_date', 'max_date').first()
     self.generic_data = values
     self.save(update_fields=['generic_data'])
Exemple #9
0
def survey(request, survey_name):
    survey = KoboData.objects.filter(dataset_name=survey_name)
    village_geojson = survey_villages(survey_name)

    q1 = Answer.objects.filter(dataset_uuid__dataset_name=survey_name)\
        .annotate(num_hh=Count('answerhhmembers')+1)\
        .aggregate(Max('survey_date'),
                   Min('survey_date'),
                   Avg('num_hh'),
                   districts=StringAgg('district', delimiter=",", distinct=True),
                   landscape=ArrayAgg('landscape', distinct=True))

    q2 = Answer.objects.filter(dataset_uuid__dataset_name=survey_name).\
        values('hh_type_control')\
        .annotate(num_hh=Count('answer_id'))\
        .order_by('hh_type_control')

    if len(q2) >= 2:
        survey_size_control = q2[1]["num_hh"]
        survey_size = q2[0]["num_hh"] + q2[1]["num_hh"]
    elif len(q2) == 1:
        survey_size_control = 0
        survey_size = q2[0]["num_hh"] + 0
    else:
        survey_size = 0
        survey_size_control = 0


    survey_facts = {
        'start_date': q1["survey_date__min"].date(),
        'end_date': q1["survey_date__max"].date(),
        'survey_size': survey_size,
        'survey_size_control': survey_size_control,
        'avg_hh_size': round(q1["num_hh__avg"], 2),
        'districts': q1["districts"],
        'landscape': q1["landscape"][0]
    }

    # TODO review return values,  can be better structured
    return render(request, 'bns_survey.html', {'survey': survey,
                                               'surveys': [survey],
                                               'landscape_geojson': '{"type" : "FeatureCollection", "features" :[]}',
                                               'village_geojson': village_geojson,
                                               'survey_name': survey_name,
                                               'survey_facts': survey_facts})
    def get_context_data(self, *, object_list=None, **kwargs):
        fromdate = self.request.session['fromdate']
        todate = self.request.session['todate']

        orders_set_sorted = OrderItem.objects.all() \
            .annotate(name=Concat(Value('Товар - '), 'product_name', output_field=CharField())) \
            .annotate(concat=Concat(Value('Заказ -  '), 'order__number',
                                    Value(',  Цена -  '), 'product_price',
                                    Value(',  Дата -  '), 'order__create_date',
                                    Value(',  Количество -  '), 'amount',
                                    output_field=CharField())) \
            .filter(order__create_date__gte=fromdate, order__create_date__lte=todate)

        queryset = orders_set_sorted.values('name').order_by('-name') \
            .annotate(
            concat_=StringAgg('concat', delimiter='; '),
            customer_name=F('name')
        )
        top_amount = queryset.values('name') \
            .annotate(all_amount=ExpressionWrapper(Sum("amount"), output_field=CharField())) \
            .aggregate(Max('all_amount'))['all_amount__max']

        top_product = queryset.values('name') \
            .annotate(sum_num=ExpressionWrapper(F("amount") * F('product_price'), output_field=DecimalField()))

        top_product_max = top_product.order_by('order__id').aggregate(
            Max('sum_num'))['sum_num__max']

        queries = connection.queries
        reset_queries()
        context = {
            "items": queryset[:100],
            "queries": queries,
            'sum_num__max': top_product_max,
            'all_amount__max': top_amount
        }
        return context
Exemple #11
0
def format_items_iterator(analysis, sample_ids, items):
    """ A few things are done in JS formatters, eg changing -1 to missing values (? in grid) and tags
        We can't just add tags via node queryset (in monkey patch func above) as we'll get an issue with
        tacked on zygosity columns etc not being in GROUP BY or aggregate func. So, just patch items via iterator """

    SAMPLE_FIELDS = ["allele_depth", "allele_frequency", "read_depth", "genotype_quality", "phred_likelihood"]

    variant_tags_qs = Variant.objects.filter(varianttag__analysis=analysis)
    variant_tags_qs = variant_tags_qs.annotate(tags=StringAgg("varianttag__tag", delimiter=', ', distinct=True))
    variant_tags = dict(variant_tags_qs.values_list("id", "tags"))

    for item in items:
        for sample_id in sample_ids:
            for f in SAMPLE_FIELDS:
                sample_field = f"sample_{sample_id}_ov_{f}"
                val = item.get(sample_field)
                if val and val == -1:
                    item[sample_field] = "."

        variant_id = item["id"]
        tags = variant_tags.get(variant_id)
        if tags:
            item["tags"] = tags
        yield item
Exemple #12
0
    def validate(self, data):
        room_id = data.get('room_id', None)
        user_id = data.get('user_id', None)

        if room_id is None and user_id is None:
            raise ValidationError({
                'room_id':
                'One of this fields:room_id, user_id is required.'
            })

        if room_id:
            self.context['room'] = Room.objects.filter(
                room_id=room_id, owner=self.context['user']).first()
            if not self.context['room']:
                raise ValidationError({'room_id': 'Room was not found.'})

        if user_id:
            if user_id == self.context['user'].id:
                raise ValidationError(
                    {'user_id': 'You can\'t send message to yourself.'})

            user = User.objects.filter(id=user_id).first()
            if not user:
                raise ValidationError({'user_id': 'User was not found.'})

            room_ids = RoomMember.objects.filter(
                user=self.context['user']).values_list('room_id', flat=True)
            room_exist = RoomMember.objects.filter(room_id__in=room_ids) \
                .values('room_id') \
                .annotate(item=StringAgg(Cast('user_id', ModelCharField()), ',', ordering=('user_id',))) \
                .filter(item=','.join(sorted([str(user_id), str(self.context['user'].id)])))

            if len(room_exist):
                print('+++++++++++')

        return data
Exemple #13
0
def query(request):
    delimiter = request.GET.get("delimiter", ',')
    queryset = Address.objects.values("customer__name").annotate(
        phones=StringAgg('phone', delimiter),
    )
    return HttpResponse(queryset)
Exemple #14
0
    def render_to_response(self, context, **kwargs):
        """Allow CSV responses"""

        wants_csv = self.request.GET.get('content_type') == 'csv'
        has_perm = self.request.user.has_perm('foia.export_csv')
        if wants_csv and has_perm:
            psuedo_buffer = Echo()
            fields = (
                (lambda f: f.user.username, 'User'),
                (lambda f: f.title, 'Title'),
                (lambda f: f.get_status_display(), 'Status'),
                (lambda f: settings.MUCKROCK_URL + f.get_absolute_url(),
                 'URL'),
                (lambda f: f.jurisdiction.name, 'Jurisdiction'),
                (lambda f: f.jurisdiction.pk, 'Jurisdiction ID'),
                (
                    lambda f: f.jurisdiction.get_level_display(),
                    'Jurisdiction Level',
                ),
                (
                    lambda f: f.jurisdiction.parent.name
                    if f.jurisdiction.level == 'l' else f.jurisdiction.name,
                    'Jurisdiction State',
                ),
                (lambda f: f.agency.name if f.agency else '', 'Agency'),
                (lambda f: f.agency.pk if f.agency else '', 'Agency ID'),
                (lambda f: f.date_followup, 'Followup Date'),
                (lambda f: f.date_estimate, 'Estimated Completion Date'),
                (lambda f: f.composer.requested_docs, 'Requested Documents'),
                (lambda f: f.current_tracking_id(), 'Tracking Number'),
                (lambda f: f.embargo, 'Embargo'),
                (lambda f: f.days_since_submitted, 'Days since submitted'),
                (lambda f: f.days_since_updated, 'Days since updated'),
                (lambda f: f.project_names, 'Projects'),
                (lambda f: f.tag_names, 'Tags'),
            )
            foias = (context['paginator'].object_list.select_related(
                None).select_related(
                    'composer__user',
                    'agency__jurisdiction',
                ).prefetch_related('tracking_ids', ).only(
                    'composer__user__username',
                    'title',
                    'status',
                    'slug',
                    'agency__jurisdiction__name',
                    'agency__jurisdiction__slug',
                    'agency__jurisdiction__id',
                    'agency__name',
                    'agency__id',
                    'date_followup',
                    'date_estimate',
                    'embargo',
                    'composer__requested_docs',
                ).annotate(
                    days_since_submitted=ExtractDay(
                        Cast(Now() - F('composer__datetime_submitted'),
                             DurationField())),
                    days_since_updated=ExtractDay(
                        Cast(Now() - F('datetime_updated'), DurationField())),
                    project_names=StringAgg('projects__title',
                                            ',',
                                            distinct=True),
                    tag_names=StringAgg('tags__name', ',', distinct=True),
                ))
            writer = csv.writer(psuedo_buffer)
            response = StreamingHttpResponse(
                chain(
                    [writer.writerow(f[1] for f in fields)],
                    (writer.writerow(f[0](foia) for f in fields)
                     for foia in foias),
                ),
                content_type='text/csv',
            )
            response[
                'Content-Disposition'] = 'attachment; filename="requests.csv"'
            return response
        else:
            return super(RequestList,
                         self).render_to_response(context, **kwargs)
Exemple #15
0
 def groups(self):
     return self.groupmember_set.aggregate(groups=StringAgg(
         "group__name",
         delimiter=", ",
     ))["groups"]
Exemple #16
0
def export_metadata(ds, bins):
    name = ds.name
    dataset_location = ds.location
    dataset_depth = ds.depth
    bqs = bins
    qs = bqs.values('id', 'pid', 'sample_time', 'location', 'ml_analyzed',
                    'cruise', 'cast', 'niskin', 'depth', 'instrument__number',
                    'skip', 'sample_type', 'n_images',
                    'tags__name').order_by('pid', 'tags__name')
    # fetch all tags and compute number of tag columns
    tags_by_id = defaultdict(list)
    n_tag_cols = 0
    for item in qs:
        tag_name = item['tags__name']
        if tag_name:
            id = item['id']
            tags = tags_by_id[id]
            tags.append(tag_name)
            if len(tags) > n_tag_cols:
                n_tag_cols = len(tags)
    # fetch all comment summaries
    comment_summary_by_id = \
        dict(bqs.filter(comments__isnull=False).values_list('id') \
             .annotate(comment_summary=StringAgg('comments__content', delimiter='; ', ordering='comments__timestamp')))
    # fetch selected metadata fields
    # PMTtriggerSelection_DAQ_MCConly
    trigger_selection_key = 'PMTtriggerSelection_DAQ_MCConly'
    id2md = bqs.filter(
        metadata_json__contains=trigger_selection_key).values_list(
            'id', 'metadata_json')
    trigger_selection_by_id = dict([
        (id, json.loads(md).get(trigger_selection_key)) for id, md in id2md
    ])
    # now construct the dataframe
    r = defaultdict(list)
    r.update({'dataset': name})
    # fast way to remove duplicates
    prev_pid = None
    for item in qs:
        if item['pid'] == prev_pid:
            continue
        prev_pid = item['pid']

        def add(field, rename=None):
            if rename is not None:
                r[rename].append(item[field])
            else:
                r[field].append(item[field])

        add('pid')
        add('sample_time')
        add('instrument__number', rename='ifcb')
        add('ml_analyzed')
        if item['location'] is not None:
            r['latitude'].append(item['location'].y)
            r['longitude'].append(item['location'].x)
        elif dataset_location is not None:
            r['latitude'].append(dataset_location.y)
            r['longitude'].append(dataset_location.x)
        else:
            r['latitude'].append(np.nan)
            r['longitude'].append(np.nan)
        if item['depth'] is not None:
            add('depth')
        elif dataset_depth is not None:
            r['depth'].append(dataset_depth)
        else:
            r['depth'].append(np.nan)
        add('cruise')
        add('cast')
        add('niskin')
        add('sample_type')
        add('n_images')
        tag_names = tags_by_id[item['id']]
        for i in range(n_tag_cols):
            v = tag_names[i] if i < len(tag_names) else ''
            r[f'tag{i+1}'].append(v)
        r['comment_summary'].append(comment_summary_by_id.get(item['id'], ''))
        r['trigger_selection'].append(
            trigger_selection_by_id.get(item['id'], ''))
        r['skip'].append(1 if item['skip'] else 0)

    df = pd.DataFrame(r)

    return df
Exemple #17
0
def export_csv(foia_pks, user_pk):
    """Export a csv of the selected FOIA requests"""
    fields = (
        (lambda f: f.user.username, 'User'),
        (lambda f: f.title, 'Title'),
        (lambda f: f.get_status_display(), 'Status'),
        (lambda f: settings.MUCKROCK_URL + f.get_absolute_url(), 'URL'),
        (lambda f: f.jurisdiction.name, 'Jurisdiction'),
        (lambda f: f.jurisdiction.pk, 'Jurisdiction ID'),
        (
            lambda f: f.jurisdiction.get_level_display(),
            'Jurisdiction Level',
        ),
        (
            lambda f: f.jurisdiction.parent.name
            if f.jurisdiction.level == 'l' else f.jurisdiction.name,
            'Jurisdiction State',
        ),
        (lambda f: f.agency.name if f.agency else '', 'Agency'),
        (lambda f: f.agency.pk if f.agency else '', 'Agency ID'),
        (lambda f: f.date_followup, 'Followup Date'),
        (lambda f: f.date_estimate, 'Estimated Completion Date'),
        (lambda f: f.composer.requested_docs, 'Requested Documents'),
        (lambda f: f.current_tracking_id(), 'Tracking Number'),
        (lambda f: f.embargo, 'Embargo'),
        (lambda f: f.days_since_submitted, 'Days since submitted'),
        (lambda f: f.days_since_updated, 'Days since updated'),
        (lambda f: f.project_names, 'Projects'),
        (lambda f: f.tag_names, 'Tags'),
        (lambda f: f.price, 'Price'),
        (lambda f: f.composer.datetime_submitted, 'Date Submitted'),
        (lambda f: f.date_due, 'Date Due'),
        (lambda f: f.datetime_done, 'Date Done'),
    )
    foias = (FOIARequest.objects.filter(pk__in=foia_pks).select_related(
        'composer__user',
        'agency__jurisdiction__parent',
    ).only(
        'composer__user__username',
        'title',
        'status',
        'slug',
        'agency__jurisdiction__name',
        'agency__jurisdiction__slug',
        'agency__jurisdiction__id',
        'agency__jurisdiction__parent__name',
        'agency__jurisdiction__parent__id',
        'agency__name',
        'agency__id',
        'date_followup',
        'date_estimate',
        'embargo',
        'composer__requested_docs',
    ).annotate(
        days_since_submitted=ExtractDay(
            Cast(Now() - F('composer__datetime_submitted'), DurationField())),
        days_since_updated=ExtractDay(
            Cast(Now() - F('datetime_updated'), DurationField())),
        project_names=StringAgg('projects__title', ',', distinct=True),
        tag_names=StringAgg('tags__name', ',', distinct=True),
    ))
    conn = S3Connection(
        settings.AWS_ACCESS_KEY_ID,
        settings.AWS_SECRET_ACCESS_KEY,
    )
    bucket = conn.get_bucket(settings.AWS_STORAGE_BUCKET_NAME)
    today = date.today()
    file_key = 'exported_csv/{y:4d}/{m:02d}/{d:02d}/{md5}/requests.csv'.format(
        y=today.year,
        m=today.month,
        d=today.day,
        md5=md5('{}{}'.format(
            int(timestamp()),
            ''.join(str(pk) for pk in foia_pks[:100]),
        )).hexdigest(),
    )
    key = bucket.new_key(file_key)
    with smart_open(key, 'wb') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(f[1] for f in fields)
        for foia in foias.iterator():
            writer.writerow(f[0](foia) for f in fields)
    key.set_acl('public-read')

    notification = TemplateEmail(
        user=User.objects.get(pk=user_pk),
        extra_context={'file': file_key},
        text_template='message/notification/csv_export.txt',
        html_template='message/notification/csv_export.html',
        subject='Your CSV Export',
    )
    notification.send(fail_silently=False)
Exemple #18
0
def query(request):
    queryset = Address.objects.values("customer__name").annotate(
        phones=StringAgg('phone', delimiter="'"), )
    return HttpResponse(queryset)