Exemple #1
0
 def extract_query_filter_params(cls, request) -> dict:
     # we use celery with the params, so we need to make it serialization friendly
     # thus we convert the params accordingly using str_date and used_ids
     query_params = date_filter_from_params(request.GET, str_date=True)
     query_params.update(
         extract_accesslog_attr_query_params(request.GET, dimensions=CSVExport.implicit_dims,
                                             use_ids=True))
     return query_params
Exemple #2
0
 def title_count(self, request, organization_pk):
     org_filter = organization_filter_from_org_id(organization_pk,
                                                  request.user)
     date_filter_params = date_filter_from_params(request.GET)
     qs = PlatformTitle.objects.\
         filter(**org_filter, **date_filter_params).\
         values('platform').\
         annotate(title_count=Count('title', distinct=True))
     return Response(qs)
Exemple #3
0
 def platforms(self, request, pk, organization_pk):
     title = get_object_or_404(Title.objects.all(), pk=pk)
     org_filter = organization_filter_from_org_id(organization_pk,
                                                  self.request.user)
     date_filter_params = date_filter_from_params(self.request.GET,
                                                  key_start='accesslog__')
     platforms = Platform.objects.filter(accesslog__target=title,
                                         **date_filter_params,
                                         **extend_query_filter(
                                             org_filter,
                                             'accesslog__')).distinct()
     return Response(PlatformSerializer(platforms, many=True).data)
Exemple #4
0
 def get_queryset(self, request, organization_pk):
     org_filter = organization_filter_from_org_id(organization_pk,
                                                  request.user)
     date_filter_params = date_filter_from_params(request.GET)
     interest_rt, interest_annot_params = self.get_report_type_and_filters()
     # we do not filter acceslogs by report type here because we want other report types
     # to be able to "connect" platform and titles for the title_count
     # this makes it possible to show the proper title count even if there is no interest
     result = AccessLog.objects\
         .filter(**org_filter, **date_filter_params, report_type=interest_rt)\
         .values('platform')\
         .annotate(**interest_annot_params)
     return result
Exemple #5
0
 def construct_query(self, report_type, params):
     if report_type:
         if isinstance(report_type, ReportType):
             query_params = {
                 'report_type': report_type,
                 'metric__active': True
             }
         else:
             query_params = {'metric__active': True}
     else:
         query_params = {'metric__active': True}
     # go over implicit dimensions and add them to the query if GET params are given for this
     query_params.update(
         extract_accesslog_attr_query_params(params,
                                             dimensions=self.implicit_dims))
     # now go over the extra dimensions and add them to filter if requested
     dim_raw_name_to_name = {}
     if report_type:
         for i, dim in enumerate(report_type.dimensions_sorted):
             dim_raw_name = 'dim{}'.format(i + 1)
             dim_name = dim.short_name
             dim_raw_name_to_name[dim_raw_name] = dim_name
             value = params.get(dim_name)
             if value:
                 if dim.type == dim.TYPE_TEXT:
                     try:
                         value = DimensionText.objects.get(dimension=dim,
                                                           text=value).pk
                     except DimensionText.DoesNotExist:
                         pass  # we leave the value as it is - it will probably lead to empty result
                 query_params[dim_raw_name] = value
     # remap dimension names if the query dim name is different from the i/o one
     if self.prim_dim_name != self.io_prim_dim_name:
         dim_raw_name_to_name[self.prim_dim_name] = self.io_prim_dim_name
     if self.sec_dim_name != self.io_sec_dim_name:
         dim_raw_name_to_name[self.sec_dim_name] = self.io_sec_dim_name
     # add extra filters if requested
     prim_extra = self.extra_query_params.get(self.io_prim_dim_name)
     if prim_extra:
         query_params.update(prim_extra(report_type))
     sec_extra = self.extra_query_params.get(self.io_sec_dim_name)
     if sec_extra:
         query_params.update(sec_extra(report_type))
     # add filter for dates
     query_params.update(date_filter_from_params(params))
     # create the base query
     if report_type and isinstance(report_type, ReportDataView):
         query = report_type.logdata_qs().filter(**query_params)
     else:
         query = AccessLog.objects.filter(**query_params)
     return query, dim_raw_name_to_name
Exemple #6
0
 def title_count_detail(self, request, organization_pk, pk):
     org_filter = organization_filter_from_org_id(organization_pk,
                                                  request.user)
     date_filter_params = date_filter_from_params(request.GET)
     qs = PlatformTitle.objects.\
         filter(platform_id=pk, **org_filter, **date_filter_params).\
         values('platform').\
         annotate(title_count=Count('title', distinct=True))
     try:
         result = qs.get()
         title_count = result.get('title_count', 0)
     except PlatformTitle.DoesNotExist:
         title_count = 0
     return Response({'title_count': title_count})
Exemple #7
0
 def get_queryset(self):
     """
     Should return only titles for specific organization and platform
     """
     self.org_filter = organization_filter_from_org_id(
         self.kwargs.get('organization_pk'), self.request.user)
     self.date_filter = date_filter_from_params(self.request.GET)
     # run stuff before we start creating the queryset
     self._before_queryset()
     # put together filters for title itself
     search_filters = []
     q = self.request.query_params.get('q')
     if q:
         search_filters = [Q(name__ilike=p) | Q(isbn__ilike=p) | Q(issn__ilike=p) |\
                           Q(doi__ilike=p) for p in q.split()]
     pub_type_arg = self.request.query_params.get('pub_type')
     if pub_type_arg:
         search_filters.append(Q(pub_type=pub_type_arg))
     # we evaluate this here as it might be important for the _extra_accesslog_filters method
     extra_filters = self._extra_filters()
     # put together filters for accesslogs
     accesslog_filter = {**self._extra_accesslog_filters()}
     title_qs = Title.objects.all()
     if accesslog_filter:
         # we have some filters for accesslog - this means we have to add the relevant
         # accesslogs to the queryset
         accesslog_filter.update(
             **extend_query_filter(self.date_filter, 'accesslog__'))
         title_qs = title_qs.annotate(relevant_accesslogs=FilteredRelation(
             'accesslog', condition=Q(**accesslog_filter)))
     # construct the whole query
     # joining together platformtitle and accesslog is problematic, because there are
     # usually several platformtitles and thus the join multiplies the number of joined
     # accesslogs and the sums are then multiplied as well.
     # because of this, we preselect the titles and then use the distinct IDs as base
     # for the query containing the sums
     # as a side effect, the query is also faster ;)
     base_title_query = Title.objects.filter(
         *search_filters,
         **extend_query_filter(self.date_filter, 'platformtitle__'),
         **extend_query_filter(self.org_filter, 'platformtitle__'),
         **extra_filters,
     ).distinct()
     result = title_qs.filter(pk__in=base_title_query)
     annot = self._annotations()
     if annot:
         result = result.annotate(**annot)
     result = self._postprocess(result)
     return result
Exemple #8
0
 def interest(self, request, pk):
     org_filter = organization_filter_from_org_id(pk, request.user)
     date_filter = date_filter_from_params(request.GET)
     interest_rt = ReportType.objects.get(short_name='interest',
                                          source__isnull=True)
     data = AccessLog.objects\
         .filter(report_type=interest_rt, **org_filter, **date_filter) \
         .aggregate(interest_sum=Sum('value'), min_date=Min('date'), max_date=Max('date'))
     if data['max_date']:
         # the date might be None and then we do not want to do the math ;)
         data['max_date'] = month_end(data['max_date'])
         data['days'] = (data['max_date'] - data['min_date']).days + 1
     else:
         data['days'] = 0
     return Response(data)
Exemple #9
0
    def get_queryset(self):
        """
        Should return only platforms for the requested organization.
        Should include title_count which counts titles on the platform
        """
        try:
            interest_rt = ReportType.objects.get(short_name='interest',
                                                 source__isnull=True)
        except ReportType.DoesNotExist:
            raise ValueError('No interest report type exists')
        org_filter = organization_filter_from_org_id(
            self.kwargs.get('organization_pk'), self.request.user)
        # filters for the suitable access logs
        count_filter = extend_query_filter(org_filter, 'accesslog__')
        # we prefilter using the same filter as for count annotation
        # but without the interest_group filter
        prefilter = dict(count_filter)
        # parameters for annotation defining an annotation for each of the interest groups
        interest_annot_params = interest_annotation_params(
            count_filter, interest_rt)
        # add more filters for dates
        date_filter_params = date_filter_from_params(self.request.GET,
                                                     key_start='accesslog__')
        if date_filter_params:
            count_filter.update(date_filter_params)
            prefilter.update(date_filter_params)

        # the following creates the interest dict attr from the interest annotations
        if self.lookup_field not in self.kwargs:
            # we are not filtering by id, so we are getting a list and thus adding interest here
            # otherwise we will do it in get_object
            # also we filter only platforms that have some data for the organization at hand
            result = Platform.objects.filter(**org_filter).filter(**prefilter). \
                annotate(title_count=Count('accesslog__target', distinct=True,
                                           filter=Q(**count_filter)),
                         **interest_annot_params)
            extract_interests_from_objects(interest_rt, result)
        else:
            # we are filtering by ID and thus getting only one object
            # in this case we drop the prefilter so that it is possible to get data
            # for platforms that are not connected to the organization
            result = Platform.objects.filter(**org_filter).\
                annotate(title_count=Count('accesslog__target', distinct=True,
                                           filter=Q(**count_filter)),
                         **interest_annot_params)
        return result
Exemple #10
0
    def title_interest_histogram(self, request, pk):
        org_filter = organization_filter_from_org_id(pk, request.user)
        date_filter = date_filter_from_params(request.GET)
        interest_rt = ReportType.objects.get(short_name='interest',
                                             source__isnull=True)
        counter = Counter()
        query = AccessLog.objects\
            .filter(report_type=interest_rt, **org_filter, **date_filter) \
            .values('target') \
            .annotate(interest_sum=Coalesce(Sum('value'), 0)) \
            .values('interest_sum')
        for rec in query:
            counter[rec['interest_sum']] += 1
        # here we bin it according to self.histogram_bins
        bin_counter = Counter()
        for hits, count in counter.items():
            for start, end in self.histogram_bins:
                if start <= hits <= end:
                    bin_counter[(start, end)] += count
                    break
            else:
                digits = len(str(hits)) - 1
                unit = 10**digits
                start = unit * ((hits - 1) // unit)
                end = start + unit
                start += 1
                bin_counter[(start, end)] += count
        # objects to return
        def name(a, b):
            if a == b:
                return str(a)
            return f'{a}-{b}'

        data = [{
            'count': count,
            'start': start,
            'end': end,
            'name': name(start, end)
        } for (start, end), count in sorted(bin_counter.items())]
        return Response(data)
Exemple #11
0
    def construct_query(self, report_type, params):
        if report_type:
            query_params = {'report_type': self.used_report_type, 'metric__active': True}
        else:
            query_params = {'metric__active': True}
        # go over implicit dimensions and add them to the query if GET params are given for this
        query_params.update(
            extract_accesslog_attr_query_params(params, dimensions=self.implicit_dims)
        )
        # now go over the extra dimensions and add them to filter if requested
        dim_raw_name_to_name = {}
        if report_type:
            for i, dim in enumerate(report_type.dimensions_sorted):
                dim_raw_name = 'dim{}'.format(i + 1)
                dim_name = dim.short_name
                dim_raw_name_to_name[dim_raw_name] = dim_name
                value = params.get(dim_name)
                if value:
                    if dim.type == dim.TYPE_TEXT:
                        try:
                            value = DimensionText.objects.get(dimension=dim, text=value).pk
                        except DimensionText.DoesNotExist:
                            # we leave the value as it is - it will probably lead to empty result
                            pass
                    query_params[dim_raw_name] = value
        # remap dimension names if the query dim name is different from the i/o one
        if self.prim_dim_name != self.io_prim_dim_name:
            dim_raw_name_to_name[self.prim_dim_name] = self.io_prim_dim_name
        if self.sec_dim_name != self.io_sec_dim_name:
            dim_raw_name_to_name[self.sec_dim_name] = self.io_sec_dim_name
        # add extra filters if requested
        prim_extra = self.extra_query_params.get(self.io_prim_dim_name)
        if prim_extra:
            query_params.update(prim_extra(report_type))
        sec_extra = self.extra_query_params.get(self.io_sec_dim_name)
        if sec_extra:
            query_params.update(sec_extra(report_type))
        # add filter for dates
        query_params.update(date_filter_from_params(params))

        # maybe use materialized report if available
        extra_dims = {self.prim_dim_name}
        if self.sec_dim_name:
            extra_dims.add(self.sec_dim_name)
        rt_change = replace_report_type_with_materialized(
            query_params, other_used_dimensions=extra_dims
        )
        # we preserve the original value of used_report_type for use in later code
        original_used_report_type = self.used_report_type
        if rt_change:
            self.used_report_type = query_params.get('report_type')

        # construct the query
        query = AccessLog.objects.filter(**query_params)
        if report_type and isinstance(report_type, ReportDataView):
            query = query.filter(**report_type.accesslog_filters)

        # filter to only interest metrics if metric neither primary nor secondary dim
        if report_type and self.prim_dim_name != 'metric' and self.sec_dim_name != 'metric':
            # Rationale: summing up different metrics together does not make much sence
            # for example Total_Item_Requests and Unique_Item_Requests are dependent on each
            # other and in fact the latter is a subset of the former. Thus we only use the
            # metrics that define interest for computation if metric itself is not the primary
            # or secondary dimension
            # Technical note: putting the filter into the query leads to a very slow response
            # (2500 ms instead of 60 ms is a test case) - this is why we get the pks of the metrics
            # first and then use the "in" filter.
            self.reported_metrics = {
                im.pk: im for im in original_used_report_type.interest_metrics.order_by()
            }
            if self.reported_metrics:
                query = query.filter(metric_id__in=self.reported_metrics.keys())
            else:
                # if there are no interest metrics associated with the report_type
                # we need to tell the user that all possible metrics were used
                used_metric_ids = {rec['metric_id'] for rec in query.values('metric_id').distinct()}
                self.reported_metrics = {
                    im.pk: im for im in Metric.objects.filter(pk__in=used_metric_ids)
                }
        return query, dim_raw_name_to_name
Exemple #12
0
 def extract_query_filter_params(cls, request) -> dict:
     query_params = date_filter_from_params(request.GET)
     query_params.update(
         extract_accesslog_attr_query_params(request.GET, dimensions=cls.implicit_dims))
     return query_params