def extract_query_filter_params(cls, request) -> dict: # we use celery with the params, so we need to make it serialization friendly # thus we convert the params accordingly using str_date and used_ids query_params = date_filter_from_params(request.GET, str_date=True) query_params.update( extract_accesslog_attr_query_params(request.GET, dimensions=CSVExport.implicit_dims, use_ids=True)) return query_params
def title_count(self, request, organization_pk): org_filter = organization_filter_from_org_id(organization_pk, request.user) date_filter_params = date_filter_from_params(request.GET) qs = PlatformTitle.objects.\ filter(**org_filter, **date_filter_params).\ values('platform').\ annotate(title_count=Count('title', distinct=True)) return Response(qs)
def platforms(self, request, pk, organization_pk): title = get_object_or_404(Title.objects.all(), pk=pk) org_filter = organization_filter_from_org_id(organization_pk, self.request.user) date_filter_params = date_filter_from_params(self.request.GET, key_start='accesslog__') platforms = Platform.objects.filter(accesslog__target=title, **date_filter_params, **extend_query_filter( org_filter, 'accesslog__')).distinct() return Response(PlatformSerializer(platforms, many=True).data)
def get_queryset(self, request, organization_pk): org_filter = organization_filter_from_org_id(organization_pk, request.user) date_filter_params = date_filter_from_params(request.GET) interest_rt, interest_annot_params = self.get_report_type_and_filters() # we do not filter acceslogs by report type here because we want other report types # to be able to "connect" platform and titles for the title_count # this makes it possible to show the proper title count even if there is no interest result = AccessLog.objects\ .filter(**org_filter, **date_filter_params, report_type=interest_rt)\ .values('platform')\ .annotate(**interest_annot_params) return result
def construct_query(self, report_type, params): if report_type: if isinstance(report_type, ReportType): query_params = { 'report_type': report_type, 'metric__active': True } else: query_params = {'metric__active': True} else: query_params = {'metric__active': True} # go over implicit dimensions and add them to the query if GET params are given for this query_params.update( extract_accesslog_attr_query_params(params, dimensions=self.implicit_dims)) # now go over the extra dimensions and add them to filter if requested dim_raw_name_to_name = {} if report_type: for i, dim in enumerate(report_type.dimensions_sorted): dim_raw_name = 'dim{}'.format(i + 1) dim_name = dim.short_name dim_raw_name_to_name[dim_raw_name] = dim_name value = params.get(dim_name) if value: if dim.type == dim.TYPE_TEXT: try: value = DimensionText.objects.get(dimension=dim, text=value).pk except DimensionText.DoesNotExist: pass # we leave the value as it is - it will probably lead to empty result query_params[dim_raw_name] = value # remap dimension names if the query dim name is different from the i/o one if self.prim_dim_name != self.io_prim_dim_name: dim_raw_name_to_name[self.prim_dim_name] = self.io_prim_dim_name if self.sec_dim_name != self.io_sec_dim_name: dim_raw_name_to_name[self.sec_dim_name] = self.io_sec_dim_name # add extra filters if requested prim_extra = self.extra_query_params.get(self.io_prim_dim_name) if prim_extra: query_params.update(prim_extra(report_type)) sec_extra = self.extra_query_params.get(self.io_sec_dim_name) if sec_extra: query_params.update(sec_extra(report_type)) # add filter for dates query_params.update(date_filter_from_params(params)) # create the base query if report_type and isinstance(report_type, ReportDataView): query = report_type.logdata_qs().filter(**query_params) else: query = AccessLog.objects.filter(**query_params) return query, dim_raw_name_to_name
def title_count_detail(self, request, organization_pk, pk): org_filter = organization_filter_from_org_id(organization_pk, request.user) date_filter_params = date_filter_from_params(request.GET) qs = PlatformTitle.objects.\ filter(platform_id=pk, **org_filter, **date_filter_params).\ values('platform').\ annotate(title_count=Count('title', distinct=True)) try: result = qs.get() title_count = result.get('title_count', 0) except PlatformTitle.DoesNotExist: title_count = 0 return Response({'title_count': title_count})
def get_queryset(self): """ Should return only titles for specific organization and platform """ self.org_filter = organization_filter_from_org_id( self.kwargs.get('organization_pk'), self.request.user) self.date_filter = date_filter_from_params(self.request.GET) # run stuff before we start creating the queryset self._before_queryset() # put together filters for title itself search_filters = [] q = self.request.query_params.get('q') if q: search_filters = [Q(name__ilike=p) | Q(isbn__ilike=p) | Q(issn__ilike=p) |\ Q(doi__ilike=p) for p in q.split()] pub_type_arg = self.request.query_params.get('pub_type') if pub_type_arg: search_filters.append(Q(pub_type=pub_type_arg)) # we evaluate this here as it might be important for the _extra_accesslog_filters method extra_filters = self._extra_filters() # put together filters for accesslogs accesslog_filter = {**self._extra_accesslog_filters()} title_qs = Title.objects.all() if accesslog_filter: # we have some filters for accesslog - this means we have to add the relevant # accesslogs to the queryset accesslog_filter.update( **extend_query_filter(self.date_filter, 'accesslog__')) title_qs = title_qs.annotate(relevant_accesslogs=FilteredRelation( 'accesslog', condition=Q(**accesslog_filter))) # construct the whole query # joining together platformtitle and accesslog is problematic, because there are # usually several platformtitles and thus the join multiplies the number of joined # accesslogs and the sums are then multiplied as well. # because of this, we preselect the titles and then use the distinct IDs as base # for the query containing the sums # as a side effect, the query is also faster ;) base_title_query = Title.objects.filter( *search_filters, **extend_query_filter(self.date_filter, 'platformtitle__'), **extend_query_filter(self.org_filter, 'platformtitle__'), **extra_filters, ).distinct() result = title_qs.filter(pk__in=base_title_query) annot = self._annotations() if annot: result = result.annotate(**annot) result = self._postprocess(result) return result
def interest(self, request, pk): org_filter = organization_filter_from_org_id(pk, request.user) date_filter = date_filter_from_params(request.GET) interest_rt = ReportType.objects.get(short_name='interest', source__isnull=True) data = AccessLog.objects\ .filter(report_type=interest_rt, **org_filter, **date_filter) \ .aggregate(interest_sum=Sum('value'), min_date=Min('date'), max_date=Max('date')) if data['max_date']: # the date might be None and then we do not want to do the math ;) data['max_date'] = month_end(data['max_date']) data['days'] = (data['max_date'] - data['min_date']).days + 1 else: data['days'] = 0 return Response(data)
def get_queryset(self): """ Should return only platforms for the requested organization. Should include title_count which counts titles on the platform """ try: interest_rt = ReportType.objects.get(short_name='interest', source__isnull=True) except ReportType.DoesNotExist: raise ValueError('No interest report type exists') org_filter = organization_filter_from_org_id( self.kwargs.get('organization_pk'), self.request.user) # filters for the suitable access logs count_filter = extend_query_filter(org_filter, 'accesslog__') # we prefilter using the same filter as for count annotation # but without the interest_group filter prefilter = dict(count_filter) # parameters for annotation defining an annotation for each of the interest groups interest_annot_params = interest_annotation_params( count_filter, interest_rt) # add more filters for dates date_filter_params = date_filter_from_params(self.request.GET, key_start='accesslog__') if date_filter_params: count_filter.update(date_filter_params) prefilter.update(date_filter_params) # the following creates the interest dict attr from the interest annotations if self.lookup_field not in self.kwargs: # we are not filtering by id, so we are getting a list and thus adding interest here # otherwise we will do it in get_object # also we filter only platforms that have some data for the organization at hand result = Platform.objects.filter(**org_filter).filter(**prefilter). \ annotate(title_count=Count('accesslog__target', distinct=True, filter=Q(**count_filter)), **interest_annot_params) extract_interests_from_objects(interest_rt, result) else: # we are filtering by ID and thus getting only one object # in this case we drop the prefilter so that it is possible to get data # for platforms that are not connected to the organization result = Platform.objects.filter(**org_filter).\ annotate(title_count=Count('accesslog__target', distinct=True, filter=Q(**count_filter)), **interest_annot_params) return result
def title_interest_histogram(self, request, pk): org_filter = organization_filter_from_org_id(pk, request.user) date_filter = date_filter_from_params(request.GET) interest_rt = ReportType.objects.get(short_name='interest', source__isnull=True) counter = Counter() query = AccessLog.objects\ .filter(report_type=interest_rt, **org_filter, **date_filter) \ .values('target') \ .annotate(interest_sum=Coalesce(Sum('value'), 0)) \ .values('interest_sum') for rec in query: counter[rec['interest_sum']] += 1 # here we bin it according to self.histogram_bins bin_counter = Counter() for hits, count in counter.items(): for start, end in self.histogram_bins: if start <= hits <= end: bin_counter[(start, end)] += count break else: digits = len(str(hits)) - 1 unit = 10**digits start = unit * ((hits - 1) // unit) end = start + unit start += 1 bin_counter[(start, end)] += count # objects to return def name(a, b): if a == b: return str(a) return f'{a}-{b}' data = [{ 'count': count, 'start': start, 'end': end, 'name': name(start, end) } for (start, end), count in sorted(bin_counter.items())] return Response(data)
def construct_query(self, report_type, params): if report_type: query_params = {'report_type': self.used_report_type, 'metric__active': True} else: query_params = {'metric__active': True} # go over implicit dimensions and add them to the query if GET params are given for this query_params.update( extract_accesslog_attr_query_params(params, dimensions=self.implicit_dims) ) # now go over the extra dimensions and add them to filter if requested dim_raw_name_to_name = {} if report_type: for i, dim in enumerate(report_type.dimensions_sorted): dim_raw_name = 'dim{}'.format(i + 1) dim_name = dim.short_name dim_raw_name_to_name[dim_raw_name] = dim_name value = params.get(dim_name) if value: if dim.type == dim.TYPE_TEXT: try: value = DimensionText.objects.get(dimension=dim, text=value).pk except DimensionText.DoesNotExist: # we leave the value as it is - it will probably lead to empty result pass query_params[dim_raw_name] = value # remap dimension names if the query dim name is different from the i/o one if self.prim_dim_name != self.io_prim_dim_name: dim_raw_name_to_name[self.prim_dim_name] = self.io_prim_dim_name if self.sec_dim_name != self.io_sec_dim_name: dim_raw_name_to_name[self.sec_dim_name] = self.io_sec_dim_name # add extra filters if requested prim_extra = self.extra_query_params.get(self.io_prim_dim_name) if prim_extra: query_params.update(prim_extra(report_type)) sec_extra = self.extra_query_params.get(self.io_sec_dim_name) if sec_extra: query_params.update(sec_extra(report_type)) # add filter for dates query_params.update(date_filter_from_params(params)) # maybe use materialized report if available extra_dims = {self.prim_dim_name} if self.sec_dim_name: extra_dims.add(self.sec_dim_name) rt_change = replace_report_type_with_materialized( query_params, other_used_dimensions=extra_dims ) # we preserve the original value of used_report_type for use in later code original_used_report_type = self.used_report_type if rt_change: self.used_report_type = query_params.get('report_type') # construct the query query = AccessLog.objects.filter(**query_params) if report_type and isinstance(report_type, ReportDataView): query = query.filter(**report_type.accesslog_filters) # filter to only interest metrics if metric neither primary nor secondary dim if report_type and self.prim_dim_name != 'metric' and self.sec_dim_name != 'metric': # Rationale: summing up different metrics together does not make much sence # for example Total_Item_Requests and Unique_Item_Requests are dependent on each # other and in fact the latter is a subset of the former. Thus we only use the # metrics that define interest for computation if metric itself is not the primary # or secondary dimension # Technical note: putting the filter into the query leads to a very slow response # (2500 ms instead of 60 ms is a test case) - this is why we get the pks of the metrics # first and then use the "in" filter. self.reported_metrics = { im.pk: im for im in original_used_report_type.interest_metrics.order_by() } if self.reported_metrics: query = query.filter(metric_id__in=self.reported_metrics.keys()) else: # if there are no interest metrics associated with the report_type # we need to tell the user that all possible metrics were used used_metric_ids = {rec['metric_id'] for rec in query.values('metric_id').distinct()} self.reported_metrics = { im.pk: im for im in Metric.objects.filter(pk__in=used_metric_ids) } return query, dim_raw_name_to_name
def extract_query_filter_params(cls, request) -> dict: query_params = date_filter_from_params(request.GET) query_params.update( extract_accesslog_attr_query_params(request.GET, dimensions=cls.implicit_dims)) return query_params