def retry(self): attempt = self.credentials.fetch_report(counter_report=self.counter_report, start_date=self.start_date, end_date=month_end(self.end_date)) attempt.queue_previous = self attempt.save() return attempt
def get_data_with_months(self, dim, filter_params, success_metric): if dim not in self.attr_to_query_param_map: return HttpResponseBadRequest( 'unsupported dimension: "{}"'.format(dim)) # we use 2 separate fields for dim in order to preserve both the ID of the # related field and its text value values = self.attr_to_query_param_map[dim] months = SushiFetchAttempt.objects.aggregate(start=Min('start_date'), end=Max('end_date')) start = month_start(months['start']) end = month_end(months['end']) cur_date = start output = [] while cur_date < end: # now get the output for rec in SushiFetchAttempt.objects.filter(**filter_params).\ filter(start_date__lte=cur_date, end_date__gte=cur_date).values(*values).annotate( success_count=Count('pk', filter=Q(**{success_metric: True})), failure_count=Count('pk', filter=Q(**{success_metric: False})), ): cur_date_str = '-'.join(str(cur_date).split('-')[:2]) rec['month'] = cur_date_str[2:] rec['month_id'] = cur_date_str output.append(rec) cur_date = month_start(cur_date + timedelta(days=32)) return output
def perform_create(self, serializer: SushiFetchAttemptSerializer): serializer.validated_data['in_progress'] = True serializer.validated_data['end_date'] = month_end( serializer.validated_data['end_date']) super().perform_create(serializer) attempt = serializer.instance run_sushi_fetch_attempt_task.apply_async(args=(attempt.pk, ), countdown=1)
def process_fetch_units(fetch_units: [FetchUnit], start_date: date, end_date: date, conflict_ok='skip', conflict_error='smart', sleep_time=0, use_lock=True): while fetch_units and start_date >= end_date: new_fetch_units = [] platform = fetch_units[0].credentials.platform logger.debug('Processing %d fetch units for platform %s, %s', len(fetch_units), platform, start_date) for fetch_unit in fetch_units: # type: FetchUnit end = month_end(start_date) # deal with possible conflict conflict = fetch_unit.find_conflicting(start_date, end) if conflict: action = conflict_ok if (conflict.contains_data or conflict.queued)\ else conflict_error if action == 'smart': # smart means that we use the retry timeout of the conflicting attempt # to decide on what to do action = smart_decide_conflict_action(conflict) if action == 'stop': logger.debug('Stopping on existing data: %s, %s: %s', platform, fetch_unit.credentials.organization, start_date) continue elif action == 'skip': logger.debug('Skipping on existing data: %s, %s: %s', platform, fetch_unit.credentials.organization, start_date) new_fetch_units.append(fetch_unit) continue else: logger.debug('Continuing regardless of existing data: %s, %s: %s', platform, fetch_unit.credentials.organization, start_date) # download the data fetch_unit.sleep() attempt = fetch_unit.download(start_date, end, use_lock=use_lock) if attempt.contains_data or attempt.queued: new_fetch_units.append(fetch_unit) else: go_on = False # no data in this attempt, we must analyze it further if attempt.error_code: error_meaning = attempt.error_explanation() if error_meaning.setup_ok: # this means we can process - the credentials, etc. are OK go_on = True if go_on: new_fetch_units.append(fetch_unit) else: logger.info('Unsuccessful fetch, stoping: %s, %s: %s', platform, fetch_unit.credentials.organization, start_date) # sleep but only if this is not the last in the list - it would not make sense # to wait just before finishing if fetch_unit is not fetch_units[-1]: sleep(sleep_time) fetch_units = new_fetch_units if fetch_units: sleep(sleep_time) # we will do one more round, we need to sleep start_date = month_start(start_date - timedelta(days=20)) logger.debug('Finished processing')
def handle(self, *args, **options): self.sleep_time = options['sleep'] / 1000 args = {} if options['organization']: args['organization__internal_id'] = options['organization'] if options['platform']: args['platform__short_name'] = options['platform'] if options['version']: args['counter_version'] = int(options['version']) credentials = list(SushiCredentials.objects.filter(**args)) cr_args = {'active': True} if options['version']: cr_args['counter_version'] = int(options['version']) if options['report']: cr_args['code'] = options['report'] # now fetch all possible combinations start_date = month_start(parse_date(options['start_date'])) end_date = month_end(parse_date(options['end_date'])) # we divide the requests to groups by platform and counter version combination # and then process each group in a separate thread platform_counter_v_to_requests = {} for cred in credentials: crs = list(cred.active_counter_reports.filter(**cr_args)) for cr in crs: key = (cred.platform_id, cred.counter_version) # check if we have a successful attempt already and skip it if yes success_req_for_skip = {'download_success': True, 'processing_success': True} \ if not options['skip_on_unsuccess'] else {} existing = SushiFetchAttempt.objects.filter( credentials=cred, counter_report=cr, start_date=start_date, end_date=end_date, **success_req_for_skip, ).exists() if existing: self.stderr.write( self.style.SUCCESS(f'Skipping existing {cred}, {cr}')) else: if key not in platform_counter_v_to_requests: platform_counter_v_to_requests[key] = [] platform_counter_v_to_requests[key].append( (cred, cr, start_date, end_date)) if not platform_counter_v_to_requests: self.stderr.write(self.style.WARNING('No matching reports found!')) return # let's create some threads and use them to process individual platforms with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: for result in executor.map( self.download_list, list(platform_counter_v_to_requests.items())): pass
def interest(self, request, pk): org_filter = organization_filter_from_org_id(pk, request.user) date_filter = date_filter_from_params(request.GET) interest_rt = ReportType.objects.get(short_name='interest', source__isnull=True) data = AccessLog.objects\ .filter(report_type=interest_rt, **org_filter, **date_filter) \ .aggregate(interest_sum=Sum('value'), min_date=Min('date'), max_date=Max('date')) if data['max_date']: # the date might be None and then we do not want to do the math ;) data['max_date'] = month_end(data['max_date']) data['days'] = (data['max_date'] - data['min_date']).days + 1 else: data['days'] = 0 return Response(data)
def get_queryset(self): org_perm_args = Q( organization__in=self.request.user.accessible_organizations()) | Q( organization__isnull=True) query_args = [] exclude_args = [] for param in ('platform', 'organization'): value = self.request.query_params.get(param) if value: if value == 'null': # only those with the value equal to None are requested query_args.append(Q(**{param + '__isnull': True})) else: # we filter to include those with specified value or with this value null query_args.append( Q(**{param + '_id': value}) | Q(**{param + '__isnull': True})) start_date = self.request.query_params.get('start_date') end_date = self.request.query_params.get('end_date') if start_date: exclude_args.append(Q(end_date__lte=parse_month(start_date))) if end_date: exclude_args.append( Q(start_date__gte=month_end(parse_month(end_date)))) if len(exclude_args) > 1: # we have more args, we need to "OR" them exclude_args = [reduce(operator.or_, exclude_args)] qs = (Annotation.objects.filter(org_perm_args).filter( *query_args).exclude(*exclude_args).select_related( 'organization', 'platform').order_by('pk')) # add access level stuff org_to_level = {} user = self.request.user for annot in qs: # type: Annotation if not annot.organization_id: user_org_level = (UL_CONS_STAFF if user.is_superuser or user.is_from_master_organization else UL_NORMAL) else: if annot.organization_id not in org_to_level: org_to_level[ annot. organization_id] = self.request.user.organization_relationship( annot.organization_id) user_org_level = org_to_level[annot.organization_id] annot.can_edit = user_org_level >= annot.owner_level return qs
def test_month_end(self, value, result): assert month_end(value) == result