def __init__( self, cohort, metric, individual=False, aggregate=True, aggregate_sum=True, aggregate_average=False, aggregate_std_deviation=False, *args, **kwargs ): super(AggregateReport, self).__init__( parameters=stringify(metric.data), *args, **kwargs ) self.individual = individual self.aggregate = aggregate self.aggregate_sum = aggregate_sum self.aggregate_average = aggregate_average self.aggregate_std_deviation = aggregate_std_deviation self.children = [MultiProjectMetricReport( cohort, metric, name=self.name, )]
def create_reports_for_missed_days(cls, report, session, no_more_than=365): """ Find which runs of a recurrent report were missed and create one report for each of those runs. For reports on timeseries metrics, this method will set the end_date to midnight, today. For non-timeseries metrics, it will set the start_date to yesterday and end_date to today. Parameters: report : the parent recurrent report session : a database session to the wikimetrics database no_more_than : do not create more than this many reports, defaults to 365 Returns: An array of RunReport instances that each represent a missed run of the parent report passed in. The current day's run is considered a missed run for simplicity. However, truly missed runs may be flagged so maintainers can troubleshoot reports that may have systemic problems. """ # get the days the report needs to be run for days_missed = cls.days_missed(report, session) reports_created = 0 for day in days_missed: try: # get the report parameters parameters = json.loads(report.parameters) # update the date parameters for this run of the metric metric = parameters['metric'] # TODO: all metrics need to have an 'end_date' parameter in order to run # recurrently. This should be at least less hardcoded if not more elegant metric['start_date'] = day - timedelta(days=1) metric['end_date'] = day # without this, reports would recur infinitely parameters['recurrent'] = False parameters['public'] = report.public new_run = cls( parameters, user_id=report.user_id, recurrent_parent_id=report.id, created=day, # See constructor of RunReport ) reports_created += 1 except Exception, e: # don't need to roll back session because it's just a query task_logger.error('Problem creating child report: {}, params: {}'.format( traceback.format_exc(e), stringify(parameters) )) continue yield new_run if reports_created >= no_more_than: break
def __init__(self, user_id=None, status=celery.states.PENDING, name=None, queue_result_key=None, children=None, public=False, parameters={}, recurrent=False, recurrent_parent_id=None, created=None, store=False, persistent_id=None): if children is None: children = [] self.user_id = user_id if not self.user_id: try: if current_user.is_authenticated(): self.user_id = current_user.id except RuntimeError: # nothing to worry about, just using current_user outside # of a web context. This should only happen during testing pass self.status = status self.name = name self.queue_result_key = queue_result_key self.children = children self.public = public self.store = store self.persistent_id = persistent_id self.created = None if self.store is True and self.persistent_id is None: # store report to database # note that queue_result_key is always empty at this stage pj = ReportStore(user_id=self.user_id, status=self.status, show_in_ui=self.show_in_ui, parameters=stringify(parameters).encode('utf-8'), public=self.public, recurrent=recurrent, recurrent_parent_id=recurrent_parent_id, created=created or datetime.now()) try: session = db.get_session() session.add(pj) session.commit() self.persistent_id = pj.id self.created = pj.created pj.name = self.name or str(self) session.commit() except: session.rollback() raise
def create_reports_for_missed_days(cls, report, session, no_more_than=365): """ Find which runs of a recurrent report were missed and create one report for each of those runs. For reports on timeseries metrics, this method will set the end_date to midnight, today. For non-timeseries metrics, it will set the start_date to yesterday and end_date to today. Parameters: report : the parent recurrent report session : a database session to the wikimetrics database no_more_than : do not create more than this many reports, defaults to 365 Returns: An array of RunReport instances that each represent a missed run of the parent report passed in. The current day's run is considered a missed run for simplicity. However, truly missed runs may be flagged so maintainers can troubleshoot reports that may have systemic problems. """ # get the days the report needs to be run for days_missed = cls.days_missed(report, session) reports_created = 0 for day in days_missed: try: # get the report parameters parameters = json.loads(report.parameters) # update the date parameters for this run of the metric metric = parameters['metric'] # TODO: all metrics need to have an 'end_date' parameter in order to run # recurrently. This should be at least less hardcoded if not more elegant metric['start_date'] = day - timedelta(days=1) metric['end_date'] = day # without this, reports would recur infinitely parameters['recurrent'] = False parameters['public'] = report.public new_run = cls( parameters, user_id=report.user_id, recurrent_parent_id=report.id, created=day, # See constructor of RunReport ) reports_created += 1 except Exception, e: # don't need to roll back session because it's just a query task_logger.error( 'Problem creating child report: {}, params: {}'.format( traceback.format_exc(e), stringify(parameters))) continue yield new_run if reports_created >= no_more_than: break
def __init__(self, metric, cohort): """ Parameters metric : an instance of a wikimetrics.metrics.metric.Metric cohort : an instance of a wikimetrics.models.cohort.Cohort """ super(ValidateReport, self).__init__(parameters=stringify(metric.data)) self.metric_valid = metric.validate() self.cohort_valid = cohort.validated self.metric_label = metric.label self.cohort_name = cohort.name
def add_runs_to_report(self, index): p = self.p self.report_runs = [] for d in range(0, self.no_more_than + 10): day = self.today - timedelta(days=d) p['metric']['start_date'] = day - timedelta(days=1) p['metric']['end_date'] = day p['recurrent'] = False ps = stringify(p) if d not in (self.missed_by_index[index] ) and d < self.ago_by_index[index]: self.report_runs.append( ReportStore( recurrent_parent_id=self.reports[index].id, created=day, status='SUCCESS', parameters=ps, user_id=self.uid, )) self.session.add_all(self.report_runs) self.session.commit()
def test_better_encoder_datetime(self): result = stringify(date_time=datetime.datetime(2013, 06, 01, 02, 03, 04)) assert_true(result.find('"date_time"') >= 0) assert_true(result.find('2013-06-01 02:03:04') >= 0)
def test_better_encoder_date(self): result = stringify(date_not_date_time=datetime.date(2013, 06, 01)) assert_true(result.find('"date_not_date_time"') >= 0) assert_true(result.find('2013-06-01') >= 0)
def setUp(self): DatabaseTest.setUp(self) # turn off the scheduler for this test self.save_schedule = queue.conf['CELERYBEAT_SCHEDULE'] queue.conf['CELERYBEAT_SCHEDULE'] = {} self.common_cohort_1() uid = self.owner_user_id self.today = strip_time(datetime.today()) ago_25 = self.today - timedelta(days=25) ago_35 = self.today - timedelta(days=35) ago_05 = self.today - timedelta(days=5) p = { 'metric': { 'start_date': ago_05, 'end_date': self.today, 'name': 'NamespaceEdits', }, 'recurrent': True, 'cohort': {'id': self.cohort.id, 'name': self.cohort.name}, 'name': 'test-recurrent-reports', } ps = stringify(p) self.reports = [ PersistentReport(recurrent=True, created=ago_25, parameters=ps, user_id=uid), PersistentReport(recurrent=True, created=ago_35, parameters=ps, user_id=uid), PersistentReport(recurrent=True, created=ago_05, parameters=ps, user_id=uid), ] self.session.add_all(self.reports) self.session.commit() self.report_runs = [] for d in range(0, 35): day = self.today - timedelta(days=d) p['metric']['start_date'] = day - timedelta(days=1) p['metric']['end_date'] = day p['recurrent'] = False ps = stringify(p) if d not in [1, 2, 11] and d < 26: self.report_runs.append(PersistentReport( recurrent_parent_id=self.reports[0].id, created=day, status='SUCCESS', parameters=ps, user_id=uid, )) if d not in [1, 2, 11, 31, 33]: self.report_runs.append(PersistentReport( recurrent_parent_id=self.reports[1].id, created=day, status='SUCCESS', parameters=ps, user_id=uid, )) if d not in [1, 2] and d < 6: self.report_runs.append(PersistentReport( recurrent_parent_id=self.reports[2].id, created=day, status='SUCCESS', parameters=ps, user_id=uid, )) self.session.add_all(self.report_runs) self.session.commit()
def test_better_encoder_decimal(self): string = stringify(deci=decimal.Decimal(6.01)) assert_true(string.find('"deci"') >= 0)
def test_better_encoder_default(self): result = stringify(normal='hello world') assert_true(result.find('"normal"') >= 0) assert_true(result.find('normal') >= 0)
def test_better_encoder_decimal(self): result = stringify(deci=decimal.Decimal(6.01)) assert_true(result.find('"deci"') >= 0) assert_true(result.find('6.01') >= 0)
def test_better_encoder_datetime(self): result = stringify(date_time=datetime(2013, 06, 01, 02, 03, 04)) assert_true(result.find('"date_time"') >= 0) assert_true(result.find('2013-06-01 02:03:04') >= 0)
def test_better_encoder_date(self): result = stringify(date_not_date_time=date(2013, 06, 01)) assert_true(result.find('"date_not_date_time"') >= 0) assert_true(result.find('2013-06-01') >= 0)
def test_better_encoder_default(self): string = stringify(normal='hello world') assert_true(string.find('"normal"') >= 0)
def test_better_encoder_date(self): string = stringify(date_not_date_time=datetime.date(2013, 06, 01)) assert_true(string.find('"date_not_date_time"') >= 0)
def setUp(self): DatabaseTest.setUp(self) # turn off the scheduler for this test self.save_schedule = queue.conf['CELERYBEAT_SCHEDULE'] queue.conf['CELERYBEAT_SCHEDULE'] = {} self.common_cohort_1() self.uid = self.owner_user_id self.today = strip_time(datetime.today()) # NOTE: running with 20 just makes the tests run faster, but any value > 11 works self.no_more_than = 20 self.missed_by_index = { 0: [1, 2, 11], 1: [1, 2, 11, self.no_more_than + 1, self.no_more_than + 3], 2: [1, 2], 3: range(0, self.no_more_than + 10), } self.ago_by_index = { 0: 26, 1: self.no_more_than + 10, 2: 6, 3: self.no_more_than + 10 } age = { i: self.today - timedelta(days=v - 1) for i, v in self.ago_by_index.items() } self.p = { 'metric': { 'start_date': age[0], 'end_date': self.today, 'name': 'NamespaceEdits', }, 'recurrent': True, 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name }, 'name': 'test-recurrent-reports', } ps = stringify(self.p) self.reports = [ ReportStore(recurrent=True, created=age[0], parameters=ps, user_id=self.uid), ReportStore(recurrent=True, created=age[1], parameters=ps, user_id=self.uid), ReportStore(recurrent=True, created=age[2], parameters=ps, user_id=self.uid), ReportStore(recurrent=True, created=age[3], parameters=ps, user_id=self.uid), ] self.session.add_all(self.reports) self.session.commit()