def test_first_run_of_recurrent_report_happens(self): ''' Scheduling a recurrent report should run the 1st run of the report right away and not wait for the scheduled run ''' parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': [0, 1, 2], 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-03 00:00:00', 'individualResults': True, 'aggregateResults': False, 'aggregateSum': False, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, 'recurrent': True, 'public': True } # with patch.object(RunReport, '_run_child_report', mock_run): rr = RunReport(parameters, user_id=self.owner_user_id) rr._run_child_report = MagicMock() rr.task.delay(rr) rr._run_child_report.assert_called_once_with()
def test_invalid_metric(self): run_report = RunReport() run_report.parse_request([{ 'name': 'Edits - test', 'cohort': { 'id': self.test_cohort_id, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': 'blah blah', }, }])
def test_run_report_finish(self): run_report = RunReport({ 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', }, }, user_id=self.owner_user_id) result = run_report.finish(['aggregate_result']) assert_equals(result[run_report.result_key], 'aggregate_result')
def test_run_report_finish(self): run_report = RunReport( { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', }, }, user_id=self.owner_user_id) result = run_report.finish(['aggregate_result']) assert_equals(result[run_report.result_key], 'aggregate_result')
def test_aggregated_response_bytes_added(self): desired_responses = [{ 'name': 'Edits - test', 'cohort': { 'id': self.test_cohort_id, }, 'metric': { 'name': 'BytesAdded', 'namespaces': [0, 1, 2], 'start_date': '2013-06-01', 'end_date': '2013-09-01', 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, }] jr = RunReport(desired_responses, user_id=self.test_user_id) results = jr.task.delay(jr).get() result_key = self.session.query(PersistentReport)\ .filter(PersistentReport.id == jr.children[0].persistent_id)\ .one()\ .result_key results = results[result_key] assert_equals( results[Aggregation.IND][0][self.test_mediawiki_user_id]['net_sum'], 6, ) assert_equals( results[Aggregation.SUM]['positive_only_sum'], 150, )
def test_days_missed_0(self): missed_days = RunReport.days_missed(self.reports[0], self.session) assert_equals(missed_days, set([ self.today - timedelta(days=1), self.today - timedelta(days=2), self.today - timedelta(days=11), ]))
def test_basic_response(self): parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': [0, 1, 2], 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-02 00:00:00', 'individualResults': True, 'aggregateResults': False, 'aggregateSum': False, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, } jr = RunReport(parameters, user_id=self.owner_user_id) results = jr.task.delay(jr).get() self.session.commit() result_key = self.session.query(ReportStore) \ .get(jr.persistent_id) \ .result_key results = results[result_key] # TODO: figure out why one of the resulting wiki_user_ids is None here assert_equals( results[Aggregation.IND][self.editor(0)]['edits'], 2, )
def test_empty_response(self): """ Case where user tries to submit form with no cohorts / metrics should be handled client side server side an exception will be thrown if RunReport object cannot be created """ RunReport({}, user_id=self.owner_user_id)
def test_aggregated_response_bytes_added(self): parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'BytesAdded', 'namespaces': [0], 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-03 00:00:00', 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, } jr = RunReport(parameters, user_id=self.owner_user_id) results = jr.task.delay(jr).get() self.session.commit() result_key = self.session.query(ReportStore) \ .get(jr.persistent_id) \ .result_key results = results[result_key] assert_equals( results[Aggregation.IND][self.editor(0)]['net_sum'], -90, ) assert_equals( results[Aggregation.SUM]['positive_only_sum'], 140, )
def inject_and_fetch_recurrent_run(self): parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': [0, 1, 2], 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-03 00:00:00', 'individualResults': True, 'aggregateResults': False, 'aggregateSum': False, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, 'recurrent': True, } jr = RunReport(parameters, user_id=self.owner_user_id) jr.task.delay(jr).get() self.session.commit() # executing directly the code that will be run by the scheduler recurring_reports() recurrent_runs = self.session.query(ReportStore) \ .filter(ReportStore.recurrent_parent_id == jr.persistent_id) \ .all() return recurrent_runs
def test_raises_invalid_cohort_for_any_metric(self): self.cohort.validated = False self.session.commit() for name, metric in metric_classes.iteritems(): if not metric.show_in_ui: continue parameters = { 'name': '{0} - test'.format(name), 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': name, 'namespaces': [0, 1, 2], 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-02 00:00:00', 'individualResults': True, 'aggregateResults': False, 'aggregateSum': False, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, } try: RunReport(parameters, user_id=self.owner_user_id) except InvalidCohort: continue assert_true(False)
def test_basic_response(self): desired_responses = [{ 'name': 'Edits - test', 'cohort': { 'id': self.test_cohort_id, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': [0, 1, 2], 'start_date': '2013-06-01', 'end_date': '2013-09-01', 'individualResults': True, 'aggregateResults': False, 'aggregateSum': False, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, }] jr = RunReport(desired_responses, user_id=self.test_user_id) results = jr.task.delay(jr).get() result_key = self.session.query(PersistentReport)\ .filter(PersistentReport.id == jr.children[0].persistent_id)\ .one()\ .result_key results = results[result_key] # TODO: figure out why one of the resulting wiki_user_ids is None here assert_equals( results[Aggregation.IND][0][self.test_mediawiki_user_id]['edits'], 2, )
def test_user_id_assigned_properly(self): parameters = { 'name': 'Bytes - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'BytesAdded', 'namespaces': '0,1,2', 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-02 00:00:00', 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, } jr = RunReport(parameters, user_id=self.owner_user_id) jr.task.delay(jr).get() self.session.commit() # executing directly the code that will be run by the scheduler recurring_reports() # make sure all report nodes have a user_id no_user_id = self.session.query(func.count(ReportStore)) \ .filter(ReportStore.user_id == None) \ .one()[0] assert_equals(no_user_id, 0)
def test_days_missed_2(self): self.add_runs_to_report(2) missed_days = RunReport.days_missed(self.reports[2], self.session) assert_equals( set(missed_days), set([ self.today - timedelta(days=m) for m in self.missed_by_index[2] ]))
def test_create_reports_for_missed_days_2(self): new_runs = list(RunReport.create_reports_for_missed_days( self.reports[2], self.session )) assert_equals(set([r.created for r in new_runs]), set([ self.today - timedelta(days=1), self.today - timedelta(days=2), ]))
def test_rerun(self): def count_report_stores(): return len(self.session.query(ReportStore).all()) prev_report_count = count_report_stores() result = RunReport.rerun(self.reports[0]).get() assert_equals(type(result), dict) next_report_count = count_report_stores() assert_equals(prev_report_count, next_report_count)
def test_days_missed_1(self): missed_days = RunReport.days_missed(self.reports[1], self.session) assert_equals(missed_days, set([ self.today - timedelta(days=1), self.today - timedelta(days=2), self.today - timedelta(days=11), # NOTE: search stops at 30 days, so it doesn't matter that # the 31 and 33 days-ago runs were missed ]))
def test_create_reports_for_missed_days_2(self): self.add_runs_to_report(2) new_runs = list( RunReport.create_reports_for_missed_days( self.reports[2], self.session, no_more_than=self.no_more_than)) assert_equals( set([r.created for r in new_runs]), set([ self.today - timedelta(days=m) for m in self.missed_by_index[2] ]))
def test_days_missed_2_with_cleanup(self): self.add_runs_to_report(2) # change some reports back to pending to make sure they're cleaned up map(make_pending, self.report_runs[0::2]) self.session.commit() missed_days = RunReport.days_missed(self.reports[2], self.session) assert_equals( set(missed_days), set([ self.today - timedelta(days=m) for m in self.missed_by_index[2] ] + [r.created for r in self.report_runs[0::2]]))
def test_run_report_repr(self): run_report = RunReport( { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', }, }, user_id=self.owner_user_id) assert_true(str(run_report).find('RunReport') >= 0)
def test_lots_of_concurrent_requests(self): parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'BytesAdded', 'namespaces': '0,1,2', 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-02 00:00:00', 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, } reports = [] # NOTE: you can make this loop as much as you'd like if celery # is allowed enough concurrent workers, set via CELERYD_CONCURRENCY trials = 3 for i in range(trials): jr = RunReport(parameters, user_id=self.owner_user_id) reports.append((jr, jr.task.delay(jr))) successes = 0 for jr, delayed in reports: try: results = delayed.get() self.session.commit() result_key = self.session.query(ReportStore) \ .get(jr.persistent_id) \ .result_key results = results[result_key] if results[Aggregation.SUM]['positive_only_sum'] == 140: successes += 1 except SoftTimeLimitExceeded: print('Timeout expired during this task.') except Exception: print('An exception occurred during this task.') raise print('Successes: {0}'.format(successes)) assert_true(successes == trials, 'all of the trials must succeed')
def recurring_reports(report_id=None): from wikimetrics.configurables import db from wikimetrics.models import ReportStore, RunReport replication_lag_service = ReplicationLagService() if replication_lag_service.is_any_lagged(): task_logger.warning( 'Replication lag detected. ' 'Hence, skipping creating new recurring reports.' ) return try: session = db.get_session() query = session.query(ReportStore) \ .filter(ReportStore.recurrent) \ if report_id is not None: query = query.filter(ReportStore.id == report_id) for report in query.all(): try: task_logger.info('Running recurring report "{0}"'.format(report)) no_more_than = queue.conf.get('MAX_INSTANCES_PER_RECURRENT_REPORT') kwargs = dict() if no_more_than: kwargs['no_more_than'] = no_more_than days_to_run = RunReport.create_reports_for_missed_days( report, session, **kwargs ) for day_to_run in days_to_run: day_to_run.task.delay(day_to_run) except Exception: task_logger.error('Problem running recurring report "{}": {}'.format( report, traceback.format_exc() )) except Exception: task_logger.error('Problem running recurring reports: {}'.format( traceback.format_exc() ))
def test_wiki_cohort_runs(self): self.create_wiki_cohort() # give the WikiCohort all the users of self.cohort, for easy testing self.session.execute(WikiUserStore.__table__.update().values( validating_cohort=self.basic_wiki_cohort.id).where( WikiUserStore.validating_cohort == self.cohort.id)) self.session.execute(CohortWikiUserStore.__table__.update().values( cohort_id=self.basic_wiki_cohort.id).where( CohortWikiUserStore.cohort_id == self.cohort.id)) self.cohort = self.basic_wiki_cohort parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': [0, 1, 2], 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-02 00:00:00', 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, } jr = RunReport(parameters, user_id=self.owner_user_id) results = jr.task.delay(jr).get() self.session.commit() result_key = self.session.query(ReportStore) \ .get(jr.persistent_id) \ .result_key results = results[result_key] assert_equals( results[Aggregation.IND][self.editor(0)]['edits'], 2, ) assert_equals( results[Aggregation.SUM]['edits'], 4, )
def test_aggregated_response_namespace_edits_with_timeseries(self): parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': [0, 1, 2], 'start_date': '2013-01-01 00:20:00', 'end_date': '2013-03-01 00:00:00', 'timeseries': TimeseriesChoices.MONTH, 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, } jr = RunReport(parameters, user_id=self.owner_user_id) results = jr.task.delay(jr).get() self.session.commit() result_key = self.session.query(ReportStore) \ .get(jr.persistent_id) \ .result_key results = results[result_key] key = results[Aggregation.IND][self.editor(0)]['edits'].items()[0][0] assert_equals(key, '2013-01-01 00:20:00') assert_equals( results[Aggregation.SUM]['edits'].items()[0][0], '2013-01-01 00:20:00', ) assert_equals( results[Aggregation.SUM]['edits']['2013-01-01 00:20:00'], 8, ) assert_equals( results[Aggregation.SUM]['edits']['2013-02-01 00:00:00'], 2, )
def test_many_parallel_runs(self): parameters = { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, 'name': self.cohort.name, }, 'metric': { 'name': 'NamespaceEdits', 'namespaces': [0, 1, 2], 'start_date': '2013-01-01 00:00:00', 'end_date': '2013-01-03 00:00:00', 'individualResults': True, 'aggregateResults': False, 'aggregateSum': False, 'aggregateAverage': False, 'aggregateStandardDeviation': False, }, 'recurrent': True, } jr = RunReport(parameters, user_id=self.owner_user_id, created=datetime.today() - timedelta(days=self.total_runs)) jr.task.delay(jr).get() self.session.commit() # executing directly the code that will be run by the scheduler recurring_reports() recurrent_runs = self.session.query(ReportStore) \ .filter(ReportStore.recurrent_parent_id == jr.persistent_id) \ .all() successful_runs = filter(lambda x: x.status == 'SUCCESS', recurrent_runs) # make sure we have one and no more than one recurrent run assert_equal(len(successful_runs), self.total_runs)
def recurring_reports(): from wikimetrics.configurables import db from wikimetrics.models import PersistentReport, RunReport try: session = db.get_session() recurrent_reports = session.query(PersistentReport) \ .filter(PersistentReport.recurrent) \ .all() for report in recurrent_reports: try: task_logger.info('Running recurring report "{0}"'.format(report)) days_to_run = RunReport.create_reports_for_missed_days(report, session) for day_to_run in days_to_run: day_to_run.task.delay(day_to_run) except Exception, e: task_logger.error('Problem running recurring report "{0}": {1}'.format( report, e )) except Exception, e: task_logger.error('Problem running recurring reports: {0}'.format(e))
def test_invalid_metric(self): jr = RunReport( { 'name': 'Edits - test', 'cohort': { 'id': self.cohort.id, }, 'metric': { 'name': 'NamespaceEdits', 'start_date': 'blah', }, }, user_id=self.owner_user_id) results = jr.task.delay(jr).get() print results self.session.commit() result_key = self.session.query(ReportStore) \ .get(jr.persistent_id) \ .result_key assert_true( results[result_key]['FAILURE'].find( 'Edits was incorrectly configured') >= 0, )
def recurring_reports(report_id=None): from wikimetrics.configurables import db from wikimetrics.models import ReportStore, RunReport try: session = db.get_session() query = session.query(ReportStore) \ .filter(ReportStore.recurrent) \ if report_id is not None: query = query.filter(ReportStore.id == report_id) for report in query.all(): try: task_logger.info('Running recurring report "{0}"'.format(report)) days_to_run = RunReport.create_reports_for_missed_days(report, session) for day_to_run in days_to_run: day_to_run.task.delay(day_to_run) except Exception, e: task_logger.error('Problem running recurring report "{0}": {1}'.format( report, e )) except Exception, e: task_logger.error('Problem running recurring reports: {0}'.format(e))
def reports_request(): """ Renders a page that facilitates kicking off a new report """ if request.method == 'GET': return render_template('report.html') else: desired_responses = json.loads(request.form['responses']) recurrent = json.loads(request.form.get('recurrent', 'false')) for parameters in desired_responses: parameters['recurrent'] = recurrent # NOTE: this is not a mistake. Currently recurrent => public on creation parameters['public'] = recurrent # Encode cohort description for the case it contains special characters if ('description' in parameters['cohort'] and parameters['cohort']['description'] is not None): encoded_description = parameters['cohort']['description'].encode('utf-8') parameters['cohort']['description'] = encoded_description jr = RunReport(parameters, user_id=current_user.id) jr.task.delay(jr) return json_redirect(url_for('reports_index'))
def rerun_report(report_id): session = db.get_session() report = session.query(ReportStore).get(report_id) RunReport.rerun(report) return json_response(message='Report scheduled for rerun')
def test_empty_response(self): # TODO: handle case where user tries to submit form with no cohorts / metrics jr = RunReport([], user_id=self.test_user_id) result = jr.task.delay(jr).get() assert_equals(result, [])
def test_run_report_finish(self): run_report = RunReport([]) result = run_report.finish([]) assert_equals(result[run_report.result_key], 'Finished')
def test_run_report_repr(self): run_report = RunReport([]) assert_true(str(run_report).find('RunReport') >= 0)
def test_create_reports_for_missed_days_3(self): self.add_runs_to_report(3) new_runs = list( RunReport.create_reports_for_missed_days( self.reports[3], self.session, no_more_than=self.no_more_than)) assert_equals(len(new_runs), self.no_more_than)
def test_invalid_report(self): RunReport({})