def test_daily_report_without_previous_results_with_explode_by(self): def fetchall_callback(): return [[datetime(2015, 1, 1), str(1)]] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) pymysql.connect = MagicMock(return_value=connection_mock) config_path = os.path.join(self.config_folder, 'reportupdater_test4.yaml') reportupdater.run( config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder ) output_folder = os.path.join(self.output_folder, 'reportupdater_test4') self.paths_to_clean.extend([output_folder]) output_filenames = [ 'visualeditor/wiki1.tsv', 'visualeditor/wiki2.tsv', 'visualeditor/wiki3.tsv', 'wikitext/wiki1.tsv', 'wikitext/wiki2.tsv', 'wikitext/wiki3.tsv', ] for output_filename in output_filenames: output_path = os.path.join(output_folder, output_filename) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertEqual(len(output_lines), 2) self.assertEqual(output_lines[0], 'date\tvalue\n') self.assertEqual(output_lines[1], '2015-01-01\t1\n')
def test_daily_timeboxed_script_report_without_previous_results(self): config_path = os.path.join(self.config_folder, 'reportupdater_test5.yaml') history_path = 'test/fixtures/reportupdater_test5.history' reportupdater.run( config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder, history_path=history_path ) output_path = os.path.join(self.output_folder, 'reportupdater_test5.tsv') self.paths_to_clean.extend([output_path, history_path]) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent dates. expected_date = datetime(2015, 1, 1) for line in output_lines: date_str, value = line.strip().split('\t') expected_date_str = expected_date.strftime(DATE_FORMAT) self.assertEqual(date_str, expected_date_str) self.assertEqual(type(value), unicode) expected_date += relativedelta(days=+1)
def test_daily_report_without_previous_results_with_explode_by(self, *_): def fetchall_callback(): return [[datetime(2015, 1, 1), str(1)]] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) with mock.patch('pymysql.connect', return_value=connection_mock): config_path = os.path.join(self.config_folder, 'reportupdater_test4.yaml') reportupdater.run(config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder) output_folder = os.path.join(self.output_folder, 'reportupdater_test4') self.paths_to_clean.extend([output_folder]) output_filenames = [ 'visualeditor/wiki1.tsv', 'visualeditor/wiki2.tsv', 'visualeditor/wiki3.tsv', 'wikitext/wiki1.tsv', 'wikitext/wiki2.tsv', 'wikitext/wiki3.tsv', ] for output_filename in output_filenames: output_path = os.path.join(output_folder, output_filename) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertEqual(len(output_lines), 2) self.assertEqual(output_lines[0], 'date\tvalue\n') self.assertEqual(output_lines[1], '2015-01-01\t1\n')
def test_when_current_exec_time_and_last_exec_time_are_within_the_same_day(self): last_exec_time = datetime(2015, 1, 2, 3, 4, 5) self.write_time_to_history(last_exec_time) reportupdater.utcnow = MagicMock(return_value=datetime(2015, 1, 2, 13, 14, 15)) reportupdater.run( config_path=os.path.join(self.config_folder, 'reportupdater_test2.yaml'), query_folder=self.query_folder, output_folder=self.output_folder, history_path=self.history_path ) # The report should not be computed because it has already been computed # within this day. So the output file should not exist. output_path = os.path.join(self.output_folder, 'reportupdater_test2.tsv') self.assertFalse(os.path.exists(output_path))
def test_daily_report_with_previous_results(self, *_): def fetchall_callback(): # This method will return a subsequent row with each call. try: sql_date = self.last_date + relativedelta(months=+1) value = self.last_value + 1 except AttributeError: # Starts at Mar, Jan and Feb are in previous results sql_date = datetime(2015, 3, 1) value = 3 self.last_date = sql_date self.last_value = value return [[sql_date, str(value)]] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) with mock.patch('pymysql.connect', return_value=connection_mock): config_path = os.path.join(self.config_folder, 'reportupdater_test2.yaml') output_path = os.path.join(self.output_folder, 'reportupdater_test2.tsv') with io.open(output_path, 'w') as output_file: output_file.write( str('date\tvalue\n2015-01-01\t1\n2015-02-01\t2\n')) self.paths_to_clean.extend([output_path]) reportupdater.run(config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent values. expected_date = datetime(2015, 1, 1) expected_value = 1 for line in output_lines: expected_line = expected_date.strftime( DATE_FORMAT) + '\t' + str(expected_value) self.assertEqual(line.strip(), expected_line) expected_date += relativedelta(months=+1) expected_value += 1
def test_hourly_funnel_timeboxed_report_without_previous_results(self): def fetchall_callback(): # This method will return a subsequent row with each call. try: sql_date = self.last_date + relativedelta(days=+1) except AttributeError: sql_date = date(2015, 1, 1) self.last_date = sql_date return [ [sql_date, '1'], [sql_date, '2'], [sql_date, '3'] ] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) MySQLdb.connect = MagicMock(return_value=connection_mock) config_path = os.path.join(self.config_folder, 'reportupdater_test3.yaml') output_path = os.path.join(self.output_folder, 'reportupdater_test3.tsv') history_path = 'test/fixtures/reportupdater_test3.history' self.paths_to_clean.extend([output_path, history_path]) reportupdater.run( config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder, history_path=history_path ) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent values. expected_date = datetime(2015, 1, 1) expected_value = 1 for line in output_lines: expected_line = expected_date.strftime(DATE_FORMAT) + '\t' + str(expected_value) self.assertEqual(line.strip(), expected_line) if expected_value < 3: expected_value += 1 else: expected_date += relativedelta(days=+1) expected_value = 1
def test_daily_timeboxed_report_with_previous_results(self): def fetchall_callback(): # This method will return a subsequent row with each call. try: sql_date = self.last_date + relativedelta(months=+1) value = self.last_value + 1 except AttributeError: # Starts at Mar, Jan and Feb are in previous results sql_date = datetime(2015, 3, 1) value = 3 self.last_date = sql_date self.last_value = value return [[sql_date, str(value)]] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) MySQLdb.connect = MagicMock(return_value=connection_mock) config_path = os.path.join(self.config_folder, 'reportupdater_test2.yaml') output_path = os.path.join(self.output_folder, 'reportupdater_test2.tsv') history_path = 'test/fixtures/reportupdater_test2.history' with io.open(output_path, 'w') as output_file: output_file.write(unicode('date\tvalue\n2015-01-01\t1\n2015-02-01\t2\n')) self.paths_to_clean.extend([output_path, history_path]) reportupdater.run( config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder, history_path=history_path ) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent values. expected_date = datetime(2015, 1, 1) expected_value = 1 for line in output_lines: expected_line = expected_date.strftime(DATE_FORMAT) + '\t' + str(expected_value) self.assertEqual(line.strip(), expected_line) expected_date += relativedelta(months=+1) expected_value += 1
def main(): parser = argparse.ArgumentParser( description=('Periodically execute SQL queries or scripts ' + 'and write/update the results into TSV files.')) parser.add_argument('query_folder', help='Folder with *.sql files and scripts.') parser.add_argument('output_folder', help='Folder to write the TSV files to.') parser.add_argument('--config-path', help='Yaml configuration file. Default: <query_folder>/config.yaml.') parser.add_argument('-l', '--log-level', help='(debug|info|warning|error|critical)') args = vars(parser.parse_args()) if 'log_level' in args: if args['log_level'] in LOGGING_LEVELS: args['log_level'] = LOGGING_LEVELS[args['log_level']] else: del args['log_level'] reportupdater.run(**args)
def test_hourly_funnel_report_without_previous_results(self, *_): def fetchall_callback(): # This method will return a subsequent row with each call. try: sql_date = self.last_date + relativedelta(days=+1) except AttributeError: sql_date = date(2015, 1, 1) self.last_date = sql_date return [[sql_date, '1'], [sql_date, '2'], [sql_date, '3']] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) with mock.patch('pymysql.connect', return_value=connection_mock): config_path = os.path.join(self.config_folder, 'reportupdater_test3.yaml') output_path = os.path.join(self.output_folder, 'reportupdater_test3.tsv') self.paths_to_clean.extend([output_path]) reportupdater.run(config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent values. expected_date = datetime(2015, 1, 1) expected_value = 1 for line in output_lines: expected_line = expected_date.strftime( DATE_FORMAT) + '\t' + str(expected_value) self.assertEqual(line.strip(), expected_line) if expected_value < 3: expected_value += 1 else: expected_date += relativedelta(days=+1) expected_value = 1
def test_daily_script_report_without_previous_results(self, *_): config_path = os.path.join(self.config_folder, 'reportupdater_test5.yaml') reportupdater.run(config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder) output_path = os.path.join(self.output_folder, 'reportupdater_test5.tsv') self.paths_to_clean.extend([output_path]) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent dates. expected_date = datetime(2015, 1, 1) for line in output_lines: date_str, value = line.strip().split('\t') expected_date_str = expected_date.strftime(DATE_FORMAT) self.assertEqual(date_str, expected_date_str) self.assertEqual(type(value), str) expected_date += relativedelta(days=+1)
def main(): parser = argparse.ArgumentParser( description=('Periodically execute SQL queries or scripts ' + 'and write/update the results into TSV files.')) parser.add_argument('query_folder', help='Folder with *.sql files and scripts.') parser.add_argument('output_folder', help='Folder to write the TSV files to.') parser.add_argument( '--config-path', help='Yaml configuration file. Default: <query_folder>/config.yaml.') parser.add_argument('--no-graphite', action='store_true', help='Deactivate posting metrics to graphite.') parser.add_argument('-l', '--log-level', help='(debug|info|warning|error|critical)') args = vars(parser.parse_args()) if 'log_level' in args: if args['log_level'] in LOGGING_LEVELS: args['log_level'] = LOGGING_LEVELS[args['log_level']] else: del args['log_level'] reportupdater.run(**args)
def execute(self): # Call reportupdater. # It will be only called if 'reportupdater-reports' section # can be found in the config file root level. Reportupdater # will not interfere in generate.py execution and viceversa. if 'reportupdater-reports' in self.config: output_folder = self.config.get('reportupdater-output', None) project_path = os.path.dirname(os.path.realpath(__file__)) reportupdater.run( config=self.config, query_folder=os.path.abspath(self.folder), output_folder=os.path.abspath(output_folder or self.config['output']['path']), wikis_path=os.path.join(project_path, 'reportupdater/wikis.txt') ) # End of reportupdater call. history = self.get_history() """Generates a CSV report by executing Python code and SQL queries.""" if self.graph: name = self.graph graphs = {name: self.config['graphs'][name]} else: graphs = self.config['graphs'] or {} reportupdater_reports = self.config.get('reportupdater-reports', {}) for key, value in graphs.iteritems(): # Ensure that reports specified to be executed # by reportupdater are not also run by generate.py. if key in reportupdater_reports: print '%s should have been executed by reportupdater, skipping.' % key continue # title = value['title'] freq = value['frequency'] try: last_run_time = history[key] except KeyError: last_run_time = 0 now = time.time() if freq == 'daily': increment = 60 * 60 * 24 elif freq == 'hourly': increment = 60 * 60 else: increment = 0 due_at = last_run_time + increment if due_at < now or self.force: print('Generating {0}'.format(value['title'])) if "timeboxed" in value and "starts" in value: from_date = value["starts"] if "ends" in value: to_date = value["ends"] else: to_date = None ok = self.generate_graph_timeboxed(key, value, from_date, to_date) else: ok = self.generate_graph_full(key, value) if ok: try: history[key] = now except: continue finally: if history[key] == now: self.save_history(history) else: print('Skipping generation of {0}: not enough time has passed'.format(value['title']))
def test_daily_report_with_previous_results_and_reruns(self): def fetchall_callback(): # This method will return a subsequent row with each call. try: sql_date = self.last_date + relativedelta(days=+1) value = self.last_value + 1 except AttributeError: # Starts at Mar, Jan and Feb are in previous results sql_date = datetime(2016, 1, 1) value = 1 self.last_date = sql_date self.last_value = value return [[sql_date, str(value)]] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) pymysql.connect = MagicMock(return_value=connection_mock) config_path = os.path.join(self.config_folder, 'reportupdater_test6.yaml') output_path = os.path.join(self.output_folder, 'reportupdater_test6.tsv') with io.open(output_path, 'w') as output_file: output_file.write(unicode( 'date\tvalue\n' '2016-01-01\t1\n' '2016-01-02\ta\n' # Note irregular result. '2016-01-03\t3\n' '2016-01-04\tb\n' # Note irregular result. '2016-01-05\t5\n' )) self.paths_to_clean.extend([output_path]) # Build rerun files. rerun_folder = os.path.join(self.query_folder, '.reruns') os.makedirs(rerun_folder) rerun_path1 = os.path.join(rerun_folder, 'reportupdater_test6.1') with io.open(rerun_path1, 'w') as rerun_file1: rerun_file1.write(unicode( '2016-01-02\n' '2016-01-03\n' 'reportupdater_test6\n' )) rerun_path2 = os.path.join(rerun_folder, 'reportupdater_test6.2') with io.open(rerun_path2, 'w') as rerun_file2: rerun_file2.write(unicode( '2016-01-04\n' '2016-01-05\n' 'reportupdater_test6\n' )) self.paths_to_clean.extend([rerun_folder]) reportupdater.run( config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder ) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent values. expected_date = datetime(2016, 1, 1) expected_value = 1 for line in output_lines: expected_line = expected_date.strftime(DATE_FORMAT) + '\t' + str(expected_value) self.assertEqual(line.strip(), expected_line) expected_date += relativedelta(days=+1) expected_value += 1
def test_daily_report_with_previous_results_and_reruns(self, mock_utcnow): mock_utcnow.return_value = datetime(2016, 1, 8) def fetchall_callback(): # This method will return a subsequent row with each call. try: sql_date = self.last_date + relativedelta(days=+1) value = self.last_value + 1 except AttributeError: # Starts at Mar, Jan and Feb are in previous results sql_date = datetime(2016, 1, 1) value = 1 self.last_date = sql_date self.last_value = value return [[sql_date, str(value)]] header = ['date', 'value'] connection_mock = ConnectionMock(None, fetchall_callback, header) with mock.patch('pymysql.connect', return_value=connection_mock): config_path = os.path.join(self.config_folder, 'reportupdater_test6.yaml') output_path = os.path.join(self.output_folder, 'reportupdater_test6.tsv') with io.open(output_path, 'w') as output_file: output_file.write( str('date\tvalue\n' '2016-01-01\t1\n' '2016-01-02\ta\n' # Note irregular result, this will be overwritten. '2016-01-03\t3\n' '2016-01-04\tb\n' # Note irregular result, this will be overwritten. '2016-01-05\t5\n')) self.paths_to_clean.extend([output_path]) # Build rerun files. rerun_folder = os.path.join(self.query_folder, '.reruns') os.makedirs(rerun_folder) rerun_path1 = os.path.join(rerun_folder, 'reportupdater_test6.1') with io.open(rerun_path1, 'w') as rerun_file1: rerun_file1.write( str('2016-01-02\n' '2016-01-03\n' 'reportupdater_test6\n')) rerun_path2 = os.path.join(rerun_folder, 'reportupdater_test6.2') with io.open(rerun_path2, 'w') as rerun_file2: rerun_file2.write( str('2016-01-04\n' '2016-01-05\n' 'reportupdater_test6\n')) self.paths_to_clean.extend([rerun_folder]) reportupdater.run(config_path=config_path, query_folder=self.query_folder, output_folder=self.output_folder) self.assertTrue(os.path.exists(output_path)) with io.open(output_path, 'r', encoding='utf-8') as output_file: output_lines = output_file.readlines() self.assertTrue(len(output_lines) > 1) header = output_lines.pop(0).strip() self.assertEqual(header, 'date\tvalue') # Assert that all lines hold subsequent values. expected_date = datetime(2016, 1, 1) expected_value = 1 for line in output_lines: expected_line = expected_date.strftime( DATE_FORMAT) + '\t' + str(expected_value) self.assertEqual(line.strip(), expected_line) expected_date += relativedelta(days=+1) expected_value += 1