def test_daily_report_without_previous_results_with_explode_by(self):
        def fetchall_callback():
            return [[datetime(2015, 1, 1), str(1)]]
        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        pymysql.connect = MagicMock(return_value=connection_mock)

        config_path = os.path.join(self.config_folder, 'reportupdater_test4.yaml')
        reportupdater.run(
            config_path=config_path,
            query_folder=self.query_folder,
            output_folder=self.output_folder
        )

        output_folder = os.path.join(self.output_folder, 'reportupdater_test4')
        self.paths_to_clean.extend([output_folder])

        output_filenames = [
            'visualeditor/wiki1.tsv',
            'visualeditor/wiki2.tsv',
            'visualeditor/wiki3.tsv',
            'wikitext/wiki1.tsv',
            'wikitext/wiki2.tsv',
            'wikitext/wiki3.tsv',
        ]
        for output_filename in output_filenames:
            output_path = os.path.join(output_folder, output_filename)
            self.assertTrue(os.path.exists(output_path))
            with io.open(output_path, 'r', encoding='utf-8') as output_file:
                output_lines = output_file.readlines()
            self.assertEqual(len(output_lines), 2)
            self.assertEqual(output_lines[0], 'date\tvalue\n')
            self.assertEqual(output_lines[1], '2015-01-01\t1\n')
    def test_daily_timeboxed_script_report_without_previous_results(self):
        config_path = os.path.join(self.config_folder, 'reportupdater_test5.yaml')
        history_path = 'test/fixtures/reportupdater_test5.history'
        reportupdater.run(
            config_path=config_path,
            query_folder=self.query_folder,
            output_folder=self.output_folder,
            history_path=history_path
        )
        output_path = os.path.join(self.output_folder, 'reportupdater_test5.tsv')
        self.paths_to_clean.extend([output_path, history_path])

        self.assertTrue(os.path.exists(output_path))
        with io.open(output_path, 'r', encoding='utf-8') as output_file:
            output_lines = output_file.readlines()
        self.assertTrue(len(output_lines) > 1)
        header = output_lines.pop(0).strip()
        self.assertEqual(header, 'date\tvalue')
        # Assert that all lines hold subsequent dates.
        expected_date = datetime(2015, 1, 1)
        for line in output_lines:
            date_str, value = line.strip().split('\t')
            expected_date_str = expected_date.strftime(DATE_FORMAT)
            self.assertEqual(date_str, expected_date_str)
            self.assertEqual(type(value), unicode)
            expected_date += relativedelta(days=+1)
Example #3
0
    def test_daily_report_without_previous_results_with_explode_by(self, *_):
        def fetchall_callback():
            return [[datetime(2015, 1, 1), str(1)]]

        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        with mock.patch('pymysql.connect', return_value=connection_mock):

            config_path = os.path.join(self.config_folder,
                                       'reportupdater_test4.yaml')
            reportupdater.run(config_path=config_path,
                              query_folder=self.query_folder,
                              output_folder=self.output_folder)

            output_folder = os.path.join(self.output_folder,
                                         'reportupdater_test4')
            self.paths_to_clean.extend([output_folder])

            output_filenames = [
                'visualeditor/wiki1.tsv',
                'visualeditor/wiki2.tsv',
                'visualeditor/wiki3.tsv',
                'wikitext/wiki1.tsv',
                'wikitext/wiki2.tsv',
                'wikitext/wiki3.tsv',
            ]
            for output_filename in output_filenames:
                output_path = os.path.join(output_folder, output_filename)
                self.assertTrue(os.path.exists(output_path))
                with io.open(output_path, 'r',
                             encoding='utf-8') as output_file:
                    output_lines = output_file.readlines()
                self.assertEqual(len(output_lines), 2)
                self.assertEqual(output_lines[0], 'date\tvalue\n')
                self.assertEqual(output_lines[1], '2015-01-01\t1\n')
 def test_when_current_exec_time_and_last_exec_time_are_within_the_same_day(self):
     last_exec_time = datetime(2015, 1, 2, 3, 4, 5)
     self.write_time_to_history(last_exec_time)
     reportupdater.utcnow = MagicMock(return_value=datetime(2015, 1, 2, 13, 14, 15))
     reportupdater.run(
         config_path=os.path.join(self.config_folder, 'reportupdater_test2.yaml'),
         query_folder=self.query_folder,
         output_folder=self.output_folder,
         history_path=self.history_path
     )
     # The report should not be computed because it has already been computed
     # within this day. So the output file should not exist.
     output_path = os.path.join(self.output_folder, 'reportupdater_test2.tsv')
     self.assertFalse(os.path.exists(output_path))
Example #5
0
    def test_daily_report_with_previous_results(self, *_):
        def fetchall_callback():
            # This method will return a subsequent row with each call.
            try:
                sql_date = self.last_date + relativedelta(months=+1)
                value = self.last_value + 1
            except AttributeError:
                # Starts at Mar, Jan and Feb are in previous results
                sql_date = datetime(2015, 3, 1)
                value = 3
            self.last_date = sql_date
            self.last_value = value
            return [[sql_date, str(value)]]

        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        with mock.patch('pymysql.connect', return_value=connection_mock):

            config_path = os.path.join(self.config_folder,
                                       'reportupdater_test2.yaml')
            output_path = os.path.join(self.output_folder,
                                       'reportupdater_test2.tsv')
            with io.open(output_path, 'w') as output_file:
                output_file.write(
                    str('date\tvalue\n2015-01-01\t1\n2015-02-01\t2\n'))
            self.paths_to_clean.extend([output_path])
            reportupdater.run(config_path=config_path,
                              query_folder=self.query_folder,
                              output_folder=self.output_folder)
            self.assertTrue(os.path.exists(output_path))
            with io.open(output_path, 'r', encoding='utf-8') as output_file:
                output_lines = output_file.readlines()
            self.assertTrue(len(output_lines) > 1)
            header = output_lines.pop(0).strip()
            self.assertEqual(header, 'date\tvalue')
            # Assert that all lines hold subsequent values.
            expected_date = datetime(2015, 1, 1)
            expected_value = 1
            for line in output_lines:
                expected_line = expected_date.strftime(
                    DATE_FORMAT) + '\t' + str(expected_value)
                self.assertEqual(line.strip(), expected_line)
                expected_date += relativedelta(months=+1)
                expected_value += 1
    def test_hourly_funnel_timeboxed_report_without_previous_results(self):
        def fetchall_callback():
            # This method will return a subsequent row with each call.
            try:
                sql_date = self.last_date + relativedelta(days=+1)
            except AttributeError:
                sql_date = date(2015, 1, 1)
            self.last_date = sql_date
            return [
                [sql_date, '1'],
                [sql_date, '2'],
                [sql_date, '3']
            ]
        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        MySQLdb.connect = MagicMock(return_value=connection_mock)

        config_path = os.path.join(self.config_folder, 'reportupdater_test3.yaml')
        output_path = os.path.join(self.output_folder, 'reportupdater_test3.tsv')
        history_path = 'test/fixtures/reportupdater_test3.history'
        self.paths_to_clean.extend([output_path, history_path])
        reportupdater.run(
            config_path=config_path,
            query_folder=self.query_folder,
            output_folder=self.output_folder,
            history_path=history_path
        )
        self.assertTrue(os.path.exists(output_path))
        with io.open(output_path, 'r', encoding='utf-8') as output_file:
            output_lines = output_file.readlines()
        self.assertTrue(len(output_lines) > 1)
        header = output_lines.pop(0).strip()
        self.assertEqual(header, 'date\tvalue')
        # Assert that all lines hold subsequent values.
        expected_date = datetime(2015, 1, 1)
        expected_value = 1
        for line in output_lines:
            expected_line = expected_date.strftime(DATE_FORMAT) + '\t' + str(expected_value)
            self.assertEqual(line.strip(), expected_line)
            if expected_value < 3:
                expected_value += 1
            else:
                expected_date += relativedelta(days=+1)
                expected_value = 1
    def test_daily_timeboxed_report_with_previous_results(self):
        def fetchall_callback():
            # This method will return a subsequent row with each call.
            try:
                sql_date = self.last_date + relativedelta(months=+1)
                value = self.last_value + 1
            except AttributeError:
                # Starts at Mar, Jan and Feb are in previous results
                sql_date = datetime(2015, 3, 1)
                value = 3
            self.last_date = sql_date
            self.last_value = value
            return [[sql_date, str(value)]]
        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        MySQLdb.connect = MagicMock(return_value=connection_mock)

        config_path = os.path.join(self.config_folder, 'reportupdater_test2.yaml')
        output_path = os.path.join(self.output_folder, 'reportupdater_test2.tsv')
        history_path = 'test/fixtures/reportupdater_test2.history'
        with io.open(output_path, 'w') as output_file:
            output_file.write(unicode('date\tvalue\n2015-01-01\t1\n2015-02-01\t2\n'))
        self.paths_to_clean.extend([output_path, history_path])
        reportupdater.run(
            config_path=config_path,
            query_folder=self.query_folder,
            output_folder=self.output_folder,
            history_path=history_path
        )
        self.assertTrue(os.path.exists(output_path))
        with io.open(output_path, 'r', encoding='utf-8') as output_file:
            output_lines = output_file.readlines()
        self.assertTrue(len(output_lines) > 1)
        header = output_lines.pop(0).strip()
        self.assertEqual(header, 'date\tvalue')
        # Assert that all lines hold subsequent values.
        expected_date = datetime(2015, 1, 1)
        expected_value = 1
        for line in output_lines:
            expected_line = expected_date.strftime(DATE_FORMAT) + '\t' + str(expected_value)
            self.assertEqual(line.strip(), expected_line)
            expected_date += relativedelta(months=+1)
            expected_value += 1
def main():
    parser = argparse.ArgumentParser(
        description=('Periodically execute SQL queries or scripts ' +
                     'and write/update the results into TSV files.'))
    parser.add_argument('query_folder',
                        help='Folder with *.sql files and scripts.')
    parser.add_argument('output_folder',
                        help='Folder to write the TSV files to.')
    parser.add_argument('--config-path',
                        help='Yaml configuration file. Default: <query_folder>/config.yaml.')
    parser.add_argument('-l', '--log-level',
                        help='(debug|info|warning|error|critical)')
    args = vars(parser.parse_args())
    if 'log_level' in args:
        if args['log_level'] in LOGGING_LEVELS:
            args['log_level'] = LOGGING_LEVELS[args['log_level']]
        else:
            del args['log_level']
    reportupdater.run(**args)
Example #9
0
    def test_hourly_funnel_report_without_previous_results(self, *_):
        def fetchall_callback():
            # This method will return a subsequent row with each call.
            try:
                sql_date = self.last_date + relativedelta(days=+1)
            except AttributeError:
                sql_date = date(2015, 1, 1)
            self.last_date = sql_date
            return [[sql_date, '1'], [sql_date, '2'], [sql_date, '3']]

        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        with mock.patch('pymysql.connect', return_value=connection_mock):

            config_path = os.path.join(self.config_folder,
                                       'reportupdater_test3.yaml')
            output_path = os.path.join(self.output_folder,
                                       'reportupdater_test3.tsv')
            self.paths_to_clean.extend([output_path])
            reportupdater.run(config_path=config_path,
                              query_folder=self.query_folder,
                              output_folder=self.output_folder)
            self.assertTrue(os.path.exists(output_path))
            with io.open(output_path, 'r', encoding='utf-8') as output_file:
                output_lines = output_file.readlines()
            self.assertTrue(len(output_lines) > 1)
            header = output_lines.pop(0).strip()
            self.assertEqual(header, 'date\tvalue')
            # Assert that all lines hold subsequent values.
            expected_date = datetime(2015, 1, 1)
            expected_value = 1
            for line in output_lines:
                expected_line = expected_date.strftime(
                    DATE_FORMAT) + '\t' + str(expected_value)
                self.assertEqual(line.strip(), expected_line)
                if expected_value < 3:
                    expected_value += 1
                else:
                    expected_date += relativedelta(days=+1)
                    expected_value = 1
Example #10
0
    def test_daily_script_report_without_previous_results(self, *_):
        config_path = os.path.join(self.config_folder,
                                   'reportupdater_test5.yaml')
        reportupdater.run(config_path=config_path,
                          query_folder=self.query_folder,
                          output_folder=self.output_folder)
        output_path = os.path.join(self.output_folder,
                                   'reportupdater_test5.tsv')
        self.paths_to_clean.extend([output_path])

        self.assertTrue(os.path.exists(output_path))
        with io.open(output_path, 'r', encoding='utf-8') as output_file:
            output_lines = output_file.readlines()
        self.assertTrue(len(output_lines) > 1)
        header = output_lines.pop(0).strip()
        self.assertEqual(header, 'date\tvalue')
        # Assert that all lines hold subsequent dates.
        expected_date = datetime(2015, 1, 1)
        for line in output_lines:
            date_str, value = line.strip().split('\t')
            expected_date_str = expected_date.strftime(DATE_FORMAT)
            self.assertEqual(date_str, expected_date_str)
            self.assertEqual(type(value), str)
            expected_date += relativedelta(days=+1)
Example #11
0
def main():
    parser = argparse.ArgumentParser(
        description=('Periodically execute SQL queries or scripts ' +
                     'and write/update the results into TSV files.'))
    parser.add_argument('query_folder',
                        help='Folder with *.sql files and scripts.')
    parser.add_argument('output_folder',
                        help='Folder to write the TSV files to.')
    parser.add_argument(
        '--config-path',
        help='Yaml configuration file. Default: <query_folder>/config.yaml.')
    parser.add_argument('--no-graphite',
                        action='store_true',
                        help='Deactivate posting metrics to graphite.')
    parser.add_argument('-l',
                        '--log-level',
                        help='(debug|info|warning|error|critical)')
    args = vars(parser.parse_args())
    if 'log_level' in args:
        if args['log_level'] in LOGGING_LEVELS:
            args['log_level'] = LOGGING_LEVELS[args['log_level']]
        else:
            del args['log_level']
    reportupdater.run(**args)
    def execute(self):
        # Call reportupdater.
        # It will be only called if 'reportupdater-reports' section
        # can be found in the config file root level. Reportupdater
        # will not interfere in generate.py execution and viceversa.
        if 'reportupdater-reports' in self.config:
            output_folder = self.config.get('reportupdater-output', None)
            project_path = os.path.dirname(os.path.realpath(__file__))
            reportupdater.run(
                config=self.config,
                query_folder=os.path.abspath(self.folder),
                output_folder=os.path.abspath(output_folder or self.config['output']['path']),
                wikis_path=os.path.join(project_path, 'reportupdater/wikis.txt')
            )
        # End of reportupdater call.
        history = self.get_history()
        """Generates a CSV report by executing Python code and SQL queries."""
        if self.graph:
            name = self.graph
            graphs = {name: self.config['graphs'][name]}
        else:
            graphs = self.config['graphs'] or {}

        reportupdater_reports = self.config.get('reportupdater-reports', {})
        for key, value in graphs.iteritems():
            # Ensure that reports specified to be executed
            # by reportupdater are not also run by generate.py.
            if key in reportupdater_reports:
                print '%s should have been executed by reportupdater, skipping.' % key
                continue
            # title = value['title']
            freq = value['frequency']
            try:
                last_run_time = history[key]
            except KeyError:
                last_run_time = 0

            now = time.time()
            if freq == 'daily':
                increment = 60 * 60 * 24
            elif freq == 'hourly':
                increment = 60 * 60
            else:
                increment = 0
            due_at = last_run_time + increment

            if due_at < now or self.force:
                print('Generating {0}'.format(value['title']))
                if "timeboxed" in value and "starts" in value:
                    from_date = value["starts"]

                    if "ends" in value:
                        to_date = value["ends"]
                    else:
                        to_date = None
                    ok = self.generate_graph_timeboxed(key, value, from_date, to_date)
                else:
                    ok = self.generate_graph_full(key, value)

                if ok:
                    try:
                        history[key] = now
                    except:
                        continue
                    finally:
                        if history[key] == now:
                            self.save_history(history)
            else:
                print('Skipping generation of {0}: not enough time has passed'.format(value['title']))
    def test_daily_report_with_previous_results_and_reruns(self):
        def fetchall_callback():
            # This method will return a subsequent row with each call.
            try:
                sql_date = self.last_date + relativedelta(days=+1)
                value = self.last_value + 1
            except AttributeError:
                # Starts at Mar, Jan and Feb are in previous results
                sql_date = datetime(2016, 1, 1)
                value = 1
            self.last_date = sql_date
            self.last_value = value
            return [[sql_date, str(value)]]
        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        pymysql.connect = MagicMock(return_value=connection_mock)

        config_path = os.path.join(self.config_folder, 'reportupdater_test6.yaml')
        output_path = os.path.join(self.output_folder, 'reportupdater_test6.tsv')
        with io.open(output_path, 'w') as output_file:
            output_file.write(unicode(
                'date\tvalue\n'
                '2016-01-01\t1\n'
                '2016-01-02\ta\n'  # Note irregular result.
                '2016-01-03\t3\n'
                '2016-01-04\tb\n'  # Note irregular result.
                '2016-01-05\t5\n'
            ))
        self.paths_to_clean.extend([output_path])

        # Build rerun files.
        rerun_folder = os.path.join(self.query_folder, '.reruns')
        os.makedirs(rerun_folder)
        rerun_path1 = os.path.join(rerun_folder, 'reportupdater_test6.1')
        with io.open(rerun_path1, 'w') as rerun_file1:
            rerun_file1.write(unicode(
                '2016-01-02\n'
                '2016-01-03\n'
                'reportupdater_test6\n'
            ))
        rerun_path2 = os.path.join(rerun_folder, 'reportupdater_test6.2')
        with io.open(rerun_path2, 'w') as rerun_file2:
            rerun_file2.write(unicode(
                '2016-01-04\n'
                '2016-01-05\n'
                'reportupdater_test6\n'
            ))
        self.paths_to_clean.extend([rerun_folder])

        reportupdater.run(
            config_path=config_path,
            query_folder=self.query_folder,
            output_folder=self.output_folder
        )
        self.assertTrue(os.path.exists(output_path))
        with io.open(output_path, 'r', encoding='utf-8') as output_file:
            output_lines = output_file.readlines()
        self.assertTrue(len(output_lines) > 1)
        header = output_lines.pop(0).strip()
        self.assertEqual(header, 'date\tvalue')
        # Assert that all lines hold subsequent values.
        expected_date = datetime(2016, 1, 1)
        expected_value = 1
        for line in output_lines:
            expected_line = expected_date.strftime(DATE_FORMAT) + '\t' + str(expected_value)
            self.assertEqual(line.strip(), expected_line)
            expected_date += relativedelta(days=+1)
            expected_value += 1
Example #14
0
    def test_daily_report_with_previous_results_and_reruns(self, mock_utcnow):
        mock_utcnow.return_value = datetime(2016, 1, 8)

        def fetchall_callback():
            # This method will return a subsequent row with each call.
            try:
                sql_date = self.last_date + relativedelta(days=+1)
                value = self.last_value + 1
            except AttributeError:
                # Starts at Mar, Jan and Feb are in previous results
                sql_date = datetime(2016, 1, 1)
                value = 1
            self.last_date = sql_date
            self.last_value = value
            return [[sql_date, str(value)]]

        header = ['date', 'value']
        connection_mock = ConnectionMock(None, fetchall_callback, header)
        with mock.patch('pymysql.connect', return_value=connection_mock):

            config_path = os.path.join(self.config_folder,
                                       'reportupdater_test6.yaml')
            output_path = os.path.join(self.output_folder,
                                       'reportupdater_test6.tsv')
            with io.open(output_path, 'w') as output_file:
                output_file.write(
                    str('date\tvalue\n'
                        '2016-01-01\t1\n'
                        '2016-01-02\ta\n'  # Note irregular result, this will be overwritten.
                        '2016-01-03\t3\n'
                        '2016-01-04\tb\n'  # Note irregular result, this will be overwritten.
                        '2016-01-05\t5\n'))
            self.paths_to_clean.extend([output_path])

            # Build rerun files.
            rerun_folder = os.path.join(self.query_folder, '.reruns')
            os.makedirs(rerun_folder)
            rerun_path1 = os.path.join(rerun_folder, 'reportupdater_test6.1')
            with io.open(rerun_path1, 'w') as rerun_file1:
                rerun_file1.write(
                    str('2016-01-02\n'
                        '2016-01-03\n'
                        'reportupdater_test6\n'))
            rerun_path2 = os.path.join(rerun_folder, 'reportupdater_test6.2')
            with io.open(rerun_path2, 'w') as rerun_file2:
                rerun_file2.write(
                    str('2016-01-04\n'
                        '2016-01-05\n'
                        'reportupdater_test6\n'))
            self.paths_to_clean.extend([rerun_folder])

            reportupdater.run(config_path=config_path,
                              query_folder=self.query_folder,
                              output_folder=self.output_folder)
            self.assertTrue(os.path.exists(output_path))
            with io.open(output_path, 'r', encoding='utf-8') as output_file:
                output_lines = output_file.readlines()
            self.assertTrue(len(output_lines) > 1)
            header = output_lines.pop(0).strip()
            self.assertEqual(header, 'date\tvalue')
            # Assert that all lines hold subsequent values.
            expected_date = datetime(2016, 1, 1)
            expected_value = 1
            for line in output_lines:
                expected_line = expected_date.strftime(
                    DATE_FORMAT) + '\t' + str(expected_value)
                self.assertEqual(line.strip(), expected_line)
                expected_date += relativedelta(days=+1)
                expected_value += 1