Python OperatorReport Examples, woudc_data_registry.report.OperatorReport Python Examples

Example #1

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_mixed_run_report(self):
        """
        Test that passing and failing files are written to the run report
        when a mixture of the two is processed
        """

        infile_root = resolve_test_data_path('data/general/pass_and_fail')

        agency = 'MSC'

        expected_passes = set()
        expected_fails = set()

        with report.OperatorReport() as error_bank:
            run_report = report.RunReport(SANDBOX_DIR)

            for infile in os.listdir(infile_root):
                fullpath = os.path.join(infile_root, infile)

                try:
                    contents = util.read_file(fullpath)
                    ecsv = parser.ExtendedCSV(contents, error_bank)
                except (parser.MetadataValidationError,
                        parser.NonStandardDataError):
                    expected_fails.add(fullpath)
                    run_report.write_failing_file(fullpath, agency)
                    continue

                try:
                    ecsv.validate_metadata_tables()
                    ecsv.validate_dataset_tables()
                    data_record = models.DataRecord(ecsv)
                    data_record.filename = infile

                    expected_passes.add(fullpath)
                    run_report.write_passing_file(fullpath, agency)
                except (parser.MetadataValidationError,
                        parser.NonStandardDataError):
                    expected_fails.add(fullpath)
                    run_report.write_failing_file(fullpath, agency)

        self.assertEquals(len(expected_passes), 6)
        self.assertEquals(len(expected_fails), 4)

        output_path = os.path.join(SANDBOX_DIR, 'run_report')
        self.assertTrue(os.path.exists(output_path))

        with open(output_path) as output:
            lines = output.read().splitlines()
            self.assertEquals(lines[0], agency)
            self.assertEquals(len(lines),
                              len(expected_passes) + len(expected_fails) + 1)

            for line in lines[1:]:
                if line.startswith('Pass'):
                    target = line[6:].strip()
                    self.assertIn(target, expected_passes)
                elif line.startswith('Fail'):
                    target = line[6:].strip()
                    self.assertIn(target, expected_fails)

Example #2

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_failing_run_report(self):
        """Test that a failing file is written to the run report"""

        filename = 'ecsv-missing-instrument-name.csv'
        infile = resolve_test_data_path('data/general/{}'.format(filename))
        contents = util.read_file(infile)

        ecsv = None
        # Agency typically filled in with FTP username for failing files.
        agency = 'rmda'

        with report.OperatorReport() as error_bank:
            run_report = report.RunReport(SANDBOX_DIR)

            try:
                ecsv = parser.ExtendedCSV(contents, error_bank)
                ecsv.validate_metadata_tables()
                agency = ecsv.extcsv['DATA_GENERATION']['Agency']

                ecsv.validate_dataset_tables()
                raise AssertionError(
                    'Parsing of {} did not fail'.format(infile))
            except (parser.MetadataValidationError,
                    parser.NonStandardDataError):
                output_path = os.path.join(SANDBOX_DIR, 'run_report')

                run_report.write_failing_file(infile, agency)

        self.assertTrue(os.path.exists(output_path))
        with open(output_path) as output:
            lines = output.read().splitlines()
            self.assertEquals(len(lines), 2)

            self.assertEquals(lines[0], agency)
            self.assertEquals(lines[1], 'Fail: {}'.format(infile))

Example #3

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_non_extcsv_run_report(self):
        """Test that an unparseable file is written to the run report"""

        filename = 'not-an-ecsv.dat'
        infile = resolve_test_data_path('data/general/{}'.format(filename))
        contents = util.read_file(infile)

        agency = 'UNKNOWN'

        with report.OperatorReport() as error_bank:
            run_report = report.RunReport(SANDBOX_DIR)

            try:
                _ = parser.ExtendedCSV(contents, error_bank)
                raise AssertionError(
                    'Parsing of {} did not fail'.format(infile))
            except (parser.MetadataValidationError,
                    parser.NonStandardDataError):
                output_path = os.path.join(SANDBOX_DIR, 'run_report')

                run_report.write_failing_file(infile, agency)

                self.assertTrue(os.path.exists(output_path))
                with open(output_path) as output:
                    lines = output.read().splitlines()
                    self.assertEquals(len(lines), 2)

                    self.assertEquals(lines[0], agency)
                    self.assertEquals(lines[1], 'Fail: {}'.format(infile))

Example #4

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_passing_run_report(self):
        """Test that a passing file is written to the run report"""

        filename = '20080101.Kipp_Zonen.UV-S-E-T.000560.PMOD-WRC.csv'
        infile = resolve_test_data_path('data/general/{}'.format(filename))
        contents = util.read_file(infile)

        run_report = report.RunReport(SANDBOX_DIR)
        with report.OperatorReport() as error_bank:
            ecsv = parser.ExtendedCSV(contents, error_bank)

            ecsv.validate_metadata_tables()
            ecsv.validate_dataset_tables()
            data_record = models.DataRecord(ecsv)
            data_record.filename = filename

            agency = ecsv.extcsv['DATA_GENERATION']['Agency']
            output_path = os.path.join(SANDBOX_DIR, 'run_report')

            run_report.write_passing_file(infile, agency)

        self.assertTrue(os.path.exists(output_path))
        with open(output_path) as output:
            lines = output.read().splitlines()
            self.assertEquals(len(lines), 2)

            self.assertEquals(lines[0], agency)
            self.assertEquals(lines[1], 'Pass: {}'.format(infile))

Example #5

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

def dummy_extCSV(source):
    """
    Returns a parser.ExtendedCSV instace built from the filepath <source>
    with dummy output settings (no logs or reports).
    """

    with report.OperatorReport() as error_bank:
        return parser.ExtendedCSV(source, error_bank)

Example #6

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_warning_operator_report(self):
        """Test that file warnings are written in the operator report"""

        filename = 'ecsv-trailing-commas.csv'
        infile = resolve_test_data_path('data/general/{}'.format(filename))
        contents = util.read_file(infile)

        with report.OperatorReport(SANDBOX_DIR) as op_report:
            ecsv = parser.ExtendedCSV(contents, op_report)

            # Some warnings are encountered during parsing.
            ecsv.validate_metadata_tables()
            ecsv.validate_dataset_tables()
            data_record = models.DataRecord(ecsv)
            data_record.filename = filename

            agency = ecsv.extcsv['DATA_GENERATION']['Agency']

            today = datetime.now().strftime('%Y-%m-%d')
            output_path = os.path.join(SANDBOX_DIR,
                                       'operator-report-{}.csv'.format(today))

            op_report.add_message(200)  # File passes validation
            op_report.write_passing_file(infile, ecsv, data_record)

        self.assertTrue(os.path.exists(output_path))
        with open(output_path) as output:
            reader = csv.reader(output)
            next(reader)

            expected_warnings = len(ecsv.warnings)
            for _ in range(expected_warnings):
                report_line = next(reader)

                self.assertEquals(report_line[0], 'P')
                self.assertEquals(report_line[1], 'Warning')
                self.assertIn(agency, report_line)
                self.assertIn(os.path.basename(infile), report_line)

            report_line = next(reader)
            self.assertEquals(report_line[0], 'P')
            self.assertEquals(report_line[1], 'Warning')
            self.assertEquals(report_line[2], '200')
            self.assertIn(agency, report_line)
            self.assertIn(os.path.basename(infile), report_line)

            with self.assertRaises(StopIteration):
                next(reader)

Example #7

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_uses_error_definition(self):
        """Test that error/warning feedback responds to input files"""

        # The two error files below have different error types for error 1.
        all_warnings = resolve_test_data_path('config/all_warnings.csv')
        all_errors = resolve_test_data_path('config/all_errors.csv')

        with report.OperatorReport(SANDBOX_DIR) as op_report:
            op_report.read_error_definitions(all_warnings)

            self.assertIn(1, op_report._error_definitions)
            _, success = op_report.add_message(1)
            self.assertFalse(success)

            op_report.read_error_definitions(all_errors)

            self.assertIn(1, op_report._error_definitions)
            _, success = op_report.add_message(1)
            self.assertTrue(success)

Example #8

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_passing_operator_report(self):
        """Test that a passing file is written in the operator report"""

        filename = '20080101.Kipp_Zonen.UV-S-E-T.000560.PMOD-WRC.csv'
        infile = resolve_test_data_path('data/general/{}'.format(filename))
        contents = util.read_file(infile)

        with report.OperatorReport(SANDBOX_DIR) as op_report:
            ecsv = parser.ExtendedCSV(contents, op_report)

            ecsv.validate_metadata_tables()
            ecsv.validate_dataset_tables()
            data_record = models.DataRecord(ecsv)
            data_record.filename = filename

            agency = ecsv.extcsv['DATA_GENERATION']['Agency']

            today = datetime.now().strftime('%Y-%m-%d')
            output_path = os.path.join(SANDBOX_DIR,
                                       'operator-report-{}.csv'.format(today))

            op_report.add_message(200)  # File passes validation
            op_report.write_passing_file(infile, ecsv, data_record)

        self.assertTrue(os.path.exists(output_path))
        with open(output_path) as output:
            reader = csv.reader(output)
            next(reader)

            report_line = next(reader)
            self.assertEquals(report_line[0], 'P')
            self.assertEquals(report_line[2], '200')
            self.assertIn(agency, report_line)
            self.assertIn(os.path.basename(infile), report_line)

            with self.assertRaises(StopIteration):
                next(reader)

Example #9

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_operator_report_output_location(self):
        """Test that operator reports write a file in the working directory"""

        with report.OperatorReport(SANDBOX_DIR) as op_report:
            operator_path = pathlib.Path(op_report.filepath())
            self.assertEquals(str(operator_path.parent), SANDBOX_DIR)

Example #10

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_run_report_multiple_agencies(self):
        """Test that files in the run report are grouped by agency"""

        infile_root = resolve_test_data_path('data/general/agencies')

        expected_passes = {}
        expected_fails = {}
        agency_aliases = {
            'msc': 'MSC',
            'casiap': 'CAS-IAP',
            'mlcd-lu': 'MLCD-LU',
            'dwd-mohp': 'DWD-MOHp'
        }

        with report.OperatorReport() as error_bank:
            run_report = report.RunReport(SANDBOX_DIR)

            for dirpath, dirnames, filenames in os.walk(infile_root):
                for infile in filenames:
                    fullpath = os.path.join(dirpath, infile)
                    # Agency inferred from directory name.
                    agency = dirpath.split('/')[-1]

                    try:
                        contents = util.read_file(fullpath)
                        ecsv = parser.ExtendedCSV(contents, error_bank)
                    except (parser.MetadataValidationError,
                            parser.NonStandardDataError):
                        if agency not in expected_passes:
                            expected_passes[agency] = set()
                        if agency not in expected_fails:
                            expected_fails[agency] = set()
                        expected_fails[agency].add(fullpath)
                        run_report.write_failing_file(fullpath, agency)
                        continue

                    try:
                        ecsv.validate_metadata_tables()
                        agency = ecsv.extcsv['DATA_GENERATION']['Agency']

                        if agency not in expected_passes:
                            expected_passes[agency] = set()
                        if agency not in expected_fails:
                            expected_fails[agency] = set()

                        ecsv.validate_dataset_tables()
                        data_record = models.DataRecord(ecsv)
                        data_record.filename = infile

                        expected_passes[agency].add(fullpath)
                        run_report.write_passing_file(fullpath, agency)
                    except (parser.MetadataValidationError,
                            parser.NonStandardDataError):
                        agency = agency_aliases[agency]
                        if agency not in expected_passes:
                            expected_passes[agency] = set()
                        if agency not in expected_fails:
                            expected_fails[agency] = set()

                        expected_fails[agency].add(fullpath)
                        run_report.write_failing_file(fullpath, agency)

        self.assertEquals(len(expected_passes['CAS-IAP']), 1)
        self.assertEquals(len(expected_passes['DWD-MOHp']), 2)
        self.assertEquals(len(expected_passes['MLCD-LU']), 3)
        self.assertEquals(len(expected_passes['MSC']), 4)

        self.assertEquals(len(expected_fails['CAS-IAP']), 0)
        self.assertEquals(len(expected_fails['DWD-MOHp']), 1)
        self.assertEquals(len(expected_fails['MLCD-LU']), 0)
        self.assertEquals(len(expected_fails['MSC']), 1)

        output_path = os.path.join(SANDBOX_DIR, 'run_report')
        self.assertTrue(os.path.exists(output_path))

        with open(output_path) as output:
            lines = output.read().splitlines()
            curr_agency = None

            for line in lines:
                if line.startswith('Pass'):
                    target = line[6:]
                    self.assertIn(target, expected_passes[curr_agency])
                elif line.startswith('Fail'):
                    target = line[6:]
                    self.assertIn(target, expected_fails[curr_agency])
                elif line.strip() != '':
                    curr_agency = line.strip()
                    self.assertIn(line, agency_aliases.values())

Example #11

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_mixed_operator_report(self):
        """
        Test that passing and failing files are written to the operator report
        when a mixture of the two is processed
        """

        infile_root = resolve_test_data_path('data/general/pass_and_fail')

        warnings = {}
        errors = {}

        expected_warnings = {}
        expected_errors = {}

        agency = 'UNKNOWN'

        with report.OperatorReport(SANDBOX_DIR) as op_report:
            for infile in os.listdir(infile_root):
                fullpath = os.path.join(infile_root, infile)

                warnings[fullpath] = 0
                errors[fullpath] = 0

                try:
                    contents = util.read_file(fullpath)
                    ecsv = parser.ExtendedCSV(contents, op_report)
                except (parser.MetadataValidationError,
                        parser.NonStandardDataError) as err:
                    expected_errors[fullpath] = len(err.errors)

                    op_report.add_message(209)
                    op_report.write_failing_file(fullpath, agency)
                    continue

                try:
                    ecsv.validate_metadata_tables()
                    agency = ecsv.extcsv['DATA_GENERATION']['Agency']

                    ecsv.validate_dataset_tables()
                    data_record = models.DataRecord(ecsv)
                    data_record.filename = infile

                    expected_warnings[fullpath] = len(ecsv.warnings)
                    expected_errors[fullpath] = 0
                    op_report.write_passing_file(fullpath, ecsv, data_record)
                except (parser.MetadataValidationError,
                        parser.NonStandardDataError):
                    expected_warnings[fullpath] = len(ecsv.warnings)
                    expected_errors[fullpath] = len(ecsv.errors)

                    op_report.add_message(209)
                    op_report.write_failing_file(fullpath, agency, ecsv)

        today = datetime.now().strftime('%Y-%m-%d')
        output_path = os.path.join(SANDBOX_DIR,
                                   'operator-report-{}.csv'.format(today))

        self.assertTrue(os.path.exists(output_path))
        with open(output_path) as output:
            reader = csv.reader(output)
            next(reader)

            for line in reader:
                if expected_errors[line[12]] == 0:
                    self.assertEquals(line[0], 'P')
                    self.assertEquals(line[1], 'Warning')
                else:
                    self.assertEquals(line[0], 'F')

                if line[2] == '200':
                    self.assertEquals(expected_errors[line[12]], 0)
                elif line[2] == '209':
                    self.assertGreater(expected_errors[line[12]], 0)
                elif line[1] == 'Warning':
                    warnings[line[12]] += 1
                elif line[1] == 'Error':
                    errors[line[12]] += 1

        self.assertEquals(warnings, expected_warnings)
        self.assertEquals(errors, expected_errors)

Example #12

0

Show file

File: test_report_generation.py Project: kngai/woudc-data-registry

    def test_failing_operator_report(self):
        """Test that a failing file is written in the operator report"""

        filename = 'ecsv-missing-instrument-name.csv'
        infile = resolve_test_data_path('data/general/{}'.format(filename))
        contents = util.read_file(infile)

        ecsv = None
        agency = 'UNKNOWN'

        with report.OperatorReport(SANDBOX_DIR) as op_report:
            try:
                ecsv = parser.ExtendedCSV(contents, op_report)
                ecsv.validate_metadata_tables()
                agency = ecsv.extcsv['DATA_GENERATION']['Agency']

                ecsv.validate_dataset_tables()
                raise AssertionError(
                    'Parsing of {} did not fail'.format(infile))
            except (parser.MetadataValidationError,
                    parser.NonStandardDataError):
                output_path = os.path.join(SANDBOX_DIR, 'run1')

                op_report.add_message(209)
                op_report.write_failing_file(infile, agency, ecsv)

        today = datetime.now().strftime('%Y-%m-%d')
        output_path = os.path.join(SANDBOX_DIR,
                                   'operator-report-{}.csv'.format(today))

        self.assertTrue(os.path.exists(output_path))
        with open(output_path) as output:
            reader = csv.reader(output)
            next(reader)

            warnings = 0
            errors = 0

            expected_warnings = len(ecsv.warnings)
            expected_errors = len(ecsv.errors)
            for _ in range(expected_warnings + expected_errors):
                report_line = next(reader)
                self.assertEquals(report_line[0], 'F')

                if report_line[1] == 'Warning':
                    warnings += 1
                elif report_line[1] == 'Error':
                    errors += 1

            self.assertEquals(warnings, expected_warnings)
            self.assertEquals(errors, expected_errors)

            report_line = next(reader)
            self.assertEquals(report_line[0], 'F')
            self.assertEquals(report_line[1], 'Error')
            self.assertEquals(report_line[2], '209')
            self.assertIn(agency, report_line)
            self.assertIn(os.path.basename(infile), report_line)

            with self.assertRaises(StopIteration):
                next(reader)