예제 #1
0
def test_multiple_measures_and_units_loaded_in_uri_template():
    """
    multi-measure-unit-data.csv has multiple measures and multiple units

    The JSON schema for the info.json files which defines all of the possible properties an info.json can have is
    to be found at <https://github.com/GSS-Cogs/family-schemas/blob/main/dataset-schema.json>.
    """

    data = pd.read_csv(get_test_cases_dir() / "configloaders" /
                       "multi-measure-multi-unit-test-files" /
                       "multi-measure-unit-data.csv")
    cube, json_schema_validation_error = get_cube_from_info_json(
        get_test_cases_dir() / "configloaders" /
        "multi-measure-multi-unit-test-files" / "multi-measure-unit-info.json",
        data,
    )
    """Measure URI"""

    expected_measure_uris = [
        "http://gss-data.org.uk/def/x/one-litre-and-less",
        "http://gss-data.org.uk/def/x/more-than-one-litre",
        "http://gss-data.org.uk/def/x/number-of-bottles",
    ]
    measure_column = cube.columns[1]

    assert type(measure_column) == QbColumn
    assert type(
        measure_column.structural_definition) == QbMultiMeasureDimension

    # # [str(c) for c in cube.columns]

    actual_measure_uris = [
        x.measure_uri for x in measure_column.structural_definition.measures
    ]
    assert len(expected_measure_uris) == len(actual_measure_uris)
    assert not (set(expected_measure_uris) ^ set(actual_measure_uris))

    # """Unit URI"""

    unit_column = cube.columns[2]

    assert type(unit_column) == QbColumn
    assert type(unit_column.structural_definition) == QbMultiUnits

    expected_unit_uris = [
        "http://gss-data.org.uk/def/concept/measurement-units/count",
        "http://gss-data.org.uk/def/concept/measurement-units/percentage",
    ]

    actual_unit_uris = [
        x.unit_uri for x in unit_column.structural_definition.units
    ]
    assert len(expected_unit_uris) == len(actual_unit_uris)
    assert not (set(expected_unit_uris) ^ set(actual_unit_uris))

    errors = cube.validate()
    errors += validate_qb_component_constraints(cube)

    assert_num_validation_errors(errors, 0)
예제 #2
0
파일: build.py 프로젝트: GSS-Cogs/gss-utils
def build(
    info_json: Path,
    catalog_metadata_json_file: Optional[Path],
    output_directory: Path,
    csv_path: Path,
    fail_when_validation_error_occurs: bool,
    validation_errors_file_out: Optional[Path],
):
    print(f"{Style.DIM}CSV: {csv_path.absolute()}")
    print(f"{Style.DIM}info.json: {info_json.absolute()}")
    data = pd.read_csv(csv_path)
    assert isinstance(data, pd.DataFrame)
    cube, json_schema_validation_errors = get_cube_from_info_json(info_json, data)

    if catalog_metadata_json_file is not None:
        _override_catalog_metadata_state(catalog_metadata_json_file, cube)

    validation_errors = cube.validate()
    validation_errors += validate_qb_component_constraints(cube)

    if not output_directory.exists():
        print(f"Creating output directory {output_directory.absolute()}")
        output_directory.mkdir()

    if len(validation_errors) > 0 or len(json_schema_validation_errors) > 0:
        for error in validation_errors:
            print(f"{Fore.RED + Style.BRIGHT}Validation Error: {Style.NORMAL + error.message}")
            if isinstance(error, SpecificValidationError):
                print(f"More information: {error.get_error_url()}")

        for err in json_schema_validation_errors:
            print(
                f"{Fore.LIGHTRED_EX + Style.BRIGHT}Schema Validation Error: {Style.NORMAL + err.message}"
            )

        if validation_errors_file_out is not None:
            validation_errors_dict = [
                e.as_json_dict()
                if isinstance(e, DataClassBase)
                else dataclasses.asdict(e)
                for e in validation_errors
            ]
            all_errors = validation_errors_dict + [
                e.message for e in json_schema_validation_errors
            ]

            with open(validation_errors_file_out, "w+") as f:
                json.dump(all_errors, f, indent=4)

        if fail_when_validation_error_occurs and len(validation_errors) > 0:
            exit(1)

    qb_writer = QbWriter(cube)
    qb_writer.write(output_directory)

    print(f"{Fore.GREEN + Style.BRIGHT}Build Complete")
예제 #3
0
def test_csv_cols_assumed_dimensions():
    """
    If a column isn't defined, assume it is a new local dimension.

    Assume that if a column isn't defined in the info.json `transform.columns` section, then it is a
    new locally defined dimension.

    Assert that the newly defined dimension has a codelist created from the values in the CSV.
    """
    data = pd.read_csv(get_test_cases_dir() / "configloaders" / "data.csv")
    cube, json_schema_validation_error = get_cube_from_info_json(
        get_test_cases_dir() / "configloaders" / "info.json", data)

    matching_columns = [
        c for c in cube.columns if c.csv_column_title == "Undefined Column"
    ]
    assert len(matching_columns) == 1
    undefined_column_assumed_definition: CsvColumn = matching_columns[0]

    if not isinstance(undefined_column_assumed_definition, QbColumn):
        raise Exception("Incorrect type")

    assert type(undefined_column_assumed_definition.structural_definition
                ) == NewQbDimension

    new_dimension: NewQbDimension = undefined_column_assumed_definition.structural_definition
    assert new_dimension.code_list is not None

    if not isinstance(new_dimension.code_list, NewQbCodeList):
        raise Exception("Incorrect type")

    newly_defined_concepts = list(new_dimension.code_list.concepts)

    assert len(newly_defined_concepts) == 1

    new_concept = newly_defined_concepts[0]
    assert "Undefined Column Value" == new_concept.label

    errors = cube.validate()
    errors += validate_qb_component_constraints(cube)

    assert_num_validation_errors(errors, 0)
예제 #4
0
def step_impl(context):
    cube: QbCube = context.cube
    errors = cube.validate()
    errors += validate_qb_component_constraints(context.cube)
    assert len(errors) == 0, [e.message for e in errors]
예제 #5
0
def test_cube_metadata_extracted_from_info_json():
    """Metadata - ['base_uri', 'creator', 'description', 'from_dict', 'issued', 'keywords', 'landing_page',
    'license', 'public_contact_point', 'publisher', 'summary', 'themes', 'title',
    'uri_safe_identifier', 'validate']"""

    data = pd.read_csv(get_test_cases_dir() / "configloaders" /
                       "multi-measure-multi-unit-test-files" /
                       "multi-measure-unit-data.csv")
    cube, json_schema_validation_error = get_cube_from_info_json(
        get_test_cases_dir() / "configloaders" /
        "multi-measure-multi-unit-test-files" / "multi-measure-unit-info.json",
        data,
    )

    # Creator - pass

    expected_creator = "https://www.gov.uk/government/organisations/hm-revenue-customs"
    actual_creator = cube.metadata.creator_uri
    assert expected_creator == actual_creator

    # Description - pass

    expected_description = (
        "All bulletins provide details on percentage of one litre or less & more than "
        "one litre bottles. This information is provided on a yearly basis.")
    actual_description = cube.metadata.description
    assert expected_description == actual_description

    # issue_date - pass

    expected_issued_date = parser.parse("2019-02-28")
    actual_issued_date = cube.metadata.dataset_issued
    assert actual_issued_date == expected_issued_date

    # keywords - pass
    # There's currently no `keywords` property to map from the info.json.
    expected_keywords = []
    actual_keywords = cube.metadata.keywords
    assert len(expected_keywords) == len(actual_keywords)
    assert not (set(expected_keywords) ^ set(actual_keywords))

    # landingpage - pass

    expected_landingpage = [
        "https://www.gov.uk/government/statistics/bottles-bulletin"
    ]
    actual_landingpage = cube.metadata.landing_page_uris
    assert expected_landingpage == actual_landingpage

    # license - pass
    # Surprisingly the info.json schema doesn't allow a licence property just yet.
    expected_license = None
    actual_license = cube.metadata.license_uri
    assert expected_license == actual_license

    # public_contact_point - pass
    # The info.json schema doesn't allow a public_contact_point property just yet

    expected_public_contact_point = None
    actual_public_contact_point = cube.metadata.public_contact_point_uri
    assert expected_public_contact_point == actual_public_contact_point

    # publisher - pass

    expected_publisher = (
        "https://www.gov.uk/government/organisations/hm-revenue-customs")
    actual_publisher = cube.metadata.publisher_uri
    assert expected_publisher == actual_publisher

    # summary - pass
    # The info.json schema doesn't allow a summary property just yet

    expected_summary = None
    actual_summary = cube.metadata.summary
    assert expected_summary == actual_summary

    # themes - pass
    # It's the families property

    expected_themes = ["http://gss-data.org.uk/def/gdp#trade"]
    actual_themes = [str(t) for t in cube.metadata.theme_uris]
    assert len(expected_themes) == len(actual_themes)
    assert not (set(expected_themes) ^ set(actual_themes))

    # title - pass

    expected_title = "bottles"
    actual_title = cube.metadata.title
    assert expected_title == actual_title

    # uri_safe_identifier - pass

    expected_uri_safe_identifier = "bottles-bulletin"
    actual_uri_safe_identifier = cube.metadata.uri_safe_identifier
    assert expected_uri_safe_identifier == actual_uri_safe_identifier

    errors = cube.validate()
    errors += validate_qb_component_constraints(cube)

    assert_num_validation_errors(errors, 0)