Beispiel #1
0
def test_exit_with_ConfigError_wtf(tmp_path):
    """2022-05-13: It appears that this test, to succeed, 
    would need to change away from directory with the bad 
    config file created in the previous test (above).
    """
    with pytest.raises(ConfigError):
        get_config()
Beispiel #2
0
def test_exit_with_ConfigError_if_default_configfile_found_with_bad_yaml(
        tmp_path):
    """Exit with ConfigError if default config file has bad YAML."""
    os.chdir(tmp_path)
    bad_config_yaml = "DELIBE\nRATELY BAD: -: ^^YAML CONTENT^^\n"
    Path(DEFAULT_CONFIGFILE_NAME).write_text(bad_config_yaml)
    with pytest.raises(ConfigError):
        get_config()
Beispiel #3
0
def test_exit_with_ConfigError_if_specified_configfile_found_with_bad_yaml(
        tmp_path):
    """Exit with ConfigError if config file specified as argument has bad YAML."""
    os.chdir(tmp_path)
    bad_config_yaml = "DELIBE\nRATELY BAD: -: ^^YAML CONTENT^^\n"
    nondefault_configfile_name = "dctap_settings.yml"
    Path(nondefault_configfile_name).write_text(bad_config_yaml)
    with pytest.raises(ConfigError):
        get_config(configfile_name=nondefault_configfile_name)
Beispiel #4
0
def test_get_rows_with_unknown_column(tmp_path):
    """Non-DCTAP elements kept by _get_rows (but dropped by _get_shapes)."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(
        ("shapeID,propertyID,valueConstraint,value Gestalt\n"
         ":book,dc:creator,,:author\n"
         ",dc:type,so:Book,\n"
         ":author,foaf:name,,\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':book',
        'propertyID': 'dc:creator',
        'valueConstraint': '',
        'valuegestalt': ':author'
    }, {
        'shapeID': '',
        'propertyID': 'dc:type',
        'valueConstraint': 'so:Book',
        'valuegestalt': ''
    }, {
        'shapeID': ':author',
        'propertyID': 'foaf:name',
        'valueConstraint': '',
        'valuegestalt': ''
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Beispiel #5
0
def test_get_rows_with_simple_csvfile(tmp_path):
    """Another simple CSV with three columns."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,valueNodeType\n"
                             ":a,dct:creator,URI\n"
                             ":a,dct:subject,URI\n"
                             ":a,dct:date,String\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        'valueNodeType': 'URI'
    }, {
        'shapeID': ':a',
        'propertyID': 'dct:subject',
        'valueNodeType': 'URI'
    }, {
        'shapeID': ':a',
        'propertyID': 'dct:date',
        'valueNodeType': 'String'
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
def test_list_item_separator_defaults_to_single_blank():
    """Setting list_item_separator of None defaults to single blank."""
    sc = TAPStatementTemplate()
    config_dict = get_config()
    config_dict["list_elements"] = ["valueNodeType"]
    config_dict["list_item_separator"] = None
    sc.valueNodeType = "iri bnode"
    sc._parse_elements_configured_as_list_elements(config_dict)
    assert sc.valueNodeType == ["iri", "bnode"]
Beispiel #7
0
def test_normalize_element_name():
    """Element names not recognized as aliases are left unchanged."""
    config_dict = get_config()
    element_aliases_dict = config_dict.get("element_aliases")
    assert _normalize_element_name("SHAPE ID",
                                   element_aliases_dict) == "shapeID"
    assert _normalize_element_name("SHAPE___ID",
                                   element_aliases_dict) == "shapeID"
    assert _normalize_element_name("rid", element_aliases_dict) == "rid"
def test_get_TAPShape_elements_plus_extras_when_config_dict_specified():
    """List TAPShape elements plus extra shape elements."""
    expected_main_shems = ["shapeID", "shapeLabel"]
    expected_xtra_shems = ["closed", "start"]
    #config_dict = dict(extra_shape_elements=["closed", "start"])
    config_dict = get_config()
    config_dict["extra_shape_elements"] = ["closed", "start"]
    assert get_shems(TAPShape, config_dict)[0] == expected_main_shems
    assert get_shems(TAPShape, config_dict)[1] == expected_xtra_shems
Beispiel #9
0
def test_get_rows_raises_exception_if_first_line_has_no_propertyid(tmp_path):
    """Raises exception if first line of CSV has no propertyID."""
    os.chdir(tmp_path)
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyIdentifier,valueNodeType\n"
                             ":a,dct:creator,URI\n"))
    csvfile_obj = open(csvfile_path)
    config_dict = get_config()
    with pytest.raises(SystemExit):
        _get_rows(csvfile_obj, config_dict)
Beispiel #10
0
def test_get_rows_minimal(tmp_path):
    """Get list of rows, as dicts, from one-row, one-column CSV."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("PropertyID\n"
                             "http://purl.org/dc/terms/creator\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{'propertyID': 'http://purl.org/dc/terms/creator'}]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Beispiel #11
0
def test_get_rows_given_customized_element_alias(tmp_path):
    """Using customized element alias, normalized for case, dashes, underscores."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("Prop_ID\n" "http://purl.org/dc/terms/creator\n"))
    csvfile_obj = open(csvfile_path)
    config_dict["element_aliases"].update({"propid": "propertyID"})
    expected_rows_list = [{'propertyID': 'http://purl.org/dc/terms/creator'}]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Beispiel #12
0
def test_get_config_from_builtins():
    """Get config dict from built-in settings."""
    config_dict = get_config()
    config_keys = list(config_dict.keys())
    assert "prefixes" in config_keys
    assert config_dict.get("csv_elements")  # computed from dataclasses
    assert config_dict.get("shape_elements")  # computed from dataclasses
    assert config_dict.get(
        "statement_template_elements")  # computed from dataclasses
    assert config_dict.get("element_aliases")  # computed from dataclasses
    assert "element_aliases" in config_keys  # computed and configurable
Beispiel #13
0
def test_extra_shape_elements(tmp_path):
    """2022-05-13: os.chdir(tmp_path) is needed here because 
    a previous pytest (above) wrote a bad config file to tmp_path.
    """
    os.chdir(tmp_path)
    config_dict = get_config()
    config_dict["extra_shape_elements"] = ["closed", "start"]
    assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"]
    config_dict["shape_elements"].extend(config_dict["extra_shape_elements"])
    assert config_dict["shape_elements"] == [
        "shapeID", "shapeLabel", "closed", "start"
    ]
Beispiel #14
0
def test_get_config_from_default_config_file_if_present(tmp_path):
    """Get config dict from config file DEFAULT_CONFIGFILE_NAME if present."""
    os.chdir(tmp_path)
    Path(DEFAULT_CONFIGFILE_NAME).write_text(NONDEFAULT_CONFIG_YAMLDOC)
    config_dict = get_config()
    assert "prefixes" in list(config_dict.keys())
    assert config_dict.get("default_shape_identifier")
    assert config_dict.get("csv_elements")  # computed
    assert config_dict.get("shape_elements")  # computed
    assert config_dict.get("statement_template_elements")  # computed
    assert config_dict.get("element_aliases")  # asserted/computed
    assert config_dict.get("value_node_types") is None
def test_valueConstraintType_languagetag_item_separator_pipe(tmp_path):
    """@@@"""
    config_dict = get_config()
    config_dict["list_item_separator"] = "|"
    config_dict["default_shape_identifier"] = "default"
    os.chdir(tmp_path)
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(('PropertyID,valueConstraintType,valueConstraint\n'
                             'ex:foo,languagetag,"fr|it|de"\n'))
    value_constraint = csvreader(
        open(csvfile_path), config_dict
    )[0]["shapes"][0]["statement_templates"][0]["valueConstraint"]
    assert value_constraint == ["fr", "it", "de"]
def test_valueConstraintType_list_item_separator_pipe(tmp_path):
    """Picklist values are split on pipe character if so configured."""
    config_dict = get_config()
    config_dict["list_item_separator"] = "|"
    config_dict["default_shape_identifier"] = "default"
    os.chdir(tmp_path)
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(('PropertyID,valueConstraintType,valueConstraint\n'
                             'ex:foo,picklist,"one|two|three"\n'))
    value_constraint = csvreader(
        open(csvfile_path), config_dict
    )[0]["shapes"][0]["statement_templates"][0]["valueConstraint"]
    assert value_constraint == ["one", "two", "three"]
Beispiel #17
0
def test_get_rows_fills_in_short_headers_first_with_empty_header(tmp_path):
    """Where headers shorter than rows, adds one empty header."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,\n" ":a,dct:creator,URI\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        '': 'URI'
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Beispiel #18
0
def test_get_rows_including_header_not_in_DCTAP(tmp_path):
    """Get rows where one header is not part of the DCTAP model."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("PropertyID,Ricearoni\n"
                             "dc:creator,SFO treat\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'propertyID': 'dc:creator',
        'ricearoni': 'SFO treat',
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Beispiel #19
0
def test_get_rows_with_complete_csvfile(tmp_path):
    """Simple CSV with all columns."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(
        ("shapeID,shapeLabel,propertyID,"
         "propertyLabel,mandatory,repeatable,valueNodeType,"
         "valueDataType,valueConstraint,valueConstraintType,valueShape,note\n"
         ":a,Book,dct:creator,Creator,1,0,URI,,,,:b,Typically the author.\n"
         ":b,Person,ex:name,Name,1,0,Literal,xsd:string,,,,\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            "shapeID": ":a",
            "shapeLabel": "Book",
            "propertyID": "dct:creator",
            "propertyLabel": "Creator",
            "mandatory": "1",
            "repeatable": "0",
            "valueNodeType": "URI",
            "valueDataType": "",
            "valueConstraint": "",
            "valueConstraintType": "",
            "valueShape": ":b",
            "note": "Typically the author.",
        },
        {
            "shapeID": ":b",
            "shapeLabel": "Person",
            "propertyID": "ex:name",
            "propertyLabel": "Name",
            "mandatory": "1",
            "repeatable": "0",
            "valueNodeType": "Literal",
            "valueDataType": "xsd:string",
            "valueConstraint": "",
            "valueConstraintType": "",
            "valueShape": "",
            "note": "",
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert isinstance(actual_rows_list, list)
    assert isinstance(expected_rows_list, list)
    assert actual_rows_list == expected_rows_list
    assert actual_rows_list[0]["mandatory"]
    assert len(actual_rows_list) == 2
    assert len(expected_rows_list) == 2
Beispiel #20
0
def test_get_rows_fills_in_short_rows_with_None_values(tmp_path):
    """Fills in short rows with None values."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,valueNodeType\n"
                             ":a,dct:creator\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        'valueNodeType': None
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Beispiel #21
0
def test_get_rows_fills_in_short_headers_subsequently_with_None(tmp_path):
    """Where headers shorter than rows, extra values collected under header None."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text("shapeID,propertyID,\n"
                            ":a,dct:creator,URI,comment,comment two\n")
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        '': 'URI',
        None: ['comment', 'comment two']
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
def test_mkshapes_returns_tapshape_object_even_in_absence_of_propertyID(
        tmp_path):
    """Populates TAPShape object even in the absence of a propertyID."""
    os.chdir(tmp_path)  # precaution to avoid interference among pytests
    config_dict = get_config()
    assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"]
    one_row = {
        "shapeID": ":a",
        "shapeLabel": "Book",
    }
    assert _mkshape(row_dict=one_row,
                    config_dict=config_dict) == TAPShape(shapeID=':a',
                                                         shapeLabel='Book',
                                                         state_list=[],
                                                         shape_warns={},
                                                         shape_extras={})
Beispiel #23
0
def test_get_rows_correct_a_real_mess(tmp_path):
    """Messiness in headers (extra spaces, punctuation, wrong case) is corrected."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(
        "S hape ID,pr-opertyID___,valueShape     ,wildCard    \n"
        ":book,dcterms:creator,:author,Yeah yeah yeah\n")
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':book',
        'propertyID': 'dcterms:creator',
        'valueShape': ':author',
        'wildcard': 'Yeah yeah yeah',
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Beispiel #24
0
def test_warns_if_header_not_recognized(tmp_path):
    """@@@"""
    os.chdir(tmp_path)
    config_dict = get_config()
    config_dict["default_shape_identifier"] = "default"
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("propertyID,ricearoni\n" "dc:date,SFO treat\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            'propertyID': 'dc:date',
            'ricearoni': 'SFO treat',
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
    assert len(actual_warnings) == 1
def test_mkshape_recognizes_only_shape_elements_so_configured(tmp_path):
    """Populates TAPShape object but ignores any statement template elements in row."""
    os.chdir(tmp_path)  # precaution to avoid interference among pytests
    config_dict = get_config()
    config_dict["extra_shape_elements"] = ["closed"]
    one_row = {
        "shapeID": ":a",
        "shapeLabel": "Book",
        "closed": False,
        "start": True,
    }
    assert _mkshape(one_row, config_dict=config_dict) == TAPShape(
        shapeID=':a',
        shapeLabel='Book',
        state_list=[],
        shape_warns={},
        shape_extras={"closed": False})
Beispiel #26
0
def test_does_not_warn_if_non_dctap_header_configured_as_extra(tmp_path):
    """@@@"""
    os.chdir(tmp_path)
    config_dict = get_config()
    config_dict["default_shape_identifier"] = "default"
    config_dict["extra_statement_template_elements"] = ["ricearoni"]
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("propertyID,ricearoni\n" "dc:date,SFO treat\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            'propertyID': 'dc:date',
            'ricearoni': 'SFO treat',
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
    assert len(actual_warnings) == 0
def test_mkshape_extra_shape_elements_that_are_empty_are_passed_through(
        tmp_path):
    """Empty shape elements are passed through, but not unasserted elements."""
    os.chdir(tmp_path)  # precaution to avoid interference among pytests
    config_dict = get_config()
    assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"]
    config_dict["extra_shape_elements"] = ["closed", "start"]
    one_row = {
        "shapeID": ":a",
        "shapeLabel": "",
        "closed": "",
    }
    assert _mkshape(row_dict=one_row, config_dict=config_dict) == TAPShape(
        shapeID=':a',
        shapeLabel='',
        state_list=[],
        shape_warns={},
        shape_extras={"closed": ""})
Beispiel #28
0
def test_liststatements_with_csv_column_outside_dctap_model_are_ignored(
        tmp_path):
    """CSV columns not part of the DC TAP model are simply ignored."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,confidential\n"
                             ":a,dct:subject,True\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            "shapeID": ":a",
            "propertyID": "dct:subject",
            "confidential": "True"
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
def test_mkshape_sets_shape_elements_only(tmp_path):
    """Populates TAPShape object but ignores any statement template elements in row."""
    os.chdir(tmp_path)  # precaution to avoid interference among pytests
    config_dict = get_config()
    config_dict["extra_shape_elements"] = ["closed", "start"]
    one_row = {
        "shapeID": ":a",
        "shapeLabel": "Book",
        "closed": False,
        "start": True,
        "propertyID": "ex:name",
        "valueNodeType": "literal",
    }
    shape = _mkshape(one_row, config_dict)
    assert shape.shapeID == ":a"
    assert shape.shapeLabel == "Book"
    assert shape.shape_warns == {}
    assert shape.shape_extras == {"closed": False, "start": True}
    assert shape.state_list == [
    ]  # _mkshape() sets shape fields only, not ST fields
def test_mkshape_reads_all_extra_shape_elements_so_configured(tmp_path):
    """Reads all elements configured as extra shape elements."""
    os.chdir(tmp_path)  # precaution to avoid interference among pytests
    config_dict = get_config()
    assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"]
    config_dict["extra_shape_elements"] = ["closed", "start"]
    one_row = {
        "shapeID": ":a",
        "shapeLabel": "Book",
        "closed": False,
        "start": True,
    }
    assert _mkshape(row_dict=one_row,
                    config_dict=config_dict) == TAPShape(shapeID=':a',
                                                         shapeLabel='Book',
                                                         state_list=[],
                                                         shape_warns={},
                                                         shape_extras={
                                                             "closed": False,
                                                             "start": True
                                                         })