Exemple #1
0
def test_get_rows_raises_exception_if_first_line_has_no_propertyid(tmp_path):
    """Raises exception if first line of CSV has no propertyID."""
    os.chdir(tmp_path)
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyIdentifier,valueNodeType\n"
                             ":a,dct:creator,URI\n"))
    csvfile_obj = open(csvfile_path)
    config_dict = get_config()
    with pytest.raises(SystemExit):
        _get_rows(csvfile_obj, config_dict)
Exemple #2
0
def test_get_rows_with_simple_csvfile(tmp_path):
    """Another simple CSV with three columns."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,valueNodeType\n"
                             ":a,dct:creator,URI\n"
                             ":a,dct:subject,URI\n"
                             ":a,dct:date,String\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        'valueNodeType': 'URI'
    }, {
        'shapeID': ':a',
        'propertyID': 'dct:subject',
        'valueNodeType': 'URI'
    }, {
        'shapeID': ':a',
        'propertyID': 'dct:date',
        'valueNodeType': 'String'
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #3
0
def test_get_rows_with_unknown_column(tmp_path):
    """Non-DCTAP elements kept by _get_rows (but dropped by _get_shapes)."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(
        ("shapeID,propertyID,valueConstraint,value Gestalt\n"
         ":book,dc:creator,,:author\n"
         ",dc:type,so:Book,\n"
         ":author,foaf:name,,\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':book',
        'propertyID': 'dc:creator',
        'valueConstraint': '',
        'valuegestalt': ':author'
    }, {
        'shapeID': '',
        'propertyID': 'dc:type',
        'valueConstraint': 'so:Book',
        'valuegestalt': ''
    }, {
        'shapeID': ':author',
        'propertyID': 'foaf:name',
        'valueConstraint': '',
        'valuegestalt': ''
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #4
0
def test_get_rows_given_customized_element_alias(tmp_path):
    """Using customized element alias, normalized for case, dashes, underscores."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("Prop_ID\n" "http://purl.org/dc/terms/creator\n"))
    csvfile_obj = open(csvfile_path)
    config_dict["element_aliases"].update({"propid": "propertyID"})
    expected_rows_list = [{'propertyID': 'http://purl.org/dc/terms/creator'}]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #5
0
def test_get_rows_minimal(tmp_path):
    """Get list of rows, as dicts, from one-row, one-column CSV."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("PropertyID\n"
                             "http://purl.org/dc/terms/creator\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{'propertyID': 'http://purl.org/dc/terms/creator'}]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #6
0
def test_get_rows_including_header_not_in_DCTAP(tmp_path):
    """Get rows where one header is not part of the DCTAP model."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("PropertyID,Ricearoni\n"
                             "dc:creator,SFO treat\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'propertyID': 'dc:creator',
        'ricearoni': 'SFO treat',
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #7
0
def test_get_rows_fills_in_short_headers_first_with_empty_header(tmp_path):
    """Where headers shorter than rows, adds one empty header."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,\n" ":a,dct:creator,URI\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        '': 'URI'
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #8
0
def test_get_rows_with_complete_csvfile(tmp_path):
    """Simple CSV with all columns."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(
        ("shapeID,shapeLabel,propertyID,"
         "propertyLabel,mandatory,repeatable,valueNodeType,"
         "valueDataType,valueConstraint,valueConstraintType,valueShape,note\n"
         ":a,Book,dct:creator,Creator,1,0,URI,,,,:b,Typically the author.\n"
         ":b,Person,ex:name,Name,1,0,Literal,xsd:string,,,,\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            "shapeID": ":a",
            "shapeLabel": "Book",
            "propertyID": "dct:creator",
            "propertyLabel": "Creator",
            "mandatory": "1",
            "repeatable": "0",
            "valueNodeType": "URI",
            "valueDataType": "",
            "valueConstraint": "",
            "valueConstraintType": "",
            "valueShape": ":b",
            "note": "Typically the author.",
        },
        {
            "shapeID": ":b",
            "shapeLabel": "Person",
            "propertyID": "ex:name",
            "propertyLabel": "Name",
            "mandatory": "1",
            "repeatable": "0",
            "valueNodeType": "Literal",
            "valueDataType": "xsd:string",
            "valueConstraint": "",
            "valueConstraintType": "",
            "valueShape": "",
            "note": "",
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert isinstance(actual_rows_list, list)
    assert isinstance(expected_rows_list, list)
    assert actual_rows_list == expected_rows_list
    assert actual_rows_list[0]["mandatory"]
    assert len(actual_rows_list) == 2
    assert len(expected_rows_list) == 2
Exemple #9
0
def test_get_rows_fills_in_short_rows_with_None_values(tmp_path):
    """Fills in short rows with None values."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,valueNodeType\n"
                             ":a,dct:creator\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        'valueNodeType': None
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #10
0
def test_get_rows_fills_in_short_headers_subsequently_with_None(tmp_path):
    """Where headers shorter than rows, extra values collected under header None."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text("shapeID,propertyID,\n"
                            ":a,dct:creator,URI,comment,comment two\n")
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':a',
        'propertyID': 'dct:creator',
        '': 'URI',
        None: ['comment', 'comment two']
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #11
0
def test_warns_if_header_not_recognized(tmp_path):
    """@@@"""
    os.chdir(tmp_path)
    config_dict = get_config()
    config_dict["default_shape_identifier"] = "default"
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("propertyID,ricearoni\n" "dc:date,SFO treat\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            'propertyID': 'dc:date',
            'ricearoni': 'SFO treat',
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
    assert len(actual_warnings) == 1
Exemple #12
0
def test_get_rows_correct_a_real_mess(tmp_path):
    """Messiness in headers (extra spaces, punctuation, wrong case) is corrected."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(
        "S hape ID,pr-opertyID___,valueShape     ,wildCard    \n"
        ":book,dcterms:creator,:author,Yeah yeah yeah\n")
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':book',
        'propertyID': 'dcterms:creator',
        'valueShape': ':author',
        'wildcard': 'Yeah yeah yeah',
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #13
0
def test_does_not_warn_if_non_dctap_header_configured_as_extra(tmp_path):
    """@@@"""
    os.chdir(tmp_path)
    config_dict = get_config()
    config_dict["default_shape_identifier"] = "default"
    config_dict["extra_statement_template_elements"] = ["ricearoni"]
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("propertyID,ricearoni\n" "dc:date,SFO treat\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            'propertyID': 'dc:date',
            'ricearoni': 'SFO treat',
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
    assert len(actual_warnings) == 0
Exemple #14
0
def test_liststatements_with_csv_column_outside_dctap_model_are_ignored(
        tmp_path):
    """CSV columns not part of the DC TAP model are simply ignored."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text(("shapeID,propertyID,confidential\n"
                             ":a,dct:subject,True\n"))
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [
        {
            "shapeID": ":a",
            "propertyID": "dct:subject",
            "confidential": "True"
        },
    ]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list
Exemple #15
0
def test_get_rows_with_unknown_column2(tmp_path):
    """Passes thru unknown header, lowercased."""
    os.chdir(tmp_path)
    config_dict = get_config()
    csvfile_path = Path(tmp_path).joinpath("some.csv")
    csvfile_path.write_text("shapeID,propertyID,valueShape,wildCard\n"
                            ":book,dcterms:creator,:author,Yeah yeah yeah\n"
                            ":author,foaf:name,,\n")
    csvfile_obj = open(csvfile_path)
    expected_rows_list = [{
        'shapeID': ':book',
        'propertyID': 'dcterms:creator',
        'valueShape': ':author',
        'wildcard': 'Yeah yeah yeah'
    }, {
        'shapeID': ':author',
        'propertyID': 'foaf:name',
        'valueShape': '',
        'wildcard': ''
    }]
    actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict)
    assert actual_rows_list == expected_rows_list