def test_exit_with_ConfigError_wtf(tmp_path): """2022-05-13: It appears that this test, to succeed, would need to change away from directory with the bad config file created in the previous test (above). """ with pytest.raises(ConfigError): get_config()
def test_exit_with_ConfigError_if_default_configfile_found_with_bad_yaml( tmp_path): """Exit with ConfigError if default config file has bad YAML.""" os.chdir(tmp_path) bad_config_yaml = "DELIBE\nRATELY BAD: -: ^^YAML CONTENT^^\n" Path(DEFAULT_CONFIGFILE_NAME).write_text(bad_config_yaml) with pytest.raises(ConfigError): get_config()
def test_exit_with_ConfigError_if_specified_configfile_found_with_bad_yaml( tmp_path): """Exit with ConfigError if config file specified as argument has bad YAML.""" os.chdir(tmp_path) bad_config_yaml = "DELIBE\nRATELY BAD: -: ^^YAML CONTENT^^\n" nondefault_configfile_name = "dctap_settings.yml" Path(nondefault_configfile_name).write_text(bad_config_yaml) with pytest.raises(ConfigError): get_config(configfile_name=nondefault_configfile_name)
def test_get_rows_with_unknown_column(tmp_path): """Non-DCTAP elements kept by _get_rows (but dropped by _get_shapes).""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text( ("shapeID,propertyID,valueConstraint,value Gestalt\n" ":book,dc:creator,,:author\n" ",dc:type,so:Book,\n" ":author,foaf:name,,\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [{ 'shapeID': ':book', 'propertyID': 'dc:creator', 'valueConstraint': '', 'valuegestalt': ':author' }, { 'shapeID': '', 'propertyID': 'dc:type', 'valueConstraint': 'so:Book', 'valuegestalt': '' }, { 'shapeID': ':author', 'propertyID': 'foaf:name', 'valueConstraint': '', 'valuegestalt': '' }] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_get_rows_with_simple_csvfile(tmp_path): """Another simple CSV with three columns.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("shapeID,propertyID,valueNodeType\n" ":a,dct:creator,URI\n" ":a,dct:subject,URI\n" ":a,dct:date,String\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [{ 'shapeID': ':a', 'propertyID': 'dct:creator', 'valueNodeType': 'URI' }, { 'shapeID': ':a', 'propertyID': 'dct:subject', 'valueNodeType': 'URI' }, { 'shapeID': ':a', 'propertyID': 'dct:date', 'valueNodeType': 'String' }] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_list_item_separator_defaults_to_single_blank(): """Setting list_item_separator of None defaults to single blank.""" sc = TAPStatementTemplate() config_dict = get_config() config_dict["list_elements"] = ["valueNodeType"] config_dict["list_item_separator"] = None sc.valueNodeType = "iri bnode" sc._parse_elements_configured_as_list_elements(config_dict) assert sc.valueNodeType == ["iri", "bnode"]
def test_normalize_element_name(): """Element names not recognized as aliases are left unchanged.""" config_dict = get_config() element_aliases_dict = config_dict.get("element_aliases") assert _normalize_element_name("SHAPE ID", element_aliases_dict) == "shapeID" assert _normalize_element_name("SHAPE___ID", element_aliases_dict) == "shapeID" assert _normalize_element_name("rid", element_aliases_dict) == "rid"
def test_get_TAPShape_elements_plus_extras_when_config_dict_specified(): """List TAPShape elements plus extra shape elements.""" expected_main_shems = ["shapeID", "shapeLabel"] expected_xtra_shems = ["closed", "start"] #config_dict = dict(extra_shape_elements=["closed", "start"]) config_dict = get_config() config_dict["extra_shape_elements"] = ["closed", "start"] assert get_shems(TAPShape, config_dict)[0] == expected_main_shems assert get_shems(TAPShape, config_dict)[1] == expected_xtra_shems
def test_get_rows_raises_exception_if_first_line_has_no_propertyid(tmp_path): """Raises exception if first line of CSV has no propertyID.""" os.chdir(tmp_path) csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("shapeID,propertyIdentifier,valueNodeType\n" ":a,dct:creator,URI\n")) csvfile_obj = open(csvfile_path) config_dict = get_config() with pytest.raises(SystemExit): _get_rows(csvfile_obj, config_dict)
def test_get_rows_minimal(tmp_path): """Get list of rows, as dicts, from one-row, one-column CSV.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("PropertyID\n" "http://purl.org/dc/terms/creator\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [{'propertyID': 'http://purl.org/dc/terms/creator'}] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_get_rows_given_customized_element_alias(tmp_path): """Using customized element alias, normalized for case, dashes, underscores.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("Prop_ID\n" "http://purl.org/dc/terms/creator\n")) csvfile_obj = open(csvfile_path) config_dict["element_aliases"].update({"propid": "propertyID"}) expected_rows_list = [{'propertyID': 'http://purl.org/dc/terms/creator'}] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_get_config_from_builtins(): """Get config dict from built-in settings.""" config_dict = get_config() config_keys = list(config_dict.keys()) assert "prefixes" in config_keys assert config_dict.get("csv_elements") # computed from dataclasses assert config_dict.get("shape_elements") # computed from dataclasses assert config_dict.get( "statement_template_elements") # computed from dataclasses assert config_dict.get("element_aliases") # computed from dataclasses assert "element_aliases" in config_keys # computed and configurable
def test_extra_shape_elements(tmp_path): """2022-05-13: os.chdir(tmp_path) is needed here because a previous pytest (above) wrote a bad config file to tmp_path. """ os.chdir(tmp_path) config_dict = get_config() config_dict["extra_shape_elements"] = ["closed", "start"] assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"] config_dict["shape_elements"].extend(config_dict["extra_shape_elements"]) assert config_dict["shape_elements"] == [ "shapeID", "shapeLabel", "closed", "start" ]
def test_get_config_from_default_config_file_if_present(tmp_path): """Get config dict from config file DEFAULT_CONFIGFILE_NAME if present.""" os.chdir(tmp_path) Path(DEFAULT_CONFIGFILE_NAME).write_text(NONDEFAULT_CONFIG_YAMLDOC) config_dict = get_config() assert "prefixes" in list(config_dict.keys()) assert config_dict.get("default_shape_identifier") assert config_dict.get("csv_elements") # computed assert config_dict.get("shape_elements") # computed assert config_dict.get("statement_template_elements") # computed assert config_dict.get("element_aliases") # asserted/computed assert config_dict.get("value_node_types") is None
def test_valueConstraintType_languagetag_item_separator_pipe(tmp_path): """@@@""" config_dict = get_config() config_dict["list_item_separator"] = "|" config_dict["default_shape_identifier"] = "default" os.chdir(tmp_path) csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(('PropertyID,valueConstraintType,valueConstraint\n' 'ex:foo,languagetag,"fr|it|de"\n')) value_constraint = csvreader( open(csvfile_path), config_dict )[0]["shapes"][0]["statement_templates"][0]["valueConstraint"] assert value_constraint == ["fr", "it", "de"]
def test_valueConstraintType_list_item_separator_pipe(tmp_path): """Picklist values are split on pipe character if so configured.""" config_dict = get_config() config_dict["list_item_separator"] = "|" config_dict["default_shape_identifier"] = "default" os.chdir(tmp_path) csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(('PropertyID,valueConstraintType,valueConstraint\n' 'ex:foo,picklist,"one|two|three"\n')) value_constraint = csvreader( open(csvfile_path), config_dict )[0]["shapes"][0]["statement_templates"][0]["valueConstraint"] assert value_constraint == ["one", "two", "three"]
def test_get_rows_fills_in_short_headers_first_with_empty_header(tmp_path): """Where headers shorter than rows, adds one empty header.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("shapeID,propertyID,\n" ":a,dct:creator,URI\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [{ 'shapeID': ':a', 'propertyID': 'dct:creator', '': 'URI' }] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_get_rows_including_header_not_in_DCTAP(tmp_path): """Get rows where one header is not part of the DCTAP model.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("PropertyID,Ricearoni\n" "dc:creator,SFO treat\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [{ 'propertyID': 'dc:creator', 'ricearoni': 'SFO treat', }] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_get_rows_with_complete_csvfile(tmp_path): """Simple CSV with all columns.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text( ("shapeID,shapeLabel,propertyID," "propertyLabel,mandatory,repeatable,valueNodeType," "valueDataType,valueConstraint,valueConstraintType,valueShape,note\n" ":a,Book,dct:creator,Creator,1,0,URI,,,,:b,Typically the author.\n" ":b,Person,ex:name,Name,1,0,Literal,xsd:string,,,,\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [ { "shapeID": ":a", "shapeLabel": "Book", "propertyID": "dct:creator", "propertyLabel": "Creator", "mandatory": "1", "repeatable": "0", "valueNodeType": "URI", "valueDataType": "", "valueConstraint": "", "valueConstraintType": "", "valueShape": ":b", "note": "Typically the author.", }, { "shapeID": ":b", "shapeLabel": "Person", "propertyID": "ex:name", "propertyLabel": "Name", "mandatory": "1", "repeatable": "0", "valueNodeType": "Literal", "valueDataType": "xsd:string", "valueConstraint": "", "valueConstraintType": "", "valueShape": "", "note": "", }, ] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert isinstance(actual_rows_list, list) assert isinstance(expected_rows_list, list) assert actual_rows_list == expected_rows_list assert actual_rows_list[0]["mandatory"] assert len(actual_rows_list) == 2 assert len(expected_rows_list) == 2
def test_get_rows_fills_in_short_rows_with_None_values(tmp_path): """Fills in short rows with None values.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("shapeID,propertyID,valueNodeType\n" ":a,dct:creator\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [{ 'shapeID': ':a', 'propertyID': 'dct:creator', 'valueNodeType': None }] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_get_rows_fills_in_short_headers_subsequently_with_None(tmp_path): """Where headers shorter than rows, extra values collected under header None.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text("shapeID,propertyID,\n" ":a,dct:creator,URI,comment,comment two\n") csvfile_obj = open(csvfile_path) expected_rows_list = [{ 'shapeID': ':a', 'propertyID': 'dct:creator', '': 'URI', None: ['comment', 'comment two'] }] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_mkshapes_returns_tapshape_object_even_in_absence_of_propertyID( tmp_path): """Populates TAPShape object even in the absence of a propertyID.""" os.chdir(tmp_path) # precaution to avoid interference among pytests config_dict = get_config() assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"] one_row = { "shapeID": ":a", "shapeLabel": "Book", } assert _mkshape(row_dict=one_row, config_dict=config_dict) == TAPShape(shapeID=':a', shapeLabel='Book', state_list=[], shape_warns={}, shape_extras={})
def test_get_rows_correct_a_real_mess(tmp_path): """Messiness in headers (extra spaces, punctuation, wrong case) is corrected.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text( "S hape ID,pr-opertyID___,valueShape ,wildCard \n" ":book,dcterms:creator,:author,Yeah yeah yeah\n") csvfile_obj = open(csvfile_path) expected_rows_list = [{ 'shapeID': ':book', 'propertyID': 'dcterms:creator', 'valueShape': ':author', 'wildcard': 'Yeah yeah yeah', }] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_warns_if_header_not_recognized(tmp_path): """@@@""" os.chdir(tmp_path) config_dict = get_config() config_dict["default_shape_identifier"] = "default" csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("propertyID,ricearoni\n" "dc:date,SFO treat\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [ { 'propertyID': 'dc:date', 'ricearoni': 'SFO treat', }, ] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list assert len(actual_warnings) == 1
def test_mkshape_recognizes_only_shape_elements_so_configured(tmp_path): """Populates TAPShape object but ignores any statement template elements in row.""" os.chdir(tmp_path) # precaution to avoid interference among pytests config_dict = get_config() config_dict["extra_shape_elements"] = ["closed"] one_row = { "shapeID": ":a", "shapeLabel": "Book", "closed": False, "start": True, } assert _mkshape(one_row, config_dict=config_dict) == TAPShape( shapeID=':a', shapeLabel='Book', state_list=[], shape_warns={}, shape_extras={"closed": False})
def test_does_not_warn_if_non_dctap_header_configured_as_extra(tmp_path): """@@@""" os.chdir(tmp_path) config_dict = get_config() config_dict["default_shape_identifier"] = "default" config_dict["extra_statement_template_elements"] = ["ricearoni"] csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("propertyID,ricearoni\n" "dc:date,SFO treat\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [ { 'propertyID': 'dc:date', 'ricearoni': 'SFO treat', }, ] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list assert len(actual_warnings) == 0
def test_mkshape_extra_shape_elements_that_are_empty_are_passed_through( tmp_path): """Empty shape elements are passed through, but not unasserted elements.""" os.chdir(tmp_path) # precaution to avoid interference among pytests config_dict = get_config() assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"] config_dict["extra_shape_elements"] = ["closed", "start"] one_row = { "shapeID": ":a", "shapeLabel": "", "closed": "", } assert _mkshape(row_dict=one_row, config_dict=config_dict) == TAPShape( shapeID=':a', shapeLabel='', state_list=[], shape_warns={}, shape_extras={"closed": ""})
def test_liststatements_with_csv_column_outside_dctap_model_are_ignored( tmp_path): """CSV columns not part of the DC TAP model are simply ignored.""" os.chdir(tmp_path) config_dict = get_config() csvfile_path = Path(tmp_path).joinpath("some.csv") csvfile_path.write_text(("shapeID,propertyID,confidential\n" ":a,dct:subject,True\n")) csvfile_obj = open(csvfile_path) expected_rows_list = [ { "shapeID": ":a", "propertyID": "dct:subject", "confidential": "True" }, ] actual_rows_list, actual_warnings = _get_rows(csvfile_obj, config_dict) assert actual_rows_list == expected_rows_list
def test_mkshape_sets_shape_elements_only(tmp_path): """Populates TAPShape object but ignores any statement template elements in row.""" os.chdir(tmp_path) # precaution to avoid interference among pytests config_dict = get_config() config_dict["extra_shape_elements"] = ["closed", "start"] one_row = { "shapeID": ":a", "shapeLabel": "Book", "closed": False, "start": True, "propertyID": "ex:name", "valueNodeType": "literal", } shape = _mkshape(one_row, config_dict) assert shape.shapeID == ":a" assert shape.shapeLabel == "Book" assert shape.shape_warns == {} assert shape.shape_extras == {"closed": False, "start": True} assert shape.state_list == [ ] # _mkshape() sets shape fields only, not ST fields
def test_mkshape_reads_all_extra_shape_elements_so_configured(tmp_path): """Reads all elements configured as extra shape elements.""" os.chdir(tmp_path) # precaution to avoid interference among pytests config_dict = get_config() assert config_dict["shape_elements"] == ["shapeID", "shapeLabel"] config_dict["extra_shape_elements"] = ["closed", "start"] one_row = { "shapeID": ":a", "shapeLabel": "Book", "closed": False, "start": True, } assert _mkshape(row_dict=one_row, config_dict=config_dict) == TAPShape(shapeID=':a', shapeLabel='Book', state_list=[], shape_warns={}, shape_extras={ "closed": False, "start": True })