def test_fromexcel_correct(excel_wordlist): lexicon, cogsets, (empty_dataset, original_md) = excel_wordlist original = pycldf.Wordlist.from_metadata(original_md) # TODO: parameterize original, like the other parameters, over possible # test datasets. f.load_dataset(Path(empty_dataset.tablegroup._fname), str(lexicon), str(cogsets)) form_ids_from_excel = {form["ID"] for form in empty_dataset["FormTable"]} form_ids_original = {form["ID"] for form in original["FormTable"]} cognate_ids_from_excel = { cognate["ID"] for cognate in empty_dataset["CognateTable"] } cognate_ids_original = {form["ID"] for form in original["CognateTable"]} assert form_ids_original == form_ids_from_excel, "{:} and {:} don't match.".format( empty_dataset["FormTable"]._parent._fname.parent / str(empty_dataset["FormTable"].url), original["FormTable"]._parent._fname.parent / str(empty_dataset["FormTable"].url), ) assert (cognate_ids_original == cognate_ids_from_excel ), "{:} and {:} don't match.".format( empty_dataset["CognateTable"]._parent._fname.parent / str(empty_dataset["CognateTable"].url), original["CognateTable"]._parent._fname.parent / str(empty_dataset["CognateTable"].url), )
def test_matrix_import_skips_question_marks(): data = [ # noqa ["Concept", "L1", "L2"], ["C1", "?", "Form1"], ["C2", "Form2", "Form3"], ] # create excel with data wb = op.Workbook() ws = wb.active for row in data: ws.append(row) dirname = Path(tempfile.mkdtemp(prefix="lexedata-test")) target = dirname / "test.xlsx" wb.save(target) # TODO: struggeling to make a matrix importer run on such a simple structure in a way whitout too much coding ... this might even be a bad sign? # create simple metadata dataset = pycldf.Wordlist.in_dir(dirname) dataset.write(FormTable=[{ "Concept": "", "L1": "", "L2": "", "value": "" }]) # noqa # metadata = dataset.tablegroup._fname metadata = Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json" # load the dataset load_dataset( metadata=metadata, lexicon=target, ) dataset = pycldf.Dataset.from_metadata(metadata) forms = {f for f in dataset["FormTable"]} print(forms) assert True
def test_no_first_row_in_excel(empty_excel): original = Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json" copy = copy_metadata(original=original) with pytest.raises( AssertionError, match="Your first data row didn't have a name. Please check your format specification or ensure the " "first row has a name.", ): f.load_dataset(metadata=copy, lexicon=empty_excel)
def test_no_dialect_excel_cognate_parser(tmp_path, caplog, empty_excel): # ExcelCognateParser path = tmp_path / "invented_path" path.open("w").write("{}") with pytest.raises(ValueError): # mock empty json file f.load_dataset(metadata=path, lexicon=None, cognate_lexicon=empty_excel) assert re.search("User-defined format specification .* missing", caplog.text) assert re.search("default parser", caplog.text)
def test_dialect_missing_key_excel_cognate_parser(tmp_path, caplog, empty_excel): path = tmp_path / "invented_path" path.open("w").write("""{"special:fromexcel": {}}""") # CognateExcelParser with pytest.raises(ValueError): f.load_dataset(path, lexicon=None, cognate_lexicon=empty_excel) assert re.search( r"User-defined format specification in the json-file was missing the key .*falling back to default parser.*", caplog.text, )
def test_dialect_missing_key_excel_parser(tmp_path, caplog, empty_excel): # ExcelParser path = tmp_path / "invented_path" path.open("w").write("""{"special:fromexcel": {}}""") with pytest.raises(ValueError): f.load_dataset(path, lexicon=empty_excel) assert re.search( "User-defined format specification in the json-file was missing the key lang_cell_regexes, " "falling back to default parser", caplog.text, )
def test_no_wordlist_and_no_cogsets(tmp_path): # mock empty json file path = tmp_path / "invented_path" path.open("w").write("{}") with pytest.raises( argparse.ArgumentError, match="At least one of WORDLIST and COGNATESETS excel files must be specified.*", ): f.load_dataset( metadata=path, lexicon=None, cognate_lexicon=None, )
def test_fromexcel_runs(excel_wordlist): lexicon, cogsets, (empty_dataset, original) = excel_wordlist f.load_dataset(Path(empty_dataset.tablegroup._fname), str(lexicon), str(cogsets))