コード例 #1
0
def test_fromexcel_correct(excel_wordlist):
    lexicon, cogsets, (empty_dataset, original_md) = excel_wordlist
    original = pycldf.Wordlist.from_metadata(original_md)
    # TODO: parameterize original, like the other parameters, over possible
    # test datasets.
    f.load_dataset(Path(empty_dataset.tablegroup._fname), str(lexicon),
                   str(cogsets))
    form_ids_from_excel = {form["ID"] for form in empty_dataset["FormTable"]}
    form_ids_original = {form["ID"] for form in original["FormTable"]}
    cognate_ids_from_excel = {
        cognate["ID"]
        for cognate in empty_dataset["CognateTable"]
    }
    cognate_ids_original = {form["ID"] for form in original["CognateTable"]}
    assert form_ids_original == form_ids_from_excel, "{:} and {:} don't match.".format(
        empty_dataset["FormTable"]._parent._fname.parent /
        str(empty_dataset["FormTable"].url),
        original["FormTable"]._parent._fname.parent /
        str(empty_dataset["FormTable"].url),
    )
    assert (cognate_ids_original == cognate_ids_from_excel
            ), "{:} and {:} don't match.".format(
                empty_dataset["CognateTable"]._parent._fname.parent /
                str(empty_dataset["CognateTable"].url),
                original["CognateTable"]._parent._fname.parent /
                str(empty_dataset["CognateTable"].url),
            )
コード例 #2
0
ファイル: test_na_forms.py プロジェクト: Anaphory/lexedata
def test_matrix_import_skips_question_marks():
    data = [  # noqa
        ["Concept", "L1", "L2"],
        ["C1", "?", "Form1"],
        ["C2", "Form2", "Form3"],
    ]
    # create excel with data
    wb = op.Workbook()
    ws = wb.active
    for row in data:
        ws.append(row)
    dirname = Path(tempfile.mkdtemp(prefix="lexedata-test"))
    target = dirname / "test.xlsx"
    wb.save(target)

    # TODO: struggeling to make a matrix importer run on such a simple structure in a way whitout too much coding ... this might even be a bad sign?
    # create simple metadata
    dataset = pycldf.Wordlist.in_dir(dirname)
    dataset.write(FormTable=[{
        "Concept": "",
        "L1": "",
        "L2": "",
        "value": ""
    }])  # noqa
    # metadata = dataset.tablegroup._fname
    metadata = Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json"
    # load the dataset
    load_dataset(
        metadata=metadata,
        lexicon=target,
    )
    dataset = pycldf.Dataset.from_metadata(metadata)
    forms = {f for f in dataset["FormTable"]}
    print(forms)
    assert True
コード例 #3
0
def test_no_first_row_in_excel(empty_excel):
    original = Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json"
    copy = copy_metadata(original=original)
    with pytest.raises(
        AssertionError,
        match="Your first data row didn't have a name. Please check your format specification or ensure the "
        "first row has a name.",
    ):
        f.load_dataset(metadata=copy, lexicon=empty_excel)
コード例 #4
0
def test_no_dialect_excel_cognate_parser(tmp_path, caplog, empty_excel):
    # ExcelCognateParser
    path = tmp_path / "invented_path"
    path.open("w").write("{}")
    with pytest.raises(ValueError):
        # mock empty json file
        f.load_dataset(metadata=path, lexicon=None, cognate_lexicon=empty_excel)
    assert re.search("User-defined format specification .* missing", caplog.text)
    assert re.search("default parser", caplog.text)
コード例 #5
0
def test_dialect_missing_key_excel_cognate_parser(tmp_path, caplog, empty_excel):
    path = tmp_path / "invented_path"
    path.open("w").write("""{"special:fromexcel": {}}""")
    # CognateExcelParser
    with pytest.raises(ValueError):
        f.load_dataset(path, lexicon=None, cognate_lexicon=empty_excel)
    assert re.search(
        r"User-defined format specification in the json-file was missing the key .*falling back to default parser.*",
        caplog.text,
    )
コード例 #6
0
def test_dialect_missing_key_excel_parser(tmp_path, caplog, empty_excel):
    # ExcelParser
    path = tmp_path / "invented_path"
    path.open("w").write("""{"special:fromexcel": {}}""")
    with pytest.raises(ValueError):
        f.load_dataset(path, lexicon=empty_excel)
    assert re.search(
        "User-defined format specification in the json-file was missing the key lang_cell_regexes, "
        "falling back to default parser",
        caplog.text,
    )
コード例 #7
0
def test_no_wordlist_and_no_cogsets(tmp_path):
    # mock empty json file
    path = tmp_path / "invented_path"
    path.open("w").write("{}")
    with pytest.raises(
        argparse.ArgumentError,
        match="At least one of WORDLIST and COGNATESETS excel files must be specified.*",
    ):
        f.load_dataset(
            metadata=path,
            lexicon=None,
            cognate_lexicon=None,
        )
コード例 #8
0
def test_fromexcel_runs(excel_wordlist):
    lexicon, cogsets, (empty_dataset, original) = excel_wordlist
    f.load_dataset(Path(empty_dataset.tablegroup._fname), str(lexicon),
                   str(cogsets))