Example #1
0
def test_parser_variant_lands_in_comment(caplog):
    caplog.set_level(logging.INFO)
    dataset = pycldf.Dataset.from_metadata(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dataset.remove_columns("FormTable", "variants")
    parser = c.CellParser(
        dataset=dataset,
        element_semantics=[
            ("/", "/", "phonemic", True),
            ("[", "]", "phonetic", True),
            ("<", ">", "orthographic", True),
            ("{", "}", "source", False),
            ("(", ")", "comment", False),
        ],
    )
    form = parser.parse_form(" {2} [dʒi'tɨka] ~[ʒi'tɨka] {2}", "language")
    assert re.search(
        "No 'variants' column found .* will be added to #comment.*",
        caplog.text) and form == {
            "Language_ID": "language",
            "Value": " {2} [dʒi'tɨka] ~[ʒi'tɨka] {2}",
            "phonetic": "dʒi'tɨka",
            "Comment": "~[ʒi'tɨka]\t2",
            "Source": {"language_s2"},
            "Form": "dʒi'tɨka",
        }
Example #2
0
def test_fields_of_formtable_no_transcription(no_dialect):
    dataset = no_dialect
    dataset.add_columns("FormTable", "value")
    dataset.add_columns("FormTable", "form")
    dataset.add_columns("FormTable", "languageReference")
    dataset.add_columns("FormTable", "comment")
    dataset.add_columns("FormTable", "source")

    # missing transcription element
    with pytest.raises(
            AssertionError,
            match=
            r"Your metadata json file and your cell parser don’t match.*transcriptions \(at least one of "
            r"'orthographic', 'phonemic', and 'phonetic'\) to derive a #form.*",
    ):
        c.CellParser(
            dataset=dataset,
            element_semantics=[
                # ("[", "]", "phonetic", True),
                ("<", ">", "form", False),
                # ("/", "/", "phonemic", True),
                ("(", ")", "comment", False),
                ("{", "}", "source", False),
            ],
        )
Example #3
0
def test_fields_of_formtable_no_source(no_dialect):
    dataset = no_dialect
    dataset.add_columns("FormTable", "value")
    dataset.add_columns("FormTable", "form")
    dataset.add_columns("FormTable", "languageReference")
    dataset.add_columns("FormTable", "comment")

    # missing field #source
    with pytest.raises(
            ValueError,
            match=
            "Your metadata json file and your cell parser don’t match.*#source.*",
    ):
        c.CellParser(dataset=dataset)
Example #4
0
def parser():
    dataset = pycldf.Dataset.from_metadata(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    return c.CellParser(
        dataset,
        element_semantics=[
            ("/", "/", "phonemic", True),
            ("[", "]", "phonetic", True),
            ("<", ">", "orthographic", True),
            ("{", "}", "source", False),
            ("(", ")", "comment", False),
        ],
    )