Exemple #1
0
def test_cell_comments():
    dataset, _ = copy_to_temp(
        Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json")
    excel_filename = Path(
        __file__).parent / "data/excel/judgement_cell_with_note.xlsx"

    ws = openpyxl.load_workbook(excel_filename).active
    import_cognates_from_excel(ws, dataset)
    cognates = {
        cog["ID"]: {
            k: v
            for k, v in cog.items()
            if k in {"Form_ID", "Cognateset", "Comment"}
        }
        for cog in dataset["CognateTable"]
    }
    assert cognates == {
        "autaa_Woman-cogset": {
            "Cognateset": "cogset",
            "Comment": "Comment on judgement",
            "Form_ID": "autaa_Woman",
        }
    }
    cognatesets = {
        cog["ID"]: {k: v
                    for k, v in cog.items()}
        for cog in dataset["CognatesetTable"]
    }
    assert cognatesets == {
        "cogset": {
            "Name": "cogset",
            "Comment": "Cognateset-comment",
            "ID": "cogset"
        }
    }
Exemple #2
0
def test_roundtrip(cldf_wordlist, working_and_nonworking_bibfile):
    filled_cldf_wordlist = working_and_nonworking_bibfile(cldf_wordlist)
    dataset, target = filled_cldf_wordlist
    c_formReference = dataset["CognateTable", "formReference"].name
    c_cogsetReference = dataset["CognateTable", "cognatesetReference"].name
    old_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}
    writer = ExcelWriter(dataset,
                         database_url="https://example.org/lexicon/{:}")
    forms = util.cache_table(filled_cldf_wordlist[0])
    languages = util.cache_table(filled_cldf_wordlist[0],
                                 "LanguageTable").values()
    judgements = util.cache_table(filled_cldf_wordlist[0],
                                  "CognateTable").values()
    cogsets = util.cache_table(filled_cldf_wordlist[0],
                               "CognatesetTable").values()
    writer.create_excel(rows=cogsets,
                        judgements=judgements,
                        forms=forms,
                        languages=languages)

    # Reset the existing cognatesets and cognate judgements, to avoid
    # interference with the the data in the Excel file
    dataset["CognateTable"].write([])
    dataset["CognatesetTable"].write([])

    import_cognates_from_excel(writer.ws, dataset)

    new_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}

    assert new_judgements == old_judgements
Exemple #3
0
def test_cell_comments():
    dataset, _ = copy_to_temp(
        Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json")
    ws_test = Path(
        __file__).parent / "data/excel/judgement_cell_with_note.xlsx"

    import_cognates_from_excel(ws_test, dataset)
    cognates = {
        cog["ID"]: {k: v
                    for k, v in cog.items()}
        for cog in dataset["CognateTable"]
    }
    assert cognates == {
        "autaa_Woman-cogset": {
            "Cognateset": "cogset",
            "Comment": "Comment on judgement",
            "Form_ID": "autaa_Woman",
            "ID": "autaa_Woman-cogset",
            "Segment_Slice": None,
            "Alignment": None,
        }
    }
    cognatesets = {
        cog["ID"]: {k: v
                    for k, v in cog.items()}
        for cog in dataset["CognatesetTable"]
    }
    assert cognatesets == {
        "cogset": {
            "Name": "cogset",
            "Comment": "Cognateset-comment",
            "ID": "cogset"
        }
    }
Exemple #4
0
def test_roundtrip_separator_column(cldf_wordlist):
    """Test whether a CognatesetTable column with separator survives a roundtrip."""
    dataset, target = copy_to_temp(cldf_wordlist)
    dataset.add_columns("CognatesetTable", "CommaSeparatedTags")
    dataset["CognatesetTable", "CommaSeparatedTags"].separator = ","
    c_id = dataset["CognatesetTable", "id"].name

    write_back = list(dataset["CognatesetTable"])
    tags = []
    for tag, row in zip(
            itertools.cycle([["two", "tags"], ["single-tag"], [],
                             ["tag;containing;other;separator"]]),
            write_back,
    ):
        tags.append((row[c_id], tag))
        row["CommaSeparatedTags"] = tag
    dataset.write(CognatesetTable=write_back)

    writer = ExcelWriter(dataset)
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    writer.create_excel(out_filename)

    import_cognates_from_excel(out_filename, dataset)

    reread_tags = [(c[c_id], c["CommaSeparatedTags"])
                   for c in dataset["CognatesetTable"]]
    reread_tags.sort(key=lambda x: x[0])
    tags.sort(key=lambda x: x[0])
    assert reread_tags == tags
Exemple #5
0
def test_cell_comments_and_comment_column(caplog):
    dataset, _ = copy_to_temp(
        Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json")
    excel_filename = Path(
        __file__).parent / "data/excel/judgement_cell_with_note.xlsx"

    sheet = openpyxl.load_workbook(excel_filename).active
    sheet.insert_cols(2)
    sheet.cell(row=1, column=2, value="Comment")
    sheet.cell(row=2, column=2, value="Comment")

    with caplog.at_level(logging.INFO):
        import_cognates_from_excel(sheet, dataset)

    assert "from the cell comments" in caplog.text

    cognates = {
        cog["ID"]: {
            k: v
            for k, v in cog.items()
            if k in {"Form_ID", "Cognateset", "Comment"}
        }
        for cog in dataset["CognateTable"]
    }
    assert cognates == {
        "autaa_Woman-cogset": {
            "Cognateset": "cogset",
            "Comment": "Comment on judgement",
            "Form_ID": "autaa_Woman",
        }
    }
    cognatesets = {
        cog["ID"]: {k: v
                    for k, v in cog.items()}
        for cog in dataset["CognatesetTable"]
    }
    assert cognatesets == {
        "cogset": {
            "Name": "cogset",
            "Comment": "Cognateset-comment",
            "ID": "cogset"
        }
    }
Exemple #6
0
def test_roundtrip(cldf_wordlist):
    filled_cldf_wordlist = copy_to_temp(cldf_wordlist)
    dataset, target = filled_cldf_wordlist
    c_formReference = dataset["CognateTable", "formReference"].name
    c_cogsetReference = dataset["CognateTable", "cognatesetReference"].name
    old_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}
    writer = ExcelWriter(dataset)
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    writer.create_excel(out_filename)

    # Reset the existing cognatesets and cognate judgements, to avoid
    # interference with the the data in the Excel file
    dataset["CognateTable"].write([])
    dataset["CognatesetTable"].write([])

    import_cognates_from_excel(out_filename, dataset)

    new_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}

    assert new_judgements == old_judgements
Exemple #7
0
def test_roundtrip_separator_column(cldf_wordlist,
                                    working_and_nonworking_bibfile):
    """Test whether a CognatesetTable column with separator survives a roundtrip."""
    dataset, target = working_and_nonworking_bibfile(cldf_wordlist)
    dataset.add_columns("CognatesetTable", "CommaSeparatedTags")
    dataset["CognatesetTable", "CommaSeparatedTags"].separator = ","
    c_id = dataset["CognatesetTable", "id"].name

    write_back = list(dataset["CognatesetTable"])
    tags = []
    for tag, row in zip(
            itertools.cycle([["two", "tags"], ["single-tag"], [],
                             ["tag;containing;other;separator"]]),
            write_back,
    ):
        tags.append((row[c_id], tag))
        row["CommaSeparatedTags"] = tag
    dataset.write(CognatesetTable=write_back)

    writer = ExcelWriter(dataset,
                         database_url="https://example.org/lexicon/{:}")
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    forms = util.cache_table(dataset)
    languages = util.cache_table(dataset, "LanguageTable").values()
    judgements = util.cache_table(dataset, "CognateTable").values()
    cogsets = util.cache_table(dataset, "CognatesetTable").values()
    writer.create_excel(rows=cogsets,
                        judgements=judgements,
                        forms=forms,
                        languages=languages)

    import_cognates_from_excel(writer.ws, dataset)

    reread_tags = [(c[c_id], c["CommaSeparatedTags"])
                   for c in dataset["CognatesetTable"]]
    reread_tags.sort(key=lambda x: x[0])
    tags.sort(key=lambda x: x[0])
    assert reread_tags == tags
Exemple #8
0
def test_excel_messy_row(caplog):
    # Build a dataset with forms F1, F2, F3 in languages L1, L2 and
    # CognateTable columns ID and Status
    dataset = util.fs.new_wordlist(
        FormTable=[
            {
                "ID": "F1",
                "Language_ID": "L1",
                "Form": "f1",
                "Parameter_ID": "C"
            },
            {
                "ID": "F2",
                "Language_ID": "L2",
                "Form": "f1",
                "Parameter_ID": "C"
            },
            {
                "ID": "F3",
                "Language_ID": "L1",
                "Form": "f1",
                "Parameter_ID": "C"
            },
        ],
        LanguageTable=[{
            "ID": "L1",
            "Name": "L1"
        }, {
            "ID": "L2",
            "Name": "L2"
        }],
        ParameterTable=[{
            "ID": "C"
        }],
        CognateTable=[],
        CognatesetTable=[],
    )
    # TODO: Ensure FormTable does not need a value
    dataset.add_columns("FormTable", "value")
    dataset["FormTable", "value"].required = False
    dataset.add_columns("CognatesetTable", "Status")
    dataset.add_columns("CognatesetTable", "comment")

    # Construct a sheet with a messy cognateset header
    messy_sheet = MockSingleExcelSheet([
        [
            "CogSet",
            "Status",
            "L1",
            "L2",
        ],
        [
            "S1",
            "valid",
            "F1",
            "F2",
        ],
        [
            "",
            "invalid",
            "F3",
        ],
    ])
    for cell in [(2, 3), (3, 3), (2, 4)]:
        messy_sheet.cell(*cell).hyperlink = "/{:}".format(
            messy_sheet.cell(*cell).value)

    # Cognate-import this dataset
    with caplog.at_level(logging.INFO):
        import_cognates_from_excel(
            messy_sheet,
            dataset,
        )

    # Check that cognateset S1 contains form F3
    assert ("F3", "S1") in [(j["Form_ID"], j["Cognateset_ID"])
                            for j in dataset["CognateTable"]]

    # Check for warning in caplog
    assert re.search("[Rr]ow 3 .* no cognate ?set .*'Status': 'invalid'",
                     caplog.text)