Esempio n. 1
0
def create_concepticon_for_concepts(
    dataset: pycldf.Dataset,
    language: t.Iterable,
    concepticon_glosses: bool,
    overwrite: bool,
    status_update: t.Optional[str],
):
    # add Status_Column if status update
    if status_update:
        add_status_column_to_table(dataset=dataset, table_name="ParameterTable")
    # add Concepticon_ID column to ParameterTable
    if dataset.column_names.parameters.concepticonReference is None:
        # Create a concepticonReference column
        dataset.add_columns("ParameterTable", "Concepticon_ID")
        c = dataset["ParameterTable"].tableSchema.columns[-1]
        c.valueUrl = "http://concepticon.clld.org/parameters/{Concepticon_ID}"
        c.propertyUrl = URITemplate(
            "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference"
        )
        dataset.write_metadata()
    if not language:
        language = [(dataset.column_names.parameters.id, "en")]

    gloss_languages: t.Dict[str, str] = dict(language)
    add_concepticon_references(
        dataset,
        gloss_languages=gloss_languages,
        status_update=status_update,
        overwrite=overwrite,
    )

    if concepticon_glosses:
        add_concepticon_names(dataset)
Esempio n. 2
0
def add_segments_to_dataset(dataset: pycldf.Dataset, transcription: str,
                            overwrite_existing: bool):
    if dataset.column_names.forms.segments is None:
        # Create a Segments column in FormTable
        dataset.add_columns("FormTable", "Segments")
        c = dataset["FormTable"].tableSchema.columns[-1]
        c.separator = " "
        c.propertyUrl = URITemplate(
            "http://cldf.clld.org/v1.0/terms.rdf#segments")
        dataset.write_metadata()

    write_back = []
    c_f_segments = dataset["FormTable", "Segments"].name
    for row in dataset["FormTable"]:
        if row[c_f_segments] and not overwrite_existing:
            continue
        else:
            if row[transcription]:
                form = row[transcription].strip()
                row[dataset.column_names.forms.segments] = segment_form(form)
            write_back.append(row)
    dataset.write(FormTable=write_back)
Esempio n. 3
0
def add_concepticon_definitions(
    dataset: pycldf.Dataset,
    column_name: str = "Concepticon_Definition",
    logger: cli.logging.Logger = cli.logger,
) -> None:
    concepticon_ids = dataset.column_names.parameters.concepticonReference
    if concepticon_ids is None:
        logger.error(
            "Your concepts table has no #concepticonReference column, so I cannot add any definitions from Concepticon to it. Try running lexedata.edit.add_concepticon to have me guess those references."
        )
        return

    # Create a concepticon_definition column
    try:
        dataset["ParameterTable", column_name]
        logger.info("Overwriting existing {:} column in concepts table".format(
            column_name))
    except KeyError:
        dataset.add_columns("ParameterTable", column_name)
        dataset.write_metadata()
        # Now if this throws an exception, it's an unexpected exception.

    # write concepticon definitions
    write_back = []
    for row in cli.tq(
            dataset["ParameterTable"],
            task="Write concepts with concepticon definitions to dataset",
    ):
        try:
            row[column_name] = concepticon.api.conceptsets[
                row[concepticon_ids]].definition
        except KeyError:
            pass
        write_back.append(row)

    dataset.write(ParameterTable=write_back)