def create_concepticon_for_concepts( dataset: pycldf.Dataset, language: t.Iterable, concepticon_glosses: bool, overwrite: bool, status_update: t.Optional[str], ): # add Status_Column if status update if status_update: add_status_column_to_table(dataset=dataset, table_name="ParameterTable") # add Concepticon_ID column to ParameterTable if dataset.column_names.parameters.concepticonReference is None: # Create a concepticonReference column dataset.add_columns("ParameterTable", "Concepticon_ID") c = dataset["ParameterTable"].tableSchema.columns[-1] c.valueUrl = "http://concepticon.clld.org/parameters/{Concepticon_ID}" c.propertyUrl = URITemplate( "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference" ) dataset.write_metadata() if not language: language = [(dataset.column_names.parameters.id, "en")] gloss_languages: t.Dict[str, str] = dict(language) add_concepticon_references( dataset, gloss_languages=gloss_languages, status_update=status_update, overwrite=overwrite, ) if concepticon_glosses: add_concepticon_names(dataset)
def add_table_with_columns( table: str, column_names: t.Set[str], data: pycldf.Dataset) -> None: """Add a table with the given columns to the dataset. If such a table already exists, only add the columns that do not exist yet. """ delete = True try: data[table] delete = False except KeyError: data.add_component(table) columns = data[table].tableSchema.columns for c in range(len(columns) - 1, -1, -1): column = columns[c] expected_name = "cldf_{}".format( column.propertyUrl.uri.split("#")[-1].lower()) if expected_name not in column_names and delete: del columns[c] else: column_names.remove(expected_name) for column_name in column_names: data.add_columns( table, column_name.replace( "cldf_", "http://cldf.clld.org/v1.0/terms.rdf#"))
def add_status_column_to_table(dataset: pycldf.Dataset, table_name: str) -> None: if "Status_Column" not in dataset[table_name].tableSchema.columndict.keys( ): dataset.add_columns(table_name, "Status_Column") else: cli.logger.info( f"Table {table_name} already contains a Status_Column.")
def add_segments_to_dataset(dataset: pycldf.Dataset, transcription: str, overwrite_existing: bool): if dataset.column_names.forms.segments is None: # Create a Segments column in FormTable dataset.add_columns("FormTable", "Segments") c = dataset["FormTable"].tableSchema.columns[-1] c.separator = " " c.propertyUrl = URITemplate( "http://cldf.clld.org/v1.0/terms.rdf#segments") dataset.write_metadata() write_back = [] c_f_segments = dataset["FormTable", "Segments"].name for row in dataset["FormTable"]: if row[c_f_segments] and not overwrite_existing: continue else: if row[transcription]: form = row[transcription].strip() row[dataset.column_names.forms.segments] = segment_form(form) write_back.append(row) dataset.write(FormTable=write_back)
def add_concepticon_definitions( dataset: pycldf.Dataset, column_name: str = "Concepticon_Definition", logger: cli.logging.Logger = cli.logger, ) -> None: concepticon_ids = dataset.column_names.parameters.concepticonReference if concepticon_ids is None: logger.error( "Your concepts table has no #concepticonReference column, so I cannot add any definitions from Concepticon to it. Try running lexedata.edit.add_concepticon to have me guess those references." ) return # Create a concepticon_definition column try: dataset["ParameterTable", column_name] logger.info("Overwriting existing {:} column in concepts table".format( column_name)) except KeyError: dataset.add_columns("ParameterTable", column_name) dataset.write_metadata() # Now if this throws an exception, it's an unexpected exception. # write concepticon definitions write_back = [] for row in cli.tq( dataset["ParameterTable"], task="Write concepts with concepticon definitions to dataset", ): try: row[column_name] = concepticon.api.conceptsets[ row[concepticon_ids]].definition except KeyError: pass write_back.append(row) dataset.write(ParameterTable=write_back)
def add_status_column_to_table(dataset: pycldf.Dataset, table_name: str) -> None: if "Status_Column" not in dataset[table_name].tableSchema.columndict.keys( ): dataset.add_columns(table_name, "Status_Column")