Esempio n. 1
0
def create_concepticon_for_concepts(
    dataset: pycldf.Dataset,
    language: t.Iterable,
    concepticon_glosses: bool,
    overwrite: bool,
    status_update: t.Optional[str],
):
    # add Status_Column if status update
    if status_update:
        add_status_column_to_table(dataset=dataset, table_name="ParameterTable")
    # add Concepticon_ID column to ParameterTable
    if dataset.column_names.parameters.concepticonReference is None:
        # Create a concepticonReference column
        dataset.add_columns("ParameterTable", "Concepticon_ID")
        c = dataset["ParameterTable"].tableSchema.columns[-1]
        c.valueUrl = "http://concepticon.clld.org/parameters/{Concepticon_ID}"
        c.propertyUrl = URITemplate(
            "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference"
        )
        dataset.write_metadata()
    if not language:
        language = [(dataset.column_names.parameters.id, "en")]

    gloss_languages: t.Dict[str, str] = dict(language)
    add_concepticon_references(
        dataset,
        gloss_languages=gloss_languages,
        status_update=status_update,
        overwrite=overwrite,
    )

    if concepticon_glosses:
        add_concepticon_names(dataset)
Esempio n. 2
0
def test_Dataset_validate(tmpdir, mocker):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.write(ValueTable=[])
    assert ds.validate()
    ds['ValueTable'].tableSchema.columns = []
    with pytest.raises(ValueError):
        ds.validate()
    assert not ds.validate(log=mocker.Mock())
    ds.tablegroup.tables = []
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.add_component('LanguageTable')
    ds.write(ValueTable=[])
    ds['LanguageTable'].common_props[
        'dc:conformsTo'] = 'http://cldf.clld.org/404'
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds['ValueTable'].get_column('Source').propertyUrl = URITemplate(
        'http://cldf.clld.org/404')
    ds.write(ValueTable=[])
    with pytest.raises(ValueError):
        ds.validate()
Esempio n. 3
0
 def apply_column_property(self, column, property, value):
     if self.is_uri_property(property):
         value = URITemplate(value)
     if property == 'propertyUrl':
         column.propertyUrl = value
     elif property == 'valueUrl':
         column.valueUrl = value
Esempio n. 4
0
 def add_image_schema(writer):
     writer.cldf.add_table(
         'images.csv',
         {
             'name': 'ID',
             'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id',
         },
         'Taxon_ID',
         'objid',
         'bitstreamid',
         {
             "name": "tags",
             "separator": ";"
         },
         'mime_type',
         'creator',
         'date',
         'place',
         'permission',
         'source',
         'Comment',
     )
     writer.cldf['images.csv', 'ID'].valueUrl = URITemplate(
         'https://cdstar.shh.mpg.de/bitstreams/{objid}/{bitstreamid}')
     writer.cldf.add_foreign_key('images.csv', 'Taxon_ID', 'ParameterTable',
                                 'ID')
Esempio n. 5
0
def test_Dataset_validate(tmpdir, mocker):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.write(ValueTable=[])
    values = tmpdir / 'new' / 'values.csv'
    assert values.check()
    Path(str(values)).unlink()
    log = mocker.Mock()
    assert not ds.validate(log=log)
    assert log.warn.called

    ds.write(ValueTable=[])
    assert ds.validate()

    ds['ValueTable'].tableSchema.columns = []
    with pytest.raises(ValueError):
        ds.validate()
    assert not ds.validate(log=mocker.Mock())
    ds.tablegroup.tables = []
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.add_component('LanguageTable')
    ds.write(ValueTable=[], LanguageTable=[])
    assert ds.validate()

    # test violation of referential integrity:
    ds.write(ValueTable=[{
        'ID': '1',
        'Value': '1',
        'Language_ID': 'lid',
        'Parameter_ID': 'pid'
    }],
             LanguageTable=[])
    assert not ds.validate(log=mocker.Mock())

    # test an invalid CLDF URL:
    ds['LanguageTable'].common_props[
        'dc:conformsTo'] = 'http://cldf.clld.org/404'
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds['ValueTable'].get_column('Source').propertyUrl = URITemplate(
        'http://cldf.clld.org/404')
    ds.write(ValueTable=[])
    with pytest.raises(ValueError):
        ds.validate()
Esempio n. 6
0
def test_no_concepticon_definition_column_added(caplog):
    original = Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json"
    dirname = Path(tempfile.mkdtemp(prefix="lexedata-test"))
    target = dirname / original.name
    shutil.copyfile(original, target)
    dataset = pycldf.Dataset.from_metadata(target)
    dataset.add_columns("ParameterTable", "Concepticon_ID")
    c = dataset["ParameterTable"].tableSchema.columns[-1]
    c.valueUrl = "http://concepticon.clld.org/parameters/{Concepticon_ID}"
    c.propertyUrl = URITemplate(
        "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference"
    )
    dataset.add_columns("ParameterTable", "Concepticon_Definition")
    dataset.write_metadata()
    dataset.write(ParameterTable=[])
    with caplog.at_level(logging.INFO):
        add_concepticon_definitions(dataset=dataset)
    assert re.search("[oO]verwrit.*existing Concepticon_Definition", caplog.text)
Esempio n. 7
0
def reshape_dataset(dataset: pycldf.Wordlist,
                    add_column: bool = True) -> pycldf.Dataset:
    # check for existing cognateset table
    if dataset.column_names.cognatesets is None:
        # Create a Cognateset Table
        dataset.add_component("CognatesetTable")

    # add a concept column to the cognateset table
    if add_column:
        if dataset.column_names.cognatesets.parameterReference is None:
            dataset.add_columns("CognatesetTable", "Core_Concept_ID")
            c = dataset["CognatesetTable"].tableSchema.columns[-1]
            c.datatype = dataset["ParameterTable", "ID"].datatype
            c.propertyUrl = URITemplate(
                "http://cldf.clld.org/v1.0/terms.rdf#parameterReference")
            fname = dataset.write_metadata()
            # Reload dataset with new column definitions
            dataset = pycldf.Wordlist.from_metadata(fname)
    return dataset
Esempio n. 8
0
def add_segments_to_dataset(dataset: pycldf.Dataset, transcription: str,
                            overwrite_existing: bool):
    if dataset.column_names.forms.segments is None:
        # Create a Segments column in FormTable
        dataset.add_columns("FormTable", "Segments")
        c = dataset["FormTable"].tableSchema.columns[-1]
        c.separator = " "
        c.propertyUrl = URITemplate(
            "http://cldf.clld.org/v1.0/terms.rdf#segments")
        dataset.write_metadata()

    write_back = []
    c_f_segments = dataset["FormTable", "Segments"].name
    for row in dataset["FormTable"]:
        if row[c_f_segments] and not overwrite_existing:
            continue
        else:
            if row[transcription]:
                form = row[transcription].strip()
                row[dataset.column_names.forms.segments] = segment_form(form)
            write_back.append(row)
    dataset.write(FormTable=write_back)
    parser = argparse.ArgumentParser()
    parser.add_argument("wordlist",
                        default="cldf-metadata.json",
                        type=Path,
                        help="The wordlist to add Concepticon links to")
    args = parser.parse_args()

    dataset = pycldf.Wordlist.from_metadata(args.wordlist)

    # Is there a cognateset table?
    if dataset.column_names.cognatesets is None:
        # Create a Cognateset Table
        dataset.add_component("CognatesetTable")
        dataset.add_columns("CognatesetTable", "Core_Concept_ID")
        c = dataset["CognatesetTable"].tableSchema.columns[-1]
        c.propertyUrl = URITemplate(
            "https://cldf.clld.org/v1.0/terms.rdf#parameterReference")
        dataset.column_names.parameters.concepticonReference = "Core_Concept_ID"
        dataset.write_metadata()

        # Reload dataset
        dataset = pycldf.Wordlist.from_metadata(args.wordlist)

    c_cognateset = dataset.column_names.forms.cognatesetReference
    c_form = dataset.column_names.forms.id
    table = dataset["FormTable"]
    if c_cognateset is None:
        c_cognateset = dataset.column_names.cognates.cognatesetReference
        c_form = dataset.column_names.cognates.formReference
        table = dataset["CognateTable"]
    if c_cognateset is None:
        raise ValueError(
Esempio n. 10
0
    parser.add_argument("wordlist", default="cldf-metadata.json",
                        type=Path, help="The wordlist to add Concepticon links to")
    parser.add_argument("transcription", nargs="?",
                        default=None,
                        help="Column containing the IPA transcriptions."
                        "(Default: The CLDF #form column)")
    args = parser.parse_args()

    dataset = pycldf.Wordlist.from_metadata(args.wordlist)

    if args.transcription is None:
        args.transcription = dataset.column_names.forms.form

    if dataset.column_names.forms.segments is None:
        # Create a concepticonReference column
        dataset.add_columns("FormTable", "Segments")
        c = dataset["FormTable"].tableSchema.columns[-1]
        c.separator = " "
        c.propertyUrl = URITemplate("http://cldf.clld.org/v1.0/terms.rdf#segments")
        dataset.write_metadata()

    print(dataset.column_names.forms.segments)

    write_back = []
    for row in dataset["FormTable"]:
        if row[args.transcription]:
            form = row[args.transcription].strip()
            row[dataset.column_names.forms.segments] = segment_form(form)
        write_back.append(row)
    dataset.write(FormTable=write_back)
Esempio n. 11
0
    def create_schema(self, cldf):
        cldf.add_table(
            'glossabbreviations.csv', {
                'name': 'ID',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id',
            }, {
                'name': 'Name',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#name',
            })
        cldf.add_table(
            'media.csv',
            {
                'name': 'ID',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id',
                'valueUrl': 'https://cdstar.shh.mpg.de/bitstreams/{Name}',
            },
            {
                'name': 'Name',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#name',
            },
            {
                'name': 'Description',
                'propertyUrl':
                'http://cldf.clld.org/v1.0/terms.rdf#description',
            },
            'mimetype',
            {
                'name': 'size',
                'datatype': 'integer'
            },
        )

        cldf.add_component(
            'ParameterTable',
            {
                'name':
                'Contributor_ID',
                'separator':
                ' ',
                'dc:description':
                'Authors of the Atlas chapter describing the feature',
            },
            'Chapter',  # valueUrl: https://apics-online.info/parameters/1.chapter.html
            {
                'name':
                'Type',
                'dc:description':
                "Primary or structural feature, segment or sociolinguistic feature",
            },
            {
                'name': 'PHOIBLE_Segment_ID',
                'valueUrl':
                'https://phoible.org/parameters/{PHOIBLE_Segment_ID}',
            },
            'PHOIBLE_Segment_Name',
            {
                'name': 'Multivalued',
                'datatype': 'boolean'
            },
            {
                'name': 'WALS_ID',
                'dc:description': 'ID of the corresponding WALS feature',
            },
            {
                'name': 'WALS_Representation',
                'datatype': 'integer'
            },
            'Area',
            'Map_Gall_Peters',
            {
                'name': 'metadata',
                'dc:format': 'application/json'
            },
        )
        cldf['ParameterTable', 'id'].valueUrl = URITemplate(
            'https://apics-online.info/parameters/{id}')
        cldf.add_component(
            'CodeTable',
            {
                'name': 'Number',
                'datatype': 'integer'
            },
            'icon',
            'color',
            'abbr',
        )
        cldf.add_component(
            'LanguageTable',
            {
                'name': 'Description',
                'propertyUrl':
                'http://cldf.clld.org/v1.0/terms.rdf#description',
                'dc:format': 'text/html',
            },
            {
                'name':
                'Data_Contributor_ID',
                'separator':
                ' ',
                'dc:description':
                'Authors contributing the language structure dataset',
            },
            {
                'name': 'Survey_Contributor_ID',
                'separator': ' ',
                'dc:description': 'Authors of the language survey',
            },
            'Survey_Title',
            {
                'name': 'Source',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source',
                'separator': ';',
            },
            'Ethnologue_Name',
            'Glossed_Text_PDF',
            'Glossed_Text_Audio',
            {
                'name': 'Metadata',
                'dc:format': 'text/json',
            },
            'Region',
            {
                'name': 'Default_Lect_ID',
                'dc:description': NON_DEFAULT_LECT,
            },
            {
                'name': 'Lexifier',
                'dc:description': LEXIFIER_DESC,
            },
        )
        cldf['LanguageTable', 'id'].valueUrl = URITemplate(
            'https://apics-online.info/contributions/{id}')
        cldf.add_component(
            'ExampleTable',
            {
                'name': 'Source',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source',
                'separator': ';',
            },
            'Audio',
            {
                'name': 'Type',
                'propertyUrl': 'dc:type'
            },
            {
                'name': 'markup_text',
                'dc:format': 'text/html',
            },
            {
                'name': 'markup_analyzed',
                'dc:format': 'text/html',
            },
            {
                'name': 'markup_gloss',
                'dc:format': 'text/html',
            },
            {
                'name': 'markup_comment',
                'dc:format': 'text/html',
            },
            'source_comment',
            'original_script',
            'sort',
            'alt_translation',
        )
        t = cldf.add_table(
            'contributors.csv', {
                'name': 'ID',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id',
            }, {
                'name': 'Name',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#name',
            }, 'Address', 'URL', {
                'name': 'editor_ord',
                'datatype': 'integer',
            })
        t.common_props['dc:conformsTo'] = None
        cldf.add_columns(
            'ValueTable',
            {
                'name':
                'Example_ID',
                'separator':
                ' ',
                'propertyUrl':
                'http://cldf.clld.org/v1.0/terms.rdf#exampleReference',
            },
            {
                'name': 'Frequency',
                "datatype": 'number',
            },
            'Confidence',
            {
                'name': 'Metadata',
                'dc:format': 'text/json',
            },
            'source_comment',
        )
        cldf.add_foreign_key('ParameterTable', 'Contributor_ID',
                             'contributors.csv', 'ID')
        cldf.add_foreign_key('ParameterTable', 'Map_Gall_Peters', 'media.csv',
                             'ID')
        cldf.add_foreign_key('LanguageTable', 'Glossed_Text_PDF', 'media.csv',
                             'ID')
        cldf.add_foreign_key('LanguageTable', 'Glossed_Text_Audio',
                             'media.csv', 'ID')
        cldf.add_foreign_key('ExampleTable', 'Audio', 'media.csv', 'ID')
Esempio n. 12
0
def url_template(req, route, id_name):
    return URITemplate(
        get_url_template(req,
                         route,
                         relative=False,
                         variable_map={'id': id_name}))
Esempio n. 13
0
    def cmd_makecldf(self, args):
        args.writer.add_sources()

        # We can link forms to scans of the page in the source where they appear:
        args.writer.cldf["FormTable", "Scan"].valueUrl = URITemplate(
            'https://cdstar.shh.mpg.de/bitstreams/{Objid}/gauchat_et_al_1925_tppsr_{Scan}.png'
        )
        for c in ['Population', 'SpeakerAge']:
            args.writer.cldf['LanguageTable', c].datatype.base = 'integer'
            args.writer.cldf['LanguageTable', c].datatype.minimum = 0

        values = self.raw_dir.read_csv('tppsr-db-v20.txt', delimiter='\t')
        forms = self.raw_dir.read_csv('tppsr-db-v20-ipa-narrow.txt',
                                      delimiter='\t')

        concepts = {}
        for concept in self.conceptlists[0].concepts.values():
            idx = '{0}_{1}'.format(concept.id,
                                   slug(concept.attributes['french']))
            args.writer.add_concept(
                ID=idx,
                Number=concept.number,
                Name=concept.english,
                French_Gloss=concept.attributes['french'],
                Latin_Gloss=concept.attributes['latin'],
                Concepticon_ID=concept.concepticon_id,
                Concepticon_Gloss=concept.concepticon_gloss)
            concepts[concept.number] = (idx, concept.attributes['page'],
                                        concept.attributes['french'])

        languages = args.writer.add_languages(lookup_factory='Number')

        def scan_number(bitstreams):
            p = re.compile(r'tppsr_(?P<number>[0-9]{4})\.png')
            for bs in bitstreams:
                m = p.search(bs['bitstreamid'])
                if m:
                    return m.group('number')

        scans = {
            scan_number(o['bitstreams']): objid
            for objid, o in self.raw_dir.read_json('tppsr_scans.json').items()
        }

        phrase_data = collections.defaultdict(dict)
        for row1, row2 in progressbar(zip(values, forms), desc='cldfify'):
            entry = row1[2]
            for s, t in [('\u0320', '')]:
                entry = entry.replace(s, t)
            tokens = self.tokenizer({},
                                    entry.strip().replace(' ', '_'),
                                    column='IPA')
            # Compute scan number from concept number and language number.
            page = int(concepts[row1[0]][1]) + int(int(row1[1]) > 31)
            scan = str(page + 18).rjust(4, '0')

            if row1[2].replace('_', '').replace('-', '').strip():
                phrase_data[row1[1]][row1[0]] = (row2[2], row1[2])
                args.writer.add_form_with_segments(
                    Value=row1[2],
                    Form=''.join(tokens),
                    Segments=tokens,
                    Profile=' '.join(
                        self.tokenizer({}, entry.strip(), column='Grapheme')),
                    Source=['Gauchat1925[{0}]'.format(page)],
                    Language_ID=languages[row1[1]],
                    Parameter_ID=concepts[row1[0]][0],
                    Scan=scan,
                    Objid=scans[scan],
                    ProsodicStructure=prosodic_string(tokens, _output='CcV'),
                    SegmentedValue=' '.join(
                        self.tokenizer({}, entry, column='Graphemes')))

        args.writer.cldf.add_component(
            'ExampleTable',
            'Alt_Transcription',
            {
                "name":
                "Concept_ID",
                "separator":
                " ",
                "propertyUrl":
                "http://cldf.clld.org/v1.0/terms.rdf#parameterReference",
            },
            {
                "name": "Form_ID",
                "separator": " ",
                "propertyUrl":
                "http://cldf.clld.org/v1.0/terms.rdf#formReference",
            },
        )
        args.writer.cldf.add_foreign_key('ExampleTable', 'Concept_ID',
                                         'ParameterTable', 'ID')
        args.writer.cldf.add_foreign_key('ExampleTable', 'Form_ID',
                                         'FormTable', 'ID')

        for phrase in self.etc_dir.read_csv('phrases.csv', dicts=True):
            for lid, data in sorted(phrase_data.items(), key=lambda i: i[0]):
                lid = languages[lid]
                cids = phrase['Concepts'].split()
                try:
                    args.writer.objects['ExampleTable'].append(
                        dict(
                            ID='{}-{}'.format(phrase['ID'], lid),
                            Language_ID=lid,
                            Primary_Text=' '.join(
                                [data[cid][0] for cid in cids]),
                            Translated_Text=' '.join(
                                [concepts[cid][2] for cid in cids]),
                            Alt_Transcription=' '.join(
                                [data[cid][1] for cid in cids]),
                            Concept_ID=[concepts[cid][0] for cid in cids],
                            Form_ID=[
                                '{}-{}-1'.format(lid, concepts[cid][0])
                                for cid in cids
                            ],
                        ))
                except KeyError:
                    pass
Esempio n. 14
0
 def apply_table_schema_property(self, table_schema, property, value):
     if self.is_uri_property(property):
         value = URITemplate(value)
     if property == 'aboutUrl':
         table_schema.aboutUrl = value
Esempio n. 15
0
    def _schema(self, args):
        args.writer.cldf['FormTable'].common_props['dc:description'] = \
            "Word forms are listed as 'counterparts', i.e. as words with a specific meaning. " \
            "Thus, words with multiple meanings may appear more than once in this table."
        args.writer.cldf['FormTable', 'Comment'].common_props['dc:description'] = \
            "For more specific comments see 'comment_on_borrowed' and 'comment_on_word_form'"
        args.writer.cldf['FormTable', 'Word_ID'].valueUrl = URITemplate('https://wold.clld.org/word/{Word_ID}')
        args.writer.cldf.remove_columns('FormTable', 'Cognacy')

        t = args.writer.cldf.add_component(
            "ContributionTable",
            {
                "name": "Number_of_words",
                "datatype": "integer",
                "dc:description": "There would be 1814 words in each vocabulary, "
                                  "corresponding to the 1814 Loanword Typology meanings, if each meaning "
                                  "had exactly one counterpart, and if all the counterparts were "
                                  'different words. But many ("polysomous") words are counterparts of '
                                  "several meanings, many meanings have several word counterparts "
                                  '("synonyms", or "subcounterparts"), and many meanings have no '
                                  "counterparts at all, so the number of words in each database varies "
                                  "considerably.",
            },
            {
                "name": "Language_ID",
                "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#languageReference",
                "dc:description": "References the language for which this contribution provides "
                                  "a vocabulary.",
            },
        )
        t.common_props['dc:description'] = \
            "WOLD contributions are vocabularies (mini-dictionaries of about 1000-2000 entries) " \
            "with comprehensive information about the loanword status of each word. " \
            "Descriptions of how these vocabularies coded the data can be found in the " \
            "[descriptions](descriptions/) directory."
        args.writer.cldf['ContributionTable', 'description'].valueUrl = URITemplate(
            './descriptions/vocabulary_{ID}.md')
        args.writer.cldf['ContributionTable', 'description'].common_props['dc:format'] = 'text/markdown'
        args.writer.cldf['ContributionTable', 'id'].common_props["dc:description"] = \
            "The vocabulary ID number corresponds to the ordering to the chapters on the book " \
            "Loanwords in the World's Languages. Languages are listed in rough geographical order " \
            "from west to east, from Africa via Europe to Asia and the Americas, so that " \
            "geographically adjacent languages are next to each other."
        args.writer.cldf['ContributionTable', 'citation'].common_props["dc:description"] = \
            "Each vocabulary of WOLD is a separate electronic publication with a separate author " \
            "or team of authors and should be cited as specified here."
        args.writer.cldf['ContributionTable', 'contributor'].common_props["dc:description"] = \
            "The authors are experts of the language and its history. They also contributed a " \
            "prose chapter on the borrowing situation in their language that was published in the " \
            "book Loanwords in the World's Languages."
        t.add_foreign_key("Language_ID", "languages.csv", "ID")

        t = args.writer.cldf.add_component(
            'BorrowingTable',
            {
                'name': 'Source_relation',
                'datatype': {'base': 'string', 'format': "immediate|earlier"},
                'dc:description':
                    "Whether a word was contributed directly (immediate) or indirectly (earlier), "
                    "i.e. via another, intermediate donor languoid, to the recipient language.",
            },
            'Source_word',
            'Source_meaning',
            {
                'name': 'Source_certain',
                'datatype': {'base': 'boolean', 'format': "yes|no"},
                'dc:description': "Certainty of the source identification",
            },
            {
                'name': 'Source_languoid',
                'dc:description': 'Donor languoid, specified as name of a language or language subgroup or family',
            },
            {
                'name': 'Source_languoid_glottocode',
                'dc:description': 'Glottocode of the source languid',
                'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#glottocode',
            }
        )
        t.common_props['dc:description'] = \
            'While a lot of information about the borrowing status is attached to the borrowed ' \
            'forms, the BorrowingTable lists information about (potential) source words. Note ' \
            'that we list loan events per meaning; i.e. one loanword may result in multiple ' \
            'borrowings if the word has multiple meanings.'
Esempio n. 16
0
    parser.add_argument("wordlist",
                        default="cldf-metadata.json",
                        type=Path,
                        help="The wordlist to add Concepticon links to")
    parser.add_argument(
        "--language",
        "-l",
        action="append",
        default=[],
        type=equal_separated,
        help="Maps from column names to language codes, eg. GLOSS=en")
    args = parser.parse_args()

    dataset = pycldf.Wordlist.from_metadata(args.wordlist)

    if dataset.column_names.parameters.concepticonReference is None:
        # Create a concepticonReference column
        dataset.add_columns("ParameterTable", "Concepticon_ID")
        c = dataset["ParameterTable"].tableSchema.columns[-1]
        c.valueUrl = "http://concepticon.clld.org/parameters/{Concepticon_ID}"
        c.propertyUrl = URITemplate(
            "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference")
        dataset.write_metadata()

    if not args.language:
        args.language = [(dataset.column_names.parameters.id, "en")]

    gloss_languages: t.Dict[str, str] = dict(args.language)

    add_concepticon_references(dataset, gloss_languages)