Example #1
0
from lexibank_deepadungpalaung import Dataset
from lingpy import *
from lingpy.compare.partial import Partial
from lingpy.evaluate.acd import bcubes

columns=('concept_name', 'language_id',
                'value', 'form', 'segments', 'language_glottocode', 'cogid_cognateset_id'
                )
namespace=(('concept_name', 'concept'), ('language_id',
                'doculect'), ('segments', 'tokens'), ('language_glottocode',
                    'glottolog'), ('concept_concepticon_id', 'concepticon'),
                ('language_latitude', 'latitude'), ('language_longitude',
                    'longitude'), ('cognacy', 'cognacy'),
                ('cogid_cognateset_id', 'cog'))

part = Partial.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json'),
        columns=columns, namespace=namespace)

part.renumber('cog')


method = input('method: ')

# type 'cogid' or 'cog' for method to see a tree based on Deepadung et al.'s
# cognate judgements

if method == 'lexstatcogids':
    part.get_partial_scorer(runs=10000)
    part.partial_cluster(method='lexstat', ref="lexstatcogids", threshold=0.55)
elif method == 'lexstatcogid':
    part.get_scorer(runs=10000)
    part.cluster(method='lexstat', ref="lexstatcogid", threshold=0.55)
Example #2
0
wl = Wordlist.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json'))
i = 0
for idx, tokens in wl.iter_rows('tokens'):
    #print(idx, tokens)
    for segment in tokens.n:
        if not segment:
            print(idx, tokens)

from lingpy.compare.partial import Partial

columns = ('concept_name', 'language_id', 'value', 'form', 'segments',
           'language_glottocode', 'cogid_cognateset_id')
namespace = (('concept_name', 'concept'), ('language_id', 'doculect'),
             ('segments', 'tokens'), ('language_glottocode', 'glottolog'),
             ('concept_concepticon_id', 'concepticon'), ('language_latitude',
                                                         'latitude'),
             ('language_longitude', 'longitude'), ('cognacy', 'cognacy'),
             ('cogid_cognateset_id', 'cogid'))

var = Dataset().cldf_dir.joinpath('cldf-metadata.json')
part = Partial.from_cldf(var)
part.get_partial_scorer(
    runs=100)  # make tests with 100 and 1000, when debugging)
part.partial_cluster(method='lexstat',
                     threshold=0.5,
                     ref='cogids',
                     cluster_method='infomap')
alms = Alignments(part, ref='cogids')
alms.align()
alms.output('tsv', filename='deepadung-wordlist', ignore='all', prettify=False)
Example #3
0
from lingpy.convert.strings import write_nexus
from lingpy.compare.partial import Partial
from lingpy.convert.plot import plot_tree

# Load the necessary data
part = Partial.from_cldf('cldf/cldf-metadata.json')

# Compute cognate sets according to SCA and calculate the distance matrix
part.partial_cluster(method='sca',
                     threshold=0.45,
                     ref="cogids",
                     cluster_method="upgma")
part.add_cognate_ids('cogids', 'cogid', idtype='strict')
part.calculate('tree', ref='cogid', tree_calc='upgma')
out = write_nexus(part, mode='splitstree', filename='distance_matrix.nex')
part.output('dst', filename='distance_matrix')
plot_tree(str(part.tree))
print(part.tree.asciiArt())

# Compute cognate sets according to LexStat and calculate the distance matrix
# part.get_partial_scorer(runs=1000)
# part.partial_cluster(method='lexstat', threshold=0.55, cluster_method='upgma', ref="lexstatids")
# part.add_cognate_ids('lexstatids', 'lexstatid', idtype='strict')
# part.calculate('tree', ref='lexstatid', tree_calc='upgma', force=True)
# part.output('dst', filename='distance_matrix')
# plot_tree(str(part.tree))
# print(part.tree.asciiArt())
Example #4
0
from lingpy.compare.partial import Partial
from lingpy.align.sca import Alignments
from lexibank_chingelong import Dataset

# Load data
#part = Partial.from_cldf('cldf/cldf-metadata.json')
part = Partial.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json'))

# Compute cognate sets according to SCA (appended to the column cogids)
part.partial_cluster(threshold=0.45, ref="cogids", cluster_method="upgma")

# Compute cognate sets according to LexStat (appended to the column lexstatids)
part.get_partial_scorer(runs=1000)
part.partial_cluster(method='lexstat',
                     threshold=0.55,
                     cluster_method='upgma',
                     ref="lexstatids")

# Align the partial cognates
alms = Alignments(part, ref='cogids')
alms.align()

# Write the data to a file
alms.output('tsv', filename='alignments', ignore='all', prettify=False)