"Togo_Kan" : "East-6", "Tommo_So" : "East-7", "Toro_Tegu" : "East-1", "Yanda_Dom" : "West-1", "Yorno_So" : "East-?", } vals = sorted(set(groups.values())) colors = dict(zip(vals, colorRange(len(vals)))) #['red', 'green', 'blue', 'black', 'gray', 'yellow', # '0.7', 'cyan', '0.2', '0.1', '0.5', '#ff00ff'])) colord = {} labels = {} for g in groups: colord[g] = {"bg" : colors[groups[g]]} labels[g] = g+'-'+groups[g] wl = Wordlist('../data/Dogon-227-20-cognates.tsv') wl.calculate('tree', tree_calc='upgma', ref='lexstatid') plot_tree(wl.tree, fileformat='pdf', filename='../plots/dogon-upgma', node_dict=colord, labels=labels, figsize=(15,15)) wl.output('dst', filename='../plots/dogon-upgma') wl = Wordlist('../data/Dogon-227-20-cognates.tsv') wl.calculate('tree', tree_calc='neighbor', ref='lexstatid') plot_tree(wl.tree, fileformat='pdf', filename='../plots/dogon-neighbor', node_dict=colord, labels=labels, figsize=(15,15)) wl.output('dst', filename='../plots/dogon-neighbor')
from lingpy.convert.strings import write_nexus from lingpy.compare.partial import Partial from lingpy.convert.plot import plot_tree # Load the necessary data part = Partial.from_cldf('cldf/cldf-metadata.json') # Compute cognate sets according to SCA and calculate the distance matrix part.partial_cluster(method='sca', threshold=0.45, ref="cogids", cluster_method="upgma") part.add_cognate_ids('cogids', 'cogid', idtype='strict') part.calculate('tree', ref='cogid', tree_calc='upgma') out = write_nexus(part, mode='splitstree', filename='distance_matrix.nex') part.output('dst', filename='distance_matrix') plot_tree(str(part.tree)) print(part.tree.asciiArt()) # Compute cognate sets according to LexStat and calculate the distance matrix # part.get_partial_scorer(runs=1000) # part.partial_cluster(method='lexstat', threshold=0.55, cluster_method='upgma', ref="lexstatids") # part.add_cognate_ids('lexstatids', 'lexstatid', idtype='strict') # part.calculate('tree', ref='lexstatid', tree_calc='upgma', force=True) # part.output('dst', filename='distance_matrix') # plot_tree(str(part.tree)) # print(part.tree.asciiArt())
__author__="Johann-Mattis List" __date__="2015-07-13" from lingpy import * # Schritt 1 ## 1.1 Einlesen der Daten lex = LexStat('data/chinese.tsv') ## 1.2 Kognatenerkennung lex.cluster(method='sca', threshold=0.4) ## 1.3 Auslesen der Daten lex.output('tsv', filename='data/chinese_lexstat', ignore='all') # Schritt 2 ## 1.1 Berechnen des Baums lex.calculate('tree', ref="scaid") # scaid sind die automatischen kognaten lex.output('dst', filename="data/chinese_distances") ## 1.2 Plotten des Baums from lingpy.convert.plot import plot_tree plot_tree(lex.tree, degree=160, filename="data/chinese_tree", fileformat="svg") # Schritt 3 ## 1.1 Einlesen der Daten alm = Alignments('data/chinese_lexstat.tsv', ref="scaid") ## 1.2 Alinierung alm.align() ## 1.3 Auslesen der Daten in HTML alm.output('html', filename='data/chinese_alignments')
from lingpy.compare.partial import Partial from lingpy.convert.plot import plot_tree from sys import argv from clldutils.text import strip_brackets, split_text from collections import defaultdict from lingpy import basictypes if 'all' in argv: fname='../output/A_Deepadung_' else: fname='../output/D_Deepadung_' part = Partial(fname+'crossids.tsv') part.add_cognate_ids('crossids', 'crossid', idtype='strict') part.add_entries('cog', 'crossid,concept', lambda x, y: str(x[y[0]])+x[y[1]]) part.renumber('cog') part.calculate('distance', ref='cogid') part.calculate('tree', tree_calc='neighbor') part.output('dst', filename=fname+'distance') part.output('tre', filename=fname+'tree') if 'plot' in argv: plot_tree(str(part.tree), degree=350, filename=fname+'tree')
from lingpy import * from lingpy.compare.partial import Partial from lingpy.convert.plot import plot_tree from lexibank_gaotb import Dataset columns = columns=['concept_id', 'concept_name', 'language_id', 'language_name', 'value', 'form', 'cogid_cognateset_id'] namespace = (('concept_name', 'concept'), ('language_id', 'doculect'), ('cogid_cognateset_id', 'cog')) wl = Wordlist.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json'), columns=columns, namespace=namespace) wl.renumber("cog") wl.calculate('distances') wl.output('dst', filename='expertCognates') wl.calculate('tree', ref='cogid', tree_calc='upgma') plot_tree(str(wl.tree), filename="treeEXP-UPGMA", fileformat="png") wl.calculate('tree', ref='cogid', tree_calc='neighbor', force=True) plot_tree(str(wl.tree), filename="treeEXP-NEI", fileformat="png") #get our cldf data partSCA = Partial.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json')) #generate the acd cluster partSCA.partial_cluster(threshold=0.45, ref="cogids", cluster_method="upgma") #align the cluster and save it almsSCA = Alignments(partSCA, ref='cogids') almsSCA.align() wl.output('dst', filename='autoCognatesSCA') almsSCA.output('tsv', filename='autoCognatesSCA', ignore='all', prettify=False) #add missing ids for the tree partSCA.add_cognate_ids('cogids', 'cogid', idtype='strict') #plot partSCA.calculate('tree', ref='cogid', tree_calc='upgma')
__author__ = "Johann-Mattis List" __date__ = "2015-07-13" from lingpy import * # Schritt 1 ## 1.1 Einlesen der Daten lex = LexStat('data/chinese.tsv') ## 1.2 Kognatenerkennung lex.cluster(method='sca', threshold=0.4) ## 1.3 Auslesen der Daten lex.output('tsv', filename='data/chinese_lexstat', ignore='all') # Schritt 2 ## 1.1 Berechnen des Baums lex.calculate('tree', ref="scaid") # scaid sind die automatischen kognaten lex.output('dst', filename="data/chinese_distances") ## 1.2 Plotten des Baums from lingpy.convert.plot import plot_tree plot_tree(lex.tree, degree=160, filename="data/chinese_tree", fileformat="svg") # Schritt 3 ## 1.1 Einlesen der Daten alm = Alignments('data/chinese_lexstat.tsv', ref="scaid") ## 1.2 Alinierung alm.align() ## 1.3 Auslesen der Daten in HTML alm.output('html', filename='data/chinese_alignments')