def parse_newick_to_tree(filename): with io.open(filename, encoding='ascii') as fp: trees = newick.load(fp) tree_root = TreeNode(trees[0].name, []) for child in create_children(trees[0].descendants): tree_root.addChild(child) return tree_root
def species_list_from_tree(fname): species = [] from newick import load tree = load(file(fname)) for node in tree[0].walk(): if node.name: species.append(node.name) return species
def parse(file_name): with io.open(file_name, encoding='utf8') as fp: loaded_trees = load(fp) trees = [] for t in loaded_trees: tree = Tree(leaves=t.get_leaves()) load_tree(t, tree) trees.append(tree) return trees
def read_tree(filename): """Read tree into memory. NOTE - The tree file must be in NEWICK format Arguemnts: filename - the location of the tree file. Requires: newick, io. """ with io.open(filename, encoding='utf8') as fp: tree = newick.load(fp) return tree[0]
args = parser.parse_args() if args.semantic_network: related_concepts = networkx.parse_gml(args.semantic_network) else: concept2field = defaultdict(set) for c in range(args.concepts): concept2field[random.randint(0, args.fields - 1)].add(c) related_concepts = {} for field in concept2field.values(): for concept in field: related_concepts[concept] = field - {concept} i = 0 for _, tree_file in enumerate(args.trees): for tree in newick.load(tree_file): i += 1 phy = Phylogeny(related_concepts, basic=[], tree=tree, scale=args.scale) phy.simulate(p_loss=args.p_loss, p_gain=args.p_gain, p_new=args.p_new) # "basic" is the number of words we afterwards use to to infer # phylogeny with neighbor-joining dataframe, columns = phy.collect_word_list( Language.vocabulary, collect_tips_only=not args.sample_internal_nodes)
if args.lodict is None: lodict = {} else: lodict = pickle.load(args.lodict) data = pandas.io.parsers.read_csv( args.input, sep="\t", na_values=[""], keep_default_na=False, encoding='utf-8', index_col=["English", "Language_ID", "Tokens"]) if args.guide_tree: tree = newick.load(args.guide_tree)[0] else: # Calculate an UPGMA tree or something by_language = data.groupby("Language_ID") languages = [] distance_matrix = numpy.zeros((len(by_language), len(by_language))) for (l1, (language1, data1)), (l2, (language2, data2)) in (itertools.combinations( enumerate(by_language), 2)): if language1 not in languages: languages.append(language1) print(language1, language2) c = 0 shared_vocab = 0 for concept in set(data1["Feature_ID"]) | set(data2["Feature_ID"]): c1 = set(data1["Cognate Set"][data1["Feature_ID"] == concept])
node.length += height-ht return root.length + height, root return root.length, root if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("trees", type=argparse.FileType('r')) parser.add_argument("--output", "-o", type=argparse.FileType('w'), default=sys.stdout, help="Output filename") parser.add_argument( "--mean", action="store_const", dest="acc", const=lambda x: sum(x)/len(x), default=lambda x: sum(x)/len(x), help="Use arithmetic mean to calculate new branch height") parser.add_argument( "--rms", action="store_const", dest="acc", const=lambda x: (sum(map(lambda xi: xi*xi, x))/len(x))**0.5, help="Use root mean square to calculate new branch height") parser.add_argument( "--max", action="store_const", dest="acc", const=max, help="Use maximum to calculate new branch height") args = parser.parse_args() trees = newick.load(args.trees) for tree in trees: normalize_tree(tree, args.acc) newick.dump(trees, args.output)
s_data = random_observed_data(tree, models) s_names, s_classes = data_pattern(s_data) if classes != s_classes: if verbose: print("Not generating the right data: {:}".format(tree.newick)) yield None continue for old, new in zip(s_names, names): tree.get_node(old).name = new yield tree if __name__ == "__main__": try: with open("true.tree") as treefile: true_tree = newick.load(treefile)[0] except (ValueError, FileNotFoundError): true_tree = random_tree() with open("true.tree", "w") as treefile: print(true_tree.newick, file=treefile, end=";\n", flush=True) try: with open("data.json") as datafile: true_data = json.load(datafile) except (json.JSONDecodeError, FileNotFoundError): true_data = random_observed_data(true_tree) with open("data.json", "w") as datafile: json.dump(true_data, datafile, indent=2) print(true_tree.newick) print(true_data)