Ejemplo n.º 1
0
def parse_newick_to_tree(filename):
    with io.open(filename, encoding='ascii') as fp:
        trees = newick.load(fp)

    tree_root = TreeNode(trees[0].name, [])
    for child in create_children(trees[0].descendants):
        tree_root.addChild(child)
    return tree_root
Ejemplo n.º 2
0
def species_list_from_tree(fname):
    species = []
    from newick import load
    tree = load(file(fname))
    for node in tree[0].walk():
        if node.name:
            species.append(node.name)

    return species
Ejemplo n.º 3
0
def parse(file_name):
    with io.open(file_name, encoding='utf8') as fp:
        loaded_trees = load(fp)
        trees = []
        for t in loaded_trees:
            tree = Tree(leaves=t.get_leaves())
            load_tree(t, tree)
            trees.append(tree)
    return trees
Ejemplo n.º 4
0
def read_tree(filename):
    """Read tree into memory.
    
    NOTE - The tree file must be in NEWICK format

    Arguemnts:
    filename - the location of the tree file.
    
    Requires:
    newick, io.
    """
    with io.open(filename, encoding='utf8') as fp:
        tree = newick.load(fp)
    return tree[0]
Ejemplo n.º 5
0
args = parser.parse_args()

if args.semantic_network:
    related_concepts = networkx.parse_gml(args.semantic_network)
else:
    concept2field = defaultdict(set)
    for c in range(args.concepts):
        concept2field[random.randint(0, args.fields - 1)].add(c)
    related_concepts = {}
    for field in concept2field.values():
        for concept in field:
            related_concepts[concept] = field - {concept}

i = 0
for _, tree_file in enumerate(args.trees):
    for tree in newick.load(tree_file):
        i += 1

        phy = Phylogeny(related_concepts,
                        basic=[],
                        tree=tree,
                        scale=args.scale)

        phy.simulate(p_loss=args.p_loss, p_gain=args.p_gain, p_new=args.p_new)

        # "basic" is the number of words we afterwards use to to infer
        # phylogeny with neighbor-joining

        dataframe, columns = phy.collect_word_list(
            Language.vocabulary,
            collect_tips_only=not args.sample_internal_nodes)
Ejemplo n.º 6
0
    if args.lodict is None:
        lodict = {}
    else:
        lodict = pickle.load(args.lodict)

    data = pandas.io.parsers.read_csv(
        args.input,
        sep="\t",
        na_values=[""],
        keep_default_na=False,
        encoding='utf-8',
        index_col=["English", "Language_ID", "Tokens"])

    if args.guide_tree:
        tree = newick.load(args.guide_tree)[0]
    else:
        # Calculate an UPGMA tree or something
        by_language = data.groupby("Language_ID")
        languages = []
        distance_matrix = numpy.zeros((len(by_language), len(by_language)))
        for (l1, (language1, data1)), (l2, (language2,
                                            data2)) in (itertools.combinations(
                                                enumerate(by_language), 2)):
            if language1 not in languages:
                languages.append(language1)
            print(language1, language2)
            c = 0
            shared_vocab = 0
            for concept in set(data1["Feature_ID"]) | set(data2["Feature_ID"]):
                c1 = set(data1["Cognate Set"][data1["Feature_ID"] == concept])
Ejemplo n.º 7
0
            node.length += height-ht
        return root.length + height, root
    return root.length, root


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("trees", type=argparse.FileType('r'))
    parser.add_argument("--output", "-o",
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        help="Output filename")
    parser.add_argument(
        "--mean", action="store_const", dest="acc",
        const=lambda x: sum(x)/len(x), default=lambda x: sum(x)/len(x),
        help="Use arithmetic mean to calculate new branch height")
    parser.add_argument(
        "--rms", action="store_const", dest="acc",
        const=lambda x: (sum(map(lambda xi: xi*xi, x))/len(x))**0.5,
        help="Use root mean square to calculate new branch height")
    parser.add_argument(
        "--max", action="store_const", dest="acc",
        const=max,
        help="Use maximum to calculate new branch height")

    args = parser.parse_args()
    trees = newick.load(args.trees)
    for tree in trees:
        normalize_tree(tree, args.acc)
    newick.dump(trees, args.output)
Ejemplo n.º 8
0
        s_data = random_observed_data(tree, models)
        s_names, s_classes = data_pattern(s_data)
        if classes != s_classes:
            if verbose:
                print("Not generating the right data: {:}".format(tree.newick))
            yield None
            continue
        for old, new in zip(s_names, names):
            tree.get_node(old).name = new
        yield tree


if __name__ == "__main__":
    try:
        with open("true.tree") as treefile:
            true_tree = newick.load(treefile)[0]
    except (ValueError, FileNotFoundError):
        true_tree = random_tree()
        with open("true.tree", "w") as treefile:
            print(true_tree.newick, file=treefile, end=";\n", flush=True)

    try:
        with open("data.json") as datafile:
            true_data = json.load(datafile)
    except (json.JSONDecodeError, FileNotFoundError):
        true_data = random_observed_data(true_tree)
    with open("data.json", "w") as datafile:
        json.dump(true_data, datafile, indent=2)

    print(true_tree.newick)
    print(true_data)