예제 #1
0
 def fromSimulator(self,
                   filename,
                   N=10000,
                   ambienti=2,
                   replicates=2,
                   bonus=0.1,
                   Xs=[],
                   prob=[]):
     t = Tree(filename)
     if not Xs:
         for a in range((ambienti - 1)):
             X = SelectNode(list(t.traverse()))
             Xs.append(X)
     names, DB, Ns, table, t = FakeCommunity(t,
                                             10000,
                                             ambienti=ambienti,
                                             replicates=replicates,
                                             bonus=bonus,
                                             Xs=Xs,
                                             prob=prob)
     self.countTable = DB
     self.SeqName = names
     self.samplesNames = range(DB.shape[1])
     self.nogroup = {0: self.samplesNames}
     counter = 0
     for g in range(ambienti):
         self.groups[g] = range(counter, counter + replicates)
         counter += replicates
     self.expandTable()
     self.readTree(filename)
     return names, DB, Ns, table, t
예제 #2
0
def parseTreeWithDB(t, Ordername, method='PitTrap'):
    A = Tree(t)
    A.get_leaf_names()
    con = MySQLdb.connect(host='cerbero.ba.itb.cnr.it',
                          user='******',
                          passwd='PyroNoise',
                          db='TAXONOMYdb')
    cur = con.cursor()
    sql = """SELECT 454Reads.ReadAccno, 454Reads.Region, 454Reads.Rich
    FROM 454Reads
    INNER JOIN BestHit_Order ON BestHit_Order.QueryName = 454Reads.ReadAccno
    AND 454Reads.Run = '""" + method + """'
    AND BestHit_Order.order_name = '""" + Ordername + """'"""
    cur.execute(sql)
    results = cur.fetchall()
    DB = {}
    DB.update([[x, [y, z]] for x in results])
    res = []
    names = []
    for n in A.get_leaf_names():
        temp = []
        names.append(n)
        try:
            region, abb = DB[n]
            if region == regions[0]:
                temp = [abb, 0]
            else:
                temp = [0, abb]
        except KeyError:
            temp = [0, 0]
        res.append(temp)

    return names, numpy.array(res).T
예제 #3
0
파일: bPTP.py 프로젝트: seoljongkim/PTP
    def __init__(self,
                 filename,
                 ftype="nexus",
                 reroot=False,
                 method="H1",
                 seed=1234,
                 thinning=100,
                 sampling=10000,
                 burnin=0.1,
                 firstktrees=0,
                 taxa_order=[]):
        self.method = method
        self.seed = seed
        self.thinning = thinning
        self.sampling = sampling
        self.burnin = burnin
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks['trees'].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[:self.firstktrees]

        self.taxa_order = taxa_order
        if len(self.taxa_order) == 0:
            self.taxa_order = Tree(self.trees[0]).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot
예제 #4
0
def smart_reroot(treefile, outgroupfile, outfile, format=0):
    """
    simple function to reroot Newick format tree using ete2

    Tree reading format options see here:
    http://packages.python.org/ete2/tutorial/tutorial_trees.html#reading-newick-trees
    """
    tree = Tree(treefile, format=format)
    leaves = [t.name for t in tree.get_leaves()][::-1]
    outgroup = []
    for o in must_open(outgroupfile):
        o = o.strip()
        for leaf in leaves:
            if leaf[:len(o)] == o:
                outgroup.append(leaf)
        if outgroup:
            break

    if not outgroup:
        print >>sys.stderr, \
            "Outgroup not found. Tree {0} cannot be rerooted.".format(treefile)
        return treefile

    try:
        tree.set_outgroup(tree.get_common_ancestor(*outgroup))
    except ValueError:
        assert type(outgroup) == list
        outgroup = outgroup[0]
        tree.set_outgroup(outgroup)
    tree.write(outfile=outfile, format=format)

    logging.debug("Rerooted tree printed to {0}".format(outfile))
    return outfile
예제 #5
0
def timing(tree_size, num_trees, num_samples):
	FastUnifrac_times = list()
	EMDUnifrac_times = list()
	EMDUnifrac_flow_times = list()
	for tree_it in range(num_trees):
		t = Tree()
		t.populate(tree_size, random_branches = True)
		tree_str = t.write(format=1)
		tr = DndParser(tree_str, UniFracTreeNode)
		(T,l,nodes_in_order) = EMDU.parse_tree(tree_str)
		for it in range(num_samples):
			envs = EMDU.simulate_data(t.get_leaf_names())  # FastUnifrac can only take weight on leaf nodes
			(envs_prob_dict, samples) = EMDU.parse_envs(envs, nodes_in_order)
			P = envs_prob_dict[samples[0]]
			Q = envs_prob_dict[samples[1]]
			#EMDUnifrac with flow
			t0 = timeit.default_timer()
			(Z, Flow, diffab) = EMDU.EMDUnifrac_weighted_flow(T, l, nodes_in_order, P, Q)
			t1 = timeit.default_timer()
			EMDUnifrac_flow_times.append(t1-t0)
			#EMDUnifrac no flow
			t0 = timeit.default_timer()
			(Z,diffab) = EMDU.EMDUnifrac_weighted(T, l, nodes_in_order, P, Q)
			t1 = timeit.default_timer()
			EMDUnifrac_times.append(t1-t0)
			#FastUnifrac weighted
			t0 = timeit.default_timer()
			res = fast_unifrac(tr, envs, weighted=True, modes=set(['distance_matrix']))
			t1 = timeit.default_timer()
			FastUnifrac_times.append(t1-t0)
	return  (np.array(EMDUnifrac_times).mean(), np.array(EMDUnifrac_flow_times).mean(), np.array(FastUnifrac_times).mean())
예제 #6
0
def sanitizeByType(container, sanitizeby='tsv', onlycolumns=False):
    '''for a iterable of strings, carry out sanitizeString by:
        line, 
        tsv (all or onlycolumns), 
        fasta headers, or 
        leaf in nwk'''
    
    assert sanitizeby in set(['line', 'tsv', 'newick', 'fasta'])
    if sanitizeby=='line': 
        for line in container:
            print sanitizeString(line.strip("\r\n"), False)
    if sanitizeby=='tsv': 
        for line in container:
            if onlycolumns: 
                newline = line.strip("\r\n").split("\t")
                for i in onlycolumns: 
                    newline[i-1]=sanitizeString(newline[i-1], False)
            else:
                newline=[sanitizeString(item.strip("\r\n"), False) for item in line.split("\t")]
            print "\t".join(newline)
    if sanitizeby=='newick':
        from ete2 import Tree
        t=Tree("".join(container))
        for l in t:
            l.name=sanitizeString(l.name, False)
        print t.write()
    if sanitizeby=='fasta': 
        from Bio import SeqIO
        from StringIO import StringIO
        from sys import stdout
        fasta = StringIO("".join(container))
        for seq_record in SeqIO.parse(fasta, "fasta"):
            seq_record.id=sanitizeString(seq_record.description, False)
            seq_record.description=''
            SeqIO.write(seq_record, stdout, "fasta")
예제 #7
0
def partition_main(args):
    print(args, file=sys.stderr)
    base_prior = make_base_prior(args.het, GTYPE3) # base genotype prior
    mm,mm0,mm1 = make_mut_matrix(args.mu, GTYPE3) # substitution rate matrix, with non-diagonal set to 0, with diagonal set to 0

    vcffile, variants, DPRs, PLs = read_vcf(args.vcf, args.min_ev)
    n_site,n_smpl = PLs.shape[0:2]

    tree = Tree()
    if sem(PLs[...,1],axis=1).mean() > sem(PLs[...,2],axis=1).mean():
        partition(PLs[...,0:2], tree, np.arange(n_smpl), args.min_ev)
    else:
        partition(PLs, tree, np.arange(n_smpl), args.min_ev)

    init_tree(tree)
    PLs = PLs.astype(np.longdouble)
    populate_tree_PL(tree, PLs, mm, 'PL')
    populate_tree_PL(tree, PLs, mm0, 'PL0')
    calc_mut_likelihoods(tree, mm0, mm1)

    print(tree)
    tree.write(outfile=args.output+'.pt0.nwk', format=5)
    best_tree,best_PL = recursive_NNI(tree, mm0, mm1, base_prior)
    best_tree,best_PL = recursive_reroot(best_tree, mm0, mm1, base_prior)
    print(best_tree)
    print('PL_per_site = %.4f' % (best_PL/n_site))
    best_tree.write(outfile=args.output+'.pt.nwk', format=5)
예제 #8
0
def resolve_polytomies(infileName, outfileName):
    newickString = open(infileName,
                        'rb').readline().rstrip().replace('[&R] ', '')
    tree = Tree(newickString)
    tree.resolve_polytomy(recursive=True)
    with open(outfileName, 'wb') as outfile:
        outfile.write(tree.write(format=1))
예제 #9
0
def buildFreqTree(data_seq, depth):
    
    t = Tree() # Creates an empty tree
    
    for start_i in range(0, len(data_seq)):
        end_i = start_i + depth - 1
        
        if end_i >= len(data_seq):
            end_i = len(data_seq) - 1
            
        sub_seq = data_seq[start_i:(end_i+1)]
        
        if len(sub_seq) <= 1:
            break
        
        cur_node = t.get_tree_root()
        
        for item in sub_seq:
            children_nodes = cur_node.get_children()
            children_names = []
            
            for children_node in children_nodes:
                children_names.append(children_node.name)
                
            #print children_names
            
            if item not in children_names:
                cur_node = cur_node.add_child(name=item, dist=1)
            else:
                child_i = children_names.index(item)
                cur_node = children_nodes[child_i]
                cur_node.dist = cur_node.dist + 1
    
    
    return t
예제 #10
0
def date_tree(tree):
    '''Dates each internal node of a provided newick tree in format 1. The tree
       is traversed using "postorder". Three internal node cases are beeing
       distinguished by the inner_type() function. For type 0, both children
       are leafes, thus the age of the node is the divergence time of the two
       leafes. For type 1, only child A is a leaf the other child B is an
       internal node. The age of the node is the divergence time of child A and
       the first leaf that descents from child B. For type 2 both children are
       internal nodes, the age of the node is the divergence time of the first
       leaf found that descents of child A and child B respectivly.'''
    tree = Tree(tree, format=1)
    print "Tree loaded!"
    for node in tree.traverse("postorder"):
        print "Dating %s" % node.name
        if not node.is_root() and not node.is_leaf():
            left, right = node.get_children()[0], node.get_children()[1]
            if inner_type(node) == 0:
                node.dist = date_node(left.name, right.name)
            elif inner_type(node) == 1:
                if left.is_leaf():
                    right = right.get_leaf_names()[0]
                    node.dist = date_node(left.name, right)
                elif right.is_leaf():
                    left = left.get_leaf_names()[0]
                    node.dist = date_node(left, right.name)
            elif inner_type(node) == 2:
                left = left.get_leaf_names()[0]
                right = right.get_leaf_names()[1]
                node.dist = date_node(left, right)
    return tree
예제 #11
0
def compare_trees(tree_1,tree_2):
    #Compare the trees pairwise at each nucleotide using the Robinson-Foulds,
    #or symmetric, metric
    t1 = Tree(tree_1)
    t2 = Tree(tree_2)
    rf = t1.robinson_foulds(t2)[0]
    return rf
예제 #12
0
def small_parsimony(tree, strings):
    tree   = Tree(tree, format = 1)
    length = len(strings.values()[0])

    S      = defaultdict(dict)
    L      = defaultdict(str)
    Z      = 0

    for i in xrange(length):
        for node in tree.traverse('postorder'):
            if node.is_leaf():
                S[node.name][i] = {strings[node.name][i]}
            else:
                children = node.get_children()
                s1 = S[children[0].name][i] & S[children[1].name][i]
                if s1:
                    S[node.name][i] = s1
                else:
                    S[node.name][i] = S[children[0].name][i] | S[children[1].name][i]
                    Z += 1

    for i in xrange(length):
        for node in tree.traverse('preorder'):
            if not node.up:
                L[node.name] += S[node.name][i].pop()
            else:
                if L[node.up.name][i] in S[node.name][i]:
                    L[node.name] += L[node.up.name][i]
                else:
                    L[node.name] += S[node.name][i].pop()

    return Z, {key: value for key, value in L.iteritems() if key not in strings}
예제 #13
0
def visualizeTree(sTreePath, pathToSfamilies, bootValue, width, height):
    # Random tree
    stree = Tree()
    stree = readTreeFromFile(sTreePath)
   
    snodesStatDic={}
    snodesStatDic= getFamiliesStatisticsForEachNode(pathToSfamilies, bootValue)
    #print snodesStatDic
    # Some random features in all nodes
    for n in stree.traverse():
        if n.name in snodesStatDic.keys():
            total= reduce(lambda x,y: x+y, snodesStatDic[n.name])
            #norm= [(x*100)/total for x in snodesStatDic[n.name]]
            norm= [x for x in snodesStatDic[n.name]]
            n.add_features(pie_data=norm)
    # Create an empty TreeStyle
    ts = TreeStyle()

    # Set our custom layout function
    ts.layout_fn=layout

    # Draw a tree 
    ts.mode = "r"
    
    #ts.force_topology= False
    ts.complete_branch_lines_when_necessary= True
    # We will add node names manually
    ts.show_leaf_name = False
    # Show branch data
    #ts.show_branch_length = True
    #ts.show_branch_support = True
    

    return stree, ts
def findCombination(word, lstFunc, alphabet, offset, reprs):

    debug("findCombination(%s,%s,%s)" % (word, lstFunc, alphabet))

    found = False
    tmpAlph = []
    mutation = 1
    his = dict()
    spaces = dict()
    spaceTree = Tree()
    spaceTree.add_features(space=offset)

    if contains(word, alphabet):
        info("Alphabet contains Word")
        info("PUSH %s" % word)
        exit()

    while not found:
        info("Mutation: %d !" % mutation)

        #debug
        #debug("> Tree:")
        #print spaceTree
        #print spaceTree.get_ascii(attributes=['space',])

        for n in spaceTree.get_leaves():

            #debug(">> Node:")
            #print spaceTree.get_ascii(attributes=['space',])

            for f in lstFunc:
                tmpAlph = n.space

                #generate space from the new alphabet
                space = generateSpaceEx(f, tmpAlph, alphabet)
                tmpSpace = list(set([c[0] for c in space]))
                debugListHex(tmpSpace, "SPACE")

                #check to see any the word representation exists in the space
                for r in reprs:
                    #debugListHex(r,"Checking Representation")
                    if contains(r, tmpSpace):
                        found = True
                        info("FOUND : %s" % r)
                        lstAncestors = [
                            n,
                        ]
                        lstAncestors.extend(n.get_ancestors())
                        nodeF = n.add_child(name=f)
                        nodeF.add_features(space=tmpSpace, history=space)
                        lstAncestors = [
                            nodeF,
                        ]
                        lstAncestors.extend(nodeF.get_ancestors())
                        getSolution(r, offset, lstAncestors)
                        exit()

                nodeF = n.add_child(name=f)
                nodeF.add_features(space=tmpSpace, history=space)
        mutation = mutation + 1
예제 #15
0
파일: util.py 프로젝트: ajing/ChemTreeMap
def WriteDotFile(newick):
    """
    Write newick string to a DOT file

    :param newick: a string with newick tree structure
    :return: DOT file name
    """
    tree = Tree(newick)

    dot_file_name = datetime.datetime.now().strftime(FILE_FORMAT) + ".gv"
    fileobj = open(dot_file_name, "w")

    # rename internal tree name
    i = 0
    for n in tree.traverse():
        if not n.name:
            n.name = "F" + str(i)
            i = i + 1
        else:
            n.name = n.name.replace("\'", "")

    aline = "graph G{\nnode [shape=circle, style=filled];"
    fileobj.write(aline + "\n")
    filecontent = []
    for n in tree.traverse():
        if n.up:
            filecontent.append(n.name + "--" + n.up.name + "[len=" + "{:f}".format(n.dist).rstrip("0") + "]")
        else:
            filecontent.append(n.name)

    fileobj.write("\n".join(filecontent) + "}")
    return dot_file_name
예제 #16
0
def delete(file, target_file, taxa):
    f = open(file)
    t_file = open(target_file, "w")

    count = 0

    for line in f:
        # print "looking at tree", count
        line = line.strip().split("=")
        t = Tree(line[1])
        for taxon in taxa:
            leaves = t.get_leaves_by_name(name=taxon)
            for leaf in leaves:
                leaf.delete()
        if len(t) < 3:
            pass
        else:
            # prevent falsy trees for RAxML
            while len(t.children) == 1:
                t = t.children[0]

            # write it into the file
            t_file.write(line[0] + "=" + t.write() + "\n")
        count += 1
        print(count)

    f.close()
    t_file.close()
예제 #17
0
def ETETree(seqs, ref, metric):
    """Tree showing bola alleles covered by tepitope"""
    from ete2 import Tree,PhyloTree,TreeStyle,NodeStyle
    aln = Genome.clustalAlignment(seqs=seqs)
    t = Tree('temp.dnd')
    #t.set_outgroup(t&ref)
    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.mode = "c"
    ts.arc_start = -180
    ts.arc_span = 180
    cutoff=0.25
    def func(node):
        if node.name=='NoName' or not node.name in metric:
            return False
        if metric[node.name]<=cutoff:
            return True
    matches = filter(func, t.traverse())
    print len(matches), "nodes have distance <=%s" %cutoff
    nst1 = NodeStyle()
    nst1["bgcolor"] = "Yellow"
    for n in matches:
        n.set_style(nst1)
    nst2 = NodeStyle()
    nst2["bgcolor"] = "LightGreen"
    hlanodes = [t.get_leaves_by_name(name=r)[0] for r in refalleles]
    for n in hlanodes:
        n.set_style(nst2)
    t.show(tree_style=ts)
    return
예제 #18
0
def parse_weird_tree(tree_string):
    s = tree_string.split("]")
    normTree = ""
    doubleTaxa = {}
    for elem in s:
        if "[" in elem:
            taxa = elem.split("{")[1].split(",")
            if len(taxa) > 1:
                doubleTaxa[taxa[0]] = taxa[1:]
            x = elem.split("[")
            normTree += x[0]
        else:
            normTree += elem
    tree = Tree(normTree, format=1)
    #tree.unroot()
    for node in tree.traverse():
        if node.name in doubleTaxa:
            for elem in doubleTaxa[node.name]:
                n = elem.rstrip("}")
                node.add_child(name=n)
    #strategy: remove [] first, remember all nodes that represent multiple taxa
    #build ete2 tree
    #add additional taxa: if leaf, add sister leaf
    #if internal, add sister node as leaf (should be fine for def of splits)
    a = tree.write(format=1, format_root_node=True)
    return a
예제 #19
0
def compute_GUniFrac(abundance,treefile, alpha=0.5, unweighted=False):
    n_samples = len(abundance.columns)
    n_distance = n_samples * (n_samples - 1) / 2
    d_array = np.zeros((n_distance))
    t = Tree(treefile,format=1)
    if set(t.get_leaf_names()) != set(abundance.index):
        print 'Error: OTU table contains unknown OTUs. All of OTU names in OTU table should be contained in tree file.'
        quit()
    
    for i,(sample1, sample2) in enumerate(itertools.combinations(abundance.columns, 2)):
        print 'calculating ',sample1,' vs. ',sample2,'...'
        denom = 0.0
        numer = 0.0
        for node in t.traverse():
            if node.is_root():
                continue
            else:
                p_a = 0.0
                p_b = 0.0
                for leaf in node.get_leaf_names():
                    if leaf in abundance.index:
                        p_a += abundance.loc[leaf,sample1]
                        p_b += abundance.loc[leaf,sample2]
            if p_a == 0.0 and p_b == 0.0:
                continue
            if unweighted:
                if p_a == 0.0 or p_b == 0.0:
                    numer += node.dist
                denom += node.dist
            else:
                denom += node.dist * (p_a + p_b) ** alpha
                numer += node.dist * (p_a + p_b) ** alpha * abs(p_a - p_b) / (p_a + p_b)
        d_array[i] = numer / denom
    return squareform(d_array)
예제 #20
0
def main():
    args = parser.parse_args()

    beta_metrics = args.beta_metrics.split(',')
    otu_widths = args.otu_widths.split(',')
    input_dir = args.input_dir
    output_fp = args.output_fp
    tree_fp = args.tree_fp


    nrows = len(beta_metrics)
    ncols = len(otu_widths)


    results_dict, labels_list = load_rug_dict(input_dir, beta_metrics, otu_widths)

    try:
        tree = Tree(tree_fp, format=3)
    except:
        tree = add_tip_branches(tree_fp)

    annotate_tree_with_rugs(tree, results_dict, labels_list)

    ts = TreeStyle()

    for row in range(len(labels_list)):
        for col in range(len(labels_list[row])):
            ts.legend.add_face(TextFace(labels_list[row][col], fsize=20), column=col)

    tree.render(output_fp, tree_style = ts)
    tree.show(tree_style = ts)
예제 #21
0
파일: tusv.py 프로젝트: xtmgah/tusv
def write_xml(fname, E, C, l):
    n, _ = E.shape

    root = Tree()
    root.name = str(n - 1)
    stack = [root]
    while stack:
        cur = stack.pop()
        i = int(cur.name)
        child_idxs = np.where(E[i, :] == 1)[0]
        for ci in child_idxs:
            child = cur.add_child(name=str(ci))
            child.dist = np.linalg.norm(np.subtract(C[i, l:], C[ci, l:]),
                                        ord=1)
            stack.append(child)

    newick_str = root.write(
        features=['name'], format=1, format_root_node=True
    )  # format_root_node=True puts root node name in str
    newick_tree = Phylo.read(
        StringIO(newick_str), 'newick'
    )  # format=1 gives branch lengths and names for all nodes (leaves and internal)

    for clade in newick_tree.find_clades():
        if clade.confidence is not None:  # Phylo.read() stupidly interprets names of internal nodes as confidences for newick strings
            clade.name = clade.confidence
            clade.confidence = None
    xmltree = newick_tree.as_phyloxml()  # convert to PhyloXML.Phylogeny type
    Phylo.write(xmltree, open(fname, 'w'), 'phyloxml')
예제 #22
0
def writeSeqsAndTree():
    prepareNameDict()
    tree = Tree(TREE_FILE)
    terminals = tree.get_leaves()
    # Change protein names in datas sctrucuters and write protein sequences with changed names to file
    with open(OUTPUT_ALIGNED_FILENAME, "w") as outputFile:
        for i in xrange(len(terminals)):
            proteinName = terminals[i].name.strip("'")
            processedName = prepareName(proteinName)
            if processedName in PROCESSED_TO_ALIGNED_NAMES:
                if ENUMERATE:
                    terminals[i].name = str(i + 1) + "_" + proteinName
                    alnProtName = terminals[i].name
                    alignedName = PROCESSED_TO_ALIGNED_NAMES[processedName]
                    ALIGNED_PROTEIN_NAME_TO_SEQ[
                        alnProtName] = ALIGNED_PROTEIN_NAME_TO_SEQ[alignedName]
                    del ALIGNED_PROTEIN_NAME_TO_SEQ[alignedName]

                else:
                    terminals[i].name = proteinName
                    alnProtName = PROCESSED_TO_ALIGNED_NAMES[processedName]
                outputFile.write(">" + terminals[i].name + "\n")
                outputFile.write(
                    str(ALIGNED_PROTEIN_NAME_TO_SEQ[alnProtName]) + "\n")
    tree.write(outfile=OUTPUT_TREE_NEWICK_FILENAME)
예제 #23
0
def ete_tree(aln):
    """Tree showing alleles"""

    from ete2 import Tree, PhyloTree, TreeStyle, NodeStyle

    t = Tree('temp.dnd')
    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.mode = "c"
    ts.arc_start = -180
    ts.arc_span = 180
    cutoff = 0.25

    def func(node):
        if node.name == 'NoName':  #or not node.name in metric:
            return False
        #if metric[node.name]<=cutoff:
        #    return True

    matches = filter(func, t.traverse())
    print(len(matches), "nodes have distance <=%s" % cutoff)
    nst1 = NodeStyle()
    nst1["bgcolor"] = "Yellow"
    for n in matches:
        n.set_style(nst1)
    nst2 = NodeStyle()
    nst2["bgcolor"] = "LightGreen"
    #hlanodes = [t.get_leaves_by_name(name=r)[0] for r in refalleles]
    #for n in hlanodes:
    #    n.set_style(nst2)
    t.show(tree_style=ts)
    return
예제 #24
0
	def show(self, i=0):
		t = Tree(str(self)+";")
		ts = TreeStyle()
		ts.show_leaf_name = True
		ts.rotation = 90
		t.render("mytree-{0}.png".format(i), w=183, units="mm", tree_style=ts)
		t.show(tree_style=ts)
예제 #25
0
def map_cafe_to_tree(clusters, cafe, tree):
    '''Takes the tree objects and family p-value for each cluster provided
       by the cafe parser and maps the reconstructed counts to the nodes of
       a tree for each cluster. Some postprocessing, like parsing the tree
       object is done in here, which should be moved into the parser at some
       point. This should result in a similar small function as
       map_count_to_tree provides.'''
    for cluster in clusters:
        c_counts = {}
        cafe_tree = cafe.clusters[cluster.name][0]
        cafe_tree = Tree(cafe_tree+";",format=1)
        cafe_tree = add_num_to_nodes(cafe_tree)
        for node in cafe_tree.traverse("postorder"):
            if node.is_leaf():
                a = node.name.split("_")
                c_counts[a[0]] = a[1]
            else:
                a = node.name[1:]
                c_counts[node.num] = a
        for node in cluster.tree.traverse("postorder"):
            if node.is_leaf():
                node.cafe = c_counts[node.name]
            else:
                node.cafe = c_counts[node.num]
    return clusters
예제 #26
0
class K_Graph(object):

	"""docstring for K_Graph"""
	def __init__(self):
		self.theme = Tree()
		self.topic = ''

	def add_point(self,topic,point):
		for t in self.theme.traverse():
			if t.name in topic:
				t.add_child(name=point)

	def add_topic(self,topic):
		self.theme.add_child(name=topic)
		self.topic = topic

	def getCurrentGraph(self):
		for t in self.theme.traverse():
			if t.name in self.topic:
				return t

	def get_topic(self):
		return self.topic

	def save(self):
		with open('data.pickle', 'wb') as f: 
			# Pickle the 'data' dictionary using the highest protocol available. 
			pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)

	def load(self):
		with open('data.pickle', 'rb') as f: 
			# The protocol version used is detected automatically, so we do not # have to specify it. 
			return pickle.load(f)
예제 #27
0
def neighbor_joining(D, tree, internals):
    #fsum will have better precision when adding distances across sites
    #based on PLs not mutation
    """
    
    Args:
        D (np.array): pairwise differences between samples based on PLs (passing copy)
        tree (Tree): tree of class Tree with num tips = num samples
        internals (np.array): array of sample numbers
        
    Returns:
        Tree
        D (np.array): update pairwise differences now there are internal nodes to compare
    
    """
    print('neighbor_joining() begin', end=' ', file=sys.stderr)
    m = len(internals)
    while m > 2:  #if m is 2 then only two connected to root
        d = D[
            internals[:, None],
            internals]  #initially D matrix w/o 0 distance btwn internal nodes; then add in nodes as they have distances
        u = d.sum(axis=1) / (m - 2)

        Q = np.zeros(shape=(m, m), dtype=np.longdouble)
        for i, j in itertools.combinations(xrange(m), 2):  #std Q matrix calc
            Q[i, j] = d[i, j] - u[i] - u[j]
            Q[j, i] = Q[i, j]
        #print(Q.astype(int))
        np.fill_diagonal(Q, np.inf)
        #print(np.unique(Q, return_counts=True))
        i, j = np.unravel_index(
            Q.argmin(), (m, m)
        )  #location in matrix of smallest Q value (ie closest nodes/tips)
        l = len(D) + 2 - m

        for k in xrange(m):
            D[l, internals[k]] = D[internals[k],
                                   l] = d[i, k] + d[j, k] - d[i, j]
        D[l, internals[i]] = D[internals[i],
                               l] = vi = (d[i, j] + u[i] - u[j]) / 2
        D[l, internals[j]] = D[internals[j],
                               l] = vj = (d[i, j] + u[j] - u[i]) / 2

        ci = tree & str(internals[i])
        cj = tree & str(internals[j])
        ci.detach()
        cj.detach()
        node = Tree(name=str(l))
        node.add_child(ci, dist=int(vi))
        node.add_child(cj, dist=int(vj))
        tree.add_child(node)
        #print(tree)

        internals = np.delete(internals, [i, j])
        internals = np.append(internals, l)
        m = len(internals)
        print('.', end='', file=sys.stderr)

    print(' done', file=sys.stderr)
    return D, tree
예제 #28
0
def date_tree(tree):
    '''Dates each internal node of a provided newick tree in format 1. The tree
       is traversed using "postorder". Three internal node cases are beeing
       distinguished by the inner_type() function. For type 0, both children
       are leafes, thus the age of the node is the divergence time of the two
       leafes. For type 1, only child A is a leaf the other child B is an
       internal node. The age of the node is the divergence time of child A and
       the first leaf that descents from child B. For type 2 both children are
       internal nodes, the age of the node is the divergence time of the first
       leaf found that descents of child A and child B respectivly.'''
    tree = Tree(tree, format=1)
    print "Tree loaded!"
    for node in tree.traverse("postorder"):
        print "Dating %s" %node.name
        if not node.is_root() and not node.is_leaf():
            left, right = node.get_children()[0], node.get_children()[1]
            if inner_type(node) == 0:
                node.dist = date_node(left.name,right.name)
            elif inner_type(node) == 1:
                if left.is_leaf():
                    right = right.get_leaf_names()[0]
                    node.dist = date_node(left.name, right)
                elif right.is_leaf():
                    left = left.get_leaf_names()[0]
                    node.dist = date_node(left, right.name)
            elif inner_type(node) == 2:
                left = left.get_leaf_names()[0]
                right = right.get_leaf_names()[1]
                node.dist = date_node(left, right)
    return tree
예제 #29
0
	def __init__(self, tree, start_config = None, reroot = False, startmethod = "H0", min_br = 0.0001, seed = 1234, thinning = 100, sampling = 10000, burning = 0.1, taxa_order = []):
		if start_config == None:
			me = exponential_mixture(tree= tree)
			me.search(strategy = startmethod, reroot = reroot)
			me.count_species(print_log = False, pv = 0.0)
			self.tree = me.tree
			self.current_setting = me.max_setting
		else:
			self.current_setting = start_config
			self.tree = Tree(tree, format = 1)
		self.burning = burning
		self.last_setting = self.current_setting
		self.current_logl = self.current_setting.get_log_l()
		self.last_logl = self.last_setting.get_log_l()
		self.min_br = min_br
		self.rand_nr = random.Random()
		self.rand_nr.seed(seed)
		self.thinning = thinning
		self.sampling = sampling
		if taxa_order == []:
			self.taxaorder = self.tree.get_leaf_names()
		else:
			self.taxaorder = taxa_order
		self.numtaxa = len(self.taxaorder)
		self.partitions = []
		self.llhs = []
		self.nsplit = 0
		self.nmerge = 0
		"""remember the ML partition"""
		self.maxllh = self.current_logl
		to, spe = self.current_setting.output_species(taxa_order = self.taxaorder)
		self.maxpar = spe
		self.max_setting = self.current_setting
		"""record all delimitation settings for plotting, this could consume a lot of MEM"""
		self.settings = []
def calDistanceMatrix(wordlist, treeList):
  synsetList = []
  distanceMatrix = np.zeros(len(wordlist)**2) + 100
  distanceMatrix = distanceMatrix.reshape(10,10)
  for word in wordlist:
    if db.wordSynsetMap.find({'word': word}).count():
      synset = db.wordSynsetMap.find({'word': word})[0]['synset']
    synsetList.append(synset)
  for i in range(len(synsetList)):
    if i == 0:
      for tree in treeList:
        for synset in ['travel.n.01','travel.v.03','travel.v.04','travel.v.05','travel.v.06']:
          for pos1 in tree.search_nodes(name = synset):
            for j in range(len(synsetList) - i - 1):
              for pos2 in tree.search_nodes(name = synsetList[i+j+1]):
                distance = Tree.get_distance(pos1, pos2)
                print synsetList[i], synsetList[i+j+1], wordlist[i], wordlist[i+j+1]
                if distance < distanceMatrix[i][i+j+1]:
                  distanceMatrix[i][i+j+1] = distance
                  distanceMatrix[i+j+1][i] = distance
    else:
      for tree in treeList:
        for pos1 in tree.search_nodes(name = synsetList[i]):
          for j in range(len(synsetList) - i - 1):
            for pos2 in tree.search_nodes(name = synsetList[i+j+1]):
              distance = Tree.get_distance(pos1, pos2)
              print synsetList[i], synsetList[i+j+1], wordlist[i], wordlist[i+j+1]
              if distance < distanceMatrix[i][i+j+1]:
                distanceMatrix[i][i+j+1] = distance
                distanceMatrix[i+j+1][i] = distance
  print distanceMatrix 
예제 #31
0
def make_tree(treefile, image_file, clone_info):
    colour_list = ['MidnightBlue','RoyalBlue', 'LightSkyBlue', 'Aquamarine', 'SpringGreen', 'GreenYellow',\
                   'Gold','DarkOrange']
    weeks = ['16', '30', '38', '48', '59', '119', '176', '206']
    weeks = ['6', '14', '53', '92','144']
    t = Tree(treefile,format = 1)
    ts = TreeStyle()
    for i in range(5):
        ts.legend.add_face(CircleFace(20, colour_list[i]), column=0)
        ts.legend.add_face(TextFace('week' + weeks[i]), column=1)
    ts.legend_position = 2
    ts.show_leaf_name = True
    ts.branch_vertical_margin = 15
    ts.rotation = 90
    ns = NodeStyle()
    ns["size"] = 1
    ns.hz_line_width = 10
    ns.vt_line_width = 10
    edge = 0
    for node in t.traverse():
        node.name = node.name.replace("'", "")
        node.name = node.name.replace(".", ",")
        name = node.name.split(' ')[0]
        print name
        if name in clone_info.keys():
            style_node(node, colour_list[int(clone_info[name][0])-1], int(int(clone_info[name][1])/10)+5)
        if not node.is_leaf() and node.name != 'NoName':
                f = TextFace(node.name)
                f.margin_top = 2.5
                f.margin_bottom = 2.5
                f.margin_right = 2.5
                f.margin_left = 2.5
                node.add_face(f, column=0, position="branch-top")
    t.render(image_file, tree_style = ts)
예제 #32
0
def tree_from_character_table(species, table):
    leaves  = []
    tree    = Tree()
    root    = tree.get_tree_root()

    table   = sorted([invert(row) for row in table], key = lambda x: x.count('1'))

    for specie in species:
        leaves.append(root.add_child(name = specie))

    while table:
        for row in table:
            if row.count('1') == 2:
                i1, i2     = [i.start() for i in re.finditer('1', row)]
                n1, n2     = leaves[i1], leaves[i2]

                leaves[i1] = root.add_child()
                leaves[i1].add_child(n1.detach())
                leaves[i1].add_child(n2.detach())

                table.remove(row)
                leaves     = leaves[:i2] + leaves[i2 + 1:]
                table      = [row[:i2] + row[i2 + 1:] for row in table]
                break
            else:
                return None

    return tree
예제 #33
0
파일: bubble_map.py 프로젝트: a1an77/ete
def get_example_tree():
    # Random tree
    t = Tree()
    t.populate(20, random_branches=True)

    # Some random features in all nodes
    for n in t.traverse():
        n.add_features(weight=random.randint(0, 50))

    # Create an empty TreeStyle
    ts = TreeStyle()

    # Set our custom layout function
    ts.layout_fn = layout

    # Draw a tree 
    ts.mode = "c"

    # We will add node names manually
    ts.show_leaf_name = False
    # Show branch data
    ts.show_branch_length = True
    ts.show_branch_support = True

    return t, ts
예제 #34
0
def main(args):
    gtr_file, cdt_file, nwk_file = args
    reader = csv.reader(file(cdt_file), delimiter="\t")
    reader.next()  # header
    reader.next()  # EWEIGHT
    gid_to_name = {}
    for row in reader:
        gid, name = row[:2]
        #gid_to_name[gid] = name
        gid_to_name[gid] = name.upper()

    reader = csv.reader(file(gtr_file), delimiter="\t") 
    nodes = {}
    for gtr in map(GTRLine._make, reader):
        node = Tree() 
        parent_name, parent_dist = gtr.parent, float(gtr.dist)
        for child in (gtr.left_child, gtr.right_child):
            if child in gid_to_name:
                node.add_child(name=gid_to_name[child], dist=1-parent_dist)
            else:
                assert child in nodes, child
                child_node, child_dist = nodes[child]
                node.add_child(child_node, dist=child_dist-parent_dist)

        nodes[parent_name] = (node, parent_dist)

    t = node
    print >>sys.stderr, "writing newick tree to %s" % nwk_file
    t.write(format=5, outfile=nwk_file)
class TrackedItem(object):
    def __init__(self):
        self.name = ''
        self.parent = None
        self.data = DataStore(float)
        self.leaf = False
        self.node = Tree()

    @property
    def root(self):
        return self.parent.root if self.parent else self

    def update_stats(self, name, parent, data, sf):
        self.data.merge(data)
        self.name = self.node.name = name
        self.node.item = self
        if parent and self.node not in parent.node.children:
            self.parent = parent
            parent.node.add_child(self.node)

        self.node.add_feature("weight", self.data[sf])
        for key in self.data:
            self.node.add_feature(key, self.data[key])

    def __str__(self):
        return "%s: %s" % (self.name, ','.join(["%d %s" % (self.data[key], key) for key in self.data]))
예제 #36
0
    def setUp(self):
        tree = Tree()
        root = tree.get_tree_root()
        root.dist = 0
        root.name = "root"
        node = root.add_child(name="Left")
        node.add_child(name="Alpha")
        node.add_child(name="Beta")
        node = root.add_child(name="Right")
        node.add_child(name="Gamma")
        node.add_child(name="Delta")
        for desc in tree.iter_descendants():
            desc.dist = 0

        ts = TreeStyle()
        ts.show_leaf_name = True
        ts.show_branch_length = False
        ts.mode = "c"
        ts.arc_start = 0
        ts.arc_span = 360

        self.circular_style = ts
        self.exampleTree = tree
        self.alignment = MultipleSeqAlignment([
            SeqRecord(Seq("AAG", generic_dna), id="Alpha"),
            SeqRecord(Seq("AGA", generic_dna), id="Beta"),
            SeqRecord(Seq("AAA", generic_dna), id="Gamma"),
            SeqRecord(Seq("GGA", generic_dna), id="Delta"),
        ])
def write_json_files(OTU_table, target_directory):
    # Write JSON files containing children node abundances from a given OTU table to a target directory.
    # Currently does this for each internal node in the 'fill_mod.newick' tree.
    tree_file = '/home/ubuntu/templates/fill_mod.newick'
    OTU_table_labeled = OTU_table + '.taxonomies'
    Taxonomy.convert_GGIDs_to_latin_names(OTU_table, OTU_table_labeled, '/home/ubuntu/databases/gg_13_5_otus/taxonomy/97_otu_taxonomy.txt')

    # Load tree
    tree = Tree(tree_file, format=1)
    leaves = []
    internals = []
    all_nodes = tree.get_descendants("preorder")

    # Sort into leaves and internal nodes
    for node in all_nodes:
        if(node.is_leaf()):
            leaves.append(node)
        else:
            internals.append(node)

    # Write each node's JSON
    for node in internals:
        node_taxonomies = taxonomy_parser(node.name)
        node_name = node_taxonomies[node_taxonomies['Level']]
        children_taxa = node_taxonomies['Children']
        json_dict = Taxonomy.collapse_taxonomic_contents_for_json(OTU_table_labeled, children_taxa, node_name)
        with open(os.path.join(target_directory, node_name + '.json'), 'w') as outfile:
            if len(json_dict) > 0:
                json.dump(json_dict, outfile, sort_keys=True)
예제 #38
0
def read_tree(tree):
    t = Tree(tree)
    for node in t.traverse():
        if node.name.startswith("""'"""):
            node.name = node.name.replace("""'""", "")
            node.name = node.name.replace(" ", "_")
    return t.write(format=9)
예제 #39
0
 def calculate_mislabels_distance(self, human_ranks, bestplace_bid):
     t = Tree(self.tree, format = 1)
     bestnode = t.search_nodes(B=bestplace_bid)[0]
     #print("Best node: " + str(bestnode))
     #find all nodes that match the original labels 
     distance = []
     node_distance = []
     for i, rank in enumerate(human_ranks):
         curr_rank_nodes = []
         for bid in self.bid_taxonomy_map.keys():
             curr_ranks = self.bid_taxonomy_map[bid]
             if curr_ranks[i] == rank:
                 if i == 5:
                     curr_rank_nodes.append(t.search_nodes(B=str(bid))[0])
                 else:
                     #if curr_ranks[i+1] == "-":
                     curr_rank_nodes.append(t.search_nodes(B=str(bid))[0])
         num_nodes = float(len(curr_rank_nodes))
         sumdis = 0.0
         sumnodedis = 0.0
         if num_nodes!=0.0:
             for node in curr_rank_nodes:
                 sumdis = sumdis + bestnode.get_distance(node)
                 sumnodedis = sumnodedis + bestnode.get_distance(node,  topology_only=True)
             distance.append(sumdis/num_nodes)
             node_distance.append(sumnodedis/num_nodes)
         else:
             distance.append(0.0)
             node_distance.append(0.0)
     print("Average distance from best EPA-placement to original labeled ranks: \n        " + str(distance))
     print("Average node distance from best EPA-placement to original labeled ranks: \n        " +str(node_distance))
     return distance, node_distance
def parse_bootrep_file(fname, root, bootrep_num):
    bootrep_temp_files = []
    #bootreps = defaultdict(dendropy.TreeList)
    bootreps = defaultdict(list)
    sys.stdout.write("Parsing bootrep file")
    sys.stdout.flush()
    printrep = 1
    for line in open(fname, 'rU'):
        repnum, tree_string = line.strip().split('\t')
        # clean up input
        repnum = int(repnum)
        if repnum > printrep:
            sys.stdout.write('.')
            sys.stdout.flush()
            printrep = repnum
        tree_string = tree_string.strip('"')
        if repnum <= bootrep_num:
            tree = Tree(tree_string)
            tree.set_outgroup(root)
            bootreps[repnum].append(tree)
        else:
            break
    genes = list(set([len(trees) for trees in bootreps.values()]))
    assert len(genes) == 1
    for repnum, trees in bootreps.iteritems():
        temp_fd, temp_out = tempfile.mkstemp(prefix='{}-'.format(repnum), suffix='.mpest-bootrep')
        for tree in trees:
            os.write(temp_fd, tree.write(format=5) + "\n")
        os.close(temp_fd)
        bootrep_temp_files.append(temp_out)
    return genes[0], bootrep_temp_files
예제 #41
0
파일: cafe.py 프로젝트: tolotos/Tfsuite
 def map_cafe_tree(self, cafe_file):
     cafe_file = open(cafe_file, "r").readlines()
     for line in cafe_file:
         if line[0:5] == "# IDs":
             line = line.split(":")
             tree = Tree(line[1] + ";", format=1)
     for node in tree.traverse("postorder"):
         node.add_features(ident=None,
                           branch_p="na",
                           position=None,
                           count=0)
         if node.is_leaf():
             pos = node.name.find("<")
             match = re.search("\d+", node.name)
             match = match.group(0)
             node.ident = match
             node.name = node.name[:pos]
         if not node.is_leaf():
             if node.up:
                 child_1 = node.children[0].name
                 child_2 = node.children[1].name
                 ancestor = self.tree.get_common_ancestor(child_1, child_2)
                 match = re.search("\d+", node.name)
                 match = match.group(0)
                 node.ident = match
                 node.name = ancestor.name
     self.tree = tree
예제 #42
0
def map_cafe_to_tree(clusters, cafe, tree):
    '''Takes the tree objects and family p-value for each cluster provided
       by the cafe parser and maps the reconstructed counts to the nodes of
       a tree for each cluster. Some postprocessing, like parsing the tree
       object is done in here, which should be moved into the parser at some
       point. This should result in a similar small function as
       map_count_to_tree provides.'''
    for cluster in clusters:
        c_counts = {}
        cafe_tree = cafe.clusters[cluster.name][0]
        cafe_tree = Tree(cafe_tree + ";", format=1)
        cafe_tree = add_num_to_nodes(cafe_tree)
        for node in cafe_tree.traverse("postorder"):
            if node.is_leaf():
                a = node.name.split("_")
                c_counts[a[0]] = a[1]
            else:
                a = node.name[1:]
                c_counts[node.num] = a
        for node in cluster.tree.traverse("postorder"):
            if node.is_leaf():
                node.cafe = c_counts[node.name]
            else:
                node.cafe = c_counts[node.num]
    return clusters
예제 #43
0
def parse_bootrep_file(fname, root, bootrep_num):
    bootrep_temp_files = []
    #bootreps = defaultdict(dendropy.TreeList)
    bootreps = defaultdict(list)
    sys.stdout.write("Parsing bootrep file")
    sys.stdout.flush()
    printrep = 1
    for line in open(fname, 'rU'):
        repnum, tree_string = line.strip().split('\t')
        # clean up input
        repnum = int(repnum)
        if repnum > printrep:
            sys.stdout.write('.')
            sys.stdout.flush()
            printrep = repnum
        tree_string = tree_string.strip('"')
        if repnum <= bootrep_num:
            tree = Tree(tree_string)
            tree.set_outgroup(root)
            bootreps[repnum].append(tree)
        else:
            break
    genes = list(set([len(trees) for trees in bootreps.values()]))
    assert len(genes) == 1
    for repnum, trees in bootreps.iteritems():
        temp_fd, temp_out = tempfile.mkstemp(prefix='{}-'.format(repnum),
                                             suffix='.mpest-bootrep')
        for tree in trees:
            os.write(temp_fd, tree.write(format=5) + "\n")
        os.close(temp_fd)
        bootrep_temp_files.append(temp_out)
    return genes[0], bootrep_temp_files
예제 #44
0
def get_tree_object_in_newick(tree,
                              id_to_sample_dict,
                              normalize_branches=False):
    """i.e., tree = hcluster.to_tree(c_res)"""

    root = Tree()
    root.dist = 0
    root.name = "root"
    item2node = {tree: root}

    to_visit = [tree]
    while to_visit:
        node = to_visit.pop()
        cl_dist = node.dist / 2.0
        for ch_node in [node.left, node.right]:
            if ch_node:
                ch = Tree()
                ch.dist = cl_dist

                if ch_node.is_leaf():
                    ch.name = id_to_sample_dict[ch_node.id]
                else:
                    ch.name = 'Int' + str(ch_node.id)

                item2node[node].add_child(ch)
                item2node[ch_node] = ch
                to_visit.append(ch_node)

    if normalize_branches:
        root = get_normalized_newick(root)

    return root.write(format=1)
예제 #45
0
class bayesianptp:
	"""Run MCMC on multiple trees"""
	def __init__(self, filename, ftype = "nexus", reroot = False, method = "H1", seed = 1234, thinning = 100, sampling = 10000, burnin = 0.1, firstktrees = 0, taxa_order = []):
		self.method = method
		self.seed = seed
		self.thinning = thinning 
		self.sampling = sampling
		self.burnin = burnin
		self.firstktrees = firstktrees
		if ftype == "nexus":
			self.nexus = NexusReader(filename)
			self.nexus.blocks['trees'].detranslate()
			self.trees = self.nexus.trees.trees
		else:
			self.trees = self.raxmlTreeParser(filename)
		
		if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
			self.trees = self.trees[:self.firstktrees]
		self.taxa_order = taxa_order
		if len(self.taxa_order) == 0:
			self.taxa_order = Tree(self.trees[0]).get_leaf_names()
		self.numtaxa = len(self.taxa_order)
		self.numtrees = len(self.trees)
		self.reroot = reroot
	
	
	def remove_outgroups(self, ognames, remove = False, output = ""):
		"""reroot using outgroups and remove them"""
		self.reroot = False
		try:
			if remove:
				for og in ognames:
					self.taxa_order.remove(og)
				self.numtaxa = len(self.taxa_order)
			for i in range(len(self.trees)):
				t = Tree(self.trees[i])
				if len(ognames) < 2:
					t.set_outgroup(ognames[0])
					if remove:
						t.prune(self.taxa_order, preserve_branch_length=True)
				else:
					ancestor = t.get_common_ancestor(ognames)
					if not t == ancestor:
						t.set_outgroup(ancestor)
					if remove:
						t.prune(self.taxa_order, preserve_branch_length=True)
				self.trees[i] = t.write()
			if remove and output!="":
				with open(output, "w") as fout:
					for t in self.trees:
						fout.write(t + "\n") 
		except ValueError, e:
			print(e)
			print("")
			print("")
			print("Somthing is wrong with the input outgroup names")
			print("")
			print("Quiting .....")
			sys.exit()
예제 #46
0
def run(args):
    import random
    from ete2 import Tree
    
    for n in xrange(args.number):
        t = Tree()
        t.populate(args.size, random_branches=args.random_branches)
        dump(t)
예제 #47
0
def convert_tree(infile, id_dict):
    tree_file = '%s.formal_id.tree' % (os.path.splitext(infile)[0])
    tree_t = Tree(infile, format=1)
    for node in tree_t.traverse("postorder"):
        #print '%s\t%s' %(node.name, id_dict[node.name])
        if id_dict.has_key(node.name):
            node.name = id_dict[node.name]
    tree_t.write(format=1, outfile=tree_file)
예제 #48
0
def convert_tree(infile, id_dict):
    tree_file = '%s.formal_id.tree' %(os.path.splitext(infile)[0])
    tree_t = Tree(infile, format=1) 
    for node in tree_t.traverse("postorder"):
        #print '%s\t%s' %(node.name, id_dict[node.name])
        if id_dict.has_key(node.name):
            node.name = id_dict[node.name]
    tree_t.write(format=1, outfile=tree_file)
예제 #49
0
def get_example_tree():
    t = Tree()
    ts = TreeStyle()
    ts.layout_fn = layout
    ts.mode = "r"
    ts.show_leaf_name = False
    t.populate(10)
    return t, ts
def printNodeNames(treeFilePath):
    tree = Tree(treeFilePath)
    file = open("NodeName.txt","w")
    for n in tree.traverse():
        file.write(n.name)
        file.write("\n")
        
    file.close()
def getEte2Tree(hypoTree):
  t = Tree()
  for entry in hypoTree:
    if type(entry) is list:
      t.add_child(getEte2Tree(entry))
    else:
      t.name = entry.name
  return t
예제 #52
0
파일: mut.py 프로젝트: cjlee112/logtree
def build_ete_tree(node):
    from ete2 import Tree
    eteNode = Tree()
    eteNode.dist = 0.
    for i in range(3):
        for j in range(len(node.closest[i])):
            build_ete_edge(node, node.closest[i][j].edge, 1, eteNode)
    return eteNode
def readTree(tree):
    t = Tree(tree)
    #print (t.get_ascii(attributes=["name", "dist", "size"]))
    #print (t.dist)
    #print(t.write(format=9))
    with open(tree+".nl","w") as tree_nolabel:
        tree_nolabel.write(t.write(format=9))
    return(t.write(format=9))
예제 #54
0
def parents(data):
    t = Tree(data, format=1)
    ps = []
    for node in t.traverse('levelorder'):
        if node.name != 'NoName':
            d = {'AA': 0.0, 'Aa': 0.0, 'aa': 0.0}
            d[node.name] = 1.0
            ps.append((d, t.get_distance(node)))
    return ps[::-1]
예제 #55
0
def constructing_final_tree(distance_matrix, protein_labels):
    v = str(neighbor_joining(distance_matrix, protein_labels)) + ";"
    t = Tree(v)
    t.dist = 0
    ts = TreeStyle()
    ts.mode = "c"
    ts.show_leaf_name = True
    ts.layout_fn = my_layout
    t.show(tree_style=ts)
예제 #56
0
def treeorder(treefile):
    from ete2 import Tree, faces, TreeStyle, NodeStyle, AttrFace
    t = Tree(treefile)
    rt = t.get_tree_root()
    nameorder = []
    for desc in rt.iter_descendants("preorder"):
        if not desc.is_leaf():
            continue
        nameorder.append(desc.name)
    return nameorder
예제 #57
0
def get_distances(input_dir, group, genomes):
    results = {}
    in_file = os.path.join(input_dir, group + ".nwk")
    try:
        t = Tree(in_file)
        a = t.get_common_ancestor(*genomes)
    except Exception, e:
        sys.stderr.write("Problem with newick " + in_file + "\n")
        print "Unexpected error:", str(e)
        sys.exit()
예제 #58
0
def tree_generation(entities):
    for entity in entities:
        words = split(r'[\s-]+', entity)
        reversed_words_list = [words[i - 1:] for i in range(len(words), 0, -1)]
        t = Tree()
        for word in reversed_words_list:
            string = ' '.join(word)
            z = t.add_child(name=string)
            t = z
        print t.show()
예제 #59
0
def get_taxa_for_one_alignment(fname, raxml=False):
    line = open(fname, 'rU').readline()
    if raxml:
        tree_string = line.strip()
    else:
        repnum, tree_string = line.strip().split('\t')
    tree_string = tree_string.strip('"')
    tree = Tree(tree_string)
    taxa = tuple(tree.get_leaf_names())
    return taxa