Example #1
0
def writeNewick(species, distance, output):
    '''
	Input is a list of species names and the top half of the distance matrix
	Newick tree is output to standard out.	

	'''
    outputFile = sys.stdout
    if args.output:
        outputFile = open(output, 'w')
    import Bio.Phylo.TreeConstruction as TreeConstruction
    constructor = TreeConstruction.DistanceTreeConstructor()
    distanceMatrix = TreeConstruction._DistanceMatrix(species, distance)
    treeConstructor = TreeConstruction.DistanceTreeConstructor(method='nj')
    njTree = treeConstructor.nj(distanceMatrix)
    TEMP_FILE_NUM = str(int(os.urandom(3).encode('hex'), 16))
    while checkTempNum(TEMP_FILE_NUM):
        TEMP_FILE_NUM = str(int(os.urandom(3).encode('hex'), 16))
    tempFile = open(".tempFile" + TEMP_FILE_NUM, 'w')
    from Bio import Phylo
    Phylo.write(njTree, tempFile, "newick")
    tempFile.close()
    import re
    treeF = open(".tempFile" + TEMP_FILE_NUM, 'r')
    tree = treeF.read()
    treeF.close()
    os.remove(".tempFile" + TEMP_FILE_NUM)

    tree = re.sub("Inner[0-9]+:[-0-9\.]+", "", tree)
    tree = re.sub(":[0-9\.]+", "", tree)
    tree = re.sub("_", " ", tree)
    outputFile.write(tree)
    if args.output:
        outputFile.close()
Example #2
0
def labeler(files, etalon_tree, tree_path=".", rebuild=False):
    """
    Constructs labels for given files. (Best phylogeny reconstruction method)
    :param files: an iterable with file paths to alignments
    :param etalon_tree: the path to etalon tree
    :param tree_path: a directory, where built trees will be stored
    :param rebuild: set it True, if you need to rebuild trees or build them from scratch
    :return: tensor with labels
    """
    tree_path = osp.abspath(tree_path)  # raxml needs absolute paths
    if rebuild:
        calculator = TreeConstruction.DistanceCalculator('blosum62')
        dist_constructor = TreeConstruction.DistanceTreeConstructor()

        # construct all trees with UPGMA, NJ and raxml
        for i, file in enumerate(files):
            aln = AlignIO.read(file, 'fasta')
            tree = dist_constructor.upgma(calculator.get_distance(aln))
            name = file.split("/")[-1].split(".")[0]
            Phylo.write(tree, osp.join(tree_path, 'upgma_{}.tre'.format(name)),
                        'newick')
            tree = dist_constructor.nj(calculator.get_distance(aln))
            Phylo.write(tree, osp.join(tree_path, 'nj_{}.tre'.format(name)),
                        'newick')
            raxml = RaxmlCommandline(sequences=osp.abspath(file),
                                     model='PROTCATWAG',
                                     name='{}.tre'.format(name),
                                     threads=3,
                                     working_dir=tree_path)
            _, stderr = raxml()
            print(stderr)
            print('{} finished'.format(name))
    # get best tree
    tns = dendropy.TaxonNamespace()
    act_tree = dendropy.Tree.get_from_path(osp.join(tree_path, etalon_tree),
                                           "newick",
                                           taxon_namespace=tns)
    act_tree.encode_bipartitions()
    distances = np.zeros(shape=(len(files), 3))
    for i, file in enumerate(files):
        name = file.split("/")[-1].split(".")[0]
        nj_tree = dendropy.Tree.get_from_path(osp.join(
            tree_path, "nj_{}.tre".format(name)),
                                              "newick",
                                              taxon_namespace=tns)
        up_tree = dendropy.Tree.get_from_path(osp.join(
            tree_path, "upgma_{}.tre".format(name)),
                                              "newick",
                                              taxon_namespace=tns)
        ml_tree = dendropy.Tree.get_from_path(osp.join(
            tree_path, "RAxML_bestTree.{}.tre".format(name)),
                                              "newick",
                                              taxon_namespace=tns)
        distances[i, 0] = dendropy.calculate.treecompare.symmetric_difference(
            nj_tree, act_tree)
        distances[i, 1] = dendropy.calculate.treecompare.symmetric_difference(
            up_tree, act_tree)
        distances[i, 2] = dendropy.calculate.treecompare.symmetric_difference(
            ml_tree, act_tree)
    return distances.argmin(1)
    def _make_nj_tree(self, treesams, dm):
        """
        **PRIVATE**

        Parameters
        ----------
        treesams: dict
            {sam name: samid, sam name: samid, ...}

        Returns
        -------
        nwkstring: str
            tree as newick string
        """

        iNofSams = len(treesams.keys())
        logging.info("Calculating %i distances. Patience!", ((iNofSams**2) - iNofSams) / 2)

        dist_mat = get_distance_matrix(self.cur, treesams.values())

        if dm != None:
            logging.info("Distance matrix written to file: %s", dm)
            if os.path.exists(dm) == True:
                os.remove(dm)

        aSampleNames = treesams.keys()
        aSimpleMatrix = []
        for i, sample_1 in enumerate(aSampleNames):
            mat_line = []
            for j, sample_2 in enumerate(aSampleNames):
                if j < i:
                    sid1 = treesams[sample_1]
                    sid2 = treesams[sample_2]
                    mat_line.append(dist_mat[sid1][sid2])
                elif j == i:
                    mat_line.append(0)
                else:
                    pass
            aSimpleMatrix.append(mat_line)
            if dm != None:
                with open(dm, 'a') as f:
                    f.write("%s\n" % ','.join([sample_1] + [str(x) for x in mat_line[:-1]]))

        logging.info("Bulding tree.")
        oDistMat = TreeConstruction._DistanceMatrix(aSampleNames, aSimpleMatrix)
        constructor = TreeConstruction.DistanceTreeConstructor()
        oTree = constructor.nj(oDistMat)

        # I don't know how to get newick string from this object without a file ...
        td = tempfile.mkdtemp()
        tmpfile = os.path.join(td, 'tree.nwk')
        Phylo.write(oTree, tmpfile, 'newick')
        nwkstring = ""
        with open(tmpfile, 'r') as f:
            nwkstring = f.read()
        shutil.rmtree(td)

        return nwkstring
Example #4
0
def neighbor_joining_tree(aln, prot_model='blosum62'):
    
    """ Estimate a tree from an alignment using neighbor-joining
    """

    calculator = TreeConstruction.DistanceCalculator(prot_model)
    constructor = TreeConstruction.DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(aln)
    return(tree)
Example #5
0
 def NJ(self, f=min):
     m = self.distanceMatrix()
     for i in range(len(self.languages)):
         for j in range(i, len(self.languages)):
             m[i][j] = f(m[i][j], m[j][i])
             m[j][i] = m[i][j]
     predm = [[m[i][j] for j in range(i + 1)]
              for i in range(len(self.languages))]
     #	print(predm)
     dm = TreeConstruction._DistanceMatrix([l.name for l in self.languages],
                                           predm)
     constructor = TreeConstruction.DistanceTreeConstructor(method='nj')
     njtree = constructor.nj(dm)
     Phylo.draw_ascii(njtree)
Example #6
0
def make_nj_tree(dist_mat, dArgs, aSampleNames):
    '''
    Uses Biopython.Phylo to make a neighbour joining tree from a distance matrix

    Parameters
    ----------
    dist_mat: dict
        distance matrix as a dict of dicts
        distance_a_to_b = dist_mat[a][b]
    dArgs: dict
        input argument dictionary
    aSampleNames: list
        list of sample names

    Returns
    -------
    returns 0
    also writes tree file to to dArgs['tree'] in newick format
    '''

    aSimpleMatrix = []
    for i, sample_1 in enumerate(aSampleNames):
        mat_line = []
        for j, sample_2 in enumerate(aSampleNames):
            if j < i:
                mat_line.append(dist_mat[sample_1][sample_2])
            elif j == i:
                mat_line.append(0)
            else:
                pass
        aSimpleMatrix.append(mat_line)

    oDistMat = TreeConstruction._DistanceMatrix(aSampleNames, aSimpleMatrix)
    constructor = TreeConstruction.DistanceTreeConstructor()
    oTree = constructor.nj(oDistMat)
    Phylo.write(oTree, dArgs['tree'], 'newick')
    logging.info("Tree file written.")

    return 0
Example #7
0
    def UPGMA(self, f=min):
        """builds a tree via UPGMA, and uses the passed in function to deal with asymmetric 'distances'"""
        m = self.distanceMatrix()
        for i in range(len(self.languages)):
            for j in range(i, len(self.languages)):
                m[i][j] = f(m[i][j], m[j][i])
                m[j][i] = m[i][j]
        predm = [[m[i][j] for j in range(i + 1)]
                 for i in range(len(self.languages))]
        #	print(predm)
        dm = TreeConstruction._DistanceMatrix([l.name for l in self.languages],
                                              predm)
        constructor = TreeConstruction.DistanceTreeConstructor(method='upgma')
        upgmatree = constructor.upgma(dm)
        Phylo.draw_ascii(upgmatree)
        #	indices = range(len(self.languages))
        #	while len(indices)>1:
        #		#find minimum distance in m
        #
        #		#join indices as  tuple
        #		#recalculate m

        return m
print_matrix("Scores", scores, pdbs)
scoresM = TreeConstruction._Matrix([x[x.rfind('/') + 1:] for x in pdbs],
                                   scoresM)

distances = {}
for i in range(leng):
    distances[pdbs[i]] = {}
    for j in range(i + 1):
        distances[pdbs[i]][pdbs[j]] = (scores[pdbs[i]][pdbs[i]] + scores[
            pdbs[j]][pdbs[j]]) / 2.0 - scores[pdbs[i]][pdbs[j]]
        PhyloM[i].append(distances[pdbs[i]][pdbs[j]])
PhyloM = TreeConstruction._DistanceMatrix([x[x.rfind('/') + 1:] for x in pdbs],
                                          PhyloM)
print_matrix("Distances", distances, pdbs)

tree = TreeConstruction.DistanceTreeConstructor().upgma(PhyloM)
Phylo.draw_ascii(tree)
tree.ladderize()


#Phylo.draw_graphviz(tree, node_size=0)
def hide_inner(node):
    if node.name.startswith("Inner"):
        return None
    else:
        return node.name


try:
    Phylo.draw(tree, label_func=hide_inner, do_show=False)
    #show()