Ejemplo n.º 1
0
def RunDlcpar(treesPat, ogSet, nOGs, speciesTreeFN, workingDir):
    """
    
    Implementation:
    - (skip: label species tree)
    - sort out trees (midpoint root, resolve plytomies etc)
    - run
    
    """
    dlcparResultsDir = workingDir + 'dlcpar/'
    if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir)
    RootGeneTreesArbitrarily(treesPat, nOGs, dlcparResultsDir)
    geneMapFN = WriteGeneSpeciesMap(dlcparResultsDir, ogSet)

    filenames = [
        dlcparResultsDir + os.path.split(treesPat % i)[1] for i in xrange(nOGs)
    ]

    dlcCommands = [
        'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -x 1' %
        (speciesTreeFN, geneMapFN, fn) for fn in filenames
    ]
    #    print(dlcCommands[0])
    # use this to run in parallel
    util.RunParallelOrderedCommandLists(nThreads, [[c] for c in dlcCommands],
                                        qHideStdout=True)
    return dlcparResultsDir
Ejemplo n.º 2
0
def RunDlcpar(treesIDsPatFn, ogSet, speciesTreeFN, workingDir, nParallel):
    """
    
    Implementation:
    - (skip: label species tree)
    - sort out trees (midpoint root, resolve plytomies etc)
    - run
    
    """
    ogs = ogSet.OGs()
    nOGs = len(ogs)
    dlcparResultsDir = workingDir + 'dlcpar/'
    if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir)
    RootGeneTreesArbitrarily(treesIDsPatFn, nOGs, dlcparResultsDir)
    geneMapFN = WriteGeneSpeciesMap(dlcparResultsDir, ogSet.SpeciesDict())
    filenames = [
        dlcparResultsDir + os.path.split(treesIDsPatFn(i))[1]
        for i in xrange(nOGs)
    ]
    dlcCommands = [
        'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -x 1' %
        (speciesTreeFN, geneMapFN, fn) for fn in filenames
    ]
    util.RunParallelOrderedCommandLists(nParallel, [[c] for c in dlcCommands],
                                        qHideStdout=True)
    return dlcparResultsDir
Ejemplo n.º 3
0
 def RunAnalysis(self, qSpeciesTree=True):
     util.PrintUnderline("Calculating gene distances")
     ogs, ogMatrices_partial = self.GetOGMatrices_FullParallel()
     ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial)
     util.PrintTime("Done")
     cmds_trees = self.PrepareGeneTreeCommand()
     qLessThanFourSpecies = len(self.ogSet.seqsInfo.speciesToUse) < 4
     if qLessThanFourSpecies:
         qSTAG = False
         spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
         WriteSpeciesTreeIDs_TwoThree(self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
     else:
         qSTAG = self.EnoughOGsForSTAG(ogs, self.ogSet.seqsInfo.speciesToUse)
         if not qSTAG:
             print("Using fallback species tree inference method")
             D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
             cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
             cmds_trees = [[cmd_spTree]] + cmds_trees
     util.PrintUnderline("Inferring gene and species trees")
     util.RunParallelOrderedCommandLists(self.nProcesses, cmds_trees)
     if qSTAG:
         # Trees must have been completed
         print("")
         spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
         stag.Run_ForOrthoFinder(files.FileHandler.GetOGsTreeDir(), files.FileHandler.GetWorkingDirectory_Write(), self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
     seqDict = self.ogSet.Spec_SeqDict()
     for iog in xrange(len(self.ogSet.OGs())):
         util.RenameTreeTaxa(files.FileHandler.GetOGsTreeFN(iog), files.FileHandler.GetOGsTreeFN(iog, True), seqDict, qSupport=False, qFixNegatives=True)
     if qSpeciesTree:
         util.RenameTreeTaxa(spTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), self.ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True)        
         return spTreeFN_ids, qSTAG
     else:      
         return None, qSTAG
Ejemplo n.º 4
0
    def RunAnalysis(self, qSpeciesTree=True):
        ogs, ogMatrices_partial = self.GetOGMatrices()
        ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial)

        D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
        cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
        cmds_geneTrees = self.PrepareGeneTreeCommand()
        util.PrintUnderline("Inferring gene and species trees")
        util.RunParallelOrderedCommandLists(self.nProcesses,
                                            [[cmd_spTree]] + cmds_geneTrees,
                                            qHideStdout=True)
        seqDict = self.ogSet.Spec_SeqDict()
        for iog in xrange(len(self.ogSet.OGs())):
            util.RenameTreeTaxa(self.TreeFilename_IDs(iog),
                                self.treesPat % iog,
                                seqDict,
                                qFixNegatives=True)
        if qSpeciesTree:
            spTreeUnrootedFN = self.workingDir + "SpeciesTree_unrooted.txt"
            util.RenameTreeTaxa(spTreeFN_ids,
                                spTreeUnrootedFN,
                                self.ogSet.SpeciesDict(),
                                qFixNegatives=True)
            return len(ogs), D, spTreeFN_ids, spTreeUnrootedFN
        else:
            return len(ogs), D, None, None
def RunDlcpar(ogSet, speciesTreeFN, workingDir, nParallel, qDeepSearch):
    """
    
    Implementation:
    - (skip: label species tree)
    - sort out trees (midpoint root, resolve plytomies etc)
    - run
    
    """
    ogs = ogSet.OGs()
    nOGs = len(ogs)
    dlcparResultsDir = workingDir + 'dlcpar/'
    if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir)
    RootGeneTreesArbitrarily(nOGs, dlcparResultsDir)
    spec_seq_dict = ogSet.Spec_SeqDict()
    for iog in xrange(len(ogs)):
        util.RenameTreeTaxa(files.FileHandler.GetOGsTreeFN(iog),
                            files.FileHandler.GetOGsTreeFN(iog, True),
                            spec_seq_dict,
                            qSupport=False,
                            qFixNegatives=True,
                            qViaCopy=False)
    geneMapFN = WriteGeneSpeciesMap(dlcparResultsDir, ogSet.SpeciesDict())
    filenames = [
        dlcparResultsDir + os.path.split(files.FileHandler.GetOGsTreeFN(i))[1]
        for i in xrange(nOGs)
    ]
    if qDeepSearch:
        nTaxa = [len(og) for og in ogs[:nOGs]]
        nIter = [
            1000 if n < 25 else 25000 if n < 200 else 50000 for n in nTaxa
        ]
        nNoImprov = [
            100 if n < 25 else 1000 if n < 200 else 2000 for n in nTaxa
        ]
        dlcCommands = [
            'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -i %d --nprescreen 100 --nconverge %d'
            % (speciesTreeFN, geneMapFN, fn, i, n)
            for (fn, i, n) in zip(filenames, nIter, nNoImprov)
        ]
    else:
        dlcCommands = [
            'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -x 1' %
            (speciesTreeFN, geneMapFN, fn) for fn in filenames
        ]
    util.RunParallelOrderedCommandLists(nParallel, [[c] for c in dlcCommands])
    return dlcparResultsDir, "OG%07d_tree_id.dlcpar.locus.tree"
Ejemplo n.º 6
0
    def RunAnalysis(self):
        ogs, ogMatrices_partial = self.GetOGMatrices()
        ogMatrices = self.WriteOGMatrices(ogs, ogMatrices_partial)

        D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
        cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
        cmds_geneTrees = self.PrepareGeneTreeCommand()
        print("\n3. Inferring gene and species trees")
        print("-----------------------------------")
        util.RunParallelOrderedCommandLists(self.nProcesses,
                                            [[cmd_spTree]] + cmds_geneTrees,
                                            qHideStdout=True)
        seqDict = self.ogSet.Spec_SeqDict()
        for iog in xrange(len(self.ogSet.OGs())):
            util.RenameTreeTaxa(self.treesPatIDs % iog,
                                self.treesPat % iog,
                                seqDict,
                                qFixNegatives=True)
#        util.RenameTreeTaxa(spTreeFN_ids, self.workingDir + "SpeciesTree_unrooted.txt", self.ogSet.SpeciesDict(), qFixNegatives=True)
        return len(ogs), D, spPairs, spTreeFN_ids
Ejemplo n.º 7
0
    def DoTrees(self,
                ogs,
                idDict,
                nProcesses,
                qStopAfterSeqs,
                qStopAfterAlignments,
                nSwitchToMafft=500):
        # 0
        resultsDirsFullPath = []
        for fn in [
                self.GetFastaFilename, self.GetAlignmentFilename,
                self.GetTreeFilename
        ]:
            for qIDs in [True, False]:
                d = os.path.split(fn(0, not qIDs))[0]
                if not os.path.exists(d): os.mkdir(d)
                if not qIDs: resultsDirsFullPath.append(d)
            if qStopAfterSeqs: break
            if qStopAfterAlignments and fn == self.GetAlignmentFilename: break

        # 1.
        fastaWriter = FastaWriter(self.ogsWorkingDir)
        self.WriteFastaFiles(fastaWriter, ogs, idDict)
        if qStopAfterSeqs: return resultsDirsFullPath

        # 2
        if qStopAfterAlignments:
            util.PrintUnderline("Inferring multiple sequence alignments")
        else:
            util.PrintUnderline(
                "Inferring multiple sequence alignments and gene trees")

        # 3
        alignCommands = self.GetAlignmentCommands(ogs, nSwitchToMafft)
        if qStopAfterAlignments:
            util.RunParallelCommands(nProcesses, alignCommands, qShell=True)
            return resultsDirsFullPath[:2]
        alignmentFilesToUse = [
            self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands)
        ]
        treeCommands = self.GetTreeCommands(alignmentFilesToUse, ogs)
        commandsSet = []
        for i in xrange(len(treeCommands)):
            commandsSet.append([alignCommands[i], treeCommands[i]])
        for i in xrange(len(treeCommands), len(alignCommands)):
            commandsSet.append([alignCommands[i]])
        util.RunParallelOrderedCommandLists(nProcesses, commandsSet)

        # Convert ids to accessions
        for i, alignFN in enumerate(alignmentFilesToUse):
            with open(alignFN,
                      'rb') as infile, open(self.GetAlignmentFilename(i, True),
                                            'wb') as outfile:
                for line in infile:
                    if line.startswith(">"):
                        outfile.write(">" + idDict[line[1:].rstrip()] + "\n")
                    else:
                        outfile.write(line)
            if os.path.exists(self.GetTreeFilename(i)):
                util.RenameTreeTaxa(self.GetTreeFilename(i),
                                    self.GetTreeFilename(i, True),
                                    idDict,
                                    qFixNegatives=True)

        return resultsDirsFullPath[:2]