def RunDlcpar(treesPat, ogSet, nOGs, speciesTreeFN, workingDir): """ Implementation: - (skip: label species tree) - sort out trees (midpoint root, resolve plytomies etc) - run """ dlcparResultsDir = workingDir + 'dlcpar/' if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir) RootGeneTreesArbitrarily(treesPat, nOGs, dlcparResultsDir) geneMapFN = WriteGeneSpeciesMap(dlcparResultsDir, ogSet) filenames = [ dlcparResultsDir + os.path.split(treesPat % i)[1] for i in xrange(nOGs) ] dlcCommands = [ 'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -x 1' % (speciesTreeFN, geneMapFN, fn) for fn in filenames ] # print(dlcCommands[0]) # use this to run in parallel util.RunParallelOrderedCommandLists(nThreads, [[c] for c in dlcCommands], qHideStdout=True) return dlcparResultsDir
def RunDlcpar(treesIDsPatFn, ogSet, speciesTreeFN, workingDir, nParallel): """ Implementation: - (skip: label species tree) - sort out trees (midpoint root, resolve plytomies etc) - run """ ogs = ogSet.OGs() nOGs = len(ogs) dlcparResultsDir = workingDir + 'dlcpar/' if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir) RootGeneTreesArbitrarily(treesIDsPatFn, nOGs, dlcparResultsDir) geneMapFN = WriteGeneSpeciesMap(dlcparResultsDir, ogSet.SpeciesDict()) filenames = [ dlcparResultsDir + os.path.split(treesIDsPatFn(i))[1] for i in xrange(nOGs) ] dlcCommands = [ 'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -x 1' % (speciesTreeFN, geneMapFN, fn) for fn in filenames ] util.RunParallelOrderedCommandLists(nParallel, [[c] for c in dlcCommands], qHideStdout=True) return dlcparResultsDir
def RunAnalysis(self, qSpeciesTree=True): util.PrintUnderline("Calculating gene distances") ogs, ogMatrices_partial = self.GetOGMatrices_FullParallel() ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial) util.PrintTime("Done") cmds_trees = self.PrepareGeneTreeCommand() qLessThanFourSpecies = len(self.ogSet.seqsInfo.speciesToUse) < 4 if qLessThanFourSpecies: qSTAG = False spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN() WriteSpeciesTreeIDs_TwoThree(self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids) else: qSTAG = self.EnoughOGsForSTAG(ogs, self.ogSet.seqsInfo.speciesToUse) if not qSTAG: print("Using fallback species tree inference method") D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices) cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs) cmds_trees = [[cmd_spTree]] + cmds_trees util.PrintUnderline("Inferring gene and species trees") util.RunParallelOrderedCommandLists(self.nProcesses, cmds_trees) if qSTAG: # Trees must have been completed print("") spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN() stag.Run_ForOrthoFinder(files.FileHandler.GetOGsTreeDir(), files.FileHandler.GetWorkingDirectory_Write(), self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids) seqDict = self.ogSet.Spec_SeqDict() for iog in xrange(len(self.ogSet.OGs())): util.RenameTreeTaxa(files.FileHandler.GetOGsTreeFN(iog), files.FileHandler.GetOGsTreeFN(iog, True), seqDict, qSupport=False, qFixNegatives=True) if qSpeciesTree: util.RenameTreeTaxa(spTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), self.ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True) return spTreeFN_ids, qSTAG else: return None, qSTAG
def RunAnalysis(self, qSpeciesTree=True): ogs, ogMatrices_partial = self.GetOGMatrices() ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial) D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices) cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs) cmds_geneTrees = self.PrepareGeneTreeCommand() util.PrintUnderline("Inferring gene and species trees") util.RunParallelOrderedCommandLists(self.nProcesses, [[cmd_spTree]] + cmds_geneTrees, qHideStdout=True) seqDict = self.ogSet.Spec_SeqDict() for iog in xrange(len(self.ogSet.OGs())): util.RenameTreeTaxa(self.TreeFilename_IDs(iog), self.treesPat % iog, seqDict, qFixNegatives=True) if qSpeciesTree: spTreeUnrootedFN = self.workingDir + "SpeciesTree_unrooted.txt" util.RenameTreeTaxa(spTreeFN_ids, spTreeUnrootedFN, self.ogSet.SpeciesDict(), qFixNegatives=True) return len(ogs), D, spTreeFN_ids, spTreeUnrootedFN else: return len(ogs), D, None, None
def RunDlcpar(ogSet, speciesTreeFN, workingDir, nParallel, qDeepSearch): """ Implementation: - (skip: label species tree) - sort out trees (midpoint root, resolve plytomies etc) - run """ ogs = ogSet.OGs() nOGs = len(ogs) dlcparResultsDir = workingDir + 'dlcpar/' if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir) RootGeneTreesArbitrarily(nOGs, dlcparResultsDir) spec_seq_dict = ogSet.Spec_SeqDict() for iog in xrange(len(ogs)): util.RenameTreeTaxa(files.FileHandler.GetOGsTreeFN(iog), files.FileHandler.GetOGsTreeFN(iog, True), spec_seq_dict, qSupport=False, qFixNegatives=True, qViaCopy=False) geneMapFN = WriteGeneSpeciesMap(dlcparResultsDir, ogSet.SpeciesDict()) filenames = [ dlcparResultsDir + os.path.split(files.FileHandler.GetOGsTreeFN(i))[1] for i in xrange(nOGs) ] if qDeepSearch: nTaxa = [len(og) for og in ogs[:nOGs]] nIter = [ 1000 if n < 25 else 25000 if n < 200 else 50000 for n in nTaxa ] nNoImprov = [ 100 if n < 25 else 1000 if n < 200 else 2000 for n in nTaxa ] dlcCommands = [ 'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -i %d --nprescreen 100 --nconverge %d' % (speciesTreeFN, geneMapFN, fn, i, n) for (fn, i, n) in zip(filenames, nIter, nNoImprov) ] else: dlcCommands = [ 'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -x 1' % (speciesTreeFN, geneMapFN, fn) for fn in filenames ] util.RunParallelOrderedCommandLists(nParallel, [[c] for c in dlcCommands]) return dlcparResultsDir, "OG%07d_tree_id.dlcpar.locus.tree"
def RunAnalysis(self): ogs, ogMatrices_partial = self.GetOGMatrices() ogMatrices = self.WriteOGMatrices(ogs, ogMatrices_partial) D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices) cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs) cmds_geneTrees = self.PrepareGeneTreeCommand() print("\n3. Inferring gene and species trees") print("-----------------------------------") util.RunParallelOrderedCommandLists(self.nProcesses, [[cmd_spTree]] + cmds_geneTrees, qHideStdout=True) seqDict = self.ogSet.Spec_SeqDict() for iog in xrange(len(self.ogSet.OGs())): util.RenameTreeTaxa(self.treesPatIDs % iog, self.treesPat % iog, seqDict, qFixNegatives=True) # util.RenameTreeTaxa(spTreeFN_ids, self.workingDir + "SpeciesTree_unrooted.txt", self.ogSet.SpeciesDict(), qFixNegatives=True) return len(ogs), D, spPairs, spTreeFN_ids
def DoTrees(self, ogs, idDict, nProcesses, qStopAfterSeqs, qStopAfterAlignments, nSwitchToMafft=500): # 0 resultsDirsFullPath = [] for fn in [ self.GetFastaFilename, self.GetAlignmentFilename, self.GetTreeFilename ]: for qIDs in [True, False]: d = os.path.split(fn(0, not qIDs))[0] if not os.path.exists(d): os.mkdir(d) if not qIDs: resultsDirsFullPath.append(d) if qStopAfterSeqs: break if qStopAfterAlignments and fn == self.GetAlignmentFilename: break # 1. fastaWriter = FastaWriter(self.ogsWorkingDir) self.WriteFastaFiles(fastaWriter, ogs, idDict) if qStopAfterSeqs: return resultsDirsFullPath # 2 if qStopAfterAlignments: util.PrintUnderline("Inferring multiple sequence alignments") else: util.PrintUnderline( "Inferring multiple sequence alignments and gene trees") # 3 alignCommands = self.GetAlignmentCommands(ogs, nSwitchToMafft) if qStopAfterAlignments: util.RunParallelCommands(nProcesses, alignCommands, qShell=True) return resultsDirsFullPath[:2] alignmentFilesToUse = [ self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands) ] treeCommands = self.GetTreeCommands(alignmentFilesToUse, ogs) commandsSet = [] for i in xrange(len(treeCommands)): commandsSet.append([alignCommands[i], treeCommands[i]]) for i in xrange(len(treeCommands), len(alignCommands)): commandsSet.append([alignCommands[i]]) util.RunParallelOrderedCommandLists(nProcesses, commandsSet) # Convert ids to accessions for i, alignFN in enumerate(alignmentFilesToUse): with open(alignFN, 'rb') as infile, open(self.GetAlignmentFilename(i, True), 'wb') as outfile: for line in infile: if line.startswith(">"): outfile.write(">" + idDict[line[1:].rstrip()] + "\n") else: outfile.write(line) if os.path.exists(self.GetTreeFilename(i)): util.RenameTreeTaxa(self.GetTreeFilename(i), self.GetTreeFilename(i, True), idDict, qFixNegatives=True) return resultsDirsFullPath[:2]