Example #1
0
 def RunAnalysis(self, qSpeciesTree=True):
     util.PrintUnderline("Calculating gene distances")
     ogs, ogMatrices_partial = self.GetOGMatrices_FullParallel()
     ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial)
     util.PrintTime("Done")
     cmds_trees = self.PrepareGeneTreeCommand()
     qLessThanFourSpecies = len(self.ogSet.seqsInfo.speciesToUse) < 4
     if qLessThanFourSpecies:
         qSTAG = False
         spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
         WriteSpeciesTreeIDs_TwoThree(self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
     else:
         qSTAG = self.EnoughOGsForSTAG(ogs, self.ogSet.seqsInfo.speciesToUse)
         if not qSTAG:
             print("Using fallback species tree inference method")
             D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
             cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
             cmds_trees = [[cmd_spTree]] + cmds_trees
     util.PrintUnderline("Inferring gene and species trees")
     util.RunParallelOrderedCommandLists(self.nProcesses, cmds_trees)
     if qSTAG:
         # Trees must have been completed
         print("")
         spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
         stag.Run_ForOrthoFinder(files.FileHandler.GetOGsTreeDir(), files.FileHandler.GetWorkingDirectory_Write(), self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
     seqDict = self.ogSet.Spec_SeqDict()
     for iog in xrange(len(self.ogSet.OGs())):
         util.RenameTreeTaxa(files.FileHandler.GetOGsTreeFN(iog), files.FileHandler.GetOGsTreeFN(iog, True), seqDict, qSupport=False, qFixNegatives=True)
     if qSpeciesTree:
         util.RenameTreeTaxa(spTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), self.ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True)        
         return spTreeFN_ids, qSTAG
     else:      
         return None, qSTAG
Example #2
0
def OrthologuesFromTrees(groupsDir, workingDir, nHighParallel, speciesTree_fn = None, pickleDir=None):
    """
    groupsDir - directory with orthogroups file in
    userSpeciesTree_fn - None if not supplied otherwise rooted tree using user species names (not orthofinder IDs)
    workingDir - orthologues 'WorkingDirectory'
    qUserSpTree - is the speciesTree_fn user-supplied
    
    Just infer orthologues from trees, don't do any of the preceeding steps.
    """
    # Check species tree
    qUserSpTree = (speciesTree_fn != None)
    if qUserSpTree:
        if not os.path.exists(speciesTree_fn):
            print("\nERROR: %s does not exist\n" % speciesTree_fn)
            util.Fail()
    else:
        possibilities = ["SpeciesTree_ids_0_rooted.txt", "SpeciesTree_ids_1_rooted.txt", "SpeciesTree_user_ids.txt"] # etc (only need to determine if unique)
        nTrees = 0
        for p in possibilities:
            fn = workingDir + "Trees_ids/" + p
            if os.path.exists(fn): 
                nTrees += 1
                speciesTree_fn = fn
        if nTrees == 0:
            print("\nERROR: There is a problem with the specified directory. The rooted species tree %s or %s is not present." % (possibilities[0], possibilities[2]))
            print("Please rectify the problem or alternatively use the -s option to specify the species tree to use.\n")
            util.Fail()
        if nTrees > 1:
            print("\nERROR: There is more than one rooted species tree in the specified directory structure. Please use the -s option to specify which species tree should be used\n")
            util.Fail()
    
    def TreePatIDs(iog):
        return workingDir + ("Trees_ids/OG%07d_tree_id.txt" % iog)
    reconTreesRenamedDir = workingDir + "Recon_Gene_Trees/"
    resultsDir_new = workingDir + "../Orthologues"      # for the Orthologues_Species/ directories
#    if os.path.exists(resultsDir_new):
    resultsDir_new = util.CreateNewWorkingDirectory(resultsDir_new + "_")
#    else:
#        resultsDir_new += os.sep
#        os.mkdir(resultsDir_new)
    orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = util.GetOGsFile(groupsDir)
    speciesToUse, nSpAll = util.GetSpeciesToUse(orthofinderWorkingDir + "SpeciesIDs.txt")    
    ogSet = OrthoGroupsSet(orthofinderWorkingDir, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor)
    if qUserSpTree:
        speciesToUseNames = ogSet.SpeciesDict().values()
        CheckUserSpeciesTree(speciesTree_fn, speciesToUseNames)
        speciesTree_fn = ConvertUserSpeciesTree(workingDir + "Trees_ids/", speciesTree_fn, ogSet.SpeciesDict())
    util.PrintUnderline("Running Orthologue Prediction", True)
    util.PrintUnderline("Reconciling gene and species trees") 
    ReconciliationAndOrthologues(TreePatIDs, ogSet, speciesTree_fn, workingDir, resultsDir_new, reconTreesRenamedDir, nHighParallel, pickleDir=pickleDir)
    util.PrintUnderline("Writing results files")
    CleanWorkingDir(workingDir)
    return "Species-by-species orthologues directory:\n   %s\n" % resultsDir_new
Example #3
0
 def DoTrees(self, ogs, idDict, nProcesses, qStopAfterSeqs, qStopAfterAlignments):
     # 0       
     resultsDirsFullPath = []
     for fn in [self.GetFastaFilename, self.GetAlignmentFilename, self.GetTreeFilename]:
         for qIDs in [True, False]:
             d = os.path.split(fn(0, not qIDs))[0]
             if not os.path.exists(d): os.mkdir(d)
             if not qIDs: resultsDirsFullPath.append(d)
         if qStopAfterSeqs: break
         if qStopAfterAlignments and fn == self.GetAlignmentFilename: break
     
     # 1.
     fastaWriter = FastaWriter(self.ogsWorkingDir)
     self.WriteFastaFiles(fastaWriter, ogs, idDict)
     if qStopAfterSeqs: return resultsDirsFullPath
     
     # 2
     if qStopAfterAlignments:
         util.PrintUnderline("Inferring multiple sequence alignments") 
     else:
         util.PrintUnderline("Inferring multiple sequence alignments and gene trees") 
     
     # 3
     alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(ogs)
     if qStopAfterAlignments:
         pc.RunParallelCommandsAndMoveResultsFile(nProcesses, alignCommands_and_filenames, False)
         return resultsDirsFullPath[:2]
     
     # Otherwise, alignments and trees
     alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]
     treeCommands_and_filenames = self.GetTreeCommands(alignmentFilesToUse, ogs)
     commands_and_filenames = []
     for i in xrange(len(treeCommands_and_filenames)):
         commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
     for i in xrange(len(treeCommands_and_filenames), len(alignCommands_and_filenames)):
         commands_and_filenames.append([alignCommands_and_filenames[i]])
     pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
     
     # Convert ids to accessions
     for i, alignFN in enumerate(alignmentFilesToUse):
         with open(alignFN, 'rb') as infile, open(self.GetAlignmentFilename(i, True), 'wb') as outfile:
             for line in infile:
                 if line.startswith(">"):
                     outfile.write(">" + idDict[line[1:].rstrip()] + "\n")
                 else:
                     outfile.write(line)
         if os.path.exists(self.GetTreeFilename(i)):
             util.RenameTreeTaxa(self.GetTreeFilename(i), self.GetTreeFilename(i, True), idDict, qFixNegatives=True)
     
     return resultsDirsFullPath[:2]
Example #4
0
def ReconciliationAndOrthologues(treesIDsPatFn, ogSet, speciesTree_fn, workingDir, resultsDir, reconTreesRenamedDir, nParallel, iSpeciesTree=None, pickleDir = None):
    """
    treesPatFn - function returning name of filename
    ogSet - info about the orthogroups, species etc
    speciesTree_fn - the species tree
    workingDir - Orthologues working dir
    resultsDir - where the Orthologues top level results directory will go (should exist already)
    reconTreesRenamedDir - where to put the reconcilled trees that use the gene accessions
    iSpeciesTree - which of the potential roots of the species tree is this
    """
    dlcparResultsDir = RunDlcpar(treesIDsPatFn, ogSet, speciesTree_fn, workingDir, nParallel)
    if not os.path.exists(reconTreesRenamedDir): os.mkdir(reconTreesRenamedDir)
    for iog in xrange(len(ogSet.OGs())):
        util.RenameTreeTaxa(dlcparResultsDir + "OG%07d_tree_id.dlcpar.locus.tree" % iog, reconTreesRenamedDir + "OG%07d_tree.txt" % iog, ogSet.Spec_SeqDict(), qFixNegatives=False, inFormat=8)

    # Orthologue lists
    util.PrintUnderline("Inferring orthologues from gene trees" + (" (root %d)"%iSpeciesTree if iSpeciesTree != None else ""))
    qDelDir = False
    if pickleDir == None: 
        pickleDir = workingDir + "matrices_orthologues/"
        if not os.path.exists(pickleDir): os.mkdir(pickleDir)
        qDelDir = True    
    rt.create_orthologue_lists(ogSet, resultsDir, dlcparResultsDir, pickleDir)  
    # If a temporary matrices directory was created, delete it now
    if qDelDir:
        if os.path.exists(pickleDir): 
            try:
                os.rmdir(pickleDir)
            except OSError:
                pass
Example #5
0
    def RunAnalysis(self, qSpeciesTree=True):
        ogs, ogMatrices_partial = self.GetOGMatrices()
        ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial)

        D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
        cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
        cmds_geneTrees = self.PrepareGeneTreeCommand()
        util.PrintUnderline("Inferring gene and species trees")
        util.RunParallelOrderedCommandLists(self.nProcesses,
                                            [[cmd_spTree]] + cmds_geneTrees,
                                            qHideStdout=True)
        seqDict = self.ogSet.Spec_SeqDict()
        for iog in xrange(len(self.ogSet.OGs())):
            util.RenameTreeTaxa(self.TreeFilename_IDs(iog),
                                self.treesPat % iog,
                                seqDict,
                                qFixNegatives=True)
        if qSpeciesTree:
            spTreeUnrootedFN = self.workingDir + "SpeciesTree_unrooted.txt"
            util.RenameTreeTaxa(spTreeFN_ids,
                                spTreeUnrootedFN,
                                self.ogSet.SpeciesDict(),
                                qFixNegatives=True)
            return len(ogs), D, spTreeFN_ids, spTreeUnrootedFN
        else:
            return len(ogs), D, None, None
Example #6
0
def OrthologuesFromTrees(recon_method, nHighParallel, userSpeciesTree_fn, qAddSpeciesToIDs):
    """
    userSpeciesTree_fn - None if not supplied otherwise rooted tree using user species names (not orthofinder IDs)
    qUserSpTree - is the speciesTree_fn user-supplied
    
    Just infer orthologues from trees, don't do any of the preceeding steps.
    """
    speciesToUse, nSpAll, _ = util.GetSpeciesToUse(files.FileHandler.GetSpeciesIDsFN())    
    ogSet = OrthoGroupsSet(files.FileHandler.GetWorkingDirectory1_Read(), speciesToUse, nSpAll, qAddSpeciesToIDs, idExtractor = util.FirstWordExtractor)
    if userSpeciesTree_fn != None:
        speciesDict = files.FileHandler.GetSpeciesDict()
        speciesToUseNames = [speciesDict[str(iSp)] for iSp in ogSet.speciesToUse]
        CheckUserSpeciesTree(userSpeciesTree_fn, speciesToUseNames)
        speciesTreeFN_ids = files.FileHandler.GetSpeciesTreeIDsRootedFN()
        ConvertUserSpeciesTree(userSpeciesTree_fn, speciesDict, speciesTreeFN_ids)
    util.PrintUnderline("Running Orthologue Prediction", True)
    util.PrintUnderline("Reconciling gene and species trees") 
    ReconciliationAndOrthologues(recon_method, ogSet, nHighParallel)
    util.PrintUnderline("Writing results files")
    util.PrintTime("Writing results files")
    files.FileHandler.CleanWorkingDir2()
    return "Species-by-species orthologues directory:\n   %s\n" % files.FileHandler.GetOrthologuesDirectory()
Example #7
0
def ReconciliationAndOrthologues(recon_method, ogSet, nParallel, iSpeciesTree=None, all_stride_dup_genes=None):
    """
    ogSet - info about the orthogroups, species etc
    resultsDir - where the Orthologues top level results directory will go (should exist already)
    reconTreesRenamedDir - where to put the reconcilled trees that use the gene accessions
    iSpeciesTree - which of the potential roots of the species tree is this
    method - can be dlcpar, dlcpar_deep, of_recon
    """
    speciesTree_ids_fn = files.FileHandler.GetSpeciesTreeIDsRootedFN()
    labeled_tree_fn = files.FileHandler.GetSpeciesTreeResultsNodeLabelsFN()
    util.RenameTreeTaxa(speciesTree_ids_fn, labeled_tree_fn, ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True, label='N')
    workingDir = files.FileHandler.GetWorkingDirectory_Write()    # workingDir - Orthologues working dir
    resultsDir_ologs = files.FileHandler.GetOrthologuesDirectory()
    reconTreesRenamedDir = files.FileHandler.GetOGsReconTreeDir(True)
    if "dlcpar" in recon_method:
        qDeepSearch = (recon_method == "dlcpar_convergedsearch")
        util.PrintTime("Starting DLCpar")
        dlcparResultsDir, dlcparLocusTreePat = trees2ologs_dlcpar.RunDlcpar(ogSet, speciesTree_ids_fn, workingDir, nParallel, qDeepSearch)
        util.PrintTime("Done DLCpar")
        spec_seq_dict = ogSet.Spec_SeqDict()
        for iog in xrange(len(ogSet.OGs())):
            util.RenameTreeTaxa(dlcparResultsDir + dlcparLocusTreePat % iog, files.FileHandler.GetOGsReconTreeFN(iog), spec_seq_dict, qSupport=False, qFixNegatives=False, inFormat=8, label='n')
    
        # Orthologue lists
        util.PrintUnderline("Inferring orthologues from gene trees" + (" (root %d)"%iSpeciesTree if iSpeciesTree != None else ""))
        pickleDir = files.FileHandler.GetPickleDir()
        nOrthologues_SpPair = trees2ologs_dlcpar.create_orthologue_lists(ogSet, resultsDir_ologs, dlcparResultsDir, pickleDir)  

    elif "phyldog" == recon_method:
        util.PrintTime("Starting Orthologues from Phyldog")
        nOrthologues_SpPair = trees2ologs_of.DoOrthologuesForOrthoFinder_Phyldog(ogSet, workingDir, trees2ologs_of.GeneToSpecies_dash, resultsDir_ologs, reconTreesRenamedDir)
        util.PrintTime("Done Orthologues from Phyldog")
    else:
        util.PrintTime("Starting OF Orthologues")
        qNoRecon = ("only_overlap" == recon_method)
        nOrthologues_SpPair = trees2ologs_of.DoOrthologuesForOrthoFinder(ogSet, speciesTree_ids_fn, trees2ologs_of.GeneToSpecies_dash, all_stride_dup_genes, qNoRecon)
        util.PrintTime("Done OF Orthologues")
    nOrthologues_SpPair += TwoAndThreeGeneOrthogroups(ogSet, resultsDir_ologs)
    WriteOrthologuesStats(ogSet, nOrthologues_SpPair)
Example #8
0
    def DoTrees(self, ogs, ogMatrix, idDict, speciesIdDict, speciesToUse, nProcesses, qStopAfterSeqs, qStopAfterAlignments, qDoSpeciesTree):
        idDict.update(speciesIdDict) # smae code will then also convert concatenated alignment for species tree
        # 0       
        resultsDirsFullPath = [files.FileHandler.GetResultsSeqsDir(), files.FileHandler.GetResultsAlignDir(), files.FileHandler.GetResultsTreesDir()]
        
        # 1.
        fastaWriter = FastaWriter(files.FileHandler.GetSpeciesSeqsDir(), speciesToUse)
        self.WriteFastaFiles(fastaWriter, ogs, idDict, True)
        if qStopAfterSeqs: return resultsDirsFullPath

        # 3
        # Get OGs to use for species tree
        if qDoSpeciesTree:
            iOgsForSpeciesTree, fSingleCopy = DetermineOrthogroupsForSpeciesTree(ogMatrix)            
            concatenated_algn_fn = files.FileHandler.GetSpeciesTreeConcatAlignFN()
        else:
            iOgsForSpeciesTree = []
        alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(ogs)
        if qStopAfterAlignments:
            util.PrintUnderline("Inferring multiple sequence alignments")
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, alignCommands_and_filenames, False)
            if qDoSpeciesTree:
                CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
                # write OGs used to file
                dSpeciesTree = os.path.split(files.FileHandler.GetSpeciesTreeResultsFN(0, True))[0] + "/"
                with open(dSpeciesTree + "Orthogroups_for_concatenated_alignment.txt", 'wb') as outfile:
                    for iog in iOgsForSpeciesTree: outfile.write("OG%07d\n" % iog)
            # ids -> accessions
            alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]        
            accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
            if qDoSpeciesTree: 
                alignmentFilesToUse.append(concatenated_algn_fn)
                accessionAlignmentFNs.append(files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
            self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
            return resultsDirsFullPath[:2]
        
        # Otherwise, alignments and trees
        # Strategy is
        # 1. Do alignments (and trees) require for species tree
        # 2. Create concatenated alignment
        # 3. Create second list of commands [speciestree] + [remaining alignments and trees]
        alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]
        treeCommands_and_filenames = self.GetTreeCommands(alignmentFilesToUse, ogs)
        commands_and_filenames = []
        if qDoSpeciesTree:
            print("Species tree: Using %d orthogroups with minimum of %0.1f%% of species having single-copy genes in any orthogroup" % (len(iOgsForSpeciesTree), 100.*fSingleCopy))
            util.PrintUnderline("Inferring multiple sequence alignments for species tree") 
            # Do required alignments and trees
            speciesTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
            for i in iOgsForSpeciesTree:
                commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
            CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
            # write OGs used to file
            dSpeciesTree = os.path.split(files.FileHandler.GetSpeciesTreeResultsFN(0, True))[0] + "/"
            with open(dSpeciesTree + "Orthogroups_for_concatenated_alignment.txt", 'wb') as outfile:
                for iog in iOgsForSpeciesTree: outfile.write("OG%07d\n" % iog)
            # Add species tree to list of commands to run
            commands_and_filenames = [self.program_caller.GetTreeCommands(self.tree_program, [concatenated_algn_fn], [speciesTreeFN_ids], ["SpeciesTree"])]
            util.PrintUnderline("Inferring remaining multiple sequence alignments and gene trees") 
        else:
            util.PrintUnderline("Inferring multiple sequence alignments and gene trees") 

        # Now continue as before
        iOgsForSpeciesTree = set(iOgsForSpeciesTree)                         
        for i in xrange(len(treeCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
        for i in xrange(len(treeCommands_and_filenames), len(alignCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i]])
        pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
        
        # Convert ids to accessions
        accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
        # Add concatenated Alignment
        if qDoSpeciesTree:
            alignmentFilesToUse.append(concatenated_algn_fn)
            accessionAlignmentFNs.append(files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
            qHaveSupport = util.HaveSupportValues(speciesTreeFN_ids)
            if os.path.exists(speciesTreeFN_ids):
                util.RenameTreeTaxa(speciesTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), idDict, qSupport=qHaveSupport, qFixNegatives=True)
            else:
                text = "ERROR: Species tree inference failed"
                files.FileHandler.LogFailAndExit(text)
        self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
        qHaveSupport = None
        for i in xrange(len(treeCommands_and_filenames)):
            infn = self.GetTreeFilename(i)
            if os.path.exists(infn):
                if qHaveSupport == None: qHaveSupport = util.HaveSupportValues(infn)
                util.RenameTreeTaxa(infn, self.GetTreeFilename(i, True), idDict, qSupport=qHaveSupport, qFixNegatives=True)       
        return resultsDirsFullPath[:2]
Example #9
0
    def DoTrees(self, ogs, ogMatrix, idDict, speciesIdDict, nProcesses, qStopAfterSeqs, qStopAfterAlignments, qDoSpeciesTree):
        idDict.update(speciesIdDict) # smae code will then also convert concatenated alignment for species tree
        # 0       
        resultsDirsFullPath = []
        for fn in [self.GetFastaFilename, self.GetAlignmentFilename, self.GetTreeFilename]:
            for qIDs in [True, False]:
                d = os.path.split(fn(0, not qIDs))[0]
                if not os.path.exists(d): os.mkdir(d)
                if not qIDs: resultsDirsFullPath.append(d)
            if qStopAfterSeqs: break
            if qStopAfterAlignments and fn == self.GetAlignmentFilename: break
        
        # 1.
        fastaWriter = FastaWriter(self.ogsWorkingDir)
        self.WriteFastaFiles(fastaWriter, ogs, idDict)
        if qStopAfterSeqs: return resultsDirsFullPath

        # 3
        # Get OGs to use for species tree
        if qDoSpeciesTree:
            iOgsForSpeciesTree, fSingleCopy = DetermineOrthogroupsForSpeciesTree(ogMatrix)            
            concatenated_algn_fn = os.path.split(self.GetAlignmentFilename(0))[0] + "/SpeciesTreeAlignment.fa"
        else:
            iOgsForSpeciesTree = []
        alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(ogs)
        if qStopAfterAlignments:
            util.PrintUnderline("Inferring multiple sequence alignments")
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, alignCommands_and_filenames, False)
            CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
            # ids -> accessions
            alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]        
            accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
            alignmentFilesToUse.append(concatenated_algn_fn)
            accessionAlignmentFNs.append(os.path.split(self.GetAlignmentFilename(0, True))[0] + "/SpeciesTreeAlignment.fa")
            self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
            return resultsDirsFullPath[:2]
        
        # Otherwise, alignments and trees
        # Strategy is
        # 1. Do alignments (and trees) require for species tree
        # 2. Create concatenated alignment
        # 3. Create second list of commands [speciestree] + [remaining alignments and trees]
        alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]
        treeCommands_and_filenames = self.GetTreeCommands(alignmentFilesToUse, ogs)
        commands_and_filenames = []
        if qDoSpeciesTree:
            print("Species tree: Using %d orthogroups with minimum of %0.1f%% of species having single-copy genes in any orthogroup" % (len(iOgsForSpeciesTree), 100.*fSingleCopy))
            util.PrintUnderline("Inferring multiple sequence alignments for species tree") 
            # Do required alignments and trees
            speciesTreeFN_ids = os.path.split(self.GetTreeFilename(i))[0] + "/SpeciesTree_unrooted.txt"
            for i in iOgsForSpeciesTree:
                commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
            CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
            # Add species tree to list of commands to run
            commands_and_filenames = [self.program_caller.GetTreeCommands(self.tree_program, [concatenated_algn_fn], [speciesTreeFN_ids], ["SpeciesTree"])]
            util.PrintUnderline("Inferring remaining multiple sequence alignments and gene trees") 
        else:
            util.PrintUnderline("Inferring multiple sequence alignments and gene trees") 

        # Now continue as before
        iOgsForSpeciesTree = set(iOgsForSpeciesTree)                         
        for i in xrange(len(treeCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
        for i in xrange(len(treeCommands_and_filenames), len(alignCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i]])
        pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
        
        # Convert ids to accessions
        accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
        # Add concatenated Alignment
        if qDoSpeciesTree:
            alignmentFilesToUse.append(concatenated_algn_fn)
            accessionAlignmentFNs.append(os.path.split(self.GetAlignmentFilename(0, True))[0] + "/SpeciesTreeAlignment.fa")
            self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
            if os.path.exists(speciesTreeFN_ids):
                util.RenameTreeTaxa(speciesTreeFN_ids, self.workingDir + "SpeciesTree_unrooted.txt", idDict, qFixNegatives=True)
            else:
                print("ERROR: Species tree inference failed")
                util.Fail()
        for i in xrange(len(treeCommands_and_filenames)):
            if os.path.exists(self.GetTreeFilename(i)):
                util.RenameTreeTaxa(self.GetTreeFilename(i), self.GetTreeFilename(i, True), idDict, qFixNegatives=True)       
        return resultsDirsFullPath[:2]
Example #10
0
def OrthologuesWorkflow(workingDir_ogs, 
                       orthofinderResultsDir, 
                       speciesToUse, nSpAll, 
                       clustersFilename_pairs, 
                       tree_options,
                       msa_method,
                       tree_method,
                       nHighParallel,
                       nLowParrallel,
                       userSpeciesTree = None, 
                       qStopAfterSeqs = False,
                       qStopAfterAlign = False,
                       qStopAfterTrees = False, 
                       qMSA = False,
                       qPhyldog = False,
                       pickleDir=None):
    """
    1. Setup:
        - ogSet, directories
        - DendroBLASTTress - object
    2. DendrobBLAST:
        - read scores
        - RunAnalysis: Get distance matrices, do trees
    3. Root species tree
    4. Reconciliation/Orthologues
    5. Clean up
    
    Variables:
    - ogSet - all the relevant information about the orthogroups, species etc.
    """
    ogSet = OrthoGroupsSet(workingDir_ogs, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor, pickleDir=pickleDir)
    
    # Class that is going to run the analysis needs to check the dependencies
#    if not CanRunOrthologueDependencies(workingDir_ogs, qMSA, qStopAfterTrees, userSpeciesTree == None): 
#        print("Orthogroups have been inferred but the dependencies for inferring gene trees and")
#        print("orthologues have not been met. Please review previous messages for more information.")
#        sys.exit()
    
    resultsDir = util.CreateNewWorkingDirectory(orthofinderResultsDir + "Orthologues_")
    """ === 1 === ust = UserSpeciesTree
    MSA:               Sequences    Alignments                        GeneTrees    db    SpeciesTree
    Phyldog:           Sequences    Alignments                        GeneTrees    db    SpeciesTree  
    Dendroblast:                                  DistanceMatrices    GeneTrees    db    SpeciesTree
    MSA (ust):         Sequences    Alignments                        GeneTrees    db
    Phyldog (ust):     Sequences    Alignments                        GeneTrees    db      
    Dendroblast (ust):                            DistanceMatrices    GeneTrees    db        
    """
    if qMSA or qPhyldog:
        treeGen = msa.TreesForOrthogroups(tree_options, msa_method, tree_method, resultsDir, workingDir_ogs)
        seqs_alignments_dirs = treeGen.DoTrees(ogSet.OGs(qInclAll=True), ogSet.Spec_SeqDict(), nHighParallel, qStopAfterSeqs, qStopAfterAlign or qPhyldog) 
        if qStopAfterSeqs:
            print("")
            return ("\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0])
        elif qStopAfterAlign:
            print("")
            st = "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
            st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            return st
        db = DendroBLASTTrees(ogSet, resultsDir, nLowParrallel)
        if not userSpeciesTree:
            util.PrintUnderline("Inferring species tree (calculating gene distances)")
            print("Loading BLAST scores")
            db.ReadAndPickle()
            spTreeFN_ids, spTreeUnrootedFN = db.SpeciesTreeOnly()
        if qPhyldog:
            trees_from_phyldog.RunPhyldogAnalysis(resultsDir + "WorkingDirectory/phyldog/", ogSet.OGs(), speciesToUse)
            return "Running Phyldog" + "\n".join(seqs_alignments_dirs)       
    else:
        util.PrintUnderline("Calculating gene distances")
        db = DendroBLASTTrees(ogSet, resultsDir, nLowParrallel)
        db.ReadAndPickle()
        nOGs, D, spTreeFN_ids, spTreeUnrootedFN = db.RunAnalysis()
    
    """ === 2 ===
    Check can continue with analysis 
    """
    if len(ogSet.speciesToUse) < 4: 
        print("ERROR: Not enough species to infer species tree")
        util.Fail()
     
    """ === 3 ===
    MSA:               RootSpeciesTree
    Phyldog:           RootSpeciesTree    
    Dendroblast:       RootSpeciesTree  
    MSA (ust):         ConvertSpeciesTreeIDs
    Phyldog (ust):     ConvertSpeciesTreeIDs
    Dendroblast (ust): ConvertSpeciesTreeIDs
    """    
    if userSpeciesTree:
        util.PrintUnderline("Using user-supplied species tree") 
        userSpeciesTree = ConvertUserSpeciesTree(db.workingDir + "Trees_ids/", userSpeciesTree, ogSet.SpeciesDict())
        rootedSpeciesTreeFN = [userSpeciesTree]
        roots = [None]
        qMultiple = False
    else:
        util.PrintUnderline("Best outgroup(s) for species tree") 
        spDict = ogSet.SpeciesDict()
        roots, clusters, rootedSpeciesTreeFN, nSupport = rfd.GetRoot(spTreeFN_ids, os.path.split(db.TreeFilename_IDs(0))[0] + "/", rfd.GeneToSpecies_dash, nHighParallel, treeFmt = 1)
        if len(roots) > 1:
            print("Observed %d duplications. %d support the best roots and %d contradict them." % (len(clusters), nSupport, len(clusters) - nSupport))
            print("Best outgroups for species tree:")  
        else:
            print("Observed %d duplications. %d support the best root and %d contradict it." % (len(clusters), nSupport, len(clusters) - nSupport))
            print("Best outgroup for species tree:")  
        for r in roots: print("  " + (", ".join([spDict[s] for s in r]))  )
        qMultiple = len(roots) > 1
        
    if qStopAfterTrees:
        if userSpeciesTree:
            st = ""
            if qMSA:
                st += "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
                st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            st += "\nGene trees:\n   %s\n" % (resultsDir + "Gene_Trees/")
            return st
        # otherwise, root species tree
        resultsSpeciesTrees = []
        for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
            if len(roots) == 1:
                resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
            else:
                resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
            util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qFixNegatives=True)
        db.DeleteBlastMatrices()
        CleanWorkingDir(db.workingDir)
        return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None, False)
    
    if qMultiple: util.PrintUnderline("\nAnalysing each of the potential species tree roots", True)
    resultsSpeciesTrees = []
    for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
        util.PrintUnderline("Reconciling gene trees and species tree" + (" (root %d)"%i if qMultiple else "")) 
        if qMultiple: 
            resultsDir_new = resultsDir + "Orthologues_using_outgroup_%d/" % i
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees_using_outgroup_%d/" % i
            resultsSpeciesTrees.append(resultsDir_new + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
            print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        elif userSpeciesTree:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
        else:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
            print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        os.mkdir(resultsDir_new)
        util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qFixNegatives=True)
        ReconciliationAndOrthologues(db.TreeFilename_IDs, db.ogSet, speciesTree_fn, db.workingDir, resultsDir_new, reconTreesRenamedDir, nHighParallel, i if qMultiple else None, pickleDir=pickleDir) 
    
    db.DeleteBlastMatrices()
    CleanWorkingDir(db.workingDir)
    util.PrintUnderline("Writing results files", True)
    
    return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None)
        arg = args.pop(0)
        if arg == "-t" or arg == "--threads":
            if len(args) == 0:
                print("Missing option for command line argument -t")
                util.Fail()
            arg = args.pop(0)
            try:
                nProcesses = int(arg)
            except:
                print("Incorrect argument for number of threads: %s" % arg)
                util.Fail()
        else:
            userDir = arg

    # Check arguments
    util.PrintUnderline("0. Getting Orthologues")
    if nProcesses == None:
        print(
            """\nNumber of parallel processes has not been specified, will use the default value.  
Number of parallel processes can be specified using the -t option.""")
        nProcesses = util.nThreadsDefault
    print("Using %d threads for alignments and trees" % nProcesses)

    orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = util.GetOGsFile(
        userDir)
    speciesToUse, nSpAll = util.GetSpeciesToUse(orthofinderWorkingDir +
                                                "SpeciesIDs.txt")
    resultsString = OrthologuesWorkflow(orthofinderWorkingDir,
                                        orthofinderResultsDir, speciesToUse,
                                        nSpAll, clustersFilename_pairs,
                                        nProcesses)
Example #12
0
    def DoTrees(self, ogs, ogMatrix, idDict, speciesIdDict, speciesToUse,
                qOutputCommands, nProcesses, qStopAfterSeqs,
                qStopAfterAlignments, qDoSpeciesTree):
        idDict.update(
            speciesIdDict
        )  # smae code will then also convert concatenated alignment for species tree
        # 0
        resultsDirsFullPath = [
            files.FileHandler.GetResultsSeqsDir(),
            files.FileHandler.GetResultsAlignDir(),
            files.FileHandler.GetResultsTreesDir()
        ]

        # 1.
        fastaWriter = FastaWriter(files.FileHandler.GetSpeciesSeqsDir(),
                                  speciesToUse)
        self.WriteFastaFiles(fastaWriter, ogs, idDict, True)
        if qStopAfterSeqs: return resultsDirsFullPath

        job_files = []

        # 3
        # Get OGs to use for species tree
        if qDoSpeciesTree:
            iOgsForSpeciesTree, fSingleCopy = DetermineOrthogroupsForSpeciesTree(
                ogMatrix)
            concatenated_algn_fn = files.FileHandler.GetSpeciesTreeConcatAlignFN(
            )
        else:
            iOgsForSpeciesTree = []
        alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(
            ogs)
        if qStopAfterAlignments:
            util.PrintUnderline("Inferring multiple sequence alignments")
            if qOutputCommands:
                job_files.append(
                    CreateMsaJob(alignCommands_and_filenames, len(job_files)))
            else:
                pc.RunParallelCommandsAndMoveResultsFile(
                    nProcesses, alignCommands_and_filenames, False)
            if qDoSpeciesTree:
                if qOutputCommands:
                    job_files.append(
                        CreateConcatenatedAlignmentJob(iOgsForSpeciesTree,
                                                       concatenated_algn_fn,
                                                       fSingleCopy,
                                                       len(job_files)))
                else:
                    CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs,
                                                self.GetAlignmentFilename,
                                                concatenated_algn_fn,
                                                fSingleCopy)

            # ids -> accessions
            alignmentFilesToUse = [
                self.GetAlignmentFilename(i)
                for i, _ in enumerate(alignCommands_and_filenames)
            ]
            accessionAlignmentFNs = [
                self.GetAlignmentFilename(i, True)
                for i in xrange(len(alignmentFilesToUse))
            ]
            if qDoSpeciesTree:
                alignmentFilesToUse.append(concatenated_algn_fn)
                accessionAlignmentFNs.append(
                    files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
            if qOutputCommands:
                # TODO: make rename alignment taxa command
                util.PrintUnderline("Execute the commands in " +
                                    ','.join(job_files))
            else:
                self.RenameAlignmentTaxa(alignmentFilesToUse,
                                         accessionAlignmentFNs, idDict)
            return resultsDirsFullPath[:2]

        # Otherwise, alignments and trees
        # Strategy is
        # 1. Do alignments (and trees) require for species tree
        # 2. Create concatenated alignment
        # 3. Create second list of commands [speciestree] + [remaining alignments and trees]
        alignmentFilesToUse = [
            self.GetAlignmentFilename(i)
            for i, _ in enumerate(alignCommands_and_filenames)
        ]
        treeCommands_and_filenames = self.GetTreeCommands(
            alignmentFilesToUse, ogs)
        commands_and_filenames = []
        if qDoSpeciesTree:
            print(
                "Species tree: Using %d orthogroups with minimum of %0.1f%% of species having single-copy genes in any orthogroup"
                % (len(iOgsForSpeciesTree), 100. * fSingleCopy))
            util.PrintUnderline(
                "Inferring multiple sequence alignments for species tree")
            # Do required alignments and trees
            speciesTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
            for i in iOgsForSpeciesTree:
                commands_and_filenames.append([
                    alignCommands_and_filenames[i],
                    treeCommands_and_filenames[i]
                ])
            if qOutputCommands:
                job_files.append(
                    CreateMsaJob(commands_and_filenames, len(job_files)))
                job_files.append(
                    CreateConcatenatedAlignmentJob(iOgsForSpeciesTree,
                                                   concatenated_algn_fn,
                                                   fSingleCopy,
                                                   len(job_files)))
            else:
                pc.RunParallelCommandsAndMoveResultsFile(
                    nProcesses, commands_and_filenames, True)
                CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs,
                                            self.GetAlignmentFilename,
                                            concatenated_algn_fn, fSingleCopy)
                # Add species tree to list of commands to run
            commands_and_filenames = [
                self.program_caller.GetTreeCommands(self.tree_program,
                                                    [concatenated_algn_fn],
                                                    [speciesTreeFN_ids],
                                                    ["SpeciesTree"])
            ]
            if qOutputCommands:
                job_files.append(
                    CreateSpeciesTreeJob(commands_and_filenames,
                                         len(job_files)))
                commands_and_filenames = []
            util.PrintUnderline(
                "Inferring remaining multiple sequence alignments and gene trees"
            )
        else:
            util.PrintUnderline(
                "Inferring multiple sequence alignments and gene trees")

        # Now continue as before
        iOgsForSpeciesTree = set(iOgsForSpeciesTree)
        for i in xrange(len(treeCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([
                alignCommands_and_filenames[i], treeCommands_and_filenames[i]
            ])
        for i in xrange(len(treeCommands_and_filenames),
                        len(alignCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i]])
        if qOutputCommands:
            job_files.append(
                CreateOGTreesJob(commands_and_filenames, len(job_files)))
        else:
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses,
                                                     commands_and_filenames,
                                                     True)

        # Convert ids to accessions
        accessionAlignmentFNs = [
            self.GetAlignmentFilename(i, True)
            for i in xrange(len(alignmentFilesToUse))
        ]
        # Add concatenated Alignment
        if qDoSpeciesTree:
            if qOutputCommands:
                job_files.append(
                    CreateRenameTaxaJob([
                        (concatenated_algn_fn,
                         files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
                    ], [(speciesTreeFN_ids,
                         files.FileHandler.GetSpeciesTreeUnrootedFN(True))],
                                        len(job_files)))
            else:
                qHaveSupport = util.HaveSupportValues(speciesTreeFN_ids)
                alignmentFilesToUse.append(concatenated_algn_fn)
                accessionAlignmentFNs.append(
                    files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
                if os.path.exists(speciesTreeFN_ids):
                    util.RenameTreeTaxa(
                        speciesTreeFN_ids,
                        files.FileHandler.GetSpeciesTreeUnrootedFN(True),
                        idDict,
                        qSupport=qHaveSupport,
                        qFixNegatives=True)
                else:
                    text = "ERROR: Species tree inference failed"
                    files.FileHandler.LogFailAndExit(text)

        if qOutputCommands:
            job_files.append(
                CreateRenameTaxaJob(
                    zip(alignmentFilesToUse, accessionAlignmentFNs),
                    [(self.GetTreeFilename(i), self.GetTreeFilename(i, True))
                     for i in xrange(len(treeCommands_and_filenames))],
                    len(job_files)))
            if qOutputCommands:
                print(
                    "Run the commands contained in these files (each depends on the previous):\n"
                    + "\n".join(job_files))
                files.FileHandler.LogWorkingDirectoryTrees()
        else:
            self.RenameAlignmentTaxa(alignmentFilesToUse,
                                     accessionAlignmentFNs, idDict)
            qHaveSupport = None
            for i in xrange(len(treeCommands_and_filenames)):
                infn = self.GetTreeFilename(i)
                if os.path.exists(infn):
                    if qHaveSupport == None:
                        qHaveSupport = util.HaveSupportValues(infn)
                    util.RenameTreeTaxa(infn,
                                        self.GetTreeFilename(i, True),
                                        idDict,
                                        qSupport=qHaveSupport,
                                        qFixNegatives=True)

        return resultsDirsFullPath[:2]
Example #13
0
def OrthologuesWorkflow(speciesToUse, nSpAll, 
                       tree_options,
                       msa_method,
                       tree_method,
                       recon_method,
                       nHighParallel,
                       nLowParrallel,
                       qDoubleBlast,
                       qAddSpeciesToIDs,
                       userSpeciesTree = None, 
                       qStopAfterSeqs = False,
                       qStopAfterAlign = False,
                       qStopAfterTrees = False, 
                       qMSA = False,
                       qPhyldog = False,
                       results_name = ""):
    """
    1. Setup:
        - ogSet, directories
        - DendroBLASTTress - object
    2. DendrobBLAST:
        - read scores
        - RunAnalysis: Get distance matrices, do trees
    3. Root species tree
    4. Reconciliation/Orthologues
    5. Clean up
    
    Variables:
    - ogSet - all the relevant information about the orthogroups, species etc.
    """
    ogSet = OrthoGroupsSet(files.FileHandler.GetWorkingDirectory1_Read(), speciesToUse, nSpAll, qAddSpeciesToIDs, idExtractor = util.FirstWordExtractor)
    
    tree_generation_method = "msa" if qMSA or qPhyldog else "dendroblast"
    stop_after = "seqs" if qStopAfterSeqs else "align" if qStopAfterAlign else ""
    files.FileHandler.MakeResultsDirectory2(tree_generation_method, stop_after, results_name)    
    """ === 1 === ust = UserSpeciesTree
    MSA:               Sequences    Alignments                        GeneTrees    db    SpeciesTree
    Phyldog:           Sequences    Alignments                        GeneTrees    db    SpeciesTree  
    Dendroblast:                                  DistanceMatrices    GeneTrees    db    SpeciesTree
    MSA (ust):         Sequences    Alignments                        GeneTrees    db
    Phyldog (ust):     Sequences    Alignments                        GeneTrees    db      
    Dendroblast (ust):                            DistanceMatrices    GeneTrees    db        
    """
    qDB_SpeciesTree = False
    if userSpeciesTree:
        util.PrintUnderline("Using user-supplied species tree") 
        spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
        ConvertUserSpeciesTree(userSpeciesTree, ogSet.SpeciesDict(), spTreeFN_ids)
    
    if qMSA or qPhyldog:
        qLessThanFourSpecies = len(ogSet.seqsInfo.speciesToUse) < 4
        treeGen = trees_msa.TreesForOrthogroups(tree_options, msa_method, tree_method)       
        if (not userSpeciesTree) and qLessThanFourSpecies:
            spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
            WriteSpeciesTreeIDs_TwoThree(ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
            util.RenameTreeTaxa(spTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True)
        qDoMSASpeciesTree = (not qLessThanFourSpecies) and (not userSpeciesTree)
        util.PrintTime("Starting MSA/Trees")
        seqs_alignments_dirs = treeGen.DoTrees(ogSet.OGs(qInclAll=True), ogSet.OrthogroupMatrix(), ogSet.Spec_SeqDict(), ogSet.SpeciesDict(), ogSet.speciesToUse, nHighParallel, qStopAfterSeqs, qStopAfterAlign or qPhyldog, qDoSpeciesTree=qDoMSASpeciesTree) 
        util.PrintTime("Done MSA/Trees")
        if qDoMSASpeciesTree:
            spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
        if qStopAfterSeqs:
            print("")
            return ("\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0])
        elif qStopAfterAlign:
            print("")
            st = "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
            st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            return st
        db = DendroBLASTTrees(ogSet, nLowParrallel, qDoubleBlast)
        if qDB_SpeciesTree and not userSpeciesTree and not qLessThanFourSpecies:
            util.PrintUnderline("Inferring species tree (calculating gene distances)")
            print("Loading BLAST scores")
            spTreeFN_ids = db.SpeciesTreeOnly()
        if qPhyldog:
#            util.PrintTime("Do species tree for phyldog")
#            spTreeFN_ids, spTreeUnrootedFN = db.SpeciesTreeOnly()
            if userSpeciesTree: 
                userSpeciesTree = ConvertUserSpeciesTree(userSpeciesTree, ogSet.SpeciesDict(), files.FileHandler.GetSpeciesTreeUnrootedFN())
            util.PrintTime("Starting phyldog")
            species_tree_ids_labelled_phyldog = wrapper_phyldog.RunPhyldogAnalysis(files.FileHandler.GetPhyldogWorkingDirectory(), ogSet.OGs(), speciesToUse, nHighParallel)
    else:
        db = DendroBLASTTrees(ogSet, nLowParrallel, qDoubleBlast)
        spTreeFN_ids, qSTAG = db.RunAnalysis()
    files.FileHandler.LogWorkingDirectoryTrees()
    qSpeciesTreeSupports = False if (userSpeciesTree or qMSA or qPhyldog) else qSTAG
    """
    SpeciesTree
    spTreeFN_ids, or equivalently FileHandler.GetSpeciesTreeUnrootedFN() in all cases (user, inferred etc)
    Thus, we always have the species tree ids format
    
    With phyldog, we also have species_tree_ids_labelled_phyldog - with the node labels given by phyldog
    """    
    
    """ === 2 ===
    Check can continue with analysis 
    """
#    if len(ogSet.speciesToUse) < 4: 
#        print("ERROR: Not enough species to infer species tree")
#        util.Fail()
     
    """ === 3 ===
    MSA:               RootSpeciesTree
    Phyldog:           RootSpeciesTree    
    Dendroblast:       RootSpeciesTree  
    MSA (ust):         ConvertSpeciesTreeIDs
    Phyldog (ust):     ConvertSpeciesTreeIDs
    Dendroblast (ust): ConvertSpeciesTreeIDs
    """    
    if qPhyldog:
        rootedSpeciesTreeFN = [species_tree_ids_labelled_phyldog]
        roots = [None]
        qMultiple = False
        all_stride_dup_genes = None
    elif userSpeciesTree:
        rootedSpeciesTreeFN = [spTreeFN_ids]
        roots = [None]
        qMultiple = False
        all_stride_dup_genes = None
    elif len(ogSet.seqsInfo.speciesToUse) == 2:
        hardcodeSpeciesTree = GetSpeciesTreeRoot_TwoTaxa(ogSet.seqsInfo.speciesToUse)
        rootedSpeciesTreeFN = [hardcodeSpeciesTree]
        roots = [None]
        qMultiple = False
        all_stride_dup_genes = None
    else:
        util.PrintUnderline("Best outgroup(s) for species tree") 
        util.PrintTime("Starting STRIDE")
        roots, clusters_counter, rootedSpeciesTreeFN, nSupport, _, _, all_stride_dup_genes = stride.GetRoot(spTreeFN_ids, files.FileHandler.GetOGsTreeDir(), stride.GeneToSpecies_dash, nHighParallel, qWriteRootedTree=True)
        util.PrintTime("Done STRIDE")
        nAll = sum(clusters_counter.values())
        nFP_mp = nAll - nSupport
        n_non_trivial = sum([v for k, v in clusters_counter.items() if len(k) > 1])
        if len(roots) > 1:
            print("Observed %d well-supported, non-terminal duplications. %d support the best roots and %d contradict them." % (n_non_trivial, n_non_trivial-nFP_mp, nFP_mp))
            print("Best outgroups for species tree:")  
        else:
            print("Observed %d well-supported, non-terminal duplications. %d support the best root and %d contradict it." % (n_non_trivial, n_non_trivial-nFP_mp, nFP_mp))
            print("Best outgroup for species tree:")  
        spDict = ogSet.SpeciesDict()
        for r in roots: print("  " + (", ".join([spDict[s] for s in r]))  )
        qMultiple = len(roots) > 1
    shutil.copy(rootedSpeciesTreeFN[0], files.FileHandler.GetSpeciesTreeIDsRootedFN())
        
    """
    SpeciesTree:
    We now have a list of rooted species trees: rootedSpeciesTreeFN (this should be recorded by the file handler)
    """
        
    if qStopAfterTrees:
        if userSpeciesTree:
            st = ""
            if qMSA:
                st += "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
                st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            st += "\nGene trees:\n   %s\n" % (files.FileHandler.GetResultsTreesDir())
            return st
        # otherwise, root species tree
        resultsSpeciesTrees = []
        for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
            resultsSpeciesTrees.append(files.FileHandler.GetSpeciesTreeResultsFN(i, not qMultiple))
            util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qSupport=qSpeciesTreeSupports, qFixNegatives=True)
            labeled_tree_fn = files.FileHandler.GetSpeciesTreeResultsNodeLabelsFN()
            util.RenameTreeTaxa(speciesTree_fn, labeled_tree_fn, db.ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True, label='N')
        files.FileHandler.CleanWorkingDir2()
        return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None, False)
    
    if qMultiple: util.PrintUnderline("\nMultiple potential species tree roots were identified, only one will be analyed.", True)
    resultsSpeciesTrees = []
    i = 0
    r = roots[0]
    speciesTree_fn = rootedSpeciesTreeFN[0]
    util.PrintUnderline("Reconciling gene trees and species tree")         
    resultsSpeciesTrees.append(files.FileHandler.GetSpeciesTreeResultsFN(0, True))
    if (not userSpeciesTree) and (not qPhyldog) and len(ogSet.seqsInfo.speciesToUse) != 2:
        print("Outgroup: " + (", ".join([spDict[s] for s in r])))
    util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qSupport=qSpeciesTreeSupports, qFixNegatives=True)
    util.PrintTime("Starting Recon and orthologues")
    ReconciliationAndOrthologues(recon_method, db.ogSet, nHighParallel, i if qMultiple else None, all_stride_dup_genes=all_stride_dup_genes) 
    util.PrintTime("Done Recon")
    
    if qMultiple:
        for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
            unanalysedSpeciesTree = files.FileHandler.GetSpeciesTreeResultsFN(i, False)
            util.RenameTreeTaxa(speciesTree_fn, unanalysedSpeciesTree, db.ogSet.SpeciesDict(), qSupport=qSpeciesTreeSupports, qFixNegatives=True, label='N')
    
    """
    SpeciesTree: If it's been inferred, there is now at least one rooted results species trees: GetSpeciesTreeResultsFN()
    """
    
    files.FileHandler.CleanWorkingDir2()
    util.PrintUnderline("Writing results files", True)
    
    return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None)