Ejemplo n.º 1
0
 def StartFromTrees(self,
                    wd1_list,
                    wd2,
                    base,
                    clustersFilename_pairs,
                    speciesTreeFN,
                    qIsUSerSpeciesTree,
                    user_name=None):
     """
     Convert user species tree here if necessary
     For OF species tree copy it to location given by FileHandler
     For user species tree, this must be done immediately by OF code
     """
     self.wd_base = wd1_list
     self.wd_trees = wd2
     if user_name == None:
         self.rd1 = util.CreateNewWorkingDirectory(base + "Results_")
     else:
         self.rd1 = util.CreateNewWorkingDirectory(base + "Results_" +
                                                   user_name,
                                                   qDate=False)
     self.wd_current = self.rd1 + "WorkingDirectory/"
     os.mkdir(self.wd_current)
     self.clustersFilename = clustersFilename_pairs[:-len("_id_pairs.txt")]
     self.StartLog()
     if not qIsUSerSpeciesTree:
         shutil.copy(speciesTreeFN, self.GetSpeciesTreeIDsRootedFN())
     self.WriteToLog("Species Tree: %s\n" % speciesTreeFN)
     self.LogWorkingDirectoryTrees()
Ejemplo n.º 2
0
 def StartFromOrthogroupsOrSequenceSearch(self,
                                          wd_base_list,
                                          base,
                                          clustersFilename_pairs=None,
                                          user_name=None,
                                          userSpeciesTree=None):
     """
     NEed to initialise:
     wd_base
     wd_trees
     wd_current
     """
     if len(self.wd_base) != 0:
         raise Exception("Changing WorkingDirectory1")
     self.wd_base = wd_base_list
     if clustersFilename_pairs != None:
         self.clustersFilename = clustersFilename_pairs[:-len(
             "_id_pairs.txt")]
     if user_name == None:
         self.rd1 = util.CreateNewWorkingDirectory(base + "Results_")
     else:
         self.rd1 = util.CreateNewWorkingDirectory(base + "Results_" +
                                                   user_name,
                                                   qDate=False)
     self.wd_current = self.rd1 + "WorkingDirectory/"
     os.mkdir(self.wd_current)
     with open(self.rd1 + "Log.txt", 'wb'):
         pass
     self.wd_trees = self.wd_current
     self.StartLog()
Ejemplo n.º 3
0
def OrthologuesFromTrees(groupsDir, workingDir, nHighParallel, speciesTree_fn = None, pickleDir=None):
    """
    groupsDir - directory with orthogroups file in
    userSpeciesTree_fn - None if not supplied otherwise rooted tree using user species names (not orthofinder IDs)
    workingDir - orthologues 'WorkingDirectory'
    qUserSpTree - is the speciesTree_fn user-supplied
    
    Just infer orthologues from trees, don't do any of the preceeding steps.
    """
    # Check species tree
    qUserSpTree = (speciesTree_fn != None)
    if qUserSpTree:
        if not os.path.exists(speciesTree_fn):
            print("\nERROR: %s does not exist\n" % speciesTree_fn)
            util.Fail()
    else:
        possibilities = ["SpeciesTree_ids_0_rooted.txt", "SpeciesTree_ids_1_rooted.txt", "SpeciesTree_user_ids.txt"] # etc (only need to determine if unique)
        nTrees = 0
        for p in possibilities:
            fn = workingDir + "Trees_ids/" + p
            if os.path.exists(fn): 
                nTrees += 1
                speciesTree_fn = fn
        if nTrees == 0:
            print("\nERROR: There is a problem with the specified directory. The rooted species tree %s or %s is not present." % (possibilities[0], possibilities[2]))
            print("Please rectify the problem or alternatively use the -s option to specify the species tree to use.\n")
            util.Fail()
        if nTrees > 1:
            print("\nERROR: There is more than one rooted species tree in the specified directory structure. Please use the -s option to specify which species tree should be used\n")
            util.Fail()
    
    def TreePatIDs(iog):
        return workingDir + ("Trees_ids/OG%07d_tree_id.txt" % iog)
    reconTreesRenamedDir = workingDir + "Recon_Gene_Trees/"
    resultsDir_new = workingDir + "../Orthologues"      # for the Orthologues_Species/ directories
#    if os.path.exists(resultsDir_new):
    resultsDir_new = util.CreateNewWorkingDirectory(resultsDir_new + "_")
#    else:
#        resultsDir_new += os.sep
#        os.mkdir(resultsDir_new)
    orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = util.GetOGsFile(groupsDir)
    speciesToUse, nSpAll = util.GetSpeciesToUse(orthofinderWorkingDir + "SpeciesIDs.txt")    
    ogSet = OrthoGroupsSet(orthofinderWorkingDir, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor)
    if qUserSpTree:
        speciesToUseNames = ogSet.SpeciesDict().values()
        CheckUserSpeciesTree(speciesTree_fn, speciesToUseNames)
        speciesTree_fn = ConvertUserSpeciesTree(workingDir + "Trees_ids/", speciesTree_fn, ogSet.SpeciesDict())
    util.PrintUnderline("Running Orthologue Prediction", True)
    util.PrintUnderline("Reconciling gene and species trees") 
    ReconciliationAndOrthologues(TreePatIDs, ogSet, speciesTree_fn, workingDir, resultsDir_new, reconTreesRenamedDir, nHighParallel, pickleDir=pickleDir)
    util.PrintUnderline("Writing results files")
    CleanWorkingDir(workingDir)
    return "Species-by-species orthologues directory:\n   %s\n" % resultsDir_new
Ejemplo n.º 4
0
 def CreateOutputDirFromStart_new(self,
                                  fasta_dir,
                                  base,
                                  user_name=None,
                                  old_wd_base_list=None):
     """
     The intial difference will be that results will go in OrthoFinder/Results_DATE or USER_SPECIFIED/RESULTS_DATE
     whereas before they went in Results_DATE or USER_SPECIFIED.
     
     If this is a composite analysis (-f + -b) then old_wd_base_list != None 
     
     old_wd_base_list - first item is the WD from a previous analysis to be extended. If this extended other
       ones itself then there will be other items in the list.
     """
     if user_name == None:
         self.rd1 = util.CreateNewWorkingDirectory(base + "Results_")
     else:
         self.rd1 = util.CreateNewWorkingDirectory(base + "Results_" +
                                                   user_name,
                                                   qDate=False)
     self.wd_current = self.rd1 + "WorkingDirectory/"
     os.mkdir(self.wd_current)
     self.wd_base = [self.wd_current]
     if old_wd_base_list != None:
         shutil.copy(old_wd_base_list[0] + "SpeciesIDs.txt",
                     self.wd_current + "SpeciesIDs.txt")
         shutil.copy(old_wd_base_list[0] + "SequenceIDs.txt",
                     self.wd_current + "SequenceIDs.txt")
         # Log the first wd in list, this can then be followed back to previous ones
         # Log file - point to WD at start of chain which contains the new species
         # wd_base_list - should contain current directory and then previous linked directories
         with open(self.wd_current + "previous_wd.txt", 'wb') as outfile:
             outfile.write(old_wd_base_list[0] + "\n")
         self.wd_base.extend(old_wd_base_list)
     self.wd_trees = self.wd_current
     self.StartLog()
Ejemplo n.º 5
0
def GetOrthologues(orthofinderWorkingDir, orthofinderResultsDir, speciesToUse,
                   nSpAll, clustersFilename_pairs, nProcesses):
    ogSet = OrthoGroupsSet(orthofinderWorkingDir,
                           speciesToUse,
                           nSpAll,
                           clustersFilename_pairs,
                           idExtractor=util.FirstWordExtractor)
    if len(ogSet.speciesToUse) < 4:
        print("ERROR: Not enough species to infer species tree")
        util.Fail()

    print("\n1. Checking required programs are installed")
    print("-------------------------------------------")
    if not CanRunDependencies(orthofinderWorkingDir):
        print(
            "Orthogroups have been inferred but the dependencies for inferring gene trees and\northologues have not been met. Please review previous messages for more information."
        )
        sys.exit()

    print("\n2. Calculating gene distances")
    print("-----------------------------")
    resultsDir = util.CreateNewWorkingDirectory(orthofinderResultsDir +
                                                "Orthologues_")

    db = DendroBLASTTrees(ogSet, resultsDir, nProcesses)
    db.ReadAndPickle()
    nOGs, D, spPairs, spTreeFN_ids = db.RunAnalysis()

    print("\n4. Best outgroup(s) for species tree")
    print("------------------------------------")
    spDict = ogSet.SpeciesDict()
    roots, clusters, rootedSpeciesTreeFN, nSupport = rfd.GetRoot(
        spTreeFN_ids,
        os.path.split(db.treesPatIDs)[0] + "/",
        rfd.GeneToSpecies_dash,
        nProcesses,
        treeFmt=1)
    if len(roots) > 1:
        print(
            "Observed %d duplications. %d support the best roots and %d contradict them."
            % (len(clusters), nSupport, len(clusters) - nSupport))
        print("Best outgroups for species tree:")
    else:
        print(
            "Observed %d duplications. %d support the best root and %d contradict it."
            % (len(clusters), nSupport, len(clusters) - nSupport))
        print("Best outgroup for species tree:")
    for r in roots:
        print("  " + (", ".join([spDict[s] for s in r])))

    qMultiple = len(roots) > 1
    if qMultiple:
        print("\nAnalysing each of the potential species tree roots.")
    resultsSpeciesTrees = []
    for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
        if qMultiple:
            resultsDir_new = resultsDir + "Orthologues_using_outgroup_%d/" % i
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees_using_outgroup_%d/" % i
            resultsSpeciesTrees.append(
                resultsDir_new + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
        else:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
        os.mkdir(resultsDir_new)
        util.RenameTreeTaxa(speciesTree_fn,
                            resultsSpeciesTrees[-1],
                            db.ogSet.SpeciesDict(),
                            qFixNegatives=True)

        print("\n5%s. Reconciling gene and species trees" %
              ("-%d" % i if qMultiple else ""))
        print("-------------------------------------" +
              ("--" if qMultiple else ""))
        print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        dlcparResultsDir = RunDlcpar(db.treesPatIDs, ogSet, nOGs,
                                     speciesTree_fn, db.workingDir)
        os.mkdir(reconTreesRenamedDir)
        for iog in xrange(len(db.ogSet.OGs())):
            util.RenameTreeTaxa(dlcparResultsDir +
                                "OG%07d_tree_id.locus.tree" % iog,
                                reconTreesRenamedDir + "OG%07d_tree.txt" % iog,
                                db.ogSet.Spec_SeqDict(),
                                qFixNegatives=False,
                                inFormat=8)

        # Orthologue lists
        print("\n6%s. Inferring orthologues from gene trees" %
              ("-%d" % i if qMultiple else ""))
        print("----------------------------------------" +
              ("--" if qMultiple else ""))
        pt.get_orthologue_lists(ogSet, resultsDir_new, dlcparResultsDir,
                                db.workingDir)

    CleanWorkingDir(db)
    print("\n7. Writing results files")
    print("------------------------")

    return GetResultsFilesString(resultsSpeciesTrees)
Ejemplo n.º 6
0
def OrthologuesWorkflow(workingDir_ogs, 
                       orthofinderResultsDir, 
                       speciesToUse, nSpAll, 
                       clustersFilename_pairs, 
                       tree_options,
                       msa_method,
                       tree_method,
                       nHighParallel,
                       nLowParrallel,
                       userSpeciesTree = None, 
                       qStopAfterSeqs = False,
                       qStopAfterAlign = False,
                       qStopAfterTrees = False, 
                       qMSA = False,
                       qPhyldog = False,
                       pickleDir=None):
    """
    1. Setup:
        - ogSet, directories
        - DendroBLASTTress - object
    2. DendrobBLAST:
        - read scores
        - RunAnalysis: Get distance matrices, do trees
    3. Root species tree
    4. Reconciliation/Orthologues
    5. Clean up
    
    Variables:
    - ogSet - all the relevant information about the orthogroups, species etc.
    """
    ogSet = OrthoGroupsSet(workingDir_ogs, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor, pickleDir=pickleDir)
    
    # Class that is going to run the analysis needs to check the dependencies
#    if not CanRunOrthologueDependencies(workingDir_ogs, qMSA, qStopAfterTrees, userSpeciesTree == None): 
#        print("Orthogroups have been inferred but the dependencies for inferring gene trees and")
#        print("orthologues have not been met. Please review previous messages for more information.")
#        sys.exit()
    
    resultsDir = util.CreateNewWorkingDirectory(orthofinderResultsDir + "Orthologues_")
    """ === 1 === ust = UserSpeciesTree
    MSA:               Sequences    Alignments                        GeneTrees    db    SpeciesTree
    Phyldog:           Sequences    Alignments                        GeneTrees    db    SpeciesTree  
    Dendroblast:                                  DistanceMatrices    GeneTrees    db    SpeciesTree
    MSA (ust):         Sequences    Alignments                        GeneTrees    db
    Phyldog (ust):     Sequences    Alignments                        GeneTrees    db      
    Dendroblast (ust):                            DistanceMatrices    GeneTrees    db        
    """
    if qMSA or qPhyldog:
        treeGen = msa.TreesForOrthogroups(tree_options, msa_method, tree_method, resultsDir, workingDir_ogs)
        seqs_alignments_dirs = treeGen.DoTrees(ogSet.OGs(qInclAll=True), ogSet.Spec_SeqDict(), nHighParallel, qStopAfterSeqs, qStopAfterAlign or qPhyldog) 
        if qStopAfterSeqs:
            print("")
            return ("\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0])
        elif qStopAfterAlign:
            print("")
            st = "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
            st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            return st
        db = DendroBLASTTrees(ogSet, resultsDir, nLowParrallel)
        if not userSpeciesTree:
            util.PrintUnderline("Inferring species tree (calculating gene distances)")
            print("Loading BLAST scores")
            db.ReadAndPickle()
            spTreeFN_ids, spTreeUnrootedFN = db.SpeciesTreeOnly()
        if qPhyldog:
            trees_from_phyldog.RunPhyldogAnalysis(resultsDir + "WorkingDirectory/phyldog/", ogSet.OGs(), speciesToUse)
            return "Running Phyldog" + "\n".join(seqs_alignments_dirs)       
    else:
        util.PrintUnderline("Calculating gene distances")
        db = DendroBLASTTrees(ogSet, resultsDir, nLowParrallel)
        db.ReadAndPickle()
        nOGs, D, spTreeFN_ids, spTreeUnrootedFN = db.RunAnalysis()
    
    """ === 2 ===
    Check can continue with analysis 
    """
    if len(ogSet.speciesToUse) < 4: 
        print("ERROR: Not enough species to infer species tree")
        util.Fail()
     
    """ === 3 ===
    MSA:               RootSpeciesTree
    Phyldog:           RootSpeciesTree    
    Dendroblast:       RootSpeciesTree  
    MSA (ust):         ConvertSpeciesTreeIDs
    Phyldog (ust):     ConvertSpeciesTreeIDs
    Dendroblast (ust): ConvertSpeciesTreeIDs
    """    
    if userSpeciesTree:
        util.PrintUnderline("Using user-supplied species tree") 
        userSpeciesTree = ConvertUserSpeciesTree(db.workingDir + "Trees_ids/", userSpeciesTree, ogSet.SpeciesDict())
        rootedSpeciesTreeFN = [userSpeciesTree]
        roots = [None]
        qMultiple = False
    else:
        util.PrintUnderline("Best outgroup(s) for species tree") 
        spDict = ogSet.SpeciesDict()
        roots, clusters, rootedSpeciesTreeFN, nSupport = rfd.GetRoot(spTreeFN_ids, os.path.split(db.TreeFilename_IDs(0))[0] + "/", rfd.GeneToSpecies_dash, nHighParallel, treeFmt = 1)
        if len(roots) > 1:
            print("Observed %d duplications. %d support the best roots and %d contradict them." % (len(clusters), nSupport, len(clusters) - nSupport))
            print("Best outgroups for species tree:")  
        else:
            print("Observed %d duplications. %d support the best root and %d contradict it." % (len(clusters), nSupport, len(clusters) - nSupport))
            print("Best outgroup for species tree:")  
        for r in roots: print("  " + (", ".join([spDict[s] for s in r]))  )
        qMultiple = len(roots) > 1
        
    if qStopAfterTrees:
        if userSpeciesTree:
            st = ""
            if qMSA:
                st += "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
                st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            st += "\nGene trees:\n   %s\n" % (resultsDir + "Gene_Trees/")
            return st
        # otherwise, root species tree
        resultsSpeciesTrees = []
        for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
            if len(roots) == 1:
                resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
            else:
                resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
            util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qFixNegatives=True)
        db.DeleteBlastMatrices()
        CleanWorkingDir(db.workingDir)
        return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None, False)
    
    if qMultiple: util.PrintUnderline("\nAnalysing each of the potential species tree roots", True)
    resultsSpeciesTrees = []
    for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
        util.PrintUnderline("Reconciling gene trees and species tree" + (" (root %d)"%i if qMultiple else "")) 
        if qMultiple: 
            resultsDir_new = resultsDir + "Orthologues_using_outgroup_%d/" % i
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees_using_outgroup_%d/" % i
            resultsSpeciesTrees.append(resultsDir_new + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
            print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        elif userSpeciesTree:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
        else:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
            print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        os.mkdir(resultsDir_new)
        util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qFixNegatives=True)
        ReconciliationAndOrthologues(db.TreeFilename_IDs, db.ogSet, speciesTree_fn, db.workingDir, resultsDir_new, reconTreesRenamedDir, nHighParallel, i if qMultiple else None, pickleDir=pickleDir) 
    
    db.DeleteBlastMatrices()
    CleanWorkingDir(db.workingDir)
    util.PrintUnderline("Writing results files", True)
    
    return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None)