def GetSpeciesGenesInfo(): speciesLabels, nSpAll, _ = util.GetSpeciesToUse( files.FileHandler.GetSpeciesIDsFN()) seqsInfo = util.GetSeqsInfo(files.FileHandler.GetSpeciesSeqsDir(), speciesLabels, nSpAll) genenumbers = list(np.diff(seqsInfo.seqStartingIndices)) genenumbers.append(seqsInfo.nSeqs - seqsInfo.seqStartingIndices[-1]) return speciesLabels, genenumbers
def OrthologuesFromTrees(groupsDir, workingDir, nHighParallel, speciesTree_fn = None, pickleDir=None): """ groupsDir - directory with orthogroups file in userSpeciesTree_fn - None if not supplied otherwise rooted tree using user species names (not orthofinder IDs) workingDir - orthologues 'WorkingDirectory' qUserSpTree - is the speciesTree_fn user-supplied Just infer orthologues from trees, don't do any of the preceeding steps. """ # Check species tree qUserSpTree = (speciesTree_fn != None) if qUserSpTree: if not os.path.exists(speciesTree_fn): print("\nERROR: %s does not exist\n" % speciesTree_fn) util.Fail() else: possibilities = ["SpeciesTree_ids_0_rooted.txt", "SpeciesTree_ids_1_rooted.txt", "SpeciesTree_user_ids.txt"] # etc (only need to determine if unique) nTrees = 0 for p in possibilities: fn = workingDir + "Trees_ids/" + p if os.path.exists(fn): nTrees += 1 speciesTree_fn = fn if nTrees == 0: print("\nERROR: There is a problem with the specified directory. The rooted species tree %s or %s is not present." % (possibilities[0], possibilities[2])) print("Please rectify the problem or alternatively use the -s option to specify the species tree to use.\n") util.Fail() if nTrees > 1: print("\nERROR: There is more than one rooted species tree in the specified directory structure. Please use the -s option to specify which species tree should be used\n") util.Fail() def TreePatIDs(iog): return workingDir + ("Trees_ids/OG%07d_tree_id.txt" % iog) reconTreesRenamedDir = workingDir + "Recon_Gene_Trees/" resultsDir_new = workingDir + "../Orthologues" # for the Orthologues_Species/ directories # if os.path.exists(resultsDir_new): resultsDir_new = util.CreateNewWorkingDirectory(resultsDir_new + "_") # else: # resultsDir_new += os.sep # os.mkdir(resultsDir_new) orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = util.GetOGsFile(groupsDir) speciesToUse, nSpAll = util.GetSpeciesToUse(orthofinderWorkingDir + "SpeciesIDs.txt") ogSet = OrthoGroupsSet(orthofinderWorkingDir, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor) if qUserSpTree: speciesToUseNames = ogSet.SpeciesDict().values() CheckUserSpeciesTree(speciesTree_fn, speciesToUseNames) speciesTree_fn = ConvertUserSpeciesTree(workingDir + "Trees_ids/", speciesTree_fn, ogSet.SpeciesDict()) util.PrintUnderline("Running Orthologue Prediction", True) util.PrintUnderline("Reconciling gene and species trees") ReconciliationAndOrthologues(TreePatIDs, ogSet, speciesTree_fn, workingDir, resultsDir_new, reconTreesRenamedDir, nHighParallel, pickleDir=pickleDir) util.PrintUnderline("Writing results files") CleanWorkingDir(workingDir) return "Species-by-species orthologues directory:\n %s\n" % resultsDir_new
def OrthologuesFromTrees(recon_method, nHighParallel, userSpeciesTree_fn, qAddSpeciesToIDs): """ userSpeciesTree_fn - None if not supplied otherwise rooted tree using user species names (not orthofinder IDs) qUserSpTree - is the speciesTree_fn user-supplied Just infer orthologues from trees, don't do any of the preceeding steps. """ speciesToUse, nSpAll, _ = util.GetSpeciesToUse(files.FileHandler.GetSpeciesIDsFN()) ogSet = OrthoGroupsSet(files.FileHandler.GetWorkingDirectory1_Read(), speciesToUse, nSpAll, qAddSpeciesToIDs, idExtractor = util.FirstWordExtractor) if userSpeciesTree_fn != None: speciesDict = files.FileHandler.GetSpeciesDict() speciesToUseNames = [speciesDict[str(iSp)] for iSp in ogSet.speciesToUse] CheckUserSpeciesTree(userSpeciesTree_fn, speciesToUseNames) speciesTreeFN_ids = files.FileHandler.GetSpeciesTreeIDsRootedFN() ConvertUserSpeciesTree(userSpeciesTree_fn, speciesDict, speciesTreeFN_ids) util.PrintUnderline("Running Orthologue Prediction", True) util.PrintUnderline("Reconciling gene and species trees") ReconciliationAndOrthologues(recon_method, ogSet, nHighParallel) util.PrintUnderline("Writing results files") util.PrintTime("Writing results files") files.FileHandler.CleanWorkingDir2() return "Species-by-species orthologues directory:\n %s\n" % files.FileHandler.GetOrthologuesDirectory()
def GetSpeciesGenesInfo(ogSet): speciesLabels, nSpAll = util.GetSpeciesToUse(ogSet.speciesIDsFN) seqsInfo = util.GetSeqsInfo(ogSet.workingDirOF, speciesLabels, nSpAll) genenumbers = list(np.diff(seqsInfo.seqStartingIndices)) genenumbers.append(seqsInfo.nSeqs - seqsInfo.seqStartingIndices[-1]) return speciesLabels, genenumbers
print("Missing option for command line argument -t") util.Fail() arg = args.pop(0) try: nProcesses = int(arg) except: print("Incorrect argument for number of threads: %s" % arg) util.Fail() else: userDir = arg # Check arguments print("0. Getting Orthologues") print("----------------------") if nProcesses == None: print( """\nNumber of parallel processes has not been specified, will use the default value. Number of parallel processes can be specified using the -t option.""") nProcesses = util.nThreadsDefault print("Using %d threads for alignments and trees" % nProcesses) orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = util.GetOGsFile( userDir) speciesToUse, nSpAll = util.GetSpeciesToUse(orthofinderWorkingDir + "SpeciesIDs.txt") resultsString = GetOrthologues(orthofinderWorkingDir, orthofinderResultsDir, speciesToUse, nSpAll, clustersFilename_pairs, nProcesses) print(resultsString) util.PrintCitation()