def run(self, fileStore): blastResultsFile = fileStore.getLocalTempFile() seqFile = fileStore.readGlobalFile(self.seqFileID) runSelfLastz(seqFile, blastResultsFile, lastzArguments=self.blastOptions.lastzArguments, gpuLastz=self.blastOptions.gpuLastz) if self.blastOptions.realign: realignResultsFile = fileStore.getLocalTempFile() runCactusSelfRealign( seqFile, inputAlignmentsFile=blastResultsFile, outputAlignmentsFile=realignResultsFile, realignArguments=self.blastOptions.realignArguments) blastResultsFile = realignResultsFile resultsFile = fileStore.getLocalTempFile() cactus_call(parameters=[ "cactus_blast_convertCoordinates", blastResultsFile, resultsFile, str(self.blastOptions.roundsOfCoordinateConversion) ]) if self.blastOptions.compressFiles: #TODO: This throws away the compressed file seqFile = compressFastaFile(seqFile) logger.info("Ran the self blast okay") return fileStore.writeGlobalFile(resultsFile)
def readXML(self, path): xmlRoot = ET.parse(path).getroot() treeElem = xmlRoot.find("tree") self.mcTree = MultiCactusTree(NXNewick().parseString( treeElem.text, addImpliedRoots=False)) self.expMap = dict() self.expIDMap = dict() cactusPathElemList = xmlRoot.findall("cactus") for cactusPathElem in cactusPathElemList: nameElem = cactusPathElem.attrib["name"] pathElem = cactusPathElem.attrib["experiment_path"] self.expMap[nameElem] = pathElem if "experiment_id" in cactusPathElem.attrib: self.expIDMap[nameElem] = cactusPathElem.attrib[ "experiment_id"] self.inputSequenceMap = dict( zip(xmlRoot.attrib["inputSequenceNames"].split(), xmlRoot.attrib["inputSequences"].split())) if "inputSequenceIDs" in xmlRoot.attrib: self.inputSequenceIDMap = dict( zip(xmlRoot.attrib["inputSequenceIDNames"].split(), xmlRoot.attrib["inputSequenceIDs"].split())) if "outputSequenceIDs" in xmlRoot.attrib: self.outputSequenceIDMap = dict( zip(xmlRoot.attrib["outputSequenceNames"].split(), xmlRoot.attrib["outputSequenceIDs"].split())) logger.info("xmlRoot = %s" % ET.tostring(xmlRoot)) if "configID" in xmlRoot.attrib: self.configID = xmlRoot.attrib["configID"] self.mcTree.assignSubtreeRootNames(self.expMap)
def run(self, fileStore): self.configNode = ET.parse(fileStore.readGlobalFile(self.project.getConfigID())).getroot() self.configWrapper = ConfigWrapper(self.configNode) self.configWrapper.substituteAllPredefinedConstantsWithLiterals() # Log the stats for the un-preprocessed assemblies for name, sequence in self.project.getInputSequenceIDMap().items(): self.addChildJobFn(logAssemblyStats, "Before preprocessing", name, sequence) # Create jobs to create the output sequences logger.info("Reading config file from: %s" % self.project.getConfigID()) configFile = fileStore.readGlobalFile(self.project.getConfigID()) configNode = ET.parse(configFile).getroot() ConfigWrapper(configNode).substituteAllPredefinedConstantsWithLiterals() #This is necessary.. #Add the preprocessor child job. The output is a job promise value that will be #converted into a list of the IDs of the preprocessed sequences in the follow on job. preprocessorJob = self.addChild(CactusPreprocessor(self.project.getInputSequenceIDs(), configNode)) self.project.setOutputSequenceIDs([preprocessorJob.rv(i) for i in range(len(self.project.getInputSequenceIDs()))]) #Now build the progressive-down job schedule = Schedule() schedule.loadProject(self.project, fileStore=fileStore) schedule.compute() self.options.event = self.project.mcTree.getRootName() leafNames = [ self.project.mcTree.getName(i) for i in self.project.mcTree.getLeaves() ] fileStore.logToMaster("Leaf names = %s" % leafNames) self.options.globalLeafEventSet = set(leafNames) return self.addFollowOn(RunCactusPreprocessorThenProgressiveDown2(options=self.options, project=self.project, event=self.options.event, schedule=schedule, memory=self.configWrapper.getDefaultMemory())).rv()
def run(self, fileStore): seqFile1 = fileStore.readGlobalFile(self.seqFileID1) seqFile2 = fileStore.readGlobalFile(self.seqFileID2) if self.blastOptions.compressFiles: seqFile1 = decompressFastaFile(seqFile1, fileStore.getLocalTempFile()) seqFile2 = decompressFastaFile(seqFile2, fileStore.getLocalTempFile()) blastResultsFile = fileStore.getLocalTempFile() runLastz(seqFile1, seqFile2, blastResultsFile, lastzArguments=self.blastOptions.lastzArguments, gpuLastz=self.blastOptions.gpuLastz) if self.blastOptions.realign: realignResultsFile = fileStore.getLocalTempFile() runCactusRealign( seqFile1, seqFile2, inputAlignmentsFile=blastResultsFile, outputAlignmentsFile=realignResultsFile, realignArguments=self.blastOptions.realignArguments) blastResultsFile = realignResultsFile resultsFile = fileStore.getLocalTempFile() cactus_call(parameters=[ "cactus_blast_convertCoordinates", blastResultsFile, resultsFile, str(self.blastOptions.roundsOfCoordinateConversion) ]) logger.info("Ran the blast okay") return fileStore.writeGlobalFile(resultsFile)
def importSingularityImage(): """Import the Singularity image from Docker if using Singularity.""" mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0") if mode == "singularity": imgPath = os.environ["CACTUS_SINGULARITY_IMG"] # If not using local image, pull the docker image if localImage == "0": # Singularity will complain if the image file already exists. Remove it. try: os.remove(imgPath) except OSError: # File doesn't exist pass # Singularity 2.4 broke the functionality that let --name # point to a path instead of a name in the CWD. So we change # to the proper directory manually, then change back after the # image is pulled. # NOTE: singularity writes images in the current directory only # when SINGULARITY_CACHEDIR is not set oldCWD = os.getcwd() os.chdir(os.path.dirname(imgPath)) # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now. try: check_call(["singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath), "docker://" + getDockerImage()]) except CalledProcessError: # Call failed, try without --size, required for singularity 3+ check_call(["singularity", "pull", "--name", os.path.basename(imgPath), "docker://" + getDockerImage()]) os.chdir(oldCWD) else: logger.info("Using pre-built singularity image: '{}'".format(imgPath))
def split_minimap_fallback(job, options, config, seqIDMap, output_id_map): """ take the output table from gather_fas, pull out the ambiguous sequences, remap them to the reference, and add them to the events where possible""" # can't do anything without a reference if not options.reference: logger.info("Skipping minimap2 fallback as --reference was not specified") return None, None # todo: also skip if no ambgious sequences ref_path, ref_id = seqIDMap[options.reference] mm_mem = ref_id.size * 5 if seqIDMap[options.reference][0].endswith('.gz'): mm_mem *= 4 mm_index_job = job.addChildJobFn(minimap_index, ref_path, ref_id, disk=ref_id.size * 5, memory=mm_mem) mm_map_root_job = Job() mm_index_job.addFollowOn(mm_map_root_job) amb_name = getOptionalAttrib(findRequiredNode(config.xmlRoot, "graphmap_split"), "ambiguousName", default="_AMBIGUOUS_") if amb_name not in output_id_map: logger.info("Skipping minmap2 fallback as no ambigious sequences found") return None, None # map every ambgiuous sequence against the reference in parallel paf_ids = [] ambiguous_seq_id_map = {} for event, fa_id in output_id_map[amb_name]['fa'].items(): paf_job = mm_map_root_job.addChildJobFn(minimap_map, mm_index_job.rv(), event, fa_id, seqIDMap[event][0], disk=ref_id.size * 3, memory=mm_mem) paf_ids.append(paf_job.rv()) ambiguous_seq_id_map[event] = (seqIDMap[event][0], fa_id) return paf_ids, ambiguous_seq_id_map
def run(self, fileStore): self.configNode = ET.parse(fileStore.readGlobalFile(self.project.getConfigID())).getroot() self.configWrapper = ConfigWrapper(self.configNode) self.configWrapper.substituteAllPredefinedConstantsWithLiterals() fileStore.logToMaster("Using the following configuration:\n%s" % ET.tostring(self.configNode)) # Log the stats for the un-preprocessed assemblies for name, sequence in self.project.getInputSequenceIDMap().items(): self.addChildJobFn(logAssemblyStats, "Before preprocessing", name, sequence) # Create jobs to create the output sequences logger.info("Reading config file from: %s" % self.project.getConfigID()) configFile = fileStore.readGlobalFile(self.project.getConfigID()) configNode = ET.parse(configFile).getroot() ConfigWrapper(configNode).substituteAllPredefinedConstantsWithLiterals() #This is necessary.. #Add the preprocessor child job. The output is a job promise value that will be #converted into a list of the IDs of the preprocessed sequences in the follow on job. preprocessorJob = self.addChild(CactusPreprocessor(self.project.getInputSequenceIDs(), configNode)) self.project.setOutputSequenceIDs([preprocessorJob.rv(i) for i in range(len(self.project.getInputSequenceIDs()))]) #Now build the progressive-down job schedule = Schedule() schedule.loadProject(self.project, fileStore=fileStore) schedule.compute() self.options.event = self.project.mcTree.getRootName() leafNames = [ self.project.mcTree.getName(i) for i in self.project.mcTree.getLeaves() ] fileStore.logToMaster("Leaf names = %s" % leafNames) self.options.globalLeafEventSet = set(leafNames) return self.addFollowOn(RunCactusPreprocessorThenProgressiveDown2(options=self.options, project=self.project, event=self.options.event, schedule=schedule, memory=self.configWrapper.getDefaultMemory())).rv()
def run(self, fileStore): logger.info("Preparing sequence for preprocessing") inSequence = fileStore.readGlobalFile(self.inSequenceID) if self.prepOptions.chunkSize <= 0: # In this first case we don't need to break up the sequence chunked = False inChunkList = [inSequence] else: # chunk it up chunked = True inChunkDirectory = getTempDirectory( rootDir=fileStore.getLocalTempDir()) inChunkList = runGetChunks(sequenceFiles=[inSequence], chunksDir=inChunkDirectory, chunkSize=self.prepOptions.chunkSize, overlapSize=0) inChunkList = [os.path.abspath(path) for path in inChunkList] logger.info("Chunks = %s" % inChunkList) inChunkIDList = [ fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in inChunkList ] outChunkIDList = [] #For each input chunk we create an output chunk, it is the output chunks that get concatenated together. if not self.chunksToCompute: self.chunksToCompute = list(range(len(inChunkList))) for i in self.chunksToCompute: #Calculate the number of chunks to use inChunkNumber = int( max( 1, math.ceil( len(inChunkList) * self.prepOptions.proportionToSample))) assert inChunkNumber <= len(inChunkList) and inChunkNumber > 0 #Now get the list of chunks flanking and including the current chunk j = max(0, i - inChunkNumber // 2) inChunkIDs = inChunkIDList[j:j + inChunkNumber] if len( inChunkIDs ) < inChunkNumber: #This logic is like making the list circular inChunkIDs += inChunkIDList[:inChunkNumber - len(inChunkIDs)] assert len(inChunkIDs) == inChunkNumber outChunkIDList.append( self.addChild( self.getChunkedJobForCurrentStage( inChunkIDs, float(inChunkNumber) / len(inChunkIDList), inChunkIDList[i])).rv()) if chunked: # Merge results of the chunking process back into a genome-wide file return self.addFollowOn( MergeChunks(self.prepOptions, outChunkIDList)).rv() else: # Didn't chunk--we have a genome-wide fasta file return outChunkIDList[0]
def run(self, fileStore): sequenceFiles1 = [ fileStore.readGlobalFile(fileID) for fileID in self.sequenceFileIDs1 ] chunks = runGetChunks( sequenceFiles=sequenceFiles1, chunksDir=getTempDirectory(rootDir=fileStore.getLocalTempDir()), chunkSize=self.blastOptions.chunkSize, overlapSize=self.blastOptions.overlapSize) assert len(chunks) > 0 logger.info( "Broken up the sequence files into individual 'chunk' files") chunkIDs = [ fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in chunks ] diagonalResultsID = self.addChild( MakeSelfBlasts(self.blastOptions, chunkIDs)).rv() offDiagonalResultsID = self.addChild( MakeOffDiagonalBlasts(self.blastOptions, chunkIDs)).rv() logger.debug("Collating the blasts after blasting all-against-all") return self.addFollowOn( CollateBlasts(self.blastOptions, [diagonalResultsID, offDiagonalResultsID])).rv()
def run(self, fileStore): sequenceFiles1 = [ fileStore.readGlobalFile(fileID) for fileID in self.sequenceFileIDs1 ] if self.blastOptions.gpuLastz == True: # wga-gpu has a 3G limit. self.blastOptions.chunkSize = 3000000000 chunks = runGetChunks( sequenceFiles=sequenceFiles1, chunksDir=getTempDirectory(rootDir=fileStore.getLocalTempDir()), chunkSize=self.blastOptions.chunkSize, overlapSize=self.blastOptions.overlapSize) if len(chunks) == 0: raise Exception( "no chunks produced for files: {} ".format(sequenceFiles1)) logger.info( "Broken up the sequence files into individual 'chunk' files") chunkIDs = [ fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in chunks ] diagonalResultsID = self.addChild( MakeSelfBlasts(self.blastOptions, chunkIDs)).rv() offDiagonalResultsID = self.addChild( MakeOffDiagonalBlasts(self.blastOptions, chunkIDs)).rv() logger.debug("Collating the blasts after blasting all-against-all") return self.addFollowOn( CollateBlasts(self.blastOptions, [diagonalResultsID, offDiagonalResultsID])).rv()
def run(self, fileStore): prepXmlElems = self.configNode.findall("preprocessor") if len(prepXmlElems) == 0: #Just cp the file to the output file return self.inputSequenceID else: logger.info("Adding child batch_preprocessor target") return self.addChild(BatchPreprocessor(prepXmlElems, self.inputSequenceID, 0)).rv()
def run(self, fileStore): logger.info("Results IDs: %s" % self.resultsFileIDs) resultsFiles = [readGlobalFileWithoutCache(fileStore, fileID) for fileID in self.resultsFileIDs] collatedResultsFile = fileStore.getLocalTempFile() catFiles(resultsFiles, collatedResultsFile) logger.info("Collated the alignments to the file: %s", collatedResultsFile) collatedResultsID = fileStore.writeGlobalFile(collatedResultsFile) for resultsFileID in self.resultsFileIDs: fileStore.deleteGlobalFile(resultsFileID) return collatedResultsID
def calculateCoverage(sequenceFile, cigarFile, outputFile, fromGenome=None, depthById=False, work_dir=None): logger.info("Calculating coverage of cigar file %s on %s, writing to %s" % ( cigarFile, sequenceFile, outputFile)) args = [sequenceFile, cigarFile] if fromGenome is not None: args += ["--from", fromGenome] if depthById: args += ["--depthById"] cactus_call(outfile=outputFile, work_dir=work_dir, parameters=["cactus_coverage"] + args)
def run(self, fileStore): sequenceFiles1 = [fileStore.readGlobalFile(fileID) for fileID in self.sequenceFileIDs1] chunks = runGetChunks(sequenceFiles=sequenceFiles1, chunksDir=getTempDirectory(rootDir=fileStore.getLocalTempDir()), chunkSize = self.blastOptions.chunkSize, overlapSize=self.blastOptions.overlapSize) assert len(chunks) > 0 logger.info("Broken up the sequence files into individual 'chunk' files") chunkIDs = [fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in chunks] diagonalResultsID = self.addChild(MakeSelfBlasts(self.blastOptions, chunkIDs)).rv() offDiagonalResultsID = self.addChild(MakeOffDiagonalBlasts(self.blastOptions, chunkIDs)).rv() logger.debug("Collating the blasts after blasting all-against-all") return self.addFollowOn(CollateBlasts(self.blastOptions, [diagonalResultsID, offDiagonalResultsID])).rv()
def main(): parser = ArgumentParser() Job.Runner.addToilOptions(parser) addCactusWorkflowOptions(parser) parser.add_argument("seqFile", help = "Seq file (will be modified if necessary to include graph Fasta sequence)") parser.add_argument("minigraphGFA", help = "Minigraph-compatible reference graph in GFA format (can be gzipped)") parser.add_argument("outputPAF", type=str, help = "Output pairwise alignment file in PAF format") parser.add_argument("--outputFasta", type=str, help = "Output graph sequence file in FASTA format (required if not present in seqFile)") parser.add_argument("--maskFilter", type=int, help = "Ignore softmasked sequence intervals > Nbp (overrides config option of same name)") parser.add_argument("--outputGAFDir", type=str, help = "Output GAF alignments (raw minigraph output before PAF conversion) to this directory") parser.add_argument("--refFromGFA", type=str, help = "Do not align given genome from seqfile, and instead extract its alignment from the rGFA tags (must have been used as reference for minigraph GFA construction)") #WDL hacks parser.add_argument("--pathOverrides", nargs="*", help="paths (multiple allowed) to override from seqFile") parser.add_argument("--pathOverrideNames", nargs="*", help="names (must be same number as --pathOverrides) of path overrides") #Progressive Cactus Options parser.add_argument("--configFile", dest="configFile", help="Specify cactus configuration file", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml")) parser.add_argument("--latest", dest="latest", action="store_true", help="Use the latest version of the docker container " "rather than pulling one matching this version of cactus") parser.add_argument("--containerImage", dest="containerImage", default=None, help="Use the the specified pre-built containter image " "rather than pulling one from quay.io") parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"], help="The way to run the Cactus binaries", default=None) options = parser.parse_args() setupBinaries(options) setLoggingFromOptions(options) enableDumpStack() if options.outputGAFDir: if not os.path.isdir(options.outputGAFDir): os.makedirs(options.outputGAFDir) if (options.pathOverrides or options.pathOverrideNames): if not options.pathOverrides or not options.pathOverrideNames or \ len(options.pathOverrideNames) != len(options.pathOverrides): raise RuntimeError('same number of values must be passed to --pathOverrides and --pathOverrideNames') # Mess with some toil options to create useful defaults. cactus_override_toil_options(options) start_time = timeit.default_timer() runCactusGraphMap(options) end_time = timeit.default_timer() run_time = end_time - start_time logger.info("cactus-graphmap has finished after {} seconds".format(run_time))
def stageWorkflow(outputSequenceDir, configFile, inputSequences, toil, restart=False, outputSequences=[], maskAlpha=False, clipAlpha=None): #Replace any constants configNode = ET.parse(configFile).getroot() if not outputSequences: outputSequences = CactusPreprocessor.getOutputSequenceFiles( inputSequences, outputSequenceDir) else: assert len(outputSequences) == len(inputSequences) # Make sure we have the dna-brnn model in the filestore if we need it loadDnaBrnnModel(toil, ET.parse(configFile).getroot(), maskAlpha=maskAlpha) if configNode.find("constants") != None: ConfigWrapper( configNode).substituteAllPredefinedConstantsWithLiterals() if maskAlpha or clipAlpha: ConfigWrapper(configNode).setPreprocessorActive( "lastzRepeatMask", False) ConfigWrapper(configNode).setPreprocessorActive("dna-brnn", True) if clipAlpha: for node in configNode.findall("preprocessor"): if getOptionalAttrib(node, "preprocessJob") == 'dna-brnn': node.attrib["action"] = "clip" node.attrib["minLength"] = clipAlpha node.attrib["mergeLength"] = clipAlpha if not restart: inputSequenceIDs = [] for seq in inputSequences: logger.info("Importing {}".format(seq)) inputSequenceIDs.append(toil.importFile(makeURL(seq))) unzip_job = Job.wrapJobFn(unzip_then_pp, configNode, inputSequences, inputSequenceIDs) outputSequenceIDs = toil.start(unzip_job) else: outputSequenceIDs = toil.restart() for seqID, path in zip(outputSequenceIDs, outputSequences): try: iter(seqID) # dna-brnn will output a couple of bed files. we scrape those out here toil.exportFile(seqID[0], makeURL(path)) toil.exportFile(seqID[1], makeURL(path) + '.bed') toil.exportFile(seqID[2], makeURL(path) + '.mask.bed') except: toil.exportFile(seqID, makeURL(path))
def main(): parser = ArgumentParser() Job.Runner.addToilOptions(parser) addCactusWorkflowOptions(parser) parser.add_argument("--vg", required=True, nargs='+', help = "Input vg files (PackedGraph or HashGraph format)") parser.add_argument("--outDir", required=True, type=str, help = "Output directory") parser.add_argument("--outName", required=True, type=str, help = "Basename of all output files") parser.add_argument("--reference", required=True, type=str, help = "Reference event name") parser.add_argument("--vcfReference", type=str, help = "Reference event for VCF (if different from --reference)") parser.add_argument("--rename", nargs='+', default = [], help = "Path renaming, each of form src>dest (see clip-vg -r)") parser.add_argument("--clipLength", type=int, default=None, help = "clip out unaligned sequences longer than this") parser.add_argument("--wlineSep", type=str, help = "wline separator for vg convert") parser.add_argument("--indexCores", type=int, default=1, help = "cores for indexing processes") parser.add_argument("--decoyGraph", help= "decoy sequences vg graph to add (PackedGraph or HashGraph format)") parser.add_argument("--hal", nargs='+', default = [], help = "Input hal files (for merging)") #Progressive Cactus Options parser.add_argument("--configFile", dest="configFile", help="Specify cactus configuration file", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml")) parser.add_argument("--latest", dest="latest", action="store_true", help="Use the latest version of the docker container " "rather than pulling one matching this version of cactus") parser.add_argument("--containerImage", dest="containerImage", default=None, help="Use the the specified pre-built containter image " "rather than pulling one from quay.io") parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"], help="The way to run the Cactus binaries", default=None) options = parser.parse_args() setupBinaries(options) setLoggingFromOptions(options) enableDumpStack() if options.outDir and not options.outDir.startswith('s3://'): if not os.path.isdir(options.outDir): os.makedirs(options.outDir) if options.hal and len(options.hal) != len(options.vg): raise RuntimeError("If --hal and --vg should specify the same number of files") # Mess with some toil options to create useful defaults. cactus_override_toil_options(options) start_time = timeit.default_timer() runCactusGraphMapJoin(options) end_time = timeit.default_timer() run_time = end_time - start_time logger.info("cactus-graphmap-join has finished after {} seconds".format(run_time))
def run(self, fileStore): self.configNode = ET.parse(fileStore.readGlobalFile(self.project.getConfigID())).getroot() self.configWrapper = ConfigWrapper(self.configNode) self.configWrapper.substituteAllPredefinedConstantsWithLiterals() if not self.schedule.isVirtual(self.event): tmpExp = fileStore.getLocalTempFile() self.eventExpWrapper.writeXML(tmpExp) self.project.expIDMap[self.event] = fileStore.writeGlobalFile(tmpExp) followOnEvent = self.schedule.followOn(self.event) if followOnEvent is not None: logger.info("Adding follow-on event %s" % followOnEvent) return self.addFollowOn(ProgressiveDown(self.options, self.project, followOnEvent, self.schedule, memory=self.configWrapper.getDefaultMemory())).rv() return self.project
def run(self, fileStore): logger.info("Results IDs: %s" % self.resultsFileIDs) resultsFiles = [ readGlobalFileWithoutCache(fileStore, fileID) for fileID in self.resultsFileIDs ] collatedResultsFile = fileStore.getLocalTempFile() catFiles(resultsFiles, collatedResultsFile) logger.info("Collated the alignments to the file: %s", collatedResultsFile) collatedResultsID = fileStore.writeGlobalFile(collatedResultsFile) for i in range(0, len(self.resultsFileIDs), self.delete_batch_size): self.addChild( DeleteFileIDs(self.resultsFileIDs[i:i + self.delete_batch_size])) return collatedResultsID
def blockUntilServerIsRunning(self, createTimeout=1800): """Check status until it's successful, an error is found, or we timeout. Returns True if the redis-server is now running, False if something went wrong.""" success = False for i in range(createTimeout): if self.isServerFailed(): logger.critical('Error starting Redis server.') success = False break if self.isServerRunning(): logger.info('Redis server running.') success = True break sleep(1) return success
def blockUntilKtserverIsRunning(logPath, createTimeout=1800): """Check status until it's successful, an error is found, or we timeout. Returns True if the ktserver is now running, False if something went wrong.""" success = False for i in xrange(createTimeout): if isKtServerFailed(logPath): logger.critical('Error starting ktserver.') success = False break if isKtServerRunning(logPath): logger.info('Ktserver running.') success = True break sleep(1) return success
def main(): parser = ArgumentParser() Job.Runner.addToilOptions(parser) addCactusWorkflowOptions(parser) parser.add_argument("seqFile", help = "Seq file (gzipped fastas supported)") parser.add_argument("minigraphGFA", help = "Minigraph-compatible reference graph in GFA format (can be gzipped)") parser.add_argument("graphmapPAF", type=str, help = "Output pairwise alignment file in PAF format (can be gzipped)") parser.add_argument("--outDir", required=True, type=str, help = "Output directory") parser.add_argument("--refContigs", nargs="*", help = "Subset to these reference contigs (multiple allowed)", default=[]) parser.add_argument("--refContigsFile", type=str, help = "Subset to (newline-separated) reference contigs in this file") parser.add_argument("--otherContig", type=str, help = "Lump all reference contigs unselected by above options into single one with this name") parser.add_argument("--reference", type=str, help = "Name of reference (in seqFile). Ambiguity filters will not be applied to it") parser.add_argument("--maskFilter", type=int, help = "Ignore softmasked sequence intervals > Nbp") #Progressive Cactus Options parser.add_argument("--configFile", dest="configFile", help="Specify cactus configuration file", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml")) parser.add_argument("--latest", dest="latest", action="store_true", help="Use the latest version of the docker container " "rather than pulling one matching this version of cactus") parser.add_argument("--containerImage", dest="containerImage", default=None, help="Use the the specified pre-built containter image " "rather than pulling one from quay.io") parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"], help="The way to run the Cactus binaries", default=None) options = parser.parse_args() setupBinaries(options) setLoggingFromOptions(options) enableDumpStack() if options.outDir and not options.outDir.startswith('s3://'): if not os.path.isdir(options.outDir): os.makedirs(options.outDir) # Mess with some toil options to create useful defaults. cactus_override_toil_options(options) start_time = timeit.default_timer() runCactusGraphMapSplit(options) end_time = timeit.default_timer() run_time = end_time - start_time logger.info("cactus-graphmap-split has finished after {} seconds".format(run_time))
def run(self, fileStore): self.configNode = ET.parse(fileStore.readGlobalFile(self.project.getConfigID())).getroot() self.configWrapper = ConfigWrapper(self.configNode) self.configWrapper.substituteAllPredefinedConstantsWithLiterals() logger.info("Progressive Down: " + self.event) depProjects = dict() deps = self.schedule.deps(self.event) fileStore.logToMaster("There are %i dependent projects" % len(deps)) for child in deps: fileStore.logToMaster("Adding dependent project %s" % child) depProjects[child] = self.addChild(ProgressiveDown(self.options, self.project, child, self.schedule)).rv() return self.addFollowOn(ProgressiveNext(self.options, self.project, self.event, self.schedule, depProjects, memory=self.configWrapper.getDefaultMemory())).rv()
def run(self, fileStore): sequenceFiles1 = [fileStore.readGlobalFile(fileID) for fileID in self.sequenceFileIDs1] sequenceFiles2 = [fileStore.readGlobalFile(fileID) for fileID in self.sequenceFileIDs2] chunks1 = runGetChunks(sequenceFiles=sequenceFiles1, chunksDir=getTempDirectory(rootDir=fileStore.getLocalTempDir()), chunkSize=self.blastOptions.chunkSize, overlapSize=self.blastOptions.overlapSize) chunks2 = runGetChunks(sequenceFiles=sequenceFiles2, chunksDir=getTempDirectory(rootDir=fileStore.getLocalTempDir()), chunkSize=self.blastOptions.chunkSize, overlapSize=self.blastOptions.overlapSize) chunkIDs1 = [fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in chunks1] chunkIDs2 = [fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in chunks2] resultsIDs = [] #Make the list of blast jobs. for chunkID1 in chunkIDs1: for chunkID2 in chunkIDs2: #TODO: Make the compression work self.blastOptions.compressFiles = False resultsIDs.append(self.addChild(RunBlast(self.blastOptions, chunkID1, chunkID2)).rv()) logger.info("Made the list of blasts") #Set up the job to collate all the results return self.addFollowOn(CollateBlasts(self.blastOptions, resultsIDs)).rv()
def run(self, fileStore): # Parse the "preprocessor" config xml element assert self.iteration < len(self.prepXmlElems) lastIteration = self.iteration == len(self.prepXmlElems) - 1 prepNode = self.prepXmlElems[self.iteration] if getOptionalAttrib(prepNode, "active", typeFn = bool, default=True): prepOptions = PreprocessorOptions(chunkSize = int(prepNode.get("chunkSize", default="-1")), preprocessJob=prepNode.attrib["preprocessJob"], memory = int(prepNode.get("memory", default=0)), cpu = int(prepNode.get("cpu", default=1)), check = bool(int(prepNode.get("check", default="0"))), proportionToSample = getOptionalAttrib(prepNode, "proportionToSample", typeFn=float, default=1.0), unmask = getOptionalAttrib(prepNode, "unmask", typeFn=bool, default=False), lastzOptions = getOptionalAttrib(prepNode, "lastzOpts", default=""), minPeriod = getOptionalAttrib(prepNode, "minPeriod", typeFn=int, default=0), checkAssemblyHub = getOptionalAttrib(prepNode, "checkAssemblyHub", typeFn=bool, default=False), gpuLastz = getOptionalAttrib(prepNode, "gpuLastz", typeFn=bool, default=False), dnabrnnOpts = getOptionalAttrib(prepNode, "dna-brnnOpts", default=""), dnabrnnLength = getOptionalAttrib(prepNode, "minLength", typeFn=int, default=1), dnabrnnMerge = getOptionalAttrib(prepNode, "mergeLength", typeFn=int, default=0), dnabrnnAction = getOptionalAttrib(prepNode, "action", typeFn=str, default="softmask"), dnabrnnInputBedID = getOptionalAttrib(prepNode, "inputBedID", typeFn=str, default=None), dnabrnnEventName = getOptionalAttrib(prepNode, "eventName", typeFn=str, default=None), cutBefore = getOptionalAttrib(prepNode, "cutBefore", typeFn=str, default=None), cutAfter = getOptionalAttrib(prepNode, "cutAfter", typeFn=str, default=None)) if prepOptions.unmask: inSequence = fileStore.readGlobalFile(self.inSequenceID) unmaskedInputFile = fileStore.getLocalTempFile() unmaskFasta(inSequence, unmaskedInputFile) self.inSequenceID = fileStore.writeGlobalFile(unmaskedInputFile) outSeqID = self.addChild(PreprocessSequence(prepOptions, self.inSequenceID)).rv() else: logger.info("Skipping inactive preprocessor {}".format(prepNode.attrib["preprocessJob"])) outSeqID = self.inSequenceID if lastIteration == False: return self.addFollowOn(BatchPreprocessor(self.prepXmlElems, outSeqID, self.iteration + 1)).rv() else: return outSeqID
def run(self, fileStore): logger.info("Preparing sequence for preprocessing") inSequence = fileStore.readGlobalFile(self.inSequenceID) if self.prepOptions.chunkSize <= 0: # In this first case we don't need to break up the sequence chunked = False inChunkList = [inSequence] else: # chunk it up chunked = True inChunkDirectory = getTempDirectory(rootDir=fileStore.getLocalTempDir()) inChunkList = runGetChunks(sequenceFiles=[inSequence], chunksDir=inChunkDirectory, chunkSize=self.prepOptions.chunkSize, overlapSize=0) inChunkList = [os.path.abspath(path) for path in inChunkList] logger.info("Chunks = %s" % inChunkList) inChunkIDList = [fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in inChunkList] outChunkIDList = [] #For each input chunk we create an output chunk, it is the output chunks that get concatenated together. if not self.chunksToCompute: self.chunksToCompute = range(len(inChunkList)) for i in self.chunksToCompute: #Calculate the number of chunks to use inChunkNumber = int(max(1, math.ceil(len(inChunkList) * self.prepOptions.proportionToSample))) assert inChunkNumber <= len(inChunkList) and inChunkNumber > 0 #Now get the list of chunks flanking and including the current chunk j = max(0, i - inChunkNumber/2) inChunkIDs = inChunkIDList[j:j+inChunkNumber] if len(inChunkIDs) < inChunkNumber: #This logic is like making the list circular inChunkIDs += inChunkIDList[:inChunkNumber-len(inChunkIDs)] assert len(inChunkIDs) == inChunkNumber outChunkIDList.append(self.addChild(self.getChunkedJobForCurrentStage(inChunkIDs, float(inChunkNumber)/len(inChunkIDList), inChunkIDList[i])).rv()) if chunked: # Merge results of the chunking process back into a genome-wide file return self.addFollowOn(MergeChunks(self.prepOptions, outChunkIDList)).rv() else: # Didn't chunk--we have a genome-wide fasta file return outChunkIDList[0]
def run(self, fileStore): blastResultsFile = fileStore.getLocalTempFile() seqFile = fileStore.readGlobalFile(self.seqFileID) runSelfLastz(seqFile, blastResultsFile, lastzArguments=self.blastOptions.lastzArguments) if self.blastOptions.realign: realignResultsFile = fileStore.getLocalTempFile() runCactusSelfRealign(seqFile, inputAlignmentsFile=blastResultsFile, outputAlignmentsFile=realignResultsFile, realignArguments=self.blastOptions.realignArguments) blastResultsFile = realignResultsFile resultsFile = fileStore.getLocalTempFile() cactus_call(parameters=["cactus_blast_convertCoordinates", blastResultsFile, resultsFile, str(self.blastOptions.roundsOfCoordinateConversion)]) if self.blastOptions.compressFiles: #TODO: This throws away the compressed file seqFile = compressFastaFile(seqFile) logger.info("Ran the self blast okay") return fileStore.writeGlobalFile(resultsFile)
def run(self, fileStore): self.configNode = ET.parse(fileStore.readGlobalFile(self.project.getConfigID())).getroot() self.configWrapper = ConfigWrapper(self.configNode) self.configWrapper.substituteAllPredefinedConstantsWithLiterals() fileStore.logToMaster("Project has %i dependencies" % len(self.depProjects)) for projName in self.depProjects: depProject = self.depProjects[projName] for expName in depProject.expIDMap: expID = depProject.expIDMap[expName] experiment = ExperimentWrapper(ET.parse(fileStore.readGlobalFile(expID)).getroot()) fileStore.logToMaster("Reference ID for experiment %s: %s" % (expName, experiment.getReferenceID())) if experiment.getReferenceID(): self.project.expIDMap[expName] = expID self.project.outputSequenceIDMap[expName] = experiment.getReferenceID() eventExpWrapper = None logger.info("Progressive Next: " + self.event) if not self.schedule.isVirtual(self.event): eventExpWrapper = self.addChild(ProgressiveUp(self.options, self.project, self.event, memory=self.configWrapper.getDefaultMemory())).rv() return self.addFollowOn(ProgressiveOut(self.options, self.project, self.event, eventExpWrapper, self.schedule, memory=self.configWrapper.getDefaultMemory())).rv()
def run(self, fileStore): self.configNode = ET.parse(fileStore.readGlobalFile(self.project.getConfigID())).getroot() self.configWrapper = ConfigWrapper(self.configNode) self.configWrapper.substituteAllPredefinedConstantsWithLiterals() logger.info("Progressive Up: " + self.event) # open up the experiment # note that we copy the path into the options here experimentFile = fileStore.readGlobalFile(self.project.expIDMap[self.event]) expXml = ET.parse(experimentFile).getroot() experiment = ExperimentWrapper(expXml) configPath = fileStore.readGlobalFile(experiment.getConfigID()) configXml = ET.parse(configPath).getroot() seqIDMap = dict() tree = experiment.getTree() seqNames = [] for node in tree.postOrderTraversal(): if tree.isLeaf(node): name = tree.getName(node) seqIDMap[name] = self.project.outputSequenceIDMap[name] seqNames.append(name) logger.info("Sequences in progressive, %s: %s" % (self.event, seqNames)) experimentFile = fileStore.getLocalTempFile() experiment.writeXML(experimentFile) self.options.experimentFileID = fileStore.writeGlobalFile(experimentFile) # take union of command line options and config options for hal and reference if self.options.buildReference == False: refNode = findRequiredNode(configXml, "reference") self.options.buildReference = getOptionalAttrib(refNode, "buildReference", bool, False) halNode = findRequiredNode(configXml, "hal") if self.options.buildHal == False: self.options.buildHal = getOptionalAttrib(halNode, "buildHal", bool, False) if self.options.buildFasta == False: self.options.buildFasta = getOptionalAttrib(halNode, "buildFasta", bool, False) # get parameters that cactus_workflow stuff wants configFile = fileStore.readGlobalFile(experiment.getConfigID()) configNode = ET.parse(configFile).getroot() workFlowArgs = CactusWorkflowArguments(self.options, experimentFile=experimentFile, configNode=configNode, seqIDMap = seqIDMap) # copy over the options so we don't trail them around workFlowArgs.buildReference = self.options.buildReference workFlowArgs.buildHal = self.options.buildHal workFlowArgs.buildFasta = self.options.buildFasta workFlowArgs.globalLeafEventSet = self.options.globalLeafEventSet if self.options.intermediateResultsUrl is not None: # Give the URL prefix a special name for this particular # subproblem (by suffixing it with the name of the # internal node in the guide tree) workFlowArgs.intermediateResultsUrl = self.options.intermediateResultsUrl + '-' + self.event # Use the trimming strategy to blast ingroups vs outgroups. finalExpWrapper = self.addChild(CactusTrimmingBlastPhase(cactusWorkflowArguments=workFlowArgs, phaseName="trimBlast")).rv() logger.info("Going to create alignments and define the cactus tree") return finalExpWrapper
def importSingularityImage(options): """Import the Singularity image from Docker if using Singularity.""" mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0") if mode == "singularity" and Toil.parseLocator( options.jobStore)[0] == "file": imgPath = os.environ["CACTUS_SINGULARITY_IMG"] # If not using local image, pull the docker image if localImage == "0": # Singularity will complain if the image file already exists. Remove it. try: os.remove(imgPath) except OSError: # File doesn't exist pass # Singularity 2.4 broke the functionality that let --name # point to a path instead of a name in the CWD. So we change # to the proper directory manually, then change back after the # image is pulled. # NOTE: singularity writes images in the current directory only # when SINGULARITY_CACHEDIR is not set oldCWD = os.getcwd() os.chdir(os.path.dirname(imgPath)) # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now. try: check_call([ "singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath), "docker://" + getDockerImage() ]) except CalledProcessError: # Call failed, try without --size, required for singularity 3+ check_call([ "singularity", "pull", "--name", os.path.basename(imgPath), "docker://" + getDockerImage() ]) os.chdir(oldCWD) else: logger.info( "Using pre-built singularity image: '{}'".format(imgPath))
def run(self, fileStore): logger.info("Blasting ingroup sequences to outgroup %s", self.outgroupNames[self.outgroupNumber - 1]) alignmentsID = self.addChild(BlastSequencesAgainstEachOther( self.sequenceIDs, [self.outgroupSequenceIDs[0]], self.blastOptions)).rv() trimRecurseJob = self.addFollowOn(TrimAndRecurseOnOutgroups( ingroupNames=self.ingroupNames, untrimmedSequenceIDs=self.untrimmedSequenceIDs, sequenceIDs=self.sequenceIDs, outgroupNames=self.outgroupNames, outgroupSequenceIDs=self.outgroupSequenceIDs, outgroupFragmentIDs=self.outgroupFragmentIDs, mostRecentResultsID=alignmentsID, outgroupResultsID=self.outgroupResultsID, blastOptions=self.blastOptions, outgroupNumber=self.outgroupNumber, ingroupCoverageIDs=self.ingroupCoverageIDs)) outgroupAlignmentsID = trimRecurseJob.rv(0) outgroupFragmentIDs = trimRecurseJob.rv(1) ingroupCoverageIDs = trimRecurseJob.rv(2) return (outgroupAlignmentsID, outgroupFragmentIDs, ingroupCoverageIDs)
def run(self, fileStore): seqFile1 = fileStore.readGlobalFile(self.seqFileID1) seqFile2 = fileStore.readGlobalFile(self.seqFileID2) if self.blastOptions.compressFiles: seqFile1 = decompressFastaFile(seqFile1, fileStore.getLocalTempFile()) seqFile2 = decompressFastaFile(seqFile2, fileStore.getLocalTempFile()) blastResultsFile = fileStore.getLocalTempFile() runLastz(seqFile1, seqFile2, blastResultsFile, lastzArguments = self.blastOptions.lastzArguments) if self.blastOptions.realign: realignResultsFile = fileStore.getLocalTempFile() runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=blastResultsFile, outputAlignmentsFile=realignResultsFile, realignArguments=self.blastOptions.realignArguments) blastResultsFile = realignResultsFile resultsFile = fileStore.getLocalTempFile() cactus_call(parameters=["cactus_blast_convertCoordinates", blastResultsFile, resultsFile, str(self.blastOptions.roundsOfCoordinateConversion)]) logger.info("Ran the blast okay") return fileStore.writeGlobalFile(resultsFile)
def run(self, fileStore): logger.info("Chunk IDs: %s" % self.chunkIDs) #Avoid compression if just one chunk self.blastOptions.compressFiles = self.blastOptions.compressFiles and len(self.chunkIDs) > 2 resultsIDs = [] for i in range(len(self.chunkIDs)): resultsIDs.append(self.addChild(RunSelfBlast(self.blastOptions, self.chunkIDs[i])).rv()) logger.info("Made the list of self blasts") #Setup job to make all-against-all blasts logger.debug("Collating self blasts.") logger.info("Blast file IDs: %s" % resultsIDs) return self.addFollowOn(CollateBlasts(self.blastOptions, resultsIDs)).rv()
def run(self, fileStore): logger.info("Chunk IDs: %s" % self.chunkIDs) #Avoid compression if just one chunk self.blastOptions.compressFiles = self.blastOptions.compressFiles and len(self.chunkIDs) > 2 resultsIDs = [] for i in xrange(len(self.chunkIDs)): resultsIDs.append(self.addChild(RunSelfBlast(self.blastOptions, self.chunkIDs[i])).rv()) logger.info("Made the list of self blasts") #Setup job to make all-against-all blasts logger.debug("Collating self blasts.") logger.info("Blast file IDs: %s" % resultsIDs) return self.addFollowOn(CollateBlasts(self.blastOptions, resultsIDs)).rv()
def run(self, fileStore): logger.info("Preparing sequence for preprocessing") # chunk it up inSequence = fileStore.readGlobalFile(self.inSequenceID) inChunkDirectory = getTempDirectory( rootDir=fileStore.getLocalTempDir()) inChunkList = runGetChunks(sequenceFiles=[inSequence], chunksDir=inChunkDirectory, chunkSize=self.prepOptions.chunkSize, overlapSize=0) inChunkList = [os.path.abspath(path) for path in inChunkList] logger.info("Chunks = %s" % inChunkList) logger.info("Chunks dir = %s" % os.listdir(inChunkDirectory)) inChunkIDList = [ fileStore.writeGlobalFile(chunk) for chunk in inChunkList ] outChunkIDList = [] #For each input chunk we create an output chunk, it is the output chunks that get concatenated together. if not self.chunksToCompute: self.chunksToCompute = range(len(inChunkList)) for i in self.chunksToCompute: #Calculate the number of chunks to use inChunkNumber = int( max( 1, math.ceil( len(inChunkList) * self.prepOptions.proportionToSample))) assert inChunkNumber <= len(inChunkList) and inChunkNumber > 0 #Now get the list of chunks flanking and including the current chunk j = max(0, i - inChunkNumber / 2) inChunkIDs = inChunkIDList[j:j + inChunkNumber] if len( inChunkIDs ) < inChunkNumber: #This logic is like making the list circular inChunkIDs += inChunkIDList[:inChunkNumber - len(inChunkIDs)] assert len(inChunkIDs) == inChunkNumber outChunkIDList.append( self.addChild( PreprocessChunk(self.prepOptions, inChunkIDs, float(inChunkNumber) / len(inChunkIDList), inChunkIDList[i])).rv()) # follow on to merge chunks return self.addFollowOn(MergeChunks(self.prepOptions, outChunkIDList)).rv()
def runCactusGraphMapJoin(options): with Toil(options) as toil: importSingularityImage(options) #Run the workflow if options.restart: wf_output = toil.restart() else: options.cactusDir = getTempDirectory() #load cactus config configNode = ET.parse(options.configFile).getroot() config = ConfigWrapper(configNode) config.substituteAllPredefinedConstantsWithLiterals() # load up the vgs vg_ids = [] for vg_path in options.vg: logger.info("Importing {}".format(vg_path)) vg_ids.append(toil.importFile(makeURL(vg_path))) # tack on the decoys if options.decoyGraph: logger.info("Importing decoys {}".format(options.decoyGraph)) vg_ids.append(toil.importFile(makeURL(options.decoyGraph))) # we'll treat it like any other graph downstream, except clipping # where we'll check first using the path name options.vg.append(options.decoyGraph) # load up the hals hal_ids = [] for hal_path in options.hal: logger.info("Importing {}".format(hal_path)) hal_ids.append(toil.importFile(makeURL(hal_path))) # run the workflow wf_output = toil.start( Job.wrapJobFn(graphmap_join_workflow, options, config, vg_ids, hal_ids)) #export the split data export_join_data(toil, options, wf_output[0], wf_output[1], wf_output[2])
def main(): parser = ArgumentParser() Job.Runner.addToilOptions(parser) addCactusWorkflowOptions(parser) parser.add_argument("seqFile", help="Seq file") parser.add_argument( "cigarsFile", nargs="+", help= "Pairiwse aliginments (from cactus-blast, cactus-refmap or cactus-graphmap)" ) parser.add_argument("outputHal", type=str, help="Output HAL file") parser.add_argument( "--pathOverrides", nargs="*", help="paths (multiple allowd) to override from seqFile") parser.add_argument( "--pathOverrideNames", nargs="*", help="names (must be same number as --paths) of path overrides") #Progressive Cactus Options parser.add_argument("--configFile", dest="configFile", help="Specify cactus configuration file", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml")) parser.add_argument( "--root", dest="root", help="Name of ancestral node (which" " must appear in NEWICK tree in <seqfile>) to use as a " "root for the alignment. Any genomes not below this node " "in the tree may be used as outgroups but will never appear" " in the output. If no root is specifed then the root" " of the tree is used. ", default=None, required=True) parser.add_argument( "--latest", dest="latest", action="store_true", help="Use the latest version of the docker container " "rather than pulling one matching this version of cactus") parser.add_argument( "--containerImage", dest="containerImage", default=None, help="Use the the specified pre-built containter image " "rather than pulling one from quay.io") parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"], help="The way to run the Cactus binaries", default=None) parser.add_argument( "--nonCactusInput", action="store_true", help= "Input lastz cigars do not come from cactus-blast or cactus-refmap: Prepend ids in cigars" ) parser.add_argument( "--pangenome", action="store_true", help= "Override some CAF settings whose defaults are not suited to star trees" ) parser.add_argument( "--pafInput", action="store_true", help="'cigarsFile' arugment is in PAF format, rather than lastz cigars." ) parser.add_argument("--database", choices=["kyoto_tycoon", "redis"], help="The type of database", default="kyoto_tycoon") options = parser.parse_args() setupBinaries(options) setLoggingFromOptions(options) enableDumpStack() if (options.pathOverrides or options.pathOverrideNames): if not options.pathOverrides or not options.pathOverrideNames or \ len(options.pathOverrideNames) != len(options.pathOverrides): raise RuntimeError( 'same number of values must be passed to --pathOverrides and --pathOverrideNames' ) # cactus doesn't run with 1 core if options.batchSystem == 'singleMachine': if options.maxCores is not None: if int(options.maxCores) < 2: raise RuntimeError('Cactus requires --maxCores > 1') else: # is there a way to get this out of Toil? That would be more consistent if cpu_count() < 2: raise RuntimeError( 'Only 1 CPU detected. Cactus requires at least 2') if options.pafInput: # cactus-graphmap does not do any prepending to simplify interface with minigraph node names # so it must be done here options.nonCactusInput = True options.buildHal = True options.buildFasta = True # Mess with some toil options to create useful defaults. cactus_override_toil_options(options) start_time = timeit.default_timer() runCactusAfterBlastOnly(options) end_time = timeit.default_timer() run_time = end_time - start_time logger.info("cactus-align has finished after {} seconds".format(run_time))