def open(self, job):
     jobReqs = job.disk
     startingDir = os.getcwd()
     self.localTempDir = makePublicDir(os.path.join(self.localTempDir, str(uuid.uuid4())))
     self._removeDeadJobs(self.workDir)
     self.jobStateFile = self._createJobStateFile()
     freeSpace, diskSize = getFileSystemSize(self.localTempDir)
     if freeSpace <= 0.1 * diskSize:
         logger.warning('Starting job %s with less than 10%% of disk space remaining.',
                        self.jobName)
     try:
         os.chdir(self.localTempDir)
         with super().open(job):
             yield
     finally:
         diskUsed = getDirSizeRecursively(self.localTempDir)
         logString = ("Job {jobName} used {percent:.2f}% ({humanDisk}B [{disk}B] used, "
                      "{humanRequestedDisk}B [{requestedDisk}B] requested) at the end of "
                      "its run.".format(jobName=self.jobName,
                                        percent=(float(diskUsed) / jobReqs * 100 if
                                                 jobReqs > 0 else 0.0),
                                        humanDisk=bytes2human(diskUsed),
                                        disk=diskUsed,
                                        humanRequestedDisk=bytes2human(jobReqs),
                                        requestedDisk=jobReqs))
         self.logToMaster(logString, level=logging.DEBUG)
         if diskUsed > jobReqs:
             self.logToMaster("Job used more disk than requested. Consider modifying the user "
                              "script to avoid the chance of failure due to incorrectly "
                              "requested resources. " + logString, level=logging.WARNING)
         os.chdir(startingDir)
         # Finally delete the job from the worker
         os.remove(self.jobStateFile)
Beispiel #2
0
 def issueJob(self, jobNode):
     """
     Add a job to the queue of jobs
     """
     jobNode.command = ' '.join((resolveEntryPoint('_toil_worker'),
                                 jobNode.jobName,
                                 self.jobStoreLocator,
                                 jobNode.jobStoreID))
     # jobBatchSystemID is an int that is an incremented counter for each job
     jobBatchSystemID = self.batchSystem.issueBatchJob(jobNode)
     self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] = jobNode
     if jobNode.preemptable:
         # len(jobBatchSystemIDToIssuedJob) should always be greater than or equal to preemptableJobsIssued,
         # so increment this value after the job is added to the issuedJob dict
         self.preemptableJobsIssued += 1
     cur_logger = (logger.debug if jobNode.jobName.startswith(CWL_INTERNAL_JOBS)
                   else logger.info)
     cur_logger("Issued job %s with job batch system ID: "
                "%s and cores: %s, disk: %s, and memory: %s",
                jobNode, str(jobBatchSystemID), int(jobNode.cores),
                bytes2human(jobNode.disk), bytes2human(jobNode.memory))
     if self.toilMetrics:
         self.toilMetrics.logIssuedJob(jobNode)
         self.toilMetrics.logQueueSize(self.getNumberOfJobsIssued())
Beispiel #3
0
def toil_call_preprocess(job, options, in_seq_file, out_seq_file, name):

    work_dir = job.fileStore.getLocalTempDir()
    
    in_path = in_seq_file.pathMap[name]
    out_name = os.path.basename(out_seq_file.pathMap[name])

    cmd = ['cactus-preprocess', os.path.join(work_dir, 'js'), '--inPaths', in_path,
           '--outPaths', out_name, '--workDir', work_dir,
           '--maxCores', str(int(job.cores)), '--maxDisk', bytes2human(job.disk), '--maxMemory', bytes2human(job.memory)] + options.cactusOptions.strip().split(' ')
    
    cactus_call(parameters=cmd)

    out_fa_id = job.fileStore.writeGlobalFile(out_name)

    return out_fa_id
Beispiel #4
0
def get_toil_resource_opts(options, task):
    if task == 'preprocess':
        cores = options.preprocessCores
        mem = options.preprocessMemory
    elif task == 'blast':
        cores = options.blastCores
        mem = options.blastMemory
    elif task == 'align':
        cores = options.alignCores
        mem = options.alignMemory
    elif task == 'halAppend':
        cores = 1
        mem = options.alignMemory
    else:
        cores = None
        mem = None
    s = ''
    if cores:
        s += '--maxCores {}'.format(cores)
    if mem and not options.wdl:
        if s:
            s += ' '
        s += '--maxMemory {}'.format(bytes2human(mem))
    return s
Beispiel #5
0
def bytes2humanN(s):
    return bytes2human(s, fmt='%(value).1f%(symbol)s') if s else s
Beispiel #6
0
def toil_call_align(job, options, seq_file, project, event, cigar_name, hal_path, fa_path, blast_output, dep_names, *dep_fa_ids):

    work_dir = job.fileStore.getLocalTempDir()

    # serialize the seqfile so cactus-blast can use it 
    seq_file_path = os.path.join(work_dir, 'seqfile.txt')
    with open(seq_file_path, 'w') as sf:
        sf.write(str(seq_file))

    # download the blast output from the file store
    blast_files = []
    for blast_file_name, blast_file_id in blast_output:
        blast_files.append(os.path.join(work_dir, blast_file_name))
        job.fileStore.readGlobalFile(blast_file_id, blast_files[-1])

    # read the fasta files
    assert len(dep_names) == len(dep_fa_ids)
    fa_paths = [os.path.join(work_dir, "{}.pp.fa".format(name)) for name in dep_names]
    for fa_path, fa_id in zip(fa_paths, dep_fa_ids):
        job.fileStore.readGlobalFile(fa_id, fa_path)

    # call cactus-align
    out_hal_path = os.path.join(work_dir, os.path.basename(hal_path))
    cactus_call(parameters=['cactus-align', os.path.join(work_dir, 'js'), seq_file_path] + blast_files +
                [out_hal_path, '--root', event,
                 '--pathOverrides'] + fa_paths + ['--pathOverrideNames'] + dep_names +
                ['--workDir', work_dir, '--maxCores', str(int(job.cores)), '--maxDisk', bytes2human(job.disk), '--maxMemory', bytes2human(job.memory)] + options.cactusOptions.strip().split(' '))

    out_hal_id = job.fileStore.writeGlobalFile(out_hal_path)

    # export the fasta while we're at it
    out_fa_path = os.path.join(work_dir, '{}.fa'.format(event))
    cactus_call(parameters=['hal2fasta', out_hal_path, event] + options.halOptions.strip().split(' '),
                outfile=out_fa_path)
    out_fa_id = job.fileStore.writeGlobalFile(out_fa_path)

    return out_fa_id, out_hal_id    
Beispiel #7
0
def toil_call_blast(job, options, seq_file, project, event, cigar_name, dep_names, *dep_fa_ids):

    work_dir = job.fileStore.getLocalTempDir()

    # serialize the seqfile so cactus-blast can use it 
    seq_file_path = os.path.join(work_dir, 'seqfile.txt')
    with open(seq_file_path, 'w') as sf:
        sf.write(str(seq_file))

    # read the fasta files
    assert len(dep_names) == len(dep_fa_ids)
    fa_paths = [os.path.join(work_dir, "{}.pp.fa".format(name)) for name in dep_names]
    for fa_path, fa_id in zip(fa_paths, dep_fa_ids):
        job.fileStore.readGlobalFile(fa_id, fa_path)
            
    cactus_call(parameters=['cactus-blast', os.path.join(work_dir, 'js'), seq_file_path, os.path.join(work_dir, os.path.basename(cigar_name)),
                 '--root', event, '--pathOverrides'] + fa_paths+ ['--pathOverrideNames'] + dep_names +
                ['--workDir', work_dir, '--maxCores', str(int(job.cores)), '--maxDisk', bytes2human(job.disk), '--maxMemory', bytes2human(job.memory)] + options.cactusOptions.strip().split(' '))

    # scrape the output files out of the workdir
    out_nameids = []
    for out_file in [f for f in os.listdir(work_dir) if os.path.isfile(os.path.join(work_dir, f))]:
        if out_file.startswith(os.path.basename(cigar_name)):
            out_nameids.append((os.path.basename(out_file), job.fileStore.writeGlobalFile(os.path.join(work_dir, out_file))))
            
    return out_nameids