def main(args):
    extractcmd = os.path.join(args.exe_dir,
            'extract-pileup-chromosome ' + args.chromosome)
    processcmd = os.path.join(args.exe_dir,
            'pileup-to-acgtn ' + args.ref_sequence)
    # create the jobs
    bsubs = []
    for datapath in args.files:
        # define and create the output subdirectory
        outdir = os.path.join(args.out_dir, args.namescheme(datapath))
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        # Split the file into chunks to enable parallelism.
        # Each position has 4-byte A, C, G, T, and N counts,
        # for a total of 20 bytes.
        nbytes_per_position = 20
        npositions_per_file = 1000000
        # define the time and memory required for the job
        nminutes = 5
        nkilobytes = 1000
        # create the job
        b = lsf.Bsub()
        b.flags = {
                'W': nminutes, 'M': nkilobytes,
                'o': 'out', 'e': 'err'}
        if args.queue:
            b.flags['q'] = args.queue
        splitcmd = ' '.join([
            'split',
            '--bytes=%d' % (nbytes_per_position * npositions_per_file),
            '--suffix-length=4',
            '--numeric-suffixes',
            '-',
            os.path.join(outdir, 'x')])
        cmd = 'zcat %s | %s | %s | %s' % (
                datapath, extractcmd, processcmd, splitcmd)
        b.commands = [cmd]
        bsubs.append(b)
    # submit the jobs
    jnums = set()
    for b in bsubs:
        out, err = b.submit()
        sys.stdout.write(out)
        sys.stderr.write(err)
        jnums.add(b.job_number)
    # initialize the set of unfinished job numbers
    prev_unfinished = set(jnums)
    # wait for all of the jobs to finish
    while True:
        curr_unfinished = jnums & set(lsf.gen_unfinished_job_numbers())
        newly_finished = prev_unfinished - curr_unfinished
        for jnum in newly_finished:
            print 'job', jnum, 'has finished'
        if not curr_unfinished:
            break
        prev_unfinished = curr_unfinished
        time.sleep(2.0)
Exemple #2
0
"""
Run a command line using bsub with some hardcoded parameters.
"""

import sys
import time

import lsf

if __name__=='__main__':
    b = lsf.Bsub()
    b.flags = {
            'n' : 1,
            'W' : 720,
            'q' : 'dean'}
    cmd = ' '.join("'%s'" % v for v in sys.argv[1:])
    b.commands = [cmd]
    out, err = b.submit()
    sys.stdout.write(out)
    sys.stderr.write(err)
    while b.job_number not in lsf.gen_unfinished_job_numbers():
        time.sleep(0.5)