Ejemplo n.º 1
0
def HTSeqCount(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath) 
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    gtf = configRobot.popParas(cmdset, ['GTF'])

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    setuppathcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    for sample in samples:
        paraset = copy.deepcopy(cmdset)        
        jobfnprefix = prefix + '_' + sample
        if bam == '=sample':
            inputfile = sample
            outputfile = sample + '.count'
        else: 
            inputfile = sample + '/' + bam
            outputfile = sample + '/' + bam + '.count'
            cmdGenerator.checkPath(outputpath + sample, create=createpath)

        samcmd = 'samtools view -h %s | '%(inputpath+inputfile)
        htseq = 'python -m HTSeq.scripts.count -q '
        countcmd = cmdGenerator.formatCmd(samcmd, htseq, paraset, '-', gtf, ' > %s'%(outputpath + outputfile))
        mvscriptcmd = cmdGenerator.formatCmd('mv ./%s%s %s'%(jobfnprefix, jobmanager.ext, outputpath))

        jobmanager.createJob(jobfnprefix, [setuppathcmd, countcmd, mvscriptcmd], outpath = outputpath, outfn = jobfnprefix)
    return jobmanager
Ejemplo n.º 2
0
Archivo: lego.py Proyecto: bjcbjc/mylib
def gmfit(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, call, mem, time, prefix, Ks, datafn = configRobot.popParas(cmdset, ['cmd', 'call', 'mem', 'time', 'prefix', 'Ks', 'datafn'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    cmdGenerator.checkPath(outputpath, create=createpath)

    if type(Ks) != type([]):
        Ks = [Ks]

    for fn in datafn:
        for k in Ks:
            jobname = prefix + fn.replace('.mat', '') + 'k%03d'%(int(k))
            functionCall = call + "('%s', %s, '%s');"%(inputpath+fn, k, outputpath + jobname + '.mat')
            CMD = []
            CMD.append( cmdGenerator.formatCmd( matlabcmd%(functionCall) ) )
            CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
            jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Ejemplo n.º 3
0
def cuffcompare(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s '%(inputpath, sample, gtf)

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']
    paraset = configRobot.validParas(paraset, availParas['cuffcompare'])
    CMD.append( cmdGenerator.formatCmd(cmd, paraset, sampletext) )
    CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )

    jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Ejemplo n.º 4
0
def markDup(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))
    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    javacmd = 'java -Xmx%dg -jar'%(int(mem.replace('G',''))-1)
    mdupjar = 'MarkDuplicates.jar'
    idxcmd = 'samtools index'

    for sample in samples:
        jobname = prefix + '_' + sample
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s/%s'%(inputpath + sample, bam)
        paraset['OUTPUT'] = paraset['INPUT'].replace('.bam', '.mdup.bam')
        paraset['METRICS_FILE'] = '=%s/%s'%(inputpath + sample, prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset, availParas[mdupjar])
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
        CMDs.append( cmdGenerator.formatCmd(javacmd, programpath+mdupjar, paraset) )
        CMDs.append( cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')) )
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, inputpath+sample)) )
        jobmanager.createJob(jobname, CMDs, outpath = inputpath+sample, outfn = jobname)
    return jobmanager
Ejemplo n.º 5
0
def RSeQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time = configRobot.popParas(cmdset, ['cmd', 'mem', 'time'])
    samples, bam, prefix = configRobot.popParas(cmdset, ['sample', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    programs = ['inner_distance.py', 'junction_annotation.py', 'junction_saturation.py', 'read_GC.py', 'read_duplication.py']

    for sample in samples:
        jobname = prefix + '_' + sample
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
        for prog in programs:
            paraset = copy.deepcopy(cmdset)
            paraset['-i'] = inputpath + sample + '/' + bam
            paraset['-o'] = outputpath + sample + '.%s'%(prog.replace('.py', ''))
            paraset = configRobot.validParas(paraset, availParas[prog])
            if '-o' not in paraset.keys():
                paraset['>'] = outputpath + sample + '.%s'%(prog.replace('.py', ''))                            

            CMDs.append( cmdGenerator.formatCmd('python', programpath+prog, paraset) )
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
        jobmanager.createJob(jobname, CMDs, outpath = outputpath, outfn = jobname)
    return jobmanager
Ejemplo n.º 6
0
Archivo: lego.py Proyecto: bjcbjc/mylib
def gseaTestAnt2Pheno(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, call, mem, time, prefix, datasets, phenos, features, njobs = configRobot.popParas(cmdset, ['cmd', 'call', 'mem', 'time', 'prefix', 'dataset', 'pheno', 'feature', 'njob'])

    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    cmdGenerator.checkPath(outputpath, create=createpath)

    for di in range(len(datasets)):
        numjobs = int(njobs[di])
        if datasets[di] != 'CCLE': 
            continue
        for jobidx in range(1, numjobs+1):
            jobname = prefix + datasets[di] + '%02d'%jobidx
            functionCall = call + "(%d, %d, '%s', 'dataset', '%s', 'pheno', '%s', 'feature', '%s');"%(jobidx, numjobs, outputpath + prefix, datasets[di], phenos[di], features[di])
            CMD = []
            CMD.append( cmdGenerator.formatCmd( matlabcmd%(functionCall) ) )
            CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
            jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Ejemplo n.º 7
0
def cuffmerge(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    sampletext = '"'
    for sample in samples:
        sampletext = sampletext + '%s%s/%s\\n' % (inputpath, sample, gtf)
    sampletext = sampletext + '"'

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']

    CMD.append(
        cmdGenerator.formatCmd('echo', sampletext, '>',
                               paraset['-o'] + '.samples'))

    paraset = configRobot.validParas(paraset, availParas['cuffmerge'])
    cmdGenerator.checkPath(paraset['-o'], create=createpath)
    CMD.append(cmdGenerator.formatCmd(cmd, paraset,
                                      paraset['-o'] + '.samples'))
    CMD.append(
        cmdGenerator.formatCmd('mv ./%s%s %s' %
                               (jobname, jobmanager.ext, paraset['-o'])))
    CMD.append(cmdGenerator.formatCmd('rm -f', paraset['-o'] + '.samples'))
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1:  #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname,
                         CMD,
                         outpath=paraset['-o'],
                         outfn=jobname,
                         sgeopt=sgeopt,
                         trackcmd=False)
    return jobmanager
Ejemplo n.º 8
0
def tophat(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])
    paired = configRobot.popParas(cmdset, ['paired'])
    readext, outpath, genome = configRobot.popParas(
        cmdset, ['readext', 'outputpath', 'genome'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    setuppathcmd = cmdGenerator.formatCmd(
        'source ~/libraries/setup_seqtools\necho $BOWTIE2_INDEXES')

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    cmdGenerator.checkPath(outpath + '%s/' % prefix, create=createpath)

    for sample in samples:
        if paired == 'paired' or paired == 'yes':
            reads = map(lambda (i): inputpath + sample + '_%d' % i + readext,
                        [1, 2])
        elif paired == 'single' or paired == 'no':
            reads = inputpath + sample + readext
        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = outpath + '%s/' % prefix + sample
        jobfnprefix = prefix + '_' + sample

        tophatcmd = cmdGenerator.formatCmd(cmd, paraset, genome, reads)
        mvscriptcmd = cmdGenerator.formatCmd(
            'mv ./%s%s %s' % (jobfnprefix, jobmanager.ext, paraset['-o']))

        if int(paraset['-p']) > 1:  #multiple threads per job
            sgeopt = ['-pe smp ' + paraset['-p']]
        else:
            sgeopt = []

        #need to create the output directory first, otherwise SGE complains cannot put the stdout in its path
        cmdGenerator.checkPath(paraset['-o'], create=createpath)
        jobmanager.createJob(jobfnprefix,
                             [setuppathcmd, tophatcmd, mvscriptcmd],
                             outpath=paraset['-o'],
                             outfn=jobfnprefix,
                             sgeopt=sgeopt)
    return jobmanager
Ejemplo n.º 9
0
def DESeqPair(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmdset = configRobot.makeParasList(cmdset, ['meta', 'group1', 'group2'])
    cmd, mem, time, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'prefix'])
    group1, group2 = configRobot.popParas(cmdset, ['group1', 'group2'])
    template = open(cmdset.pop('template')).read()
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    cmdset['inputpath'] = inputpath
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    cmdset['outputpath'] = outputpath
    cmdset['prefix'] = prefix

    meta = configRobot.popParas(cmdset, ['meta'])
    if meta[0] == "''" and len(meta) == 1:
        cmdset['meta'] = 'c()'
    else:
        cmdset['meta'] = 'c(\'' + '\', \''.join(meta) + '\')'

    if cmdset['countfnprefix'] == "''": cmdset['countfnprefix'] = ''
    if cmdset['countfnsuffix'] == "''": cmdset['countfnsuffix'] = ''

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    setuppathcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')
    for i in range(len(group1)):
        paraset = copy.deepcopy(cmdset)
        paraset['gr1'] = group1[i]
        paraset['gr2'] = group2[i]
        jobfnprefix = prefix + '_' + group1[i] + '_' + group2[i]

        f = open('./%s.R' % (jobfnprefix), 'w')
        f.write(template % paraset)
        f.close()

        deseqcmd = cmdGenerator.formatCmd('Rscript', './%s.R' % (jobfnprefix))
        mvRscriptcmd = cmdGenerator.formatCmd('mv ./%s.R %s' %
                                              (jobfnprefix, outputpath))
        mvscriptcmd = cmdGenerator.formatCmd(
            'mv ./%s%s %s' % (jobfnprefix, jobmanager.ext, outputpath))

        jobmanager.createJob(
            jobfnprefix, [setuppathcmd, deseqcmd, mvRscriptcmd, mvscriptcmd],
            outpath=outputpath,
            outfn=jobfnprefix)
    return jobmanager
Ejemplo n.º 10
0
def cuffdiff_v1(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf, bam = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s ' % (inputpath, sample, bam)

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

    paraset = copy.deepcopy(cmdset)
    paraset = configRobot.validParas(paraset, availParas['cuffdiff'])
    cmdGenerator.checkPath(paraset['--output-dir'], create=createpath)
    CMD.append(
        cmdGenerator.formatCmd(
            '/ifs/home/c2b2/dp_lab/bc2252/SeqTool/cufflinks_1/cuffdiff',
            paraset, gtf, sampletext))
    CMD.append(
        cmdGenerator.formatCmd(
            'mv ./%s%s %s' %
            (jobname, jobmanager.ext, paraset['--output-dir'])))
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1:  #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname,
                         CMD,
                         outpath=paraset['--output-dir'],
                         outfn=jobname,
                         sgeopt=sgeopt,
                         trackcmd=False)
    return jobmanager
Ejemplo n.º 11
0
def picardReorderSam(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(
            cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)
    reorder = programpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'

    for sample in samples:
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        jobname = prefix + '_' + sample

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            inputfile = sample
        else:
            inputfile = sample + '/' + bam
        paraset['INPUT'] = '=%s' % (inputpath + inputfile)
        paraset['OUTPUT'] = '=%s.reorder.bam' % (outputpath +
                                                 inputfile.replace('.bam', ''))
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, reorder, paraset))

        CMDs.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, outputpath)))
        jobmanager.createJob(jobname, CMDs, outpath=outputpath, outfn=jobname)

    return jobmanager
Ejemplo n.º 12
0
def RNASeQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time = configRobot.popParas(cmdset, ['cmd', 'mem', 'time'])
    samples, bam, prefix = configRobot.popParas(cmdset,
                                                ['sample', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))
    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(
            cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)

    #need to generate -s, -o
    #generate a temperory file
    samplestr = '"Sample ID\\tBam File\\tNotes\\n'
    for sample in samples:
        samplestr = samplestr + '%s\\t%s\\t%s\\n' % (
            sample, inputpath + sample + '/' + bam, sample)
    samplestr = samplestr + '"'
    samplefile = '%s.samples' % prefix

    cmdset['-s'] = samplefile
    cmdset['-o'] = inputpath + 'RNA-SeQC/'

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    setupcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')
    createsamplefile = cmdGenerator.formatCmd('echo', samplestr, '>',
                                              samplefile)
    removesamplefile = cmdGenerator.formatCmd('rm -f', samplefile)
    mvscriptcmd = cmdGenerator.formatCmd(
        'mv %s%s %s' % (prefix, jobmanager.ext, cmdset['-o']))
    qccmd = cmdGenerator.formatCmd(javacmd, programpath + cmd, cmdset)

    cmdGenerator.checkPath(cmdset['-o'], create=createpath)
    jobmanager.createJob(
        prefix,
        [setupcmd, createsamplefile, qccmd, removesamplefile, mvscriptcmd],
        outfn=prefix,
        outpath=cmdset['-o'],
        trackcmd=False)
    return jobmanager
Ejemplo n.º 13
0
def cufflinks(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    outputpath = cmdGenerator.checkPath(outputpath + '%s/' % prefix,
                                        create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    for sample in samples:
        jobname = prefix + '_' + sample
        CMD = []
        CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = outputpath + sample
        paraset = configRobot.validParas(paraset, availParas['cufflinks'])
        cmdGenerator.checkPath(paraset['-o'], create=createpath)
        CMD.append(
            cmdGenerator.formatCmd(cmd, paraset,
                                   inputpath + '%s/' % sample + bam))

        CMD.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, paraset['-o'])))
        sgeopt = []
        if '-p' in paraset.keys():
            if int(paraset['-p']) > 1:  #multi threads
                sgeopt = ['-pe smp ' + paraset['-p']]
        elif '--num-threads' in paraset.keys():
            if int(paraset['--num-threads']) > 1:
                sgeopt = ['-pe smp ' + paraset['-p']]
        jobmanager.createJob(jobname,
                             CMD,
                             outpath=paraset['-o'],
                             outfn=jobname,
                             sgeopt=sgeopt)
    return jobmanager
Ejemplo n.º 14
0
def picardQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, bam, prefix = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    samples = cmdset.pop('sample')

    javacmd = 'java -Xmx%dg -jar'%(int(mem.replace('G',''))-2)

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    metrics = {'CollectRnaSeqMetrics.jar': 'RnaSeq', 'CollectMultipleMetrics.jar': '', 'EstimateLibraryComplexity.jar': 'Lib', 'CollectGcBiasMetrics.jar': 'GC'}
    metrickeys = ['CollectRnaSeqMetrics.jar', 'CollectMultipleMetrics.jar', 'EstimateLibraryComplexity.jar', 'CollectGcBiasMetrics.jar']
    for sample in samples:
        jobname = prefix + '_' + sample
        allcmds = []
        allcmds.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
    
        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            paraset['INPUT'] = '=%s'%(inputpath+sample)
        else:
            paraset['INPUT'] = '=%s/%s'%(inputpath+sample, bam)
        paraset['TMP_DIR'] = paraset['TMP_DIR'] + prefix + '_' + sample + '/'
        cmdGenerator.checkPath(paraset['TMP_DIR'].strip('='), create=createpath)

        for metric in metrickeys:
            if 'MultipleMetrics' in metric:
                paraset['OUTPUT'] = '=%s'%(outputpath + sample + metrics[metric])
            else:
                paraset['OUTPUT'] = '=%s.txt'%(outputpath + sample + '.' + metrics[metric])
            paraset['CHART_OUTPUT'] = '%s'%(paraset['OUTPUT'].replace('.txt', '.pdf'))
            paraset['SUMMARY_OUTPUT'] = '%s'%(paraset['OUTPUT'].replace('.txt', '.summary.txt'))

            #filter out parameters that are not supported
            metricparaset = configRobot.validParas(paraset, availParas[metric])
            allcmds.append(cmdGenerator.formatCmd(javacmd, programpath + metric, metricparaset))

        allcmds.append(cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)))
        allcmds.append(cmdGenerator.formatCmd('rm -Rf', paraset['TMP_DIR'].strip('=')))
        jobmanager.createJob(jobname, allcmds, outpath = outputpath, outfn = jobname)
    return jobmanager
Ejemplo n.º 15
0
def GATK_genotyper(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    gatkpath = cmdGenerator.checkPath(cmdset.pop('gatkpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(
            cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)

    GATK = gatkpath + 'GenomeAnalysisTK.jar '
    genotyper = '-T UnifiedGenotyper '

    CMDs = []
    CMDs.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

    jobname = prefix

    paraset = copy.deepcopy(cmdset)
    paraset['-I'] = inputpath + samples[0] + '/' + bam
    for si in range(1, len(samples)):
        paraset['-I'] = paraset['-I'] + ' -I ' + inputpath + samples[
            si] + '/' + bam

    CMDs.append(cmdGenerator.formatCmd(javacmd, GATK + genotyper, paraset))

    CMDs.append(
        cmdGenerator.formatCmd('mv ./%s%s %s' %
                               (jobname, jobmanager.ext, outputpath)))
    jobmanager.createJob(jobname, CMDs, outpath=outputpath, outfn=jobname)

    return jobmanager
Ejemplo n.º 16
0
def RSeQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time = configRobot.popParas(cmdset, ['cmd', 'mem', 'time'])
    samples, bam, prefix = configRobot.popParas(cmdset,
                                                ['sample', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    programs = [
        'inner_distance.py', 'junction_annotation.py',
        'junction_saturation.py', 'read_GC.py', 'read_duplication.py'
    ]

    for sample in samples:
        jobname = prefix + '_' + sample
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
        for prog in programs:
            paraset = copy.deepcopy(cmdset)
            paraset['-i'] = inputpath + sample + '/' + bam
            paraset['-o'] = outputpath + sample + '.%s' % (prog.replace(
                '.py', ''))
            paraset = configRobot.validParas(paraset, availParas[prog])
            if '-o' not in paraset.keys():
                paraset['>'] = outputpath + sample + '.%s' % (prog.replace(
                    '.py', ''))

            CMDs.append(
                cmdGenerator.formatCmd('python', programpath + prog, paraset))
        CMDs.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, outputpath)))
        jobmanager.createJob(jobname, CMDs, outpath=outputpath, outfn=jobname)
    return jobmanager
Ejemplo n.º 17
0
def picardReorderSam(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True
    
    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix'])
    
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar'%(int(mem.replace('G',''))-2)        
    reorder = programpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'


    for sample in samples:
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

        jobname = prefix + '_' + sample

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            inputfile = sample
        else:
            inputfile = sample + '/' + bam
        paraset['INPUT'] = '=%s'%(inputpath + inputfile)
        paraset['OUTPUT'] = '=%s.reorder.bam'%(outputpath + inputfile.replace('.bam',''))
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, reorder, paraset) )
        
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
        jobmanager.createJob(jobname, CMDs, outpath = outputpath, outfn = jobname)
    
    return jobmanager
Ejemplo n.º 18
0
def markDup(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))
    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    javacmd = 'java -Xmx%dg -jar' % (int(mem.replace('G', '')) - 1)
    mdupjar = 'MarkDuplicates.jar'
    idxcmd = 'samtools index'

    for sample in samples:
        jobname = prefix + '_' + sample
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s/%s' % (inputpath + sample, bam)
        paraset['OUTPUT'] = paraset['INPUT'].replace('.bam', '.mdup.bam')
        paraset['METRICS_FILE'] = '=%s/%s' % (inputpath + sample,
                                              prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset, availParas[mdupjar])
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
        CMDs.append(
            cmdGenerator.formatCmd(javacmd, programpath + mdupjar, paraset))
        CMDs.append(
            cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')))
        CMDs.append(
            cmdGenerator.formatCmd(
                'mv ./%s%s %s' %
                (jobname, jobmanager.ext, inputpath + sample)))
        jobmanager.createJob(jobname,
                             CMDs,
                             outpath=inputpath + sample,
                             outfn=jobname)
    return jobmanager
Ejemplo n.º 19
0
def HTSeqCount(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    gtf = configRobot.popParas(cmdset, ['GTF'])

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    setuppathcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    for sample in samples:
        paraset = copy.deepcopy(cmdset)
        jobfnprefix = prefix + '_' + sample
        if bam == '=sample':
            inputfile = sample
            outputfile = sample + '.count'
        else:
            inputfile = sample + '/' + bam
            outputfile = sample + '/' + bam + '.count'
            cmdGenerator.checkPath(outputpath + sample, create=createpath)

        samcmd = 'samtools view -h %s | ' % (inputpath + inputfile)
        htseq = 'python -m HTSeq.scripts.count -q '
        countcmd = cmdGenerator.formatCmd(samcmd, htseq, paraset, '-', gtf,
                                          ' > %s' % (outputpath + outputfile))
        mvscriptcmd = cmdGenerator.formatCmd(
            'mv ./%s%s %s' % (jobfnprefix, jobmanager.ext, outputpath))

        jobmanager.createJob(jobfnprefix,
                             [setuppathcmd, countcmd, mvscriptcmd],
                             outpath=outputpath,
                             outfn=jobfnprefix)
    return jobmanager
Ejemplo n.º 20
0
def cuffmerge(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    sampletext = '"'
    for sample in samples:
        sampletext = sampletext + '%s%s/%s\\n'%(inputpath, sample, gtf)
    sampletext = sampletext + '"'

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']

    CMD.append( cmdGenerator.formatCmd('echo', sampletext, '>', paraset['-o'] + '.samples') )
    
    paraset = configRobot.validParas(paraset, availParas['cuffmerge'])
    cmdGenerator.checkPath(paraset['-o'], create=createpath)
    CMD.append( cmdGenerator.formatCmd(cmd, paraset, paraset['-o'] + '.samples') )
    CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, paraset['-o'])) )
    CMD.append( cmdGenerator.formatCmd('rm -f', paraset['-o'] + '.samples') )
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1: #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname, CMD, outpath=paraset['-o'], outfn=jobname, sgeopt=sgeopt, trackcmd=False)
    return jobmanager
Ejemplo n.º 21
0
def DESeqPair(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmdset = configRobot.makeParasList(cmdset, ['meta', 'group1', 'group2'])
    cmd, mem, time, prefix = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'prefix'])
    group1, group2 = configRobot.popParas(cmdset, ['group1', 'group2'])
    template = open(cmdset.pop('template')).read()
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    cmdset['inputpath'] = inputpath
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    cmdset['outputpath'] = outputpath
    cmdset['prefix'] = prefix

    meta = configRobot.popParas(cmdset, ['meta'])
    if meta[0] == "''" and len(meta) == 1:
        cmdset['meta'] = 'c()'
    else:
        cmdset['meta'] = 'c(\'' + '\', \''.join(meta) + '\')'

    if cmdset['countfnprefix'] == "''": cmdset['countfnprefix'] = ''
    if cmdset['countfnsuffix'] == "''": cmdset['countfnsuffix'] = ''

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    setuppathcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')
    for i in range(len(group1)):
        paraset = copy.deepcopy(cmdset)        
        paraset['gr1'] = group1[i]
        paraset['gr2'] = group2[i]
        jobfnprefix = prefix + '_' + group1[i] + '_' + group2[i]

        f = open('./%s.R'%(jobfnprefix), 'w')
        f.write(template%paraset)
        f.close()

        deseqcmd = cmdGenerator.formatCmd('Rscript', './%s.R'%(jobfnprefix))
        mvRscriptcmd = cmdGenerator.formatCmd('mv ./%s.R %s'%(jobfnprefix, outputpath))
        mvscriptcmd = cmdGenerator.formatCmd('mv ./%s%s %s'%(jobfnprefix, jobmanager.ext, outputpath))

        jobmanager.createJob(jobfnprefix, [setuppathcmd, deseqcmd, mvRscriptcmd, mvscriptcmd], outpath = outputpath, outfn = jobfnprefix)
    return jobmanager
Ejemplo n.º 22
0
def cuffcompare(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s ' % (inputpath, sample, gtf)

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']
    paraset = configRobot.validParas(paraset, availParas['cuffcompare'])
    CMD.append(cmdGenerator.formatCmd(cmd, paraset, sampletext))
    CMD.append(
        cmdGenerator.formatCmd('mv ./%s%s %s' %
                               (jobname, jobmanager.ext, outputpath)))

    jobmanager.createJob(jobname,
                         CMD,
                         outpath=outputpath,
                         outfn=jobname,
                         trackcmd=False)
    return jobmanager
Ejemplo n.º 23
0
def tophat(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])
    paired = configRobot.popParas(cmdset, ['paired'])
    readext, outpath, genome = configRobot.popParas(cmdset, ['readext', 'outputpath', 'genome'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    setuppathcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools\necho $BOWTIE2_INDEXES')

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    cmdGenerator.checkPath(outpath + '%s/'%prefix, create=createpath)

    for sample in samples:
        if paired == 'paired' or paired == 'yes':
            reads = map(lambda(i): inputpath + sample + '_%d'%i + readext, [1,2])
        elif paired == 'single' or paired == 'no':
            reads = inputpath + sample + readext
        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = outpath + '%s/'%prefix + sample
        jobfnprefix = prefix + '_' + sample
        
        tophatcmd = cmdGenerator.formatCmd(cmd, paraset, genome, reads)
        mvscriptcmd = cmdGenerator.formatCmd('mv ./%s%s %s'%(jobfnprefix, jobmanager.ext, paraset['-o']))

        if int(paraset['-p']) > 1: #multiple threads per job
            sgeopt = ['-pe smp ' + paraset['-p']]
        else:
            sgeopt = []

        #need to create the output directory first, otherwise SGE complains cannot put the stdout in its path
        cmdGenerator.checkPath(paraset['-o'], create=createpath)
        jobmanager.createJob(jobfnprefix, [setuppathcmd, tophatcmd, mvscriptcmd], outpath = paraset['-o'], outfn = jobfnprefix, sgeopt=sgeopt)
    return jobmanager
Ejemplo n.º 24
0
def GATK_genotyper(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True
    
    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix'])
    
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    gatkpath = cmdGenerator.checkPath(cmdset.pop('gatkpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar'%(int(mem.replace('G',''))-2)
        
    GATK = gatkpath + 'GenomeAnalysisTK.jar '
    genotyper = '-T UnifiedGenotyper '


    CMDs = []
    CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
    
    jobname = prefix

    paraset = copy.deepcopy(cmdset)
    paraset['-I'] = inputpath + samples[0] + '/' + bam
    for si in range(1,len(samples)):
        paraset['-I'] = paraset['-I'] + ' -I ' + inputpath + samples[si] + '/' + bam

    CMDs.append( cmdGenerator.formatCmd(javacmd, GATK+genotyper, paraset) )

    CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
    jobmanager.createJob(jobname, CMDs, outpath = outputpath, outfn = jobname)
    
    return jobmanager
Ejemplo n.º 25
0
def RNASeQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time = configRobot.popParas(cmdset, ['cmd', 'mem', 'time'])
    samples, bam, prefix = configRobot.popParas(cmdset, ['sample', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))
    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar'%(int(mem.replace('G',''))-2)


    #need to generate -s, -o
    #generate a temperory file
    samplestr = '"Sample ID\\tBam File\\tNotes\\n'
    for sample in samples:
        samplestr = samplestr + '%s\\t%s\\t%s\\n'%(sample, inputpath + sample + '/' + bam, sample)
    samplestr = samplestr + '"'
    samplefile = '%s.samples'%prefix

    cmdset['-s'] = samplefile
    cmdset['-o'] = inputpath + 'RNA-SeQC/'

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    setupcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')
    createsamplefile = cmdGenerator.formatCmd('echo', samplestr, '>', samplefile)
    removesamplefile = cmdGenerator.formatCmd('rm -f', samplefile)
    mvscriptcmd = cmdGenerator.formatCmd('mv %s%s %s'%(prefix, jobmanager.ext, cmdset['-o']))
    qccmd = cmdGenerator.formatCmd(javacmd, programpath+cmd, cmdset)

    cmdGenerator.checkPath(cmdset['-o'], create=createpath)
    jobmanager.createJob(prefix, [setupcmd, createsamplefile, qccmd, removesamplefile, mvscriptcmd], outfn = prefix, outpath = cmdset['-o'], trackcmd=False)
    return jobmanager
Ejemplo n.º 26
0
Archivo: lego.py Proyecto: bjcbjc/mylib
def runBootstrap(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, call, mem, time, prefix, datasets, phenos, njobs = configRobot.popParas(cmdset, ['cmd', 'call', 'mem', 'time', 'prefix', 'dataset', 'pheno', 'njob'])
    iter, usecn, combineSplit, useDisease = configRobot.popParas(cmdset, ['iter', 'usecn', 'combineSplit', 'useDisease'])

    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    cmdGenerator.checkPath(outputpath, create=createpath)

    if type(datasets) != type([]): datasets = [datasets]
    if type(njobs) != type([]): njobs = [njobs]
    if type(usecn) != type([]): usecn = [usecn]
    if type(combineSplit) != type(combineSplit): combineSplit = [combineSplit]

    for di in range(len(datasets)):
        numjobs = int(njobs[di])
        for cnSet in usecn:
            if cnSet == 'true': cnStr = 'C1'
            else: cnStr = 'C0'
            for splitSet in combineSplit:
                if splitSet == 'true': splitStr = 'S1'
                else: splitStr = 'S0'
                for jobidx in range(1, numjobs+1):
                    CMD = []
                    fnhead = prefix + datasets[di]  + cnStr + splitStr
                    jobname = prefix + datasets[di]  + cnStr + splitStr + 'J%02d'%(jobidx)                         
                    for iteridx in range(1, int(iter)+1):                        
                        functionCall = "addpath(\'./Gray/data\'); tic; " + call + "(%d, %d, %d, '%s', '%s', 'usecn', %s, 'combineSplit', %s, 'useDisease', %s); toc;"%(jobidx, numjobs, iteridx, outputpath, fnhead, cnSet, splitSet, useDisease )

                        CMD.append( cmdGenerator.formatCmd( matlabcmd%(functionCall) ) )
                    CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
                    jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Ejemplo n.º 27
0
def countmismatches(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    direct = ['forward', 'reverse']
    for sample in samples:
        for d in direct:
            jobname = prefix + '_' + sample + '_' + d
            CMD = []
            CMD.append(
                cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
            CMD.append(
                cmdGenerator.formatCmd(
                    'python', 'countmismatches.py',
                    inputpath + sample + '/' + bam,
                    outputpath + sample + '.%s.countmis' % d, d))
            CMD.append(
                cmdGenerator.formatCmd('mv',
                                       '%s%s' % (jobname, jobmanager.ext),
                                       outputpath))
            jobmanager.createJob(jobname,
                                 CMD,
                                 outpath=outputpath,
                                 outfn=jobname,
                                 trackcmd=False)
    return jobmanager
Ejemplo n.º 28
0
def countmismatches(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True
    
    cmd, mem, time, samples, prefix, bam = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    direct = ['forward', 'reverse']
    for sample in samples:
        for d in direct:
            jobname = prefix+'_'+sample+'_'+d
            CMD = []
            CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
            CMD.append( cmdGenerator.formatCmd('python','countmismatches.py',inputpath+sample+'/'+bam, outputpath+sample+'.%s.countmis'%d, d) )
            CMD.append( cmdGenerator.formatCmd('mv', '%s%s'%(jobname, jobmanager.ext), outputpath) )
            jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Ejemplo n.º 29
0
def cuffdiff_v1(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf, bam = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s '%(inputpath, sample, bam)

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

    paraset = copy.deepcopy(cmdset)
    paraset = configRobot.validParas(paraset, availParas['cuffdiff'])
    cmdGenerator.checkPath(paraset['--output-dir'], create=createpath)
    CMD.append( cmdGenerator.formatCmd('/ifs/home/c2b2/dp_lab/bc2252/SeqTool/cufflinks_1/cuffdiff', paraset, gtf, sampletext) )
    CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, paraset['--output-dir'])) )
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1: #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname, CMD, outpath=paraset['--output-dir'], outfn=jobname, sgeopt=sgeopt, trackcmd=False)
    return jobmanager
Ejemplo n.º 30
0
def cufflinks(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    outputpath = cmdGenerator.checkPath(outputpath + '%s/'%prefix, create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    for sample in samples:
        jobname = prefix + '_' + sample
        CMD = []
        CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = outputpath + sample
        paraset = configRobot.validParas(paraset, availParas['cufflinks'])
        cmdGenerator.checkPath(paraset['-o'], create=createpath)
        CMD.append( cmdGenerator.formatCmd(cmd, paraset, inputpath+'%s/'%sample+bam) )
        
        CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, paraset['-o'])) )
        sgeopt = []
        if '-p' in paraset.keys():
            if int(paraset['-p']) > 1: #multi threads
                sgeopt = ['-pe smp ' + paraset['-p']]
        elif '--num-threads' in paraset.keys():
            if int(paraset['--num-threads']) > 1:
                sgeopt = ['-pe smp ' + paraset['-p']]
        jobmanager.createJob(jobname, CMD, outpath=paraset['-o'], outfn=jobname, sgeopt=sgeopt)
    return jobmanager
Ejemplo n.º 31
0
Archivo: lego.py Proyecto: bjcbjc/mylib
def runBootstrapCCLE(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmdset = configRobot.makeParasList(cmdset, ['njob', 'dataset', 'pheno', 'usecn', 'useDisease', 'combineSplit', 'priormethod', 'ylogtransform', 'useallexp', 'cumulatecount', 'predList'])

    cmd, call, mem, time, prefix, njobs = configRobot.popParas(cmdset, ['cmd', 'call', 'mem', 'time', 'prefix', 'njob'])
    iter, priormethod, algo, ylogtransform = configRobot.popParas(cmdset, ['iter', 'priormethod', 'algo', 'ylogtransform'])
    datasets, phenos, usecn, combineSplit, useDisease = configRobot.popParas(cmdset, ['dataset', 'pheno', 'usecn', 'combineSplit', 'useDisease'])
    useallexp = configRobot.popParas(cmdset, ['useallexp'])
    if 'trainmode' in cmdset.keys():
        trainmode = configRobot.popParas(cmdset, ['trainmode'])
    else: trainmode = 'false'
    if 'cumulatecount' in cmdset.keys():
        cumulatecount = configRobot.popParas(cmdset, ['cumulatecount'])
    if 'runcv' in cmdset.keys():
        runcv = range(1, int(configRobot.popParas(cmdset, ['runcv']))+1)
    else: runcv = [0]
    if 'samplinghead' in cmdset.keys():
        sampling = configRobot.popParas(cmdset, ['samplinghead'])
    else: sampling = 'Samplings'
    if 'lassoparahead' in cmdset.keys():
        lassopara = configRobot.popParas(cmdset, ['lassoparahead'])
    else: lassopara = 'lassCvPara'
    if 'predList' in cmdset.keys():
        predlist = configRobot.popParas(cmdset, ['predList'])
    else:
        predlist = ['cancerGenes']

    iterlist = list(itertools.product( datasets, phenos, usecn, combineSplit, useDisease, priormethod, ylogtransform, useallexp, cumulatecount, runcv, predlist ) )

    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    cmdGenerator.checkPath(outputpath, create=createpath)

    f = popen('ls %s*.finished'%(outputpath))
    finfns = f.read().split()
    f.close()
    finfns = map(lambda(l):l.replace(outputpath, '').replace('.finished', ''), finfns)

    for dataset, pheno, cnSet, splitSet, diseaseSet, prior, ylog, useExp, cumcount, cvidx, plist in iterlist:
        if pheno in ['ACT', 'GI50'] and ylog == 'true': continue
        if len(njobs) == 1:
            numjobs = int(njobs[0])
        else:
            numjobs = int(njobs[ datasets.index(dataset) ] )
        if cnSet == 'true': cnStr = 'C1'
        else: cnStr = 'C0'
        if splitSet == 'true': splitStr = 'S1'
        else: splitStr = 'S0'
        if diseaseSet == 'true': diseaseStr = 'D1'
        else: diseaseStr = 'D0'
        if prior == 'bootstrapnorm': priorStr = 'NRM'
        else: priorStr = ''
        if ylog == 'true': logStr = 'log'
        else: logStr = ''
        if useExp == 'true': expStr = 'AE'
        else: expStr = ''
        if cumcount == 'true': cumStr = 'Cum'
        else: cumStr = ''
        if cvidx == 0: cvStr = ''
        else: cvStr = '%02d'%(cvidx)
        if plist == 'cancerGenes': plStr = ''
        else: plStr = plist

        if pheno == '[]': pheno = ''

        for jobidx in range(1, numjobs+1):
            fnhead = prefix + cvStr + plStr + priorStr + logStr + cnStr + splitStr + diseaseStr + expStr + cumStr
            jobname = prefix + cvStr + plStr + priorStr + logStr + dataset + pheno + cnStr + splitStr + diseaseStr + expStr + cumStr + 'J%02d'%(jobidx)                         
            CMD = []
            for iteridx in range(1, int(iter)+1):
                if fnhead+dataset+'_J%02dI%02d'%(jobidx,iteridx) in finfns: continue
                if dataset.lower() == 'joe':
                    functionCall = "addpath(\'./Gray/data\'); tic; "
                elif 'ccle' in dataset.lower():
                    functionCall = "addpath(\'./CCLE/data\'); tic; "
                
                functionCall = functionCall + call + "(%d, %d, %d, '%s', '%s', '%s', '%s', 'usecn', %s, 'combineSplit', %s, 'useDisease', %s, 'algo', '%s', 'priormethod', '%s', 'ylogtransform', %s, 'trainmode', %s, 'useallexp', %s, 'cumulatecount', %s, 'sampling', '%s', 'lassopara', '%s', 'predList', '%s'); toc;"%(jobidx, numjobs, iteridx, outputpath, fnhead, dataset, pheno, cnSet, splitSet, diseaseSet, algo, prior, ylog, trainmode, useExp, cumcount, sampling+cvStr, lassopara+cvStr, plist )

                if algo == 'lasso':
                    CMD.append( cmdGenerator.formatCmd( matlabcmd2012%(functionCall) ) )
                else:
                    CMD.append( cmdGenerator.formatCmd( matlabcmd%(functionCall) ) )
            if len(CMD) > 0:
                CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
                jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Ejemplo n.º 32
0
def picardQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, bam, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    samples = cmdset.pop('sample')

    javacmd = 'java -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    metrics = {
        'CollectRnaSeqMetrics.jar': 'RnaSeq',
        'CollectMultipleMetrics.jar': '',
        'EstimateLibraryComplexity.jar': 'Lib',
        'CollectGcBiasMetrics.jar': 'GC'
    }
    metrickeys = [
        'CollectRnaSeqMetrics.jar', 'CollectMultipleMetrics.jar',
        'EstimateLibraryComplexity.jar', 'CollectGcBiasMetrics.jar'
    ]
    for sample in samples:
        jobname = prefix + '_' + sample
        allcmds = []
        allcmds.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            paraset['INPUT'] = '=%s' % (inputpath + sample)
        else:
            paraset['INPUT'] = '=%s/%s' % (inputpath + sample, bam)
        paraset['TMP_DIR'] = paraset['TMP_DIR'] + prefix + '_' + sample + '/'
        cmdGenerator.checkPath(paraset['TMP_DIR'].strip('='),
                               create=createpath)

        for metric in metrickeys:
            if 'MultipleMetrics' in metric:
                paraset['OUTPUT'] = '=%s' % (outputpath + sample +
                                             metrics[metric])
            else:
                paraset['OUTPUT'] = '=%s.txt' % (outputpath + sample + '.' +
                                                 metrics[metric])
            paraset['CHART_OUTPUT'] = '%s' % (paraset['OUTPUT'].replace(
                '.txt', '.pdf'))
            paraset['SUMMARY_OUTPUT'] = '%s' % (paraset['OUTPUT'].replace(
                '.txt', '.summary.txt'))

            #filter out parameters that are not supported
            metricparaset = configRobot.validParas(paraset, availParas[metric])
            allcmds.append(
                cmdGenerator.formatCmd(javacmd, programpath + metric,
                                       metricparaset))

        allcmds.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, outputpath)))
        allcmds.append(
            cmdGenerator.formatCmd('rm -Rf', paraset['TMP_DIR'].strip('=')))
        jobmanager.createJob(jobname,
                             allcmds,
                             outpath=outputpath,
                             outfn=jobname)
    return jobmanager
Ejemplo n.º 33
0
Archivo: lego.py Proyecto: bjcbjc/mylib
def runCV(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmdset = configRobot.makeParasList(cmdset, ['njob', 'dataset', 'pheno', 'usecn', 'useDisease', 'combineSplit', 'ylogtransform'])

    cmd, call, mem, time, prefix, njobs = configRobot.popParas(cmdset, ['cmd', 'call', 'mem', 'time', 'prefix', 'njob'])
    datasets, phenos, usecn, useDisease, combineSplit = configRobot.popParas(cmdset, ['dataset', 'pheno', 'usecn', 'useDisease', 'combineSplit'])
    ylogtransform, trainmode = configRobot.popParas(cmdset, ['ylogtransform', 'trainmode'])
    if 'useallexp' in cmdset.keys():
        useallexp = configRobot.popParas(cmdset, ['useallexp'])
    else: useallexp = 'false'
    if 'samplinghead' in cmdset.keys():
        samplinghead = configRobot.popParas(cmdset, ['samplinghead'])
    else: samplinghead = 'Samplings'
    if 'runcv' in cmdset.keys():
        runcv = range(1, int(configRobot.popParas(cmdset, ['runcv']))+1)
    else: runcv = [0]

    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    cmdGenerator.checkPath(outputpath, create=createpath)


    iterlist = list(itertools.product( datasets, phenos, usecn, combineSplit, useDisease, ylogtransform, runcv ) )

    for dataset, pheno, cnSet, splitSet, diseaseSet, ylog, cvidx in iterlist:
        if pheno in ['ACT', 'GI50'] and ylog == 'true': continue
        if len(njobs) == 1:
            numjobs = int(njobs[0])
        else:
            numjobs = int(njobs[ datasets.index(dataset) ] )
        if cnSet == 'true': cnStr = 'C1'
        else: cnStr = 'C0'
        if splitSet == 'true': splitStr = 'S1'
        else: splitStr = 'S0'
        if diseaseSet == 'true': diseaseStr = 'D1'
        else: diseaseStr = 'D0'
        if ylog == 'true': logStr = 'log'
        else: logStr = ''
        if cvidx == 0: cvStr = ''
        else: cvStr = '%02d'%(cvidx)

        if pheno == '[]': pheno = ''

        for jobidx in range(1, numjobs+1):
            fnhead = prefix + cvStr + logStr + cnStr + splitStr + diseaseStr
            jobname = prefix + cvStr + logStr + dataset + pheno + cnStr + splitStr + diseaseStr + 'J%02d'%(jobidx)                         
            CMD = []

            if dataset.lower() == 'joe':
                functionCall = "addpath(\'./Gray/data\'); tic; "
            else:
                functionCall = "addpath(\'./CCLE/data\'); tic; "

            functionCall = functionCall + call + "(%d, %d, '%s', '%s', '%s', '%s', 'usecn', %s, 'combineSplit', %s, 'useDisease', %s, 'ylogtransform', %s, 'trainmode', %s, 'sampling', '%s'); toc;"%(jobidx, numjobs, outputpath, fnhead, dataset, pheno, cnSet, splitSet, diseaseSet, ylog, trainmode, samplinghead+cvStr )

            CMD.append( cmdGenerator.formatCmd( matlabcmd2012%(functionCall) ) )
            CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
            jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Ejemplo n.º 34
0
def preGATK(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True
    
    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix'])
    
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    picardpath = cmdGenerator.checkPath(cmdset.pop('picardpath'))
    gatkpath = cmdGenerator.checkPath(cmdset.pop('gatkpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar'%(int(mem.replace('G',''))-2)
        

    samview = 'samtools view -b -h -F 264'
    reorder = picardpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'
    RG = picardpath + 'AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT RGLB=dUTP RGPL=illumina RGPU=1'
    mdupjar = picardpath + 'MarkDuplicates.jar'
    GATK = gatkpath + 'GenomeAnalysisTK.jar '
    createTg = '-T RealignerTargetCreator '
    realign = '-T IndelRealigner '

    idxcmd = 'samtools index'
    clearup = 'rm -f '


    for sample in samples:
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

        jobname = prefix + '_' + sample

        #filter
        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = '%s/%s.filter.bam'%(inputpath+sample, bam.replace('.bam',''))
        lastoutput = paraset['-o']
        del paraset['-R']
        del paraset['-filterMBQ']
        #paraset = configRobot.validParas(paraset, availParas['samtools'])
        CMDs.append( cmdGenerator.formatCmd(samview, paraset, inputpath+sample+'/'+bam ) )

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s'%lastoutput
        paraset['OUTPUT'] = '=%s.reorder.bam'%(lastoutput.replace('.bam',''))
        paraset['REFERENCE'] = '=%s'%paraset['-R']
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, reorder, paraset) )
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput) )
        lastoutput = paraset['OUTPUT'].strip('=')
        
        #add RG
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s'%lastoutput
        paraset['OUTPUT'] = '=%s.addRG.bam'%(lastoutput.replace('.bam',''))
        paraset['RGSM'] = '=%s'%sample        
        paraset = configRobot.validParas(paraset, availParas['AddOrReplaceReadGroups.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, RG, paraset) )
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput) )
        lastoutput = paraset['OUTPUT'].strip('=')

        #mark duplicates
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s'%lastoutput
        paraset['OUTPUT'] = '=%s.mdup.bam'%(lastoutput.replace('.bam', ''))
        paraset['METRICS_FILE'] = '=%s/%s'%(inputpath + sample, prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset, availParas['MarkDuplicates.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, mdupjar, paraset) )
        CMDs.append( cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')) )
        lastoutput = paraset['OUTPUT'].strip('=')

        #create intervals
        paraset = copy.deepcopy(cmdset)
        paraset['-I'] = lastoutput
        paraset['-o'] = lastoutput.replace('.bam', '.intervals')
        CMDs.append( cmdGenerator.formatCmd(javacmd, GATK+createTg, paraset) )

        #realign
        paraset['-targetIntervals'] = paraset['-o']
        paraset['-o'] = lastoutput.replace('.bam', '.realign.bam')
        CMDs.append( cmdGenerator.formatCmd(javacmd, GATK+realign, paraset) )

        #clear up
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput) )
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput.replace('.bam', '.intervals')) )
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, inputpath+sample)) )
        jobmanager.createJob(jobname, CMDs, outpath = inputpath+sample, outfn = jobname)
    
    return jobmanager
Ejemplo n.º 35
0
def preGATK(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    picardpath = cmdGenerator.checkPath(cmdset.pop('picardpath'))
    gatkpath = cmdGenerator.checkPath(cmdset.pop('gatkpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(
            cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)

    samview = 'samtools view -b -h -F 264'
    reorder = picardpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'
    RG = picardpath + 'AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT RGLB=dUTP RGPL=illumina RGPU=1'
    mdupjar = picardpath + 'MarkDuplicates.jar'
    GATK = gatkpath + 'GenomeAnalysisTK.jar '
    createTg = '-T RealignerTargetCreator '
    realign = '-T IndelRealigner '

    idxcmd = 'samtools index'
    clearup = 'rm -f '

    for sample in samples:
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        jobname = prefix + '_' + sample

        #filter
        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = '%s/%s.filter.bam' % (inputpath + sample,
                                              bam.replace('.bam', ''))
        lastoutput = paraset['-o']
        del paraset['-R']
        del paraset['-filterMBQ']
        #paraset = configRobot.validParas(paraset, availParas['samtools'])
        CMDs.append(
            cmdGenerator.formatCmd(samview, paraset,
                                   inputpath + sample + '/' + bam))

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s' % lastoutput
        paraset['OUTPUT'] = '=%s.reorder.bam' % (lastoutput.replace(
            '.bam', ''))
        paraset['REFERENCE'] = '=%s' % paraset['-R']
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, reorder, paraset))
        CMDs.append(cmdGenerator.formatCmd(clearup, lastoutput))
        lastoutput = paraset['OUTPUT'].strip('=')

        #add RG
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s' % lastoutput
        paraset['OUTPUT'] = '=%s.addRG.bam' % (lastoutput.replace('.bam', ''))
        paraset['RGSM'] = '=%s' % sample
        paraset = configRobot.validParas(
            paraset, availParas['AddOrReplaceReadGroups.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, RG, paraset))
        CMDs.append(cmdGenerator.formatCmd(clearup, lastoutput))
        lastoutput = paraset['OUTPUT'].strip('=')

        #mark duplicates
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s' % lastoutput
        paraset['OUTPUT'] = '=%s.mdup.bam' % (lastoutput.replace('.bam', ''))
        paraset['METRICS_FILE'] = '=%s/%s' % (inputpath + sample,
                                              prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset,
                                         availParas['MarkDuplicates.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, mdupjar, paraset))
        CMDs.append(
            cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')))
        lastoutput = paraset['OUTPUT'].strip('=')

        #create intervals
        paraset = copy.deepcopy(cmdset)
        paraset['-I'] = lastoutput
        paraset['-o'] = lastoutput.replace('.bam', '.intervals')
        CMDs.append(cmdGenerator.formatCmd(javacmd, GATK + createTg, paraset))

        #realign
        paraset['-targetIntervals'] = paraset['-o']
        paraset['-o'] = lastoutput.replace('.bam', '.realign.bam')
        CMDs.append(cmdGenerator.formatCmd(javacmd, GATK + realign, paraset))

        #clear up
        CMDs.append(cmdGenerator.formatCmd(clearup, lastoutput))
        CMDs.append(
            cmdGenerator.formatCmd(clearup,
                                   lastoutput.replace('.bam', '.intervals')))
        CMDs.append(
            cmdGenerator.formatCmd(
                'mv ./%s%s %s' %
                (jobname, jobmanager.ext, inputpath + sample)))
        jobmanager.createJob(jobname,
                             CMDs,
                             outpath=inputpath + sample,
                             outfn=jobname)

    return jobmanager
Ejemplo n.º 36
0
def filetersingleton(cmdset, runmode='test'): 
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    if 'TMP_DIR' in cmdset.keys():
        TMP_DIR = cmdset.pop('TMP_DIR')
    else:
        TMP_DIR = ''

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    setuppathcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')


    javacmd = 'java -Xmx%dg -jar'%(int(mem.replace('G',''))-1)
    samview = 'samtools view -b -h -F 8'
    reorder = 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'
    RG = 'AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT RGLB=dUTP RGPL=illumina RGPU=1'
    mdup = 'MarkDuplicates.jar'
    

    for sample in samples:
        jobfnprefix = prefix + '_' + sample
        paraset = copy.deepcopy(cmdset)
        if TMP_DIR != '': paraset['TMP_DIR'] = '%s'%TMP_DIR
        tmpbam = []

        CMDs = []
        CMDs.append(setuppathcmd)

        paraset['INPUT'] = '=%s/%s'%(inputpath+sample, bam)
        paraset['OUTPUT'] = '=%s/%s.reorder.bam'%(inputpath+sample, bam.replace('.bam',''))
        tmpbam.append(paraset['OUTPUT'].strip('='))
        CMDs.append( cmdGenerator.formatCmd(javacmd, programpath+reorder, paraset) )
        
        paraset['INPUT'] = '=%s/%s.reorder.bam'%(inputpath+sample, bam.replace('.bam',''))
        paraset['OUTPUT'] = '=%s/%s.reorder.addRG.bam'%(inputpath+sample, bam.replace('.bam',''))
        paraset['RGSM'] = '=%s'%sample        
        CMDs.append( cmdGenerator.formatCmd(javacmd, programpath+RG, paraset) )

        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = '%s/%s.filter.bam'%(inputpath+sample, bam.replace('.bam','.addRG'))
        CMDs.append( cmdGenerator.formatCmd(samview, paraset, inputpath+sample+'/'+bam.replace('.bam','.addRG.bam')) )

        
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s/%s.filter.bam'%(inputpath+sample, bam.replace('.bam','.addRG'))
        paraset['OUTPUT'] = '%s/%s.mdup.bam'%(paraset['INPUT'].replace('.bam', ''))
        CMDs.append( cmdGenerator.formatCmd(javacmd, programpath+mdup, paraset) )

        paraset = copy.deepcopy(cmdset)
        CMDs.append( cmdGenerator.formatCmd('samtools index', bam.replace('.bam', '.reorder.addRG.filter.mdup.bam')) )


        CMDs.append( cmdGenerator.formatCmd('rm -f', tmpbam) )
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobfnprefix, jobmanager.ext, inputpath+sample)) )

        jobmanager.createJob(jobfnprefix, CMDs, outpath = inputpath+sample, outfn = jobfnprefix)
    return jobmanager
Ejemplo n.º 37
0
def filetersingleton(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    if 'TMP_DIR' in cmdset.keys():
        TMP_DIR = cmdset.pop('TMP_DIR')
    else:
        TMP_DIR = ''

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    setuppathcmd = cmdGenerator.formatCmd('source ~/libraries/setup_seqtools')

    javacmd = 'java -Xmx%dg -jar' % (int(mem.replace('G', '')) - 1)
    samview = 'samtools view -b -h -F 8'
    reorder = 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'
    RG = 'AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT RGLB=dUTP RGPL=illumina RGPU=1'
    mdup = 'MarkDuplicates.jar'

    for sample in samples:
        jobfnprefix = prefix + '_' + sample
        paraset = copy.deepcopy(cmdset)
        if TMP_DIR != '': paraset['TMP_DIR'] = '%s' % TMP_DIR
        tmpbam = []

        CMDs = []
        CMDs.append(setuppathcmd)

        paraset['INPUT'] = '=%s/%s' % (inputpath + sample, bam)
        paraset['OUTPUT'] = '=%s/%s.reorder.bam' % (inputpath + sample,
                                                    bam.replace('.bam', ''))
        tmpbam.append(paraset['OUTPUT'].strip('='))
        CMDs.append(
            cmdGenerator.formatCmd(javacmd, programpath + reorder, paraset))

        paraset['INPUT'] = '=%s/%s.reorder.bam' % (inputpath + sample,
                                                   bam.replace('.bam', ''))
        paraset['OUTPUT'] = '=%s/%s.reorder.addRG.bam' % (
            inputpath + sample, bam.replace('.bam', ''))
        paraset['RGSM'] = '=%s' % sample
        CMDs.append(cmdGenerator.formatCmd(javacmd, programpath + RG, paraset))

        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = '%s/%s.filter.bam' % (inputpath + sample,
                                              bam.replace('.bam', '.addRG'))
        CMDs.append(
            cmdGenerator.formatCmd(
                samview, paraset,
                inputpath + sample + '/' + bam.replace('.bam', '.addRG.bam')))

        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s/%s.filter.bam' % (
            inputpath + sample, bam.replace('.bam', '.addRG'))
        paraset['OUTPUT'] = '%s/%s.mdup.bam' % (paraset['INPUT'].replace(
            '.bam', ''))
        CMDs.append(
            cmdGenerator.formatCmd(javacmd, programpath + mdup, paraset))

        paraset = copy.deepcopy(cmdset)
        CMDs.append(
            cmdGenerator.formatCmd(
                'samtools index',
                bam.replace('.bam', '.reorder.addRG.filter.mdup.bam')))

        CMDs.append(cmdGenerator.formatCmd('rm -f', tmpbam))
        CMDs.append(
            cmdGenerator.formatCmd(
                'mv ./%s%s %s' %
                (jobfnprefix, jobmanager.ext, inputpath + sample)))

        jobmanager.createJob(jobfnprefix,
                             CMDs,
                             outpath=inputpath + sample,
                             outfn=jobfnprefix)
    return jobmanager
Ejemplo n.º 38
0
Archivo: lego.py Proyecto: bjcbjc/mylib
def testSim(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmdset = configRobot.makeParasList(cmdset, ['njob', 'dataset', 'cumulatecount', 'costCoef', 'priormethod', 'transresidual', 'btthres', 'adaptiveSigmoid', 'splitpriornorm'])

    cmd, call, mem, time, prefix, njobs = configRobot.popParas(cmdset, ['cmd', 'call', 'mem', 'time', 'prefix', 'njob'])
    datasets, algo, noiseidx, cumulatecount, costCoef, priormethod, transresidual, btthres, adaptiveSigmoid, splitpriornorm = configRobot.popParas(cmdset, ['dataset', 'algo', 'noiseidx', 'cumulatecount', 'costCoef', 'priormethod','transresidual', 'btthres', 'adaptiveSigmoid', 'splitpriornorm'])

    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    cmdGenerator.checkPath(outputpath, create=createpath)

    iterlist = list(itertools.product( datasets, cumulatecount, noiseidx, costCoef, priormethod, transresidual, btthres, adaptiveSigmoid, splitpriornorm ) )

    for dataset, cumcount, nsidx, betacost, prmethod, transres, btthreshold, adaptSig, splitNorm in iterlist:
        if len(njobs) == 1:
            numjobs = int(njobs[0])
        else:
            numjobs = int(njobs[ datasets.index(dataset) ] )
        if cumcount == 'true': cumStr = 'Cum1'
        else: cumStr = 'Cum0'
        if prmethod == 'bootstrap': prStr = 'BT'
        elif prmethod == 'bootstraplinear': prStr = 'BL'
        elif prmethod == 'bootstrapexpected': prStr = 'BE'
        elif prmethod == 'bayes': prStr = 'BY'
        else: 
            print 'unknown prior method:', prmethod
            exit(1)
        if transres == 'true':
            if prmethod != 'bootstrap': continue
            else: resStr = 'Resd1' + '_' + btthreshold
        else: 
            resStr = 'Resd0'
            if btthreshold != '0.3': continue
            else: resStr = resStr + '_0.3'
        if adaptSig == 'true': sigStr = 'ADSig1'
        else: sigStr = 'ADSig0'
        if splitNorm == 'true': splitnormStr = 'SPN1'
        else: splitnormStr = 'SPN0'
        
        if splitNorm == 'true' and prmethod != 'bootstrapexpected': continue

        resStr = resStr.replace('.','')

        Nstr = 'N%s'%(nsidx)
        #costStr = 'P' + betacost
        costStr = ''
        cumStr = ''

        for jobidx in range(1, numjobs+1):
            fnhead = prefix + dataset + cumStr + costStr + Nstr + prStr + resStr + sigStr + splitnormStr
            jobname = prefix + dataset + cumStr + costStr + Nstr + prStr + resStr + sigStr + splitnormStr + 'J%02d'%(jobidx)                         
            CMD = []

            functionCall = "tic; "

            functionCall = functionCall + call + "(%d, %d, '%s', '%s', %s, '%s', '%s', 'cumulatecount', %s, 'costCoef', %s, 'priormethod', '%s', 'transresidual', %s, 'adaptiveSigmoid', %s, 'btthres', %s, 'splitpriornorm', %s); toc;"%(jobidx, numjobs, dataset, algo, nsidx, outputpath, fnhead, cumcount, betacost, prmethod, transres, adaptSig, btthreshold, splitNorm)

            if algo == 'lasso':
                CMD.append( cmdGenerator.formatCmd( matlabcmd2012%(functionCall) ) )
            else:
                CMD.append( cmdGenerator.formatCmd( matlabcmd%(functionCall) ) )
            CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
            jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager