Esempio n. 1
0
def markDup(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))
    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    javacmd = 'java -Xmx%dg -jar'%(int(mem.replace('G',''))-1)
    mdupjar = 'MarkDuplicates.jar'
    idxcmd = 'samtools index'

    for sample in samples:
        jobname = prefix + '_' + sample
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s/%s'%(inputpath + sample, bam)
        paraset['OUTPUT'] = paraset['INPUT'].replace('.bam', '.mdup.bam')
        paraset['METRICS_FILE'] = '=%s/%s'%(inputpath + sample, prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset, availParas[mdupjar])
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
        CMDs.append( cmdGenerator.formatCmd(javacmd, programpath+mdupjar, paraset) )
        CMDs.append( cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')) )
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, inputpath+sample)) )
        jobmanager.createJob(jobname, CMDs, outpath = inputpath+sample, outfn = jobname)
    return jobmanager
Esempio n. 2
0
def cuffcompare(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s '%(inputpath, sample, gtf)

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']
    paraset = configRobot.validParas(paraset, availParas['cuffcompare'])
    CMD.append( cmdGenerator.formatCmd(cmd, paraset, sampletext) )
    CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )

    jobmanager.createJob(jobname, CMD, outpath=outputpath, outfn=jobname, trackcmd=False)
    return jobmanager
Esempio n. 3
0
def RSeQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time = configRobot.popParas(cmdset, ['cmd', 'mem', 'time'])
    samples, bam, prefix = configRobot.popParas(cmdset, ['sample', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    programs = ['inner_distance.py', 'junction_annotation.py', 'junction_saturation.py', 'read_GC.py', 'read_duplication.py']

    for sample in samples:
        jobname = prefix + '_' + sample
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
        for prog in programs:
            paraset = copy.deepcopy(cmdset)
            paraset['-i'] = inputpath + sample + '/' + bam
            paraset['-o'] = outputpath + sample + '.%s'%(prog.replace('.py', ''))
            paraset = configRobot.validParas(paraset, availParas[prog])
            if '-o' not in paraset.keys():
                paraset['>'] = outputpath + sample + '.%s'%(prog.replace('.py', ''))                            

            CMDs.append( cmdGenerator.formatCmd('python', programpath+prog, paraset) )
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
        jobmanager.createJob(jobname, CMDs, outpath = outputpath, outfn = jobname)
    return jobmanager
Esempio n. 4
0
def cuffmerge(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    sampletext = '"'
    for sample in samples:
        sampletext = sampletext + '%s%s/%s\\n' % (inputpath, sample, gtf)
    sampletext = sampletext + '"'

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']

    CMD.append(
        cmdGenerator.formatCmd('echo', sampletext, '>',
                               paraset['-o'] + '.samples'))

    paraset = configRobot.validParas(paraset, availParas['cuffmerge'])
    cmdGenerator.checkPath(paraset['-o'], create=createpath)
    CMD.append(cmdGenerator.formatCmd(cmd, paraset,
                                      paraset['-o'] + '.samples'))
    CMD.append(
        cmdGenerator.formatCmd('mv ./%s%s %s' %
                               (jobname, jobmanager.ext, paraset['-o'])))
    CMD.append(cmdGenerator.formatCmd('rm -f', paraset['-o'] + '.samples'))
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1:  #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname,
                         CMD,
                         outpath=paraset['-o'],
                         outfn=jobname,
                         sgeopt=sgeopt,
                         trackcmd=False)
    return jobmanager
Esempio n. 5
0
def cuffdiff_v1(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf, bam = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s ' % (inputpath, sample, bam)

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

    paraset = copy.deepcopy(cmdset)
    paraset = configRobot.validParas(paraset, availParas['cuffdiff'])
    cmdGenerator.checkPath(paraset['--output-dir'], create=createpath)
    CMD.append(
        cmdGenerator.formatCmd(
            '/ifs/home/c2b2/dp_lab/bc2252/SeqTool/cufflinks_1/cuffdiff',
            paraset, gtf, sampletext))
    CMD.append(
        cmdGenerator.formatCmd(
            'mv ./%s%s %s' %
            (jobname, jobmanager.ext, paraset['--output-dir'])))
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1:  #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname,
                         CMD,
                         outpath=paraset['--output-dir'],
                         outfn=jobname,
                         sgeopt=sgeopt,
                         trackcmd=False)
    return jobmanager
Esempio n. 6
0
def picardReorderSam(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(
            cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)
    reorder = programpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'

    for sample in samples:
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        jobname = prefix + '_' + sample

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            inputfile = sample
        else:
            inputfile = sample + '/' + bam
        paraset['INPUT'] = '=%s' % (inputpath + inputfile)
        paraset['OUTPUT'] = '=%s.reorder.bam' % (outputpath +
                                                 inputfile.replace('.bam', ''))
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, reorder, paraset))

        CMDs.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, outputpath)))
        jobmanager.createJob(jobname, CMDs, outpath=outputpath, outfn=jobname)

    return jobmanager
Esempio n. 7
0
def cufflinks(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    outputpath = cmdGenerator.checkPath(outputpath + '%s/' % prefix,
                                        create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    for sample in samples:
        jobname = prefix + '_' + sample
        CMD = []
        CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = outputpath + sample
        paraset = configRobot.validParas(paraset, availParas['cufflinks'])
        cmdGenerator.checkPath(paraset['-o'], create=createpath)
        CMD.append(
            cmdGenerator.formatCmd(cmd, paraset,
                                   inputpath + '%s/' % sample + bam))

        CMD.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, paraset['-o'])))
        sgeopt = []
        if '-p' in paraset.keys():
            if int(paraset['-p']) > 1:  #multi threads
                sgeopt = ['-pe smp ' + paraset['-p']]
        elif '--num-threads' in paraset.keys():
            if int(paraset['--num-threads']) > 1:
                sgeopt = ['-pe smp ' + paraset['-p']]
        jobmanager.createJob(jobname,
                             CMD,
                             outpath=paraset['-o'],
                             outfn=jobname,
                             sgeopt=sgeopt)
    return jobmanager
Esempio n. 8
0
def picardQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, bam, prefix = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    samples = cmdset.pop('sample')

    javacmd = 'java -Xmx%dg -jar'%(int(mem.replace('G',''))-2)

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    metrics = {'CollectRnaSeqMetrics.jar': 'RnaSeq', 'CollectMultipleMetrics.jar': '', 'EstimateLibraryComplexity.jar': 'Lib', 'CollectGcBiasMetrics.jar': 'GC'}
    metrickeys = ['CollectRnaSeqMetrics.jar', 'CollectMultipleMetrics.jar', 'EstimateLibraryComplexity.jar', 'CollectGcBiasMetrics.jar']
    for sample in samples:
        jobname = prefix + '_' + sample
        allcmds = []
        allcmds.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
    
        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            paraset['INPUT'] = '=%s'%(inputpath+sample)
        else:
            paraset['INPUT'] = '=%s/%s'%(inputpath+sample, bam)
        paraset['TMP_DIR'] = paraset['TMP_DIR'] + prefix + '_' + sample + '/'
        cmdGenerator.checkPath(paraset['TMP_DIR'].strip('='), create=createpath)

        for metric in metrickeys:
            if 'MultipleMetrics' in metric:
                paraset['OUTPUT'] = '=%s'%(outputpath + sample + metrics[metric])
            else:
                paraset['OUTPUT'] = '=%s.txt'%(outputpath + sample + '.' + metrics[metric])
            paraset['CHART_OUTPUT'] = '%s'%(paraset['OUTPUT'].replace('.txt', '.pdf'))
            paraset['SUMMARY_OUTPUT'] = '%s'%(paraset['OUTPUT'].replace('.txt', '.summary.txt'))

            #filter out parameters that are not supported
            metricparaset = configRobot.validParas(paraset, availParas[metric])
            allcmds.append(cmdGenerator.formatCmd(javacmd, programpath + metric, metricparaset))

        allcmds.append(cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)))
        allcmds.append(cmdGenerator.formatCmd('rm -Rf', paraset['TMP_DIR'].strip('=')))
        jobmanager.createJob(jobname, allcmds, outpath = outputpath, outfn = jobname)
    return jobmanager
Esempio n. 9
0
def RSeQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time = configRobot.popParas(cmdset, ['cmd', 'mem', 'time'])
    samples, bam, prefix = configRobot.popParas(cmdset,
                                                ['sample', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    programs = [
        'inner_distance.py', 'junction_annotation.py',
        'junction_saturation.py', 'read_GC.py', 'read_duplication.py'
    ]

    for sample in samples:
        jobname = prefix + '_' + sample
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
        for prog in programs:
            paraset = copy.deepcopy(cmdset)
            paraset['-i'] = inputpath + sample + '/' + bam
            paraset['-o'] = outputpath + sample + '.%s' % (prog.replace(
                '.py', ''))
            paraset = configRobot.validParas(paraset, availParas[prog])
            if '-o' not in paraset.keys():
                paraset['>'] = outputpath + sample + '.%s' % (prog.replace(
                    '.py', ''))

            CMDs.append(
                cmdGenerator.formatCmd('python', programpath + prog, paraset))
        CMDs.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, outputpath)))
        jobmanager.createJob(jobname, CMDs, outpath=outputpath, outfn=jobname)
    return jobmanager
Esempio n. 10
0
def markDup(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))
    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    javacmd = 'java -Xmx%dg -jar' % (int(mem.replace('G', '')) - 1)
    mdupjar = 'MarkDuplicates.jar'
    idxcmd = 'samtools index'

    for sample in samples:
        jobname = prefix + '_' + sample
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s/%s' % (inputpath + sample, bam)
        paraset['OUTPUT'] = paraset['INPUT'].replace('.bam', '.mdup.bam')
        paraset['METRICS_FILE'] = '=%s/%s' % (inputpath + sample,
                                              prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset, availParas[mdupjar])
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))
        CMDs.append(
            cmdGenerator.formatCmd(javacmd, programpath + mdupjar, paraset))
        CMDs.append(
            cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')))
        CMDs.append(
            cmdGenerator.formatCmd(
                'mv ./%s%s %s' %
                (jobname, jobmanager.ext, inputpath + sample)))
        jobmanager.createJob(jobname,
                             CMDs,
                             outpath=inputpath + sample,
                             outfn=jobname)
    return jobmanager
Esempio n. 11
0
def picardReorderSam(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True
    
    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix'])
    
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'))
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar'%(int(mem.replace('G',''))-2)        
    reorder = programpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'


    for sample in samples:
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

        jobname = prefix + '_' + sample

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            inputfile = sample
        else:
            inputfile = sample + '/' + bam
        paraset['INPUT'] = '=%s'%(inputpath + inputfile)
        paraset['OUTPUT'] = '=%s.reorder.bam'%(outputpath + inputfile.replace('.bam',''))
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, reorder, paraset) )
        
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, outputpath)) )
        jobmanager.createJob(jobname, CMDs, outpath = outputpath, outfn = jobname)
    
    return jobmanager
Esempio n. 12
0
def cuffmerge(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    sampletext = '"'
    for sample in samples:
        sampletext = sampletext + '%s%s/%s\\n'%(inputpath, sample, gtf)
    sampletext = sampletext + '"'

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )
    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']

    CMD.append( cmdGenerator.formatCmd('echo', sampletext, '>', paraset['-o'] + '.samples') )
    
    paraset = configRobot.validParas(paraset, availParas['cuffmerge'])
    cmdGenerator.checkPath(paraset['-o'], create=createpath)
    CMD.append( cmdGenerator.formatCmd(cmd, paraset, paraset['-o'] + '.samples') )
    CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, paraset['-o'])) )
    CMD.append( cmdGenerator.formatCmd('rm -f', paraset['-o'] + '.samples') )
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1: #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname, CMD, outpath=paraset['-o'], outfn=jobname, sgeopt=sgeopt, trackcmd=False)
    return jobmanager
Esempio n. 13
0
def cuffcompare(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]

    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s ' % (inputpath, sample, gtf)

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append(cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

    paraset = copy.deepcopy(cmdset)
    paraset['-o'] = outputpath + paraset['-o']
    paraset = configRobot.validParas(paraset, availParas['cuffcompare'])
    CMD.append(cmdGenerator.formatCmd(cmd, paraset, sampletext))
    CMD.append(
        cmdGenerator.formatCmd('mv ./%s%s %s' %
                               (jobname, jobmanager.ext, outputpath)))

    jobmanager.createJob(jobname,
                         CMD,
                         outpath=outputpath,
                         outfn=jobname,
                         trackcmd=False)
    return jobmanager
Esempio n. 14
0
def cufflinks(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, bam = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'), create=createpath)
    outputpath = cmdGenerator.checkPath(outputpath + '%s/'%prefix, create=createpath)

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))
    for sample in samples:
        jobname = prefix + '_' + sample
        CMD = []
        CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = outputpath + sample
        paraset = configRobot.validParas(paraset, availParas['cufflinks'])
        cmdGenerator.checkPath(paraset['-o'], create=createpath)
        CMD.append( cmdGenerator.formatCmd(cmd, paraset, inputpath+'%s/'%sample+bam) )
        
        CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, paraset['-o'])) )
        sgeopt = []
        if '-p' in paraset.keys():
            if int(paraset['-p']) > 1: #multi threads
                sgeopt = ['-pe smp ' + paraset['-p']]
        elif '--num-threads' in paraset.keys():
            if int(paraset['--num-threads']) > 1:
                sgeopt = ['-pe smp ' + paraset['-p']]
        jobmanager.createJob(jobname, CMD, outpath=paraset['-o'], outfn=jobname, sgeopt=sgeopt)
    return jobmanager
Esempio n. 15
0
def cuffdiff_v1(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix, gtf, bam = configRobot.popParas(cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix', 'gtf', 'bam'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))

    if type(samples) != type([]) and type(samples) != type(()):
        samples = [samples]
    
    sampletext = ''
    for sample in samples:
        sampletext = sampletext + '%s%s/%s '%(inputpath, sample, bam)

    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    jobname = prefix
    CMD = []
    CMD.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

    paraset = copy.deepcopy(cmdset)
    paraset = configRobot.validParas(paraset, availParas['cuffdiff'])
    cmdGenerator.checkPath(paraset['--output-dir'], create=createpath)
    CMD.append( cmdGenerator.formatCmd('/ifs/home/c2b2/dp_lab/bc2252/SeqTool/cufflinks_1/cuffdiff', paraset, gtf, sampletext) )
    CMD.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, paraset['--output-dir'])) )
    sgeopt = []
    if '-p' in paraset.keys():
        if int(paraset['-p']) > 1: #multi threads
            sgeopt = ['-pe smp ' + paraset['-p']]
    elif '--num-threads' in paraset.keys():
        if int(paraset['--num-threads']) > 1:
            sgeopt = ['-pe smp ' + paraset['-p']]
    jobmanager.createJob(jobname, CMD, outpath=paraset['--output-dir'], outfn=jobname, sgeopt=sgeopt, trackcmd=False)
    return jobmanager
Esempio n. 16
0
def picardQC(cmdset, runmode='test'):
    global availParas
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, bam, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'bam', 'prefix'])
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    outputpath = cmdGenerator.checkPath(cmdset.pop('outputpath'),
                                        create=createpath)
    programpath = cmdGenerator.checkPath(cmdset.pop('programpath'))

    samples = cmdset.pop('sample')

    javacmd = 'java -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)

    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))
    metrics = {
        'CollectRnaSeqMetrics.jar': 'RnaSeq',
        'CollectMultipleMetrics.jar': '',
        'EstimateLibraryComplexity.jar': 'Lib',
        'CollectGcBiasMetrics.jar': 'GC'
    }
    metrickeys = [
        'CollectRnaSeqMetrics.jar', 'CollectMultipleMetrics.jar',
        'EstimateLibraryComplexity.jar', 'CollectGcBiasMetrics.jar'
    ]
    for sample in samples:
        jobname = prefix + '_' + sample
        allcmds = []
        allcmds.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        paraset = copy.deepcopy(cmdset)
        if bam == '=sample':
            paraset['INPUT'] = '=%s' % (inputpath + sample)
        else:
            paraset['INPUT'] = '=%s/%s' % (inputpath + sample, bam)
        paraset['TMP_DIR'] = paraset['TMP_DIR'] + prefix + '_' + sample + '/'
        cmdGenerator.checkPath(paraset['TMP_DIR'].strip('='),
                               create=createpath)

        for metric in metrickeys:
            if 'MultipleMetrics' in metric:
                paraset['OUTPUT'] = '=%s' % (outputpath + sample +
                                             metrics[metric])
            else:
                paraset['OUTPUT'] = '=%s.txt' % (outputpath + sample + '.' +
                                                 metrics[metric])
            paraset['CHART_OUTPUT'] = '%s' % (paraset['OUTPUT'].replace(
                '.txt', '.pdf'))
            paraset['SUMMARY_OUTPUT'] = '%s' % (paraset['OUTPUT'].replace(
                '.txt', '.summary.txt'))

            #filter out parameters that are not supported
            metricparaset = configRobot.validParas(paraset, availParas[metric])
            allcmds.append(
                cmdGenerator.formatCmd(javacmd, programpath + metric,
                                       metricparaset))

        allcmds.append(
            cmdGenerator.formatCmd('mv ./%s%s %s' %
                                   (jobname, jobmanager.ext, outputpath)))
        allcmds.append(
            cmdGenerator.formatCmd('rm -Rf', paraset['TMP_DIR'].strip('=')))
        jobmanager.createJob(jobname,
                             allcmds,
                             outpath=outputpath,
                             outfn=jobname)
    return jobmanager
Esempio n. 17
0
def preGATK(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True

    cmd, mem, time, samples, prefix = configRobot.popParas(
        cmdset, ['cmd', 'mem', 'time', 'sample', 'prefix'])

    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    picardpath = cmdGenerator.checkPath(cmdset.pop('picardpath'))
    gatkpath = cmdGenerator.checkPath(cmdset.pop('gatkpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem,
                                       time=time,
                                       overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(
            cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar' % (int(mem.replace('G', '')) - 2)

    samview = 'samtools view -b -h -F 264'
    reorder = picardpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'
    RG = picardpath + 'AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT RGLB=dUTP RGPL=illumina RGPU=1'
    mdupjar = picardpath + 'MarkDuplicates.jar'
    GATK = gatkpath + 'GenomeAnalysisTK.jar '
    createTg = '-T RealignerTargetCreator '
    realign = '-T IndelRealigner '

    idxcmd = 'samtools index'
    clearup = 'rm -f '

    for sample in samples:
        CMDs = []
        CMDs.append(
            cmdGenerator.formatCmd('source ~/libraries/setup_seqtools'))

        jobname = prefix + '_' + sample

        #filter
        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = '%s/%s.filter.bam' % (inputpath + sample,
                                              bam.replace('.bam', ''))
        lastoutput = paraset['-o']
        del paraset['-R']
        del paraset['-filterMBQ']
        #paraset = configRobot.validParas(paraset, availParas['samtools'])
        CMDs.append(
            cmdGenerator.formatCmd(samview, paraset,
                                   inputpath + sample + '/' + bam))

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s' % lastoutput
        paraset['OUTPUT'] = '=%s.reorder.bam' % (lastoutput.replace(
            '.bam', ''))
        paraset['REFERENCE'] = '=%s' % paraset['-R']
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, reorder, paraset))
        CMDs.append(cmdGenerator.formatCmd(clearup, lastoutput))
        lastoutput = paraset['OUTPUT'].strip('=')

        #add RG
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s' % lastoutput
        paraset['OUTPUT'] = '=%s.addRG.bam' % (lastoutput.replace('.bam', ''))
        paraset['RGSM'] = '=%s' % sample
        paraset = configRobot.validParas(
            paraset, availParas['AddOrReplaceReadGroups.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, RG, paraset))
        CMDs.append(cmdGenerator.formatCmd(clearup, lastoutput))
        lastoutput = paraset['OUTPUT'].strip('=')

        #mark duplicates
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s' % lastoutput
        paraset['OUTPUT'] = '=%s.mdup.bam' % (lastoutput.replace('.bam', ''))
        paraset['METRICS_FILE'] = '=%s/%s' % (inputpath + sample,
                                              prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset,
                                         availParas['MarkDuplicates.jar'])
        CMDs.append(cmdGenerator.formatCmd(javacmd, mdupjar, paraset))
        CMDs.append(
            cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')))
        lastoutput = paraset['OUTPUT'].strip('=')

        #create intervals
        paraset = copy.deepcopy(cmdset)
        paraset['-I'] = lastoutput
        paraset['-o'] = lastoutput.replace('.bam', '.intervals')
        CMDs.append(cmdGenerator.formatCmd(javacmd, GATK + createTg, paraset))

        #realign
        paraset['-targetIntervals'] = paraset['-o']
        paraset['-o'] = lastoutput.replace('.bam', '.realign.bam')
        CMDs.append(cmdGenerator.formatCmd(javacmd, GATK + realign, paraset))

        #clear up
        CMDs.append(cmdGenerator.formatCmd(clearup, lastoutput))
        CMDs.append(
            cmdGenerator.formatCmd(clearup,
                                   lastoutput.replace('.bam', '.intervals')))
        CMDs.append(
            cmdGenerator.formatCmd(
                'mv ./%s%s %s' %
                (jobname, jobmanager.ext, inputpath + sample)))
        jobmanager.createJob(jobname,
                             CMDs,
                             outpath=inputpath + sample,
                             outfn=jobname)

    return jobmanager
Esempio n. 18
0
def preGATK(cmdset, runmode='test'):
    if runmode == 'test':
        createpath = False
    else:
        createpath = True
    
    cmd, mem, time, samples, prefix = configRobot.popParas(cmdset,['cmd', 'mem', 'time', 'sample', 'prefix'])
    
    inputpath = cmdGenerator.checkPath(cmdset.pop('inputpath'))
    picardpath = cmdGenerator.checkPath(cmdset.pop('picardpath'))
    gatkpath = cmdGenerator.checkPath(cmdset.pop('gatkpath'))

    bam = configRobot.popParas(cmdset, 'bam')
    jobmanager = jobFactory.jobManager(mem=mem, time=time, overwrite=cmdset.pop('overwrite'))

    if '-Djava.io.tmpdir' in cmdset.keys():
        javacmd = 'java ' + '-Djava.io.tmpdir' + cmdGenerator.checkPath(cmdset.pop('-Djava.io.tmpdir'))
    else:
        javacmd = 'java'
    javacmd = javacmd + ' -Xmx%dg -jar'%(int(mem.replace('G',''))-2)
        

    samview = 'samtools view -b -h -F 264'
    reorder = picardpath + 'ReorderSam.jar VALIDATION_STRINGENCY=LENIENT'
    RG = picardpath + 'AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT RGLB=dUTP RGPL=illumina RGPU=1'
    mdupjar = picardpath + 'MarkDuplicates.jar'
    GATK = gatkpath + 'GenomeAnalysisTK.jar '
    createTg = '-T RealignerTargetCreator '
    realign = '-T IndelRealigner '

    idxcmd = 'samtools index'
    clearup = 'rm -f '


    for sample in samples:
        CMDs = []
        CMDs.append( cmdGenerator.formatCmd('source ~/libraries/setup_seqtools') )

        jobname = prefix + '_' + sample

        #filter
        paraset = copy.deepcopy(cmdset)
        paraset['-o'] = '%s/%s.filter.bam'%(inputpath+sample, bam.replace('.bam',''))
        lastoutput = paraset['-o']
        del paraset['-R']
        del paraset['-filterMBQ']
        #paraset = configRobot.validParas(paraset, availParas['samtools'])
        CMDs.append( cmdGenerator.formatCmd(samview, paraset, inputpath+sample+'/'+bam ) )

        #reorder by chrm
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s'%lastoutput
        paraset['OUTPUT'] = '=%s.reorder.bam'%(lastoutput.replace('.bam',''))
        paraset['REFERENCE'] = '=%s'%paraset['-R']
        paraset = configRobot.validParas(paraset, availParas['ReorderSam.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, reorder, paraset) )
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput) )
        lastoutput = paraset['OUTPUT'].strip('=')
        
        #add RG
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s'%lastoutput
        paraset['OUTPUT'] = '=%s.addRG.bam'%(lastoutput.replace('.bam',''))
        paraset['RGSM'] = '=%s'%sample        
        paraset = configRobot.validParas(paraset, availParas['AddOrReplaceReadGroups.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, RG, paraset) )
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput) )
        lastoutput = paraset['OUTPUT'].strip('=')

        #mark duplicates
        paraset = copy.deepcopy(cmdset)
        paraset['INPUT'] = '=%s'%lastoutput
        paraset['OUTPUT'] = '=%s.mdup.bam'%(lastoutput.replace('.bam', ''))
        paraset['METRICS_FILE'] = '=%s/%s'%(inputpath + sample, prefix + '_mdupmetrics.txt')
        paraset = configRobot.validParas(paraset, availParas['MarkDuplicates.jar'])
        CMDs.append( cmdGenerator.formatCmd(javacmd, mdupjar, paraset) )
        CMDs.append( cmdGenerator.formatCmd(idxcmd, paraset['OUTPUT'].strip('=')) )
        lastoutput = paraset['OUTPUT'].strip('=')

        #create intervals
        paraset = copy.deepcopy(cmdset)
        paraset['-I'] = lastoutput
        paraset['-o'] = lastoutput.replace('.bam', '.intervals')
        CMDs.append( cmdGenerator.formatCmd(javacmd, GATK+createTg, paraset) )

        #realign
        paraset['-targetIntervals'] = paraset['-o']
        paraset['-o'] = lastoutput.replace('.bam', '.realign.bam')
        CMDs.append( cmdGenerator.formatCmd(javacmd, GATK+realign, paraset) )

        #clear up
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput) )
        CMDs.append( cmdGenerator.formatCmd(clearup, lastoutput.replace('.bam', '.intervals')) )
        CMDs.append( cmdGenerator.formatCmd('mv ./%s%s %s'%(jobname, jobmanager.ext, inputpath+sample)) )
        jobmanager.createJob(jobname, CMDs, outpath = inputpath+sample, outfn = jobname)
    
    return jobmanager