Esempio n. 1
0
def merge_results(name, sample):
    for r in sample.replicates + [
            sample.combined_replicate,
    ]:
        for q_val in sample.conf.Q_VALUE_THRESHOLDS + [
                0,
        ]:
            if q_val:
                output = os.path.join(
                    r.results_dir(sample),
                    '%s_%f_hits_filtered.bed' % (r.rep_name(sample), q_val))
            else:
                output = os.path.join(
                    r.results_dir(sample),
                    '%s_hits_filtered.bed' % r.rep_name(sample))
                r.unfiltered_results = output
            cmd = filter_hits_cmd(r.results_dir(sample), r.sgr_dir(sample),
                                  sample.genome, output, q_val)
            sample.add_jobs(name, [
                sjm.Job('merge_' + r.rep_name(sample) + '%g' % (q_val), [
                    cmd,
                ],
                        queue=QUEUE,
                        project=PROJECT,
                        sched_options="-m e"),
            ])

        # Merge Pseudoreplicate Hits
        output = os.path.join(r.results_dir(sample),
                              '%s_hits.bed' % (r.rep_name(sample) + '_PR1'))
        r.unfiltered_results_pr1 = output
        cmd = filter_hits_cmd(r.pr1_results_dir, r.pr1_sgr_dir, sample.genome,
                              output)
        sample.add_jobs(name, [
            sjm.Job('merge_' + r.rep_name(sample) + '_PR1', [
                cmd,
            ],
                    queue=QUEUE,
                    project=PROJECT,
                    sched_options="-m e"),
        ])
        output = os.path.join(r.results_dir(sample),
                              '%s_hits.bed' % (r.rep_name(sample) + '_PR2'))
        r.unfiltered_results_pr2 = output
        cmd = filter_hits_cmd(r.pr1_results_dir, r.pr1_sgr_dir, sample.genome,
                              output)
        sample.add_jobs(name, [
            sjm.Job('merge_' + r.rep_name(sample) + '_PR2', [
                cmd,
            ],
                    queue=QUEUE,
                    project=PROJECT,
                    sched_options="-m e"),
        ])
Esempio n. 2
0
def idr_analysis(name, sample):
    jobs = []
    modules = ["r/3.0.1"]
    for i, rep_a in enumerate(sample.replicates):
        for j in range(i + 1, len(sample.replicates)):
            rep_b = sample.replicates[j]
            idr_name = '%s_VS_%s' % (rep_a.rep_name(sample),
                                     rep_b.rep_name(sample))
            cmd = idr.idr_analysis_cmd(rep_a.narrowPeak, rep_b.narrowPeak,
                                       os.path.join(sample.idr_dir, idr_name),
                                       'q.value', sample.genome)
            jobs.append(
                sjm.Job('idr_analysis_' + idr_name, [
                    cmd,
                ],
                        queue=QUEUE,
                        modules=modules,
                        project=PROJECT,
                        sched_options="-m e"))

        # Pseudoreplicates
        idr_name = '%s_PR1_VS_%s_PR2' % (rep_a.rep_name(sample),
                                         rep_a.rep_name(sample))
        cmd = idr.idr_analysis_cmd(
            rep_a.narrowPeak_pr1, rep_a.narrowPeak_pr2,
            os.path.join(sample.idr_dir, idr_name + '_PR'), 'q.value',
            sample.genome)
        jobs.append(
            sjm.Job('idr_analysis_' + idr_name, [
                cmd,
            ],
                    queue=QUEUE,
                    modules=modules,
                    project=PROJECT,
                    sched_options="-m e"))

    # Pooled Pseudoreplicates
    idr_name = '%s_PR1_VS_%s_PR2' % (sample.combined_replicate.rep_name(
        sample), sample.combined_replicate.rep_name(sample))
    cmd = idr.idr_analysis_cmd(sample.combined_replicate.narrowPeak_pr1,
                               sample.combined_replicate.narrowPeak_pr2,
                               os.path.join(sample.idr_dir, idr_name),
                               'q.value', sample.genome)
    jobs.append(
        sjm.Job('idr_analysis_' + idr_name, [
            cmd,
        ],
                queue=QUEUE,
                modules=modules,
                project=PROJECT,
                sched_options="-m e"))

    sample.add_jobs(name, jobs)
def main(gvcfs):
    jobs = merge_gvcf(gvcfs)
    jobs = gatk_joint(jobs)

    descout = sys.stdout if jobfile is None else open(jobfile.path, "w")
    descout.write(sjm.Job().depend(*jobs).desc())
    descout.flush()
Esempio n. 4
0
def replicate_scoring(name, sample):
    cmds = []
    # Mapped Read Statistics
    cmd = os.path.join(BIN_DIR, 'read_stats.py')
    cmd += ' %s' % os.path.join(sample.results_dir, 'rep_stats')
    cmd += ' ' + sample.conf.path
    cmds.append(cmd)

    # Replicate Overlap Statistics
    for q in sample.conf.Q_VALUE_THRESHOLDS:
        for r1 in sample.replicates:
            for r2 in sample.replicates:
                if r1 == r2:
                    continue
                cmd = os.path.join(BIN_DIR, 'overlap_stats.py')
                cmd += ' ' + r1.narrowPeak
                cmd += ' ' + r2.narrowPeak
                cmd += ' ' + os.path.join(sample.results_dir, 'rep_stats')
                cmd += ' %f' % q
                cmd += ' %s_VS_%s_%f' % (r1.rep_name(sample),
                                         r2.rep_name(sample), q)
                cmds.append(cmd)

    j = sjm.Job('replicate_stats',
                cmds,
                queue=QUEUE,
                project=PROJECT,
                sched_options="-m e")
    sample.add_jobs(name, [
        j,
    ])
Esempio n. 5
0
def mail_results(sample, control, run_name, emails):
    cmds = []
    cmd = os.path.join(BIN_DIR, 'build_report_text.py')
    cmd += ' %s' % sample.run_name
    cmd += ' %s' % sample.archive_file_download
    cmd += ' %s' % control.archive_file_download
    cmd += ' %s' % os.path.join(sample.results_dir, 'rep_stats')
    cmd += ' %s' % os.path.join(sample.results_dir, 'spp_stats.txt')
    cmd += ' %s' % os.path.join(sample.results_dir, 'idr_results.txt')
    cmd += ' %s' % os.path.join(sample.results_dir, 'pbc_stats.txt')
    cmd += ' %s' % os.path.join(sample.results_dir, 'full_report.txt')

    cmds.append(cmd)

    cmd = os.path.join(BIN_DIR, 'mail_wrapper.py')
    cmd += ' "%s Scoring Results"' % sample.run_name
    cmd += ' %s' % os.path.join(sample.results_dir, 'full_report.txt')
    for email in emails:
        cmd += ' %s' % email
    cmds.append(cmd)

    return sjm.Job('mail_results',
                   cmds,
                   queue=QUEUE,
                   project=PROJECT,
                   host='localhost',
                   sched_options="-m e",
                   dependencies=sample.all_jobs() + control.all_jobs())
Esempio n. 6
0
def form_control_files(name, control):
    print " ******* form control files ****** "
    cmds = []
    control.merged_file_location = os.path.join(
        control.temp_dir, '%s_merged_eland.txt' % control.run_name)
    print " merged conrol files ", control.merged_file_location
    # Merge eland files
    cmd = os.path.join(BIN_DIR, 'merge_and_filter_reads.py')

    cmd += ' %s' % control.merged_file_location

    print " merged conrol files ", cmd
    #sys.exit()
    for mr in control.mapped_read_files:
        cmd += ' %s' % mr
    cmds.append(cmd)

    # Divide merged file by chr
    cmd = os.path.join(BIN_DIR, 'divide_eland.py')
    cmd += " %s %s %s" % (control.merged_file_location, control.genome,
                          control.results_dir)
    cmds.append(cmd)

    # Create Signal Map
    cmd = os.path.join(BIN_DIR, 'create_signal_map.py')
    cmd += ' %s %s' % (control.sgr_dir, control.results_dir)
    cmds.append(cmd)
    control.add_jobs(name, [
        sjm.Job(control.run_name,
                cmds,
                modules=["samtools/1.2"],
                queue=QUEUE,
                project=PROJECT,
                sched_options="-m e"),
    ])
Esempio n. 7
0
def form_idr_inputs(name, sample):
    os.makedirs(os.path.join(sample.results_dir, 'idr'))
    jobs = []
    for rep in sample.replicates + [
            sample.combined_replicate,
    ]:
        rep.narrowPeak = os.path.join(
            rep.results_dir(sample),
            rep.rep_name(sample) + '_unfiltered_narrowPeak.bed')
        cmd = os.path.join(SUBMISSION_BIN_DIR, 'normalhits2narrowPeak')
        cmd += ' %s > %s' % (rep.unfiltered_results, rep.narrowPeak)
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_hits2narrowPeak', [
                cmd,
            ],
                    queue=QUEUE,
                    project=PROJECT,
                    sched_options="-m e"))

        # Pseudoreplicates
        rep.narrowPeak_pr1 = os.path.join(
            rep.results_dir(sample),
            rep.rep_name(sample) + '_PR1_unfiltered_narrowPeak.bed')
        cmd = os.path.join(SUBMISSION_BIN_DIR, 'normalhits2narrowPeak')
        cmd += ' %s > %s' % (rep.unfiltered_results_pr1, rep.narrowPeak_pr1)
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_PR1_hits2narrowPeak', [
                cmd,
            ],
                    queue=QUEUE,
                    project=PROJECT,
                    sched_options="-m e"))

        rep.narrowPeak_pr2 = os.path.join(
            rep.results_dir(sample),
            rep.rep_name(sample) + '_PR2_unfiltered_narrowPeak.bed')
        cmd = os.path.join(SUBMISSION_BIN_DIR, 'normalhits2narrowPeak')
        cmd += ' %s > %s' % (rep.unfiltered_results_pr2, rep.narrowPeak_pr2)
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_PR2_hits2narrowPeak', [
                cmd,
            ],
                    queue=QUEUE,
                    project=PROJECT,
                    sched_options="-m e"))

    sample.add_jobs(name, jobs)
def gatk_vqsr(pjob):
    jobs = []
    vcf = pjob.output
    job = sjm.Job('gatk_VQSR-%s' % (vcf.prefix))
    job.memory = "10G"
    job.append('gatk_vqsr.sh %s' % vcf)
    job.depend(pjob)
    return job
def gatk_mvcf(pjobs, vcfout):
    vcfs = [pjob.output for pjob in pjobs]
    job = sjm.Job('gatk_CatVCF-%s' % (bamfile.prefix))
    job.memory = "10G"
    job.output = util.File(os.path.join(outdir, vcfout))
    job.append('gatk_catvcf.sh %s %s' % (job.output, ' '.join(vcfs)))
    job.depend(*pjobs)
    return job
Esempio n. 10
0
def form_idr_inputs(name, sample):
	os.makedirs(os.path.join(sample.results_dir, 'idr'))
	jobs = []
	for rep in sample.replicates + [sample.combined_replicate,]:
		rep.narrowPeak = rep.unfiltered_results
		rep.narrowPeak_pr1 = rep.unfiltered_results_pr1
		rep.narrowPeak_pr2 = rep.unfiltered_results_pr2
	sample.add_jobs(name, [sjm.Job('form_idr_inputs', ['echo form_idr_inputs',], queue=QUEUE, project=PROJECT, host='localhost',sched_options="-m e"),])
Esempio n. 11
0
def merge_results(name, sample):
	jobs = []
	for r in sample.replicates + [sample.combined_replicate,]:
		r.unfiltered_results = os.path.join(r.results_dir(sample), '%s_peaks.regionPeak' % r.rep_name(sample))
		r.unfiltered_results_pr1 = os.path.join(r.pr1_results_dir, '%s_PR1_peaks.regionPeak' % r.rep_name(sample))
		r.unfiltered_results_pr2 = os.path.join(r.pr2_results_dir, '%s_PR2_peaks.regionPeak' % r.rep_name(sample))
		unpack_cmds = ['zcat %s > %s' % (r.spp_results, r.unfiltered_results), 'zcat %s > %s' % (r.spp_results_pr1, r.unfiltered_results_pr1), 'zcat %s > %s' % (r.spp_results_pr2, r.unfiltered_results_pr2),]
		jobs.append(sjm.Job('merge_results_%s' % r.rep_name(sample), unpack_cmds, queue=QUEUE, project=PROJECT,sched_options="-m e"))
	sample.add_jobs(name, jobs)
Esempio n. 12
0
def form_idr_inputs(name, sample):
    os.makedirs(os.path.join(sample.results_dir, 'idr'))
    jobs = []
    for rep in sample.replicates + [
            sample.combined_replicate,
    ]:
        cmds = []
        rep.narrowPeak = rep.unfiltered_results + '.filtered'
        cmd = 'sort -k8nr %s | head -n 300000 > %s.temp && mv %s.temp %s' % (
            rep.unfiltered_results, rep.narrowPeak, rep.narrowPeak,
            rep.narrowPeak)
        cmds.append(cmd)
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_narrowPeak_filter',
                    cmds,
                    queue=QUEUE,
                    project=PROJECT))

        # Pseudoreplicates
        cmds = []
        rep.narrowPeak_pr1 = rep.unfiltered_results_pr1 + '.filtered'
        cmd = 'sort -k8nr %s | head -n 300000 > %s.temp && mv %s.temp %s' % (
            rep.unfiltered_results_pr1, rep.narrowPeak_pr1, rep.narrowPeak_pr1,
            rep.narrowPeak_pr1)
        cmds.append(cmd)
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_PR1_narrowPeak_filter',
                    cmds,
                    queue=QUEUE,
                    project=PROJECT))

        cmds = []
        rep.narrowPeak_pr2 = rep.unfiltered_results_pr2 + '.filtered'
        cmd = 'sort -k8nr %s | head -n 300000 > %s.temp && mv %s.temp %s' % (
            rep.unfiltered_results_pr2, rep.narrowPeak_pr2, rep.narrowPeak_pr2,
            rep.narrowPeak_pr2)
        cmds.append(cmd)
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_PR2_narrowPeak_filter',
                    cmds,
                    queue=QUEUE,
                    project=PROJECT))

    sample.add_jobs(name, jobs)
def gatk_joint(pjobs):
    jobs = []
    gvcfs = [pjob.output.path for pjob in pjobs]
    outvcf = util.File(os.path.join(args.outdir, args.output))
    job = sjm.Job('GATK-joint-gt-%s' % outvcf.name)
    job.memory = "20G"
    job.output = outvcf
    job.append('gatk_gt_joint.sh %s %s' % (job.output, ' '.join(gvcfs)))
    job.depend(*pjobs)
    jobs.append(job)
    return jobs
Esempio n. 14
0
def idr_filter(name, sample):
	cmd = os.path.join(BIN_DIR, 'idr_filter.py')
	cmd += ' %s' % sample.run_name
	cmd += ' %s' % sample.genome
	cmd += ' %i' % len(sample.replicates)
	cmd += ' %s' % sample.idr_dir
	cmd += ' %s' % os.path.join(os.path.join(sample.results_dir, 'All'), sample.combined_replicate.unfiltered_results)
	cmd += ' %s' % sample.results_dir
	cmd += ' 7' # sort column (signal.value)
	sample.add_jobs(name, [sjm.Job('idr_filter_' + sample.run_name, [cmd,], queue=QUEUE, project=PROJECT,sched_options="-m e"),])
		
Esempio n. 15
0
def sam_flagstat(pjobs):
    jobs = []
    for pjob in pjobs:
        bam=util.File(pjob.output)
        job=sjm.Job('samtools-flagstat-%s' % bam.prefix)
        job.memory = "10G"
        job.output = bam.chext("flagstat.txt")
        job.append('samtools flagstat %s > %s'%(bam, job.output))
        job.depend(pjob)
        jobs.append(job)
    return jobs
Esempio n. 16
0
def form_sample_files(name, sample):
    jobs = []
    print " peakseq: form sample files ***"
    for rep in sample.replicates:
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_merge',
                    form_replicate_files(rep, sample),
                    queue=QUEUE,
                    modules=["samtools/1.2"],
                    project=PROJECT,
                    sched_options="-m e"))

    jobs.append(
        sjm.Job(sample.run_name + '_All_merge',
                form_replicate_files(sample.combined_replicate, sample),
                modules=["samtools/1.2"],
                queue=QUEUE,
                project=PROJECT,
                sched_options="-m e"))
    sample.add_jobs(name, jobs)
Esempio n. 17
0
def archive_results(name, results_dir, archive_file, force=False):
    if os.path.exists(archive_file):
        if not force:
            raise Exception("Archive file %s already exists" % archive_file)
    archive_cmd = '%s %s %s %s' % (os.path.join(
        BIN_DIR, 'archive_results.py'), results_dir, archive_file, force)
    return sjm.Job('Archive_%s' % name,
                   archive_cmd,
                   queue=QUEUE,
                   project=PROJECT,
                   sched_options="-m e")
Esempio n. 18
0
def dedup_bam(pjobs):
    jobs = []
    for pjob in pjobs:
        bamfile = pjob.output
        job = sjm.Job('picard_mdup-%s' % bamfile.prefix)
        job.memory = "20G"
        job.output = os.path.join(outdir, bamfile.chext("mdup.bam").name)
        job.append('picard_mdup.sh %s %s' % (job.output, bamfile))
        job.depend(pjob)
        jobs.append(job)
    return jobs
Esempio n. 19
0
def split_species(pjobs):
    jobs_mm9 = []
    jobs_dm3 = []
    for pjob in pjobs:
        bamfile = pjob.output
        job1 = sjm.Job('samtools_mm9-%s' % bamfile.prefix)
        job1.memory = "12G"
        job1.output = bamfile.chext('mm9.bam')
        job1.append('samtools view -hb %s %s > %s' %
                    (bamfile, mm9_chrs, job1.output))
        job1.depend(*pjobs)
        jobs_mm9.append(job1)
        job2 = sjm.Job('samtools_dm3-%s' % bamfile.prefix)
        job2.memory = "12G"
        job2.output = bamfile.chext('dm3.bam')
        job2.append('samtools view -hb %s %s > %s' %
                    (bamfile, dm3_chrs, job2.output))
        job2.depend(*pjobs)
        jobs_dm3.append(job2)
    return jobs_mm9, jobs_dm3
Esempio n. 20
0
def dedup_merge(pjobs, outbam):
    jobs = []
    bams = []
    for pjob in pjobs:
        bams.append(pjob.output.path)
    job = sjm.Job('picard_mdup-%s' % outbam )
    job.memory = "20G"
    job.output = util.File( os.path.join(outdir, outbam) )
    job.append('picard_mdup.sh %s %s'%(job.output, ' '.join(bams) ) )
    job.depend(*pjobs)
    jobs.append(job)
    return jobs
Esempio n. 21
0
def gatk_recal(pjobs):
    jobs = []
    for pjob in pjobs:
        bamfile = util.File(pjob.output)
        job = sjm.Job('gatk_recalibrate-%s'%(bamfile.prefix))
        job.memory = "20G"
        job.output = os.path.join(tmpdir, '%s.%s' % (bamfile.prefix, 'recal.bam'))
        job.regions = pjob.regions
        job.append('gatk_recal.sh %s %s'%(job.output, bamfile.path))
        job.depend(pjob)
        jobs.append(job)
    return jobs
Esempio n. 22
0
def gatk_hc(pjobs):
    jobs = []
    for pjob in pjobs:
        bamfile = util.File(pjob.output)
        job = sjm.Job('gatk_haplotypecaller-%s'%(bamfile.prefix))
        job.memory = "40G"
        job.output = os.path.join(tmpdir, '%s.%s' % (bamfile.prefix, 'g.vcf.gz'))
        job.regions = pjob.regions
        job.append('gatk_hc.sh %s %s %s'%(job.output, bamfile.path, pjob.regions))
        job.depend(pjob)
        jobs.append(job)
    return jobs
Esempio n. 23
0
def gatk_gt(pjobs):
    jobs = []
    for pjob in pjobs:
        gvcffile = util.File(pjob.output, iszipfile=True)
        job = sjm.Job('gatk_genotypeGVCFs-%s'%(gvcffile.prefix))
        job.memory = "15G"
        job.output = os.path.join( tmpdir, '%s.%s' % (gvcffile.prefix, 'gt.vcf.gz') )
        job.regions = pjob.regions
        job.append('gatk_gt.sh %s %s %s'%(job.output, gvcffile.path, pjob.regions))
        job.depend(pjob)
        jobs.append(job)
    return jobs
Esempio n. 24
0
def merge_aln(pjobs):
    jobs = []
    for pjob in pjobs:
        alnbam = pjob.output
        ubam = pjob.input
        job = sjm.Job('picard_mergeBam-%s' % alnbam.name)
        job.memory = "10G"
        job.output = util.File(alnbam.path.rstrip('.aln.bam') + '.sort.bam')
        job.append('picard_mergeBam.sh %s %s %s' % (job.output, alnbam, ubam))
        job.depend(pjob)
        jobs.append(job)
    return jobs
Esempio n. 25
0
def gatk_hc_batch(bamfile, regions_file):
    jobs = []
    for region_line in open(args.regions_file):
        region_line = region_line.rstrip('\n')
        if region_line.startswith("#"): continue
        region_name, regions = region_line.split(' ',1)
        job = sjm.Job('gatk_hc_bam-%s-%s'%(bamfile.prefix, region_name))
        job.memory = "40G"
        job.output = os.path.join(tmpdir, '%s.%s.%s' % (bamfile.prefix, region_name,'g.vcf.gz'))
        job.regions = regions
        job.append('gatk_hc.sh %s %s %s'%(job.output, bamfile.path,regions))
        jobs.append(job)
    return jobs
Esempio n. 26
0
def align_pe(pjobs):
    jobs = []
    for pjob in pjobs:
        inbam = pjob.output
        obam = inbam.chext('aln.bam')
        job = sjm.Job('bwa_aln_pe-%s' % inbam.prefix)
        job.memory = "20G"
        job.input = inbam
        job.output = obam
        job.append('bwa_aln_pe_qn.sh %s %s %s' % (job.output, inbam, inbam))
        job.depend(pjob)
        jobs.append(job)
    return jobs
Esempio n. 27
0
def form_sample_files_nodups(name, sample):
    jobs = []
    for rep in sample.replicates:
        jobs.append(
            sjm.Job(rep.rep_name(sample) + '_merge',
                    form_replicate_files(rep, sample, rmdups=True),
                    modules=["samtools/1.2"],
                    queue=QUEUE,
                    project=PROJECT,
                    memory='16G',
                    sched_options="-m e"))
    jobs.append(
        sjm.Job(sample.run_name + '_All_merge',
                form_replicate_files(sample.combined_replicate,
                                     sample,
                                     rmdups=True),
                modules=["samtools/1.2"],
                queue=QUEUE,
                project=PROJECT,
                memory='16G',
                sched_options="-m e"))
    sample.add_jobs(name, jobs)
Esempio n. 28
0
def sort_ubam(ubams):
    jobs = []
    for ubam in ubams:
        ubam = util.File(ubam)
        obam = util.File(
            os.path.join(tmpdir,
                         os.path.basename(ubam.path.rstrip('u.bam') + '.bam')))
        job = sjm.Job('picard_sortUbam-%s' % ubam.prefix)
        job.memory = "20G"
        job.input = ubam
        job.output = obam
        job.append('picard_sortUbam.sh %s %s' % (job.input, job.output))
        jobs.append(job)
    return jobs
Esempio n. 29
0
def align_se(reads1, reads2):
    jobs = []
    for i in range(0, len(reads1)):
        read1 = reads1[i]
        read2 = reads2[i]
        readfile1 = util.File(read1)
        readfile2 = util.File(read2)
        bamname = re.sub(r'[._][Rr]1', '', readfile1.prefix) + '.sorted.bam'
        bam = util.File(os.path.join(tmpdir, bamname))
        job = sjm.Job('bwa_aln_se-%s' % readfile1.prefix)
        job.output = bam
        job.append('bwa_aln_se.sh %s %s %s %s' %
                   (job.output, read1, read2, readgroup))
        jobs.append(job)
    return jobs
Esempio n. 30
0
def merge_bam(pjobs, out_prefix, suffix=None):
    '''
    Caveat: If output bam exists, needs to apply "-f" to overwrite or task will abort.
    '''
    bams = []
    for pjob in pjobs:
        bams.append(pjob.output.path)
    job = sjm.Job('samtools_merge-%s' % suffix)
    job.memory = "5G"
    outname = os.path.join(tmpdir, '%s.%s.bam' % (out_prefix, suffix))
    job.output = util.File(outname)
    job.append('samtools merge %s %s && samtools index %s' %
               (job.output, ' '.join(bams), job.output))
    job.depend(*pjobs)
    return job