Python index_bam Exemples, bbcflib.mapseq.index_bam Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : snp.py Projet : JoseEspinosa/bbcflib

def snp_workflow(ex, job, assembly, minsnp=40., mincov=5, path_to_ref=None, via='local',
                 logfile=sys.stdout, debugfile=sys.stderr):
    """Main function of the workflow"""
    ref_genome = assembly.fasta_by_chrom
    sample_names = [job.groups[gid]['name'] for gid in sorted(job.files.keys())]

    logfile.write("\n* Generate vcfs for each chrom/group\n"); logfile.flush()
    vcfs = dict((chrom,{}) for chrom in ref_genome.keys()) # {chr: {}}
    bams = {}
    # Launch the jobs
    for gid in sorted(job.files.keys()):
        # Merge all bams belonging to the same group
        runs = [r['bam'] for r in job.files[gid].itervalues()]
        bam = Samfile(runs[0])
        header = bam.header
        headerfile = unique_filename_in()
        for h in header["SQ"]:
            if h["SN"] in assembly.chrmeta:
                h["SN"] = assembly.chrmeta[h["SN"]]["ac"]
        head = Samfile( headerfile, "wh", header=header )
        head.close()
        if len(runs) > 1:
            _b = merge_bam(ex,runs)
            index_bam(ex,_b)
            bams[gid] = _b
        else:
            bams[gid] = runs[0]
        # Samtools mpileup + bcftools + vcfutils.pl
        for chrom,ref in ref_genome.iteritems():
            vcf = unique_filename_in()
            vcfs[chrom][gid] = (vcf,
                                pileup.nonblocking(ex, bams[gid], ref, header=headerfile,
                                                   via=via, stdout=vcf))
        logfile.write("  ...Group %s running.\n" %job.groups[gid]['name']); logfile.flush()
    # Wait for vcfs to finish and store them in *vcfs[chrom][gid]*
    for gid in sorted(job.files.keys()):
        for chrom,ref in ref_genome.iteritems():
            vcfs[chrom][gid][1].wait()
            vcfs[chrom][gid] = vcfs[chrom][gid][0]
        logfile.write("  ...Group %s done.\n" %job.groups[gid]['name']); logfile.flush()
    # Targz the pileup files (vcf)
    tarname = unique_filename_in()
    tarfh = tarfile.open(tarname, "w:gz")
    for chrom,v in vcfs.iteritems():
        for gid,vcf in v.iteritems():
            tarfh.add(vcf, arcname="%s_%s.vcf" % (job.groups[gid]['name'],chrom))
    tarfh.close()
    ex.add( tarname, description=set_file_descr("vcfs_files.tar.gz",step="pileup",type="tar",view='admin') )

    logfile.write("\n* Merge info from vcf files\n"); logfile.flush()
    outall = unique_filename_in()
    outexons = unique_filename_in()
    with open(outall,"w") as fout:
        fout.write('#'+'\t'.join(['chromosome','position','reference']+sample_names+ \
                                 ['gene','location_type','distance'])+'\n')
    with open(outexons,"w") as fout:
        fout.write('#'+'\t'.join(['chromosome','position','reference']+sample_names+['exon','strand','ref_aa'] \
                                  + ['new_aa_'+s for s in sample_names])+'\n')
    msa_table = dict((s,'') for s in [assembly.name]+sample_names)
    for chrom,v in vcfs.iteritems():
        logfile.write("  > Chromosome '%s'\n" % chrom); logfile.flush()
    # Put together info from all vcf files
        logfile.write("  - All SNPs\n"); logfile.flush()
        allsnps = all_snps(ex,chrom,vcfs[chrom],bams,outall,assembly,
                           sample_names,mincov,float(minsnp),logfile,debugfile)
    # Annotate SNPs and check synonymy
        logfile.write("  - Exonic SNPs\n"); logfile.flush()
        exon_snps(chrom,outexons,allsnps,assembly,sample_names,ref_genome,logfile,debugfile)
        for snprow in allsnps:
            for n,k in enumerate([assembly.name]+sample_names):
                msa_table[k] += snprow[3+n][0]
    description = set_file_descr("allSNP.txt",step="SNPs",type="txt")
    ex.add(outall,description=description)
    description = set_file_descr("exonsSNP.txt",step="SNPs",type="txt")
    ex.add(outexons,description=description)
    msafile = unique_filename_in()
    with open(msafile,"w") as msa:
        msa.write(" %i %i\n"%(len(msa_table),len(msa_table.values()[0])))
        for name,seq in msa_table.iteritems():
            msa.write("%s\t%s\n" %(name,seq))
    msa_table = {}
    description = set_file_descr("SNPalignment.txt",step="SNPs",type="txt")
    ex.add(msafile,description=description)
    # Create UCSC bed tracks
    logfile.write("\n* Create tracks\n"); logfile.flush()
    create_tracks(ex,outall,sample_names,assembly)
    # Create quantitative tracks
    logfile.write("\n* Create heteroz. and quality tracks\n"); logfile.flush()

    def _process_pileup(pileups, seq, startpos, endpos):
        atoi = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
        vectors = ([],[],[])
        for pileupcolumn in pileups:
            position = pileupcolumn.pos
            if position < startpos: continue
            if position >= endpos: break
            coverage = pileupcolumn.n
            ref_symbol = seq[position-startpos]
            ref = atoi.get(ref_symbol, 4)
            symbols = [0,0,0,0,0]
            quality = 0
            for pileupread in pileupcolumn.pileups:
                symbols[atoi.get(pileupread.alignment.seq[pileupread.qpos], 4)] += 1
                quality += ord(pileupread.alignment.qual[pileupread.qpos])-33
            quality = float(quality)/coverage
            info = heterozygosity(ref, symbols[0:4])
            if coverage > 0: vectors[0].append((position, position+1, coverage))
            if info > 0: vectors[1].append((position, position+1, info))
            if quality > 0: vectors[2].append((position, position+1, quality))
#            yield (position, position+1, coverage, info, quality)
        return vectors

    if job.options.get('make_bigwigs',False):
        _descr = {'groupId':0,'step':"tracks",'type':"bigWig",'ucsc':'1'}
        for gid,bamfile in bams.iteritems():
            _descr['groupId'] = gid
            bamtr = track(bamfile,format="bam")
            covname = unique_filename_in()+".bw"
            out_cov = track(covname, chrmeta=assembly.chrmeta)
            hetname = unique_filename_in()+".bw"
            out_het = track(hetname, chrmeta=assembly.chrmeta)
            qualname = unique_filename_in()+".bw"
            out_qual = track(qualname, chrmeta=assembly.chrmeta)
            for chrom, cinfo in assembly.chrmeta.iteritems():
                fasta = Fastafile(ref_genome[chrom])
                #process fasta and bam by 10Mb chunks
                for chunk in range(0,cinfo["length"],10**7):
                    fastaseq = fasta.fetch(cinfo['ac'], chunk, chunk+10**7)
                    vecs = _process_pileup(bamtr.pileup(chrom, chunk, chunk+10**7), fastaseq, chunk, chunk+10**7)
                    out_cov.write(vecs[0], fields=['start','end','score'], chrom=chrom)
                    out_het.write(vecs[1], fields=['start','end','score'], chrom=chrom)
                    out_qual.write(vecs[2], fields=['start','end','score'], chrom=chrom)
            out_cov.close()
            out_het.close()
            out_qual.close()
            description = set_file_descr(job.groups[gid]['name']+"_coverage.bw",**_descr)
            ex.add(covname,description=description)
            description = set_file_descr(job.groups[gid]['name']+"_heterozygosity.bw",**_descr)
            ex.add(hetname,description=description)
            description = set_file_descr(job.groups[gid]['name']+"_quality.bw",**_descr)
            ex.add(qualname,description=description)

    return 0

Exemple #2

0

Afficher le fichier

Fichier : dnaseseq.py Projet : JoseEspinosa/bbcflib

def dnaseseq_workflow( ex, job, assembly, logfile=sys.stdout, via='lsf' ):
    """
    This workflow performs the following steps:

      * BAM files from replicates within the same group are merged
      * MACS is called to identify enriched regions (only peak summit +- 300 will be used), this can be by-passed by provinding a bed file to any group
      * Wellington is called to identify footprints within these enriched regions
      * If a list of motifs is provided (by group), footprints are scanned and motif occurences (log-likelihood ratio > 0) are recorded in a bed file
      * Average DNAse profiles around motifs are plotted

    """
    tests = []
    controls = []
    names = {'tests': [], 'controls': []}
    supdir = os.path.split(ex.remote_working_directory)[0]
    for gid,mapped in job.files.iteritems():
        group_name = job.groups[gid]['name']
        if not isinstance(mapped,dict):
            raise TypeError("Files values must be dictionaries with keys *run_ids* or 'bam'.")
        if 'bam' in mapped: mapped = {'_': mapped}
        if len(mapped)>1:
            bamfile = merge_bam(ex, [m['bam'] for m in mapped.values()])
            index = index_bam(ex, bamfile)
        else:
            bamfile = mapped.values()[0]['bam']
        if job.groups[gid]['control']:
            controls.append(bamfile)
            names['controls'].append((gid,group_name))
        else:
            if os.path.exists(job.groups[gid].get('bedfile','null')):
                bedfile = job.groups[gid]['bedfile']
            elif os.path.exists(os.path.join(supdir,job.groups[gid].get('bedfile','null'))):
                bedfile = os.path.join(supdir,job.groups[gid]['bedfile'])
            else:
                bedfile = None
            tests.append((bedfile,bamfile))
            names['tests'].append((gid,group_name))
    if len(controls)<1:
        controls = [None]
        names['controls'] = [(0,None)]
    tests = macs_bedfiles( ex, assembly.chrmeta, tests, controls, names, 
                           job.options.get('macs_args',["--keep-dup","10"]), via, logfile )
    bedlist = run_wellington(ex, tests, names, assembly, via, logfile)
######################### Motif scanning / plotting
    if any([gr.get('motif') != 'null' and gr.get('motif') 
            for gr in job.groups.values()]):
        motifbeds = motif_scan( ex, bedlist, assembly, job.groups, via, logfile )
        siglist = dict((gid[0],[]) for gid in names['tests'])
        for gid,mapped in job.files.iteritems():
            wig = []
            suffixes = ["fwd","rev"]
            merge_strands = int(job.options.get('merge_strands',-1))
            read_extension = int(job.options.get('read_extension') or -1)
            make_wigs = merge_strands >= 0 or read_extension != 1
            for m in mapped.values():
                if make_wigs or not('wig' in m) or len(m['wig'])<2:
                    output = mapseq.parallel_density_sql( ex, m["bam"], assembly.chrmeta,
                                                          nreads=m["stats"]["total"],
                                                          merge=-1, read_extension=1,
                                                          convert=False,
                                                          b2w_args=[], via=via )
                    wig.append(dict((s,output+s+'.sql') for s in suffixes))
                else:
                    wig.append(m['wig'])
            if len(wig) > 1:
                wig[0] = dict((s,merge_sql(ex, [x[s] for x in wig], via=via)) 
                              for s in suffixes)
            _trn = job.groups[gid]['name']+"_%s"
            if job.groups[gid]['control']:
                for s,w in wig[0].iteritems():
                    for _g in siglist.keys():
                        siglist[_g].append(track(w,info={'name': _trn%s}))
            else:
                siglist[gid].extend([track(w,info={'name': _trn%s})
                                     for s,w in wig[0].iteritems()])
        plot_files = plot_footprint_profile( ex, motifbeds, siglist, 
                                             assembly.chrnames, 
                                             job.groups, logfile )
        for gid, flist in plot_files.iteritems():
            gname = job.groups[gid]['name']
            plotall = unique_filename_in()
            touch( ex, plotall )
            ex.add(plotall, description=set_file_descr(gname+'_footprints_plots', 
                                                       type='none', view='admin',
                                                       step='motifs', groupId=gid))
            ex.add(flist['pdf'], description=set_file_descr(gname+'_footprints_plots.pdf', 
                                                            type='pdf', step='motifs', 
                                                            groupId=gid),
                   associate_to_filename=plotall, template='%s.pdf')
            tarname = unique_filename_in()
            tarfh = tarfile.open(tarname, "w:gz")
            for mname,matf in flist['mat']:
                tarfh.add(matf, arcname="%s_%s.txt" % (gname,mname))
            tarfh.close()
            ex.add( tarname, description=set_file_descr(gname+'_footprints_plots.tar.gz',
                                                        type='tar', step='motifs', groupId=gid),
                    associate_to_filename=plotall, template='%s.tar.gz')
    logfile.write("\nDone.\n ");logfile.flush()
    return 0

Exemple #3

0

Afficher le fichier

Fichier : dnaseseq.py Projet : MolbioUnige/bbcflib

def dnaseseq_workflow(ex, job, assembly, logfile=sys.stdout, via='lsf'):
    """
    This workflow performs the following steps:

      * BAM files from replicates within the same group are merged
      * MACS is called to identify enriched regions (only peak summit +- 300 will be used), this can be by-passed by provinding a bed file to any group
      * Wellington is called to identify footprints within these enriched regions
      * If a list of motifs is provided (by group), footprints are scanned and motif occurences (log-likelihood ratio > 0) are recorded in a bed file
      * Average DNAse profiles around motifs are plotted

    """
    tests = []
    controls = []
    names = {'tests': [], 'controls': []}
    supdir = os.path.split(ex.remote_working_directory)[0]
    for gid, mapped in job.files.iteritems():
        group_name = job.groups[gid]['name']
        if not isinstance(mapped, dict):
            raise TypeError(
                "Files values must be dictionaries with keys *run_ids* or 'bam'."
            )
        if 'bam' in mapped: mapped = {'_': mapped}
        if len(mapped) > 1:
            bamfile = merge_bam(ex, [m['bam'] for m in mapped.values()])
            index = index_bam(ex, bamfile)
        else:
            bamfile = mapped.values()[0]['bam']
        if job.groups[gid]['control']:
            controls.append(bamfile)
            names['controls'].append((gid, group_name))
        else:
            if os.path.exists(job.groups[gid].get('bedfile', 'null')):
                bedfile = job.groups[gid]['bedfile']
            elif os.path.exists(
                    os.path.join(supdir,
                                 job.groups[gid].get('bedfile', 'null'))):
                bedfile = os.path.join(supdir, job.groups[gid]['bedfile'])
            else:
                bedfile = None
            tests.append((bedfile, bamfile))
            names['tests'].append((gid, group_name))
    if len(controls) < 1:
        controls = [None]
        names['controls'] = [(0, None)]
    tests = macs_bedfiles(ex, assembly.chrmeta, tests, controls, names,
                          job.options.get('macs_args', ["--keep-dup", "10"]),
                          via, logfile)
    bedlist = run_wellington(ex, tests, names, assembly, via, logfile)
    ######################### Motif scanning / plotting
    if any([
            gr.get('motif') != 'null' and gr.get('motif')
            for gr in job.groups.values()
    ]):
        motifbeds = motif_scan(ex, bedlist, assembly, job.groups, via, logfile)
        siglist = dict((gid[0], []) for gid in names['tests'])
        for gid, mapped in job.files.iteritems():
            wig = []
            suffixes = ["fwd", "rev"]
            merge_strands = int(job.options.get('merge_strands', -1))
            read_extension = int(job.options.get('read_extension') or -1)
            make_wigs = merge_strands >= 0 or read_extension != 1
            for m in mapped.values():
                if make_wigs or not ('wig' in m) or len(m['wig']) < 2:
                    output = mapseq.parallel_density_sql(
                        ex,
                        m["bam"],
                        assembly.chrmeta,
                        nreads=m["stats"]["total"],
                        merge=-1,
                        read_extension=1,
                        convert=False,
                        b2w_args=[],
                        via=via)
                    wig.append(dict(
                        (s, output + s + '.sql') for s in suffixes))
                else:
                    wig.append(m['wig'])
            if len(wig) > 1:
                wig[0] = dict((s, merge_sql(ex, [x[s] for x in wig], via=via))
                              for s in suffixes)
            _trn = job.groups[gid]['name'] + "_%s"
            if job.groups[gid]['control']:
                for s, w in wig[0].iteritems():
                    for _g in siglist.keys():
                        siglist[_g].append(track(w, info={'name': _trn % s}))
            else:
                siglist[gid].extend([
                    track(w, info={'name': _trn % s})
                    for s, w in wig[0].iteritems()
                ])
        plot_files = plot_footprint_profile(ex, motifbeds, siglist,
                                            assembly.chrnames, job.groups,
                                            logfile)
        for gid, flist in plot_files.iteritems():
            gname = job.groups[gid]['name']
            plotall = unique_filename_in()
            touch(ex, plotall)
            ex.add(plotall,
                   description=set_file_descr(gname + '_footprints_plots',
                                              type='none',
                                              view='admin',
                                              step='motifs',
                                              groupId=gid))
            ex.add(flist['pdf'],
                   description=set_file_descr(gname + '_footprints_plots.pdf',
                                              type='pdf',
                                              step='motifs',
                                              groupId=gid),
                   associate_to_filename=plotall,
                   template='%s.pdf')
            tarname = unique_filename_in()
            tarfh = tarfile.open(tarname, "w:gz")
            for mname, matf in flist['mat']:
                tarfh.add(matf, arcname="%s_%s.txt" % (gname, mname))
            tarfh.close()
            ex.add(tarname,
                   description=set_file_descr(gname +
                                              '_footprints_plots.tar.gz',
                                              type='tar',
                                              step='motifs',
                                              groupId=gid),
                   associate_to_filename=plotall,
                   template='%s.tar.gz')
    logfile.write("\nDone.\n ")
    logfile.flush()
    return 0

Exemple #4

0

Afficher le fichier

Fichier : snp.py Projet : MolbioUnige/bbcflib

def snp_workflow(ex,
                 job,
                 assembly,
                 minsnp=40.,
                 mincov=5,
                 path_to_ref=None,
                 via='local',
                 logfile=sys.stdout,
                 debugfile=sys.stderr):
    """Main function of the workflow"""
    ref_genome = assembly.fasta_by_chrom
    sample_names = [
        job.groups[gid]['name'] for gid in sorted(job.files.keys())
    ]

    logfile.write("\n* Generate vcfs for each chrom/group\n")
    logfile.flush()
    vcfs = dict((chrom, {}) for chrom in ref_genome.keys())  # {chr: {}}
    bams = {}
    # Launch the jobs
    bam = Samfile(job.files.values()[0].values()[0]['bam'])
    header = bam.header
    headerfile = unique_filename_in()
    for h in header["SQ"]:
        if h["SN"] in assembly.chrmeta:
            h["SN"] = assembly.chrmeta[h["SN"]]["ac"]
    head = Samfile(headerfile, "wh", header=header)
    head.close()
    for gid in job.files.keys():
        # Merge all bams belonging to the same group
        runs = [r['bam'] for r in job.files[gid].itervalues()]
        if len(runs) > 1:
            _b = merge_bam(ex, runs)
            index_bam(ex, _b)
            bams[gid] = _b
        else:
            index_bam(ex, runs[0])
            bams[gid] = runs[0]
        # Samtools mpileup + bcftools + vcfutils.pl
        for chrom, ref in ref_genome.iteritems():
            vcf = unique_filename_in()
            vcfs[chrom][gid] = (vcf,
                                pileup.nonblocking(ex,
                                                   bams[gid],
                                                   ref,
                                                   header=headerfile,
                                                   via=via,
                                                   stdout=vcf))
        logfile.write("  ...Group %s running.\n" % job.groups[gid]['name'])
        logfile.flush()
    # Wait for vcfs to finish and store them in *vcfs[chrom][gid]*
    for gid in job.files.keys():
        for chrom, ref in ref_genome.iteritems():
            vcfs[chrom][gid][1].wait()
            vcfs[chrom][gid] = vcfs[chrom][gid][0]
        logfile.write("  ...Group %s done.\n" % job.groups[gid]['name'])
        logfile.flush()
    # Targz the pileup files (vcf)
    tarname = unique_filename_in()
    tarfh = tarfile.open(tarname, "w:gz")
    for chrom, v in vcfs.iteritems():
        for gid, vcf in v.iteritems():
            tarfh.add(vcf,
                      arcname="%s_%s.vcf" % (job.groups[gid]['name'], chrom))
    tarfh.close()
    ex.add(tarname,
           description=set_file_descr("vcf_files.tar.gz",
                                      step="pileup",
                                      type="tar",
                                      view='admin'))

    logfile.write("\n* Merge info from vcf files\n")
    logfile.flush()
    outall = unique_filename_in()
    outexons = unique_filename_in()
    with open(outall, "w") as fout:
        fout.write('#'+'\t'.join(['chromosome','position','reference']+sample_names+ \
                                 ['gene','location_type','distance'])+'\n')
    with open(outexons, "w") as fout:
        fout.write('#'+'\t'.join(['chromosome','position','reference']+sample_names+['exon','strand','ref_aa'] \
                                  + ['new_aa_'+s for s in sample_names])+'\n')
    msa_table = dict((s, '') for s in [assembly.name] + sample_names)
    for chrom, v in vcfs.iteritems():
        logfile.write("  > Chromosome '%s'\n" % chrom)
        logfile.flush()
        # Put together info from all vcf files
        logfile.write("  - All SNPs\n")
        logfile.flush()
        allsnps = all_snps(ex, chrom, vcfs[chrom], bams, outall,
                           assembly, headerfile, sample_names, mincov,
                           float(minsnp), logfile, debugfile, via)
        # Annotate SNPs and check synonymy
        logfile.write("  - Exonic SNPs\n")
        logfile.flush()
        exon_snps(chrom, outexons, allsnps, assembly, sample_names, ref_genome,
                  logfile, debugfile)
        for snprow in allsnps:
            for n, k in enumerate([assembly.name] + sample_names):
                base = snprow[3 + n][0]
                if base == "-": base = snprow[3][0]
                if base not in 'ACGTacgt': base = "N"
                msa_table[k] += base
    description = set_file_descr("allSNP.txt", step="SNPs", type="txt")
    ex.add(outall, description=description)
    description = set_file_descr("exonsSNP.txt", step="SNPs", type="txt")
    ex.add(outexons, description=description)
    msafile = unique_filename_in()
    with open(msafile, "w") as msa:
        msa.write(" %i %i\n" % (len(msa_table), len(msa_table.values()[0])))
        for name, seq in msa_table.iteritems():
            msa.write("%s\t%s\n" % (name, seq))
    msa_table = {}
    description = set_file_descr("SNPalignment.txt", step="SNPs", type="txt")
    ex.add(msafile, description=description)
    # Create UCSC bed tracks
    logfile.write("\n* Create tracks\n")
    logfile.flush()
    create_tracks(ex, outall, sample_names, assembly)
    # Create quantitative tracks
    logfile.write("\n* Create heteroz. and quality tracks\n")
    logfile.flush()

    def _process_pileup(pileups, seq, startpos, endpos):
        atoi = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
        vectors = ([], [], [])
        for pileupcolumn in pileups:
            position = pileupcolumn.pos
            if position < startpos: continue
            if position >= endpos: break
            coverage = pileupcolumn.n
            ref_symbol = seq[position - startpos]
            ref = atoi.get(ref_symbol, 4)
            symbols = [0, 0, 0, 0, 0]
            quality = 0
            for pileupread in pileupcolumn.pileups:
                if pileupread.qpos >= len(pileupread.alignment.seq):
                    coverage -= 1
                else:
                    symbols[atoi.get(pileupread.alignment.seq[pileupread.qpos],
                                     4)] += 1
                    quality += ord(
                        pileupread.alignment.qual[pileupread.qpos]) - 33
            quality = float(quality) / coverage
            info = heterozygosity(ref, symbols[0:4])
            if coverage > 0:
                vectors[0].append((position, position + 1, coverage))
            if info > 0: vectors[1].append((position, position + 1, info))
            if quality > 0:
                vectors[2].append((position, position + 1, quality))
#            yield (position, position+1, coverage, info, quality)
        return vectors

    if job.options.get('make_bigwigs', False):
        _descr = {
            'groupId': 0,
            'step': "tracks",
            'type': "bigWig",
            'ucsc': '1'
        }
        for gid, bamfile in bams.iteritems():
            _descr['groupId'] = gid
            bamtr = track(bamfile, format="bam")
            covname = unique_filename_in() + ".bw"
            out_cov = track(covname, chrmeta=assembly.chrmeta)
            hetname = unique_filename_in() + ".bw"
            out_het = track(hetname, chrmeta=assembly.chrmeta)
            qualname = unique_filename_in() + ".bw"
            out_qual = track(qualname, chrmeta=assembly.chrmeta)
            for chrom, cinfo in assembly.chrmeta.iteritems():
                fasta = Fastafile(ref_genome[chrom])
                #process fasta and bam by 10Mb chunks
                for chunk in range(0, cinfo["length"], 10**7):
                    fastaseq = fasta.fetch(cinfo['ac'], chunk, chunk + 10**7)
                    vecs = _process_pileup(
                        bamtr.pileup(chrom, chunk, chunk + 10**7), fastaseq,
                        chunk, chunk + 10**7)
                    out_cov.write(vecs[0],
                                  fields=['start', 'end', 'score'],
                                  chrom=chrom)
                    out_het.write(vecs[1],
                                  fields=['start', 'end', 'score'],
                                  chrom=chrom)
                    out_qual.write(vecs[2],
                                   fields=['start', 'end', 'score'],
                                   chrom=chrom)
            out_cov.close()
            out_het.close()
            out_qual.close()
            description = set_file_descr(
                job.groups[gid]['name'] + "_coverage.bw", **_descr)
            ex.add(covname, description=description)
            description = set_file_descr(
                job.groups[gid]['name'] + "_heterozygosity.bw", **_descr)
            ex.add(hetname, description=description)
            description = set_file_descr(
                job.groups[gid]['name'] + "_quality.bw", **_descr)
            ex.add(qualname, description=description)

    return 0