def collect_bam_stats(self, bam, bed, orig_vcf):
     """
     For each bed region compute some bam-level stats and return them in a dict
     The key of the dict is the var_key of the matching original (input) variant
     """
     bam_stats = defaultdict(dict)
     for region in util.read_regions(bed):
         key = var_key(util.find_matching_var(orig_vcf, region))
         bam_stats[key] = bam_simulation.gen_bam_stats(bam, region)
     return bam_stats
Exemple #2
0
import ConfigParser as cp
from collections import namedtuple

import pysam

from vcomp.sim import bam_simulation as bs

Var = namedtuple('Var', ['chrom', 'start', 'ref', 'alts'])
VarSet = namedtuple('VarSet', ['policy', 'vars'])
ExSNPInfo = namedtuple('ExSNPInfo', ['policy', 'dist'])
conf = cp.SafeConfigParser()
conf.read("comp.conf")

vars = pysam.VariantFile('test_single.vcf')
# v = [Var('9', 127818330, 'GCTG', ('G',)), Var('9', 127818335, 'C', ('A',))]
# vars = [VarSet(bs.TRANS, v)]

# snp_inf = ExSNPInfo(policy=bs.TRANS, dist=-1)
# sets = ij.create_variant_sets(vars, snp_inf, bs.ALL_HOMS, pysam.FastaFile(conf.get('main', 'ref_genome')))
# reads = bs.gen_alt_fq(conf.get('main', 'ref_genome'), sets, 200)
# bam = bs.gen_alt_bam(conf.get('main', 'ref_genome'), conf, reads)

stats = bs.gen_bam_stats("/Users/bofallon/clinvar_comp/het_trans_results/clinvar.nonsnps.pathogenic_part167-tmpfiles-0/input_r1.fq.bam", ("12", 32876896, 32878896))
for k,v in stats.iteritems():
    print str(k) + "\t:\t" + str(v)