def get_genotypes(conn, metadata, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" res = conn.execute(sql.text(query)) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) for row in res: gts = Z.unpack_genotype_blob(row['gts']) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts a = args.separator.join(str(row[i]) for i in xrange(len(row)-1)) b = args.separator.join([idx_to_sample[idx], gt]) print args.separator.join((a, b))
def get_genotypes(c, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) for row in c: gts = np.array(cPickle.loads(zlib.decompress(row['gts']))) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts print args.separator.join( str(row[i]) for i in xrange(len(row) - 1)), print args.separator.join([idx_to_sample[idx], gt])
def sample_lof_variants(c, args, samples): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT chrom, start, end, \ gt_types, gts, gene \ FROM variants \ WHERE is_lof='1'" c.execute(query) # header if args.var_mode: print "\t".join( [ "sample", "lof_gene", "order_of_interaction", "interacting_gene", "var_id", "chrom", "start", "end", "impact", "biotype", "in_dbsnp", "clinvar_sig", "clinvar_disease_name", "aaf_1kg_all", "aaf_esp_all", ] ) elif not args.var_mode: print "\t".join(["sample", "lof_gene", "order_of_interaction", "interacting_gene"]) sample_lof_interactions(c, args, idx_to_sample, samples)
def __init__(self, db, include_gt_cols=False, out_format=DefaultRowFormat(None)): assert os.path.exists(db), "%s does not exist." % db self.db = db self.query_executed = False self.for_browser = False self.include_gt_cols = include_gt_cols # try to connect to the provided database self._connect_to_database() # extract the column names from the sample table. # needed for gt-filter wildcard support. self._collect_sample_table_columns() # list of samples ids for each clause in the --gt-filter self.sample_info = collections.defaultdict(list) # map sample names to indices. e.g. self.sample_to_idx[NA20814] -> 323 self.sample_to_idx = util.map_samples_to_indices(self.c) # and vice versa. e.g., self.idx_to_sample[323] -> NA20814 self.idx_to_sample = util.map_indices_to_samples(self.c) self.idx_to_sample_object = util.map_indices_to_sample_objects(self.c) self.formatter = out_format self.predicates = [self.formatter.predicate]
def get_gtcounts_by_sample(c, args): """ Report the count of each genotype class observed for each sample. """ idx_to_sample = util.map_indices_to_samples(c) # report. print '\t'.join([ 'sample', 'num_hom_ref', 'num_het', 'num_hom_alt', 'num_unknown', 'total' ]) query = "SELECT *, \ (num_hom_ref + num_het + num_hom_alt + num_unknown) as total \ FROM sample_genotype_counts" c.execute(query) # count the number of each genotype type obs. for each sample. for row in c: sample = idx_to_sample[row['sample_id']] print "\t".join( str(s) for s in [ sample, row['num_hom_ref'], row['num_het'], row['num_hom_alt'], row['num_unknown'], row['total'] ])
def sample_variants(conn, metadata, args): idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT variant_id, gt_types, gts, gene, impact, biotype, \ in_dbsnp, clinvar_sig, clinvar_disease_name, aaf_1kg_all, aaf_esp_all, chrom, \ start, end \ FROM variants" res = conn.execute(query) if args.command == 'interactions': #header if args.var_mode: print "\t".join(['sample','gene','order_of_interaction', \ 'interacting_gene', 'var_id', 'chrom', 'start', \ 'end', 'impact', 'biotype', 'in_dbsnp', \ 'clinvar_sig', 'clinvar_disease_name', 'aaf_1kg_all', \ 'aaf_esp_all']) if (not args.var_mode): print "\t".join(['sample','gene','order_of_interaction', \ 'interacting_gene']) sample_gene_interactions(res, args, idx_to_sample) elif args.command == 'lof_interactions': samples = get_variant_genes(res, args, idx_to_sample) return samples
def sample_variants(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT variant_id, gt_types, gts, gene, impact, biotype, \ in_dbsnp, clinvar_sig, clinvar_disease_name, aaf_1kg_all, aaf_esp_all, chrom, \ start, end \ FROM variants" c.execute(query) if args.command == 'interactions': #header if args.var_mode: print "\t".join(['sample','gene','order_of_interaction', \ 'interacting_gene', 'var_id', 'chrom', 'start', \ 'end', 'impact', 'biotype', 'in_dbsnp', \ 'clinvar_sig', 'clinvar_disease_name', 'aaf_1kg_all', \ 'aaf_esp_all']) if (not args.var_mode): print "\t".join(['sample','gene','order_of_interaction', \ 'interacting_gene']) sample_gene_interactions(c, args, idx_to_sample) elif args.command == 'lof_interactions': samples = get_variant_genes(c, args, idx_to_sample) return samples
def get_ind_lof(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ v.impact, v.aa_change, v.aa_length, \ v.gt_types, v.gts, i.gene, \ i.transcript, i.biotype\ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id \ AND i.is_lof='1' \ AND v.type = 'snp'" c.execute(query) # header print '\t'.join([ 'chrom', 'start', 'end', 'ref', 'alt', 'highest_impact', 'aa_change', 'var_trans_pos', 'trans_aa_length', 'var_trans_pct', 'sample', 'genotype', 'gene', 'transcript', 'trans_type' ]) for r in c: gt_types = np.array(cPickle.loads(zlib.decompress(r['gt_types']))) gts = np.array(cPickle.loads(zlib.decompress(r['gts']))) gene = str(r['gene']) trans = str(r['transcript']) aa_change = str(r['aa_change']) aa_length = str(r['aa_length']) transcript_pos = None transcript_pct = None if aa_change != 'None': try: #transcript_pos for snpEff annotated VCF transcript_pos = re.findall('\S(\d+)\S', aa_change)[0] except IndexError: #transcript_pos for VEP annotated VCF if aa_length != 'None' and \ aa_length.split("/")[0] != "-": transcript_pos = aa_length.split("/")[0] #transcript_pct for snpEff annotated VCF if aa_length != 'None' and "/" not in aa_length: transcript_pct = float(transcript_pos) / float(aa_length) #transcript_pct for VEP annotated VCF elif aa_length != 'None' and "/" in aa_length: transcript_pct = float(transcript_pos) / float( aa_length.split("/")[1]) for idx, gt_type in enumerate(gt_types): if gt_type == HET or gt_type == HOM_ALT: print "\t".join([ r['chrom'], str(r['start']), str(r['end']), r['ref'], r['alt'], r['impact'], r['aa_change'] or 'None', transcript_pos or 'None', r['aa_length'] or 'None', str(transcript_pct) or 'None', idx_to_sample[idx], gts[idx], gene, trans, r['biotype'] or 'None' ])
def get_genotypes(c, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) for row in c: gts = np.array(cPickle.loads(zlib.decompress(row['gts']))) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts print args.separator.join(str(row[i]) for i in xrange(len(row)-1)), print args.separator.join([idx_to_sample[idx], gt])
def __init__(self, db, include_gt_cols=False, out_format=DefaultRowFormat(None)): assert os.path.exists(db), "%s does not exist." % db self.db = db self.query_executed = False self.for_browser = False self.include_gt_cols = include_gt_cols # try to connect to the provided database self._connect_to_database() # extract the column names from the sample table. # needed for gt-filter wildcard support. self._collect_sample_table_columns() # list of samples ids for each clause in the --gt-filter self.sample_info = collections.defaultdict(list) # map sample names to indices. e.g. self.sample_to_idx[NA20814] -> 323 self.sample_to_idx = util.map_samples_to_indices(self.c) # and vice versa. e.g., self.idx_to_sample[323] -> NA20814 self.idx_to_sample = util.map_indices_to_samples(self.c) self.idx_to_sample_object = util.map_indices_to_sample_objects(self.c) self.formatter = out_format self.predicates = [self.formatter.predicate]
def get_ind_lof(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ v.impact, v.aa_change, v.aa_length, \ v.gt_types, v.gts, i.gene, \ i.transcript, i.biotype\ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id \ AND i.is_lof='1' \ AND v.type = 'snp'" c.execute(query) # header print '\t'.join(['chrom', 'start', 'end', 'ref', 'alt', 'highest_impact', 'aa_change', 'var_trans_pos', 'trans_aa_length', 'var_trans_pct', 'sample', 'genotype', 'gene', 'transcript', 'trans_type']) for r in c: gt_types = Z.unpack_genotype_blob(r['gt_types']) gts = Z.unpack_genotype_blob(r['gts']) gene = str(r['gene']) trans = str(r['transcript']) aa_change = str(r['aa_change']) aa_length = str(r['aa_length']) transcript_pos = None transcript_pct = None if aa_change != 'None': try: #transcript_pos for snpEff annotated VCF transcript_pos = re.findall('\S(\d+)\S', aa_change)[0] except IndexError: #transcript_pos for VEP annotated VCF if aa_length != 'None' and \ aa_length.split("/")[0] != "-": transcript_pos = aa_length.split("/")[0] #transcript_pct for snpEff annotated VCF if aa_length != 'None' and "/" not in aa_length: transcript_pct = float(transcript_pos) / float(aa_length) #transcript_pct for VEP annotated VCF elif aa_length != 'None' and "/" in aa_length: transcript_pct = float(transcript_pos) / float(aa_length.split("/")[1]) for idx, gt_type in enumerate(gt_types): if gt_type == HET or gt_type == HOM_ALT: print "\t".join([r['chrom'], str(r['start']), str(r['end']), r['ref'], r['alt'], r['impact'], r['aa_change'] or 'None', transcript_pos or 'None', r['aa_length'] or 'None', str(transcript_pct) or 'None', idx_to_sample[idx], gts[idx], gene, trans, r['biotype'] or 'None'])
def get_ind_lof(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ v.impact, v.aa_change, v.aa_length, \ v.gt_types, v.gts, i.gene, \ i.transcript, i.biotype\ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id \ AND i.is_lof='1' \ AND v.type = 'snp'" c.execute(query) # header print '\t'.join(['chrom', 'start', 'end', 'ref', 'alt', 'highest_impact', 'aa_change', 'var_trans_pos', 'trans_aa_length', 'var_trans_pct', 'sample', 'genotype', 'gene', 'transcript', 'trans_type']) for r in c: gt_types = np.array(cPickle.loads(zlib.decompress(r['gt_types']))) gts = np.array(cPickle.loads(zlib.decompress(r['gts']))) gene = str(r['gene']) trans = str(r['transcript']) aa_change = str(r['aa_change']) aa_length = str(r['aa_length']) transcript_pos = None transcript_pct = None if aa_change != 'None': transcript_pos = re.findall('\S(\d+)\S', aa_change)[0] if aa_length != 'None': transcript_pct = float(transcript_pos) / float(aa_length) for idx, gt_type in enumerate(gt_types): if gt_type == HET or gt_type == HOM_ALT: print "\t".join([r['chrom'], str(r['start']), str(r['end']), r['ref'], r['alt'], r['impact'], r['aa_change'] or 'None', transcript_pos or 'None', r['aa_length'] or 'None', str(transcript_pct) or 'None', idx_to_sample[idx], gts[idx], gene, trans, r['biotype']])
def __init__(self, db, include_gt_cols=False, out_format=DefaultRowFormat(None)): assert os.path.exists(db), "%s does not exist." % db self.db = db self.query_executed = False self.for_browser = False self.include_gt_cols = include_gt_cols self._connect_to_database() # map sample names to indices. e.g. self.sample_to_idx[NA20814] -> 323 self.sample_to_idx = util.map_samples_to_indices(self.c) # and vice versa. e.g., self.idx_to_sample[323] -> NA20814 self.idx_to_sample = util.map_indices_to_samples(self.c) self.idx_to_sample_object = util.map_indices_to_sample_objects(self.c) self.formatter = out_format self.predicates = [self.formatter.predicate]
def get_variants_by_sample(c, args): """ Report the number of variants observed for each sample where the sample had a non-ref genotype """ idx_to_sample = util.map_indices_to_samples(c) # report. print '\t'.join(['sample', 'total']) query = "SELECT sample_id, \ (num_het + num_hom_alt) as total \ FROM sample_genotype_counts" c.execute(query) for row in c: sample = idx_to_sample[row['sample_id']] print "\t".join(str(s) for s in [sample, row['total']])
def get_ind_pathways(conn, metadata, args): idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ i.impact, v.gt_types, v.gts, i.gene, \ i.transcript \ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id" res = conn.execute(sql.text(query)) # header print '\t'.join(['chrom', 'start', 'end', 'ref', 'alt', \ 'impact', 'sample', 'genotype', \ 'gene', 'transcript', 'pathway']) _report_variant_pathways(res, args, idx_to_sample)
def get_variants_by_sample(c, args): """ Report the number of variants observed for each sample where the sample had a non-ref genotype """ idx_to_sample = util.map_indices_to_samples(c) # report. print '\t'.join(['sample', 'total']) query = "SELECT sample_id, \ (num_het + num_hom_alt) as total \ FROM sample_genotype_counts" c.execute(query) for row in c: sample = idx_to_sample[row['sample_id']] print "\t".join(str(s) for s in [sample, row['total']])
def get_ind_lof_pathways(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ i.impact, v.gt_types, v.gts, i.gene, \ i.transcript \ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id \ AND i.is_lof='1'" c.execute(query) # header print '\t'.join(['chrom', 'start', 'end', 'ref', 'alt', \ 'impact', 'sample', 'genotype', \ 'gene', 'transcript', 'pathway']) _report_variant_pathways(c, args, idx_to_sample)
def get_ind_lof_pathways(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ i.impact, v.gt_types, v.gts, i.gene, \ i.transcript \ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id \ AND i.is_lof='1'" c.execute(query) # header print '\t'.join(['chrom', 'start', 'end', 'ref', 'alt', \ 'impact', 'sample', 'genotype', \ 'gene', 'transcript', 'pathway']) _report_variant_pathways(c, args, idx_to_sample)
def sample_lof_variants(conn, metadata, args, samples): idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT chrom, start, end, \ gt_types, gts, gene \ FROM variants \ WHERE is_lof='1'" res = conn.execute(query) #header if args.var_mode: print "\t".join(['sample','lof_gene','order_of_interaction', \ 'interacting_gene', 'var_id', 'chrom', 'start', \ 'end', 'impact','biotype','in_dbsnp', 'clinvar_sig', \ 'clinvar_disease_name', 'aaf_1kg_all','aaf_esp_all']) elif (not args.var_mode): print "\t".join(['sample','lof_gene','order_of_interaction', \ 'interacting_gene']) sample_lof_interactions(res, args, idx_to_sample, samples)
def get_genotypes(conn, metadata, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" res = conn.execute(sql.text(query)) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) unpack = Z.unpack_genotype_blob import zlib for row in res: try: gts = unpack(row['gts']) except zlib.error: unpack = Z.snappy_unpack_blob gts = unpack(row['gts']) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts a = args.separator.join(str(row[i]) for i in xrange(len(row)-1)) b = args.separator.join([idx_to_sample[idx], gt]) print args.separator.join((a, b))
def sample_lof_variants(c, args, samples): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT chrom, start, end, \ gt_types, gts, gene \ FROM variants \ WHERE is_lof='1'" c.execute(query) #header if args.var_mode: print "\t".join(['sample','lof_gene','order_of_interaction', \ 'interacting_gene', 'var_id', 'chrom', 'start', \ 'end', 'impact','biotype','in_dbsnp', 'clinvar_sig', \ 'clinvar_disease_name', 'aaf_1kg_all','aaf_esp_all']) elif (not args.var_mode): print "\t".join(['sample','lof_gene','order_of_interaction', \ 'interacting_gene']) sample_lof_interactions(c, args, idx_to_sample, samples)
def sample_variants(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT variant_id, gt_types, gts, gene, impact, biotype, \ in_dbsnp, clinvar_sig, clinvar_disease_name, aaf_1kg_all, aaf_esp_all, chrom, \ start, end \ FROM variants" c.execute(query) if args.command == "interactions": # header if args.var_mode: print "\t".join( [ "sample", "gene", "order_of_interaction", "interacting_gene", "var_id", "chrom", "start", "end", "impact", "biotype", "in_dbsnp", "clinvar_sig", "clinvar_disease_name", "aaf_1kg_all", "aaf_esp_all", ] ) if not args.var_mode: print "\t".join(["sample", "gene", "order_of_interaction", "interacting_gene"]) sample_gene_interactions(c, args, idx_to_sample) elif args.command == "lof_interactions": samples = get_variant_genes(c, args, idx_to_sample) return samples
def get_gtcounts_by_sample(c, args): """ Report the count of each genotype class observed for each sample. """ idx_to_sample = util.map_indices_to_samples(c) # report. print '\t'.join(['sample', 'num_hom_ref', 'num_het', 'num_hom_alt', 'num_unknown', 'total']) query = "SELECT *, \ (num_hom_ref + num_het + num_hom_alt + num_unknown) as total \ FROM sample_genotype_counts" c.execute(query) # count the number of each genotype type obs. for each sample. for row in c: sample = idx_to_sample[row['sample_id']] print "\t".join(str(s) for s in [sample, row['num_hom_ref'], row['num_het'], row['num_hom_alt'], row['num_unknown'], row['total']])
def get_ind_lof(c, args): idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ v.impact, v.aa_change, v.aa_length, \ v.gt_types, v.gts, i.gene, \ i.transcript, i.biotype\ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id \ AND i.is_lof='1' \ AND v.type = 'snp'" c.execute(query) # header print "\t".join( [ "chrom", "start", "end", "ref", "alt", "highest_impact", "aa_change", "var_trans_pos", "trans_aa_length", "var_trans_pct", "sample", "genotype", "gene", "transcript", "trans_type", ] ) for r in c: gt_types = np.array(cPickle.loads(zlib.decompress(r["gt_types"]))) gts = np.array(cPickle.loads(zlib.decompress(r["gts"]))) gene = str(r["gene"]) trans = str(r["transcript"]) aa_change = str(r["aa_change"]) aa_length = str(r["aa_length"]) transcript_pos = None transcript_pct = None if aa_change != "None": transcript_pos = re.findall("\S(\d+)\S", aa_change)[0] if aa_length != "None": transcript_pct = float(transcript_pos) / float(aa_length) for idx, gt_type in enumerate(gt_types): if gt_type == HET or gt_type == HOM_ALT: print "\t".join( [ r["chrom"], str(r["start"]), str(r["end"]), r["ref"], r["alt"], r["impact"], r["aa_change"] or "None", transcript_pos or "None", r["aa_length"] or "None", str(transcript_pct) or "None", idx_to_sample[idx], gts[idx], gene, trans, r["biotype"], ] )
def get_ind_lof(conn, metadata, args): idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT v.chrom, v.start, v.end, v.ref, v.alt, \ v.impact, v.aa_change, v.aa_length, \ v.gt_types, v.gts, i.gene, \ i.transcript, i.biotype\ FROM variants v, variant_impacts i \ WHERE v.variant_id = i.variant_id \ AND i.is_lof='1' \ AND v.type = 'snp'" res = conn.execute(sql.text(query)) # header print '\t'.join([ 'chrom', 'start', 'end', 'ref', 'alt', 'highest_impact', 'aa_change', 'var_trans_pos', 'trans_aa_length', 'var_trans_pct', 'sample', 'genotype', 'gene', 'transcript', 'trans_type' ]) unpack = Z.unpack_genotype_blob for r in res: try: gt_types = unpack(r['gt_types']) gts = unpack(r['gts']) except: unpack = Z.snappy_unpack_blob gt_types = unpack(r['gt_types']) gts = unpack(r['gts']) gene = str(r['gene']) trans = str(r['transcript']) aa_change = str(r['aa_change']) aa_length = str(r['aa_length']) transcript_pos = None transcript_pct = None if aa_change != 'None': try: #transcript_pos for snpEff annotated VCF transcript_pos = re.findall('\S(\d+)\S', aa_change)[0] except IndexError: #transcript_pos for VEP annotated VCF if aa_length != 'None' and \ aa_length.split("/")[0] != "-": transcript_pos = aa_length.split("/")[0] #handle non exonic variants if transcript_pos is None: transcript_pct = '/' #transcript_pct for snpEff annotated VCF elif aa_length != 'None' and "/" not in aa_length: transcript_pct = float(transcript_pos) / float(aa_length) #transcript_pct for VEP annotated VCF elif aa_length != 'None' and "/" in aa_length: transcript_pct = float(transcript_pos) / float( aa_length.split("/")[1]) for idx, gt_type in enumerate(gt_types): if gt_type == HET or gt_type == HOM_ALT: print "\t".join([ r['chrom'], str(r['start']), str(r['end']), r['ref'], r['alt'], r['impact'], r['aa_change'] or 'None', transcript_pos or 'None', r['aa_length'] or 'None', str(transcript_pct) or 'None', idx_to_sample[idx], gts[idx], gene, trans, r['biotype'] or 'None' ])