def rvs_anno(chrom, pos, ref, alt): # annotate variants with rvs, and add the emtpy records to bulk_vep ################## # construct vkeys for rvs all_vkeys_list = [] # dict for v_id => rvs_id rvs_dict = {} # get the functional arguments of v2k function rvs_id = VarCharKey.v2k(chrom, int(pos), int(pos), alt) all_vkeys_list.append(rvs_id) print all_vkeys_list all_vkeys = ','.join(all_vkeys_list) ################## # request rvs rvs_vars = {} for mode in ['impact', 'prediction']: url = 'https://rvs.u.hpc.mssm.edu/rest/{}/vkey/{}'.format( mode, all_vkeys) print url r = requests.get(url, headers={"Content-Type": "application/json"}) rvs_vars[mode] = r.json() # parse RVS record #rvs=rvs[0] #VAR['p_change']={'gene_id':rvs['gene_id'],'cvar':rvs['hgvs_c'],'pvar':rvs['hgvs_p'],'transcript_id':rvs['enst']} #VAR['consequence']=rvs['effect'] #print 'NO RVS, USING CSV INFO' #ENSG00000164256:ENST00000296682:exon11:c.2497_2580del:p.833_860del #variant=dict() #variant['p_change'] = {'pvar': g['RVS']['impact']['hgvs_p'], 'cvar': g['RVS']['impact']['hgvs_c'], 'gene_id': gene_id, 'transcript_id': g['RVS']['impact']['enst']} #variant['consequence'] = [g['RVS']['impact']['effect'], {'impact': g['RVS']['impact']['impact']} ] #if g['RVS'].get('prediction',{}): temp = g['RVS']['prediction'] #for pred in ['Polyphen2_HDIV', 'SIFT', 'CADD', 'MutationTaster', 'ensemble_prediction', 'FATHMM', 'MutationAssessor', 'phastCons', 'GWAVA_region', 'Polyphen2_HVAR']: this['consequence'][1][pred] = temp[pred] return rvs_vars
def rvs_anno(chrom, pos, ref, alt): # annotate variants with rvs, and add the emtpy records to bulk_vep ################## # construct vkeys for rvs all_vkeys_list = [] # dict for v_id => rvs_id rvs_dict = {} # get the functional arguments of v2k function rvs_id=VarCharKey.v2k(chrom, int(pos), int(pos), alt) all_vkeys_list.append(rvs_id) print all_vkeys_list all_vkeys = ','.join(all_vkeys_list) ################## # request rvs rvs_vars = {} for mode in ['impact','prediction']: url='https://rvs.u.hpc.mssm.edu/rest/{}/vkey/{}'.format(mode,all_vkeys) print url r = requests.get(url, headers={ "Content-Type" : "application/json"}) rvs_vars[mode]=r.json() # parse RVS record #rvs=rvs[0] #VAR['p_change']={'gene_id':rvs['gene_id'],'cvar':rvs['hgvs_c'],'pvar':rvs['hgvs_p'],'transcript_id':rvs['enst']} #VAR['consequence']=rvs['effect'] #print 'NO RVS, USING CSV INFO' #ENSG00000164256:ENST00000296682:exon11:c.2497_2580del:p.833_860del #variant=dict() #variant['p_change'] = {'pvar': g['RVS']['impact']['hgvs_p'], 'cvar': g['RVS']['impact']['hgvs_c'], 'gene_id': gene_id, 'transcript_id': g['RVS']['impact']['enst']} #variant['consequence'] = [g['RVS']['impact']['effect'], {'impact': g['RVS']['impact']['impact']} ] #if g['RVS'].get('prediction',{}): temp = g['RVS']['prediction'] #for pred in ['Polyphen2_HDIV', 'SIFT', 'CADD', 'MutationTaster', 'ensemble_prediction', 'FATHMM', 'MutationAssessor', 'phastCons', 'GWAVA_region', 'Polyphen2_HVAR']: this['consequence'][1][pred] = temp[pred] return rvs_vars
# query for annotated variants from vcf vcfr = vcf.Reader( args.vcf ) for v in vcfr: if len(v.FILTER)==0: for alt in v.ALT: chrom = v.CHROM.replace('chr','') start = v.POS alt = str(alt) if v.is_snp or (not v.is_deletion and v.is_indel): # snps or insertions end = v.POS elif v.is_deletion: end = v.POS + (len(v.REF) - len(alt)) vkey = VarCharKey.v2k(chrom, start, end, alt) for resource in resources: # for n in rvs_collection.find({'chr' : str(chrom), # 'start' : str(start), # 'end' : str(end), # 'alt' : alt,}): # #'vkey' : vkey, # # 'samples' : {'$in': vcfr.samples}, # #'resource' : resource}): # pprint(n) if rvs_collection.count({ 'vkey' : vkey, 'samples' : {'$in': vcfr.samples}, 'resource' : resource}) == 0: missing_vkeys[resource].add(vkey)