Exemple #1
0
def rvs_anno(chrom, pos, ref, alt):
    # annotate variants with rvs, and add the emtpy records to bulk_vep
    ##################
    # construct vkeys for rvs
    all_vkeys_list = []
    # dict for v_id => rvs_id
    rvs_dict = {}
    # get the functional arguments of v2k function
    rvs_id = VarCharKey.v2k(chrom, int(pos), int(pos), alt)
    all_vkeys_list.append(rvs_id)
    print all_vkeys_list
    all_vkeys = ','.join(all_vkeys_list)
    ##################
    # request rvs
    rvs_vars = {}
    for mode in ['impact', 'prediction']:
        url = 'https://rvs.u.hpc.mssm.edu/rest/{}/vkey/{}'.format(
            mode, all_vkeys)
        print url
        r = requests.get(url, headers={"Content-Type": "application/json"})
        rvs_vars[mode] = r.json()
    # parse RVS record
    #rvs=rvs[0]
    #VAR['p_change']={'gene_id':rvs['gene_id'],'cvar':rvs['hgvs_c'],'pvar':rvs['hgvs_p'],'transcript_id':rvs['enst']}
    #VAR['consequence']=rvs['effect']
    #print 'NO RVS, USING CSV INFO'
    #ENSG00000164256:ENST00000296682:exon11:c.2497_2580del:p.833_860del
    #variant=dict()
    #variant['p_change'] = {'pvar': g['RVS']['impact']['hgvs_p'], 'cvar': g['RVS']['impact']['hgvs_c'], 'gene_id': gene_id, 'transcript_id': g['RVS']['impact']['enst']}
    #variant['consequence'] = [g['RVS']['impact']['effect'], {'impact': g['RVS']['impact']['impact']} ]
    #if g['RVS'].get('prediction',{}): temp = g['RVS']['prediction']
    #for pred in ['Polyphen2_HDIV', 'SIFT', 'CADD', 'MutationTaster', 'ensemble_prediction', 'FATHMM', 'MutationAssessor', 'phastCons', 'GWAVA_region', 'Polyphen2_HVAR']: this['consequence'][1][pred] = temp[pred]
    return rvs_vars
Exemple #2
0
def rvs_anno(chrom, pos, ref, alt):
    # annotate variants with rvs, and add the emtpy records to bulk_vep
    ##################
    # construct vkeys for rvs
    all_vkeys_list = []
    # dict for v_id => rvs_id
    rvs_dict = {}
    # get the functional arguments of v2k function
    rvs_id=VarCharKey.v2k(chrom, int(pos), int(pos), alt)
    all_vkeys_list.append(rvs_id)
    print all_vkeys_list
    all_vkeys = ','.join(all_vkeys_list)
    ##################
    # request rvs
    rvs_vars = {}
    for mode in ['impact','prediction']:
        url='https://rvs.u.hpc.mssm.edu/rest/{}/vkey/{}'.format(mode,all_vkeys)
        print url
        r = requests.get(url, headers={ "Content-Type" : "application/json"})
        rvs_vars[mode]=r.json()
    # parse RVS record
    #rvs=rvs[0]
    #VAR['p_change']={'gene_id':rvs['gene_id'],'cvar':rvs['hgvs_c'],'pvar':rvs['hgvs_p'],'transcript_id':rvs['enst']}
    #VAR['consequence']=rvs['effect']
    #print 'NO RVS, USING CSV INFO'
    #ENSG00000164256:ENST00000296682:exon11:c.2497_2580del:p.833_860del
    #variant=dict()
    #variant['p_change'] = {'pvar': g['RVS']['impact']['hgvs_p'], 'cvar': g['RVS']['impact']['hgvs_c'], 'gene_id': gene_id, 'transcript_id': g['RVS']['impact']['enst']}
    #variant['consequence'] = [g['RVS']['impact']['effect'], {'impact': g['RVS']['impact']['impact']} ]
    #if g['RVS'].get('prediction',{}): temp = g['RVS']['prediction'] 
    #for pred in ['Polyphen2_HDIV', 'SIFT', 'CADD', 'MutationTaster', 'ensemble_prediction', 'FATHMM', 'MutationAssessor', 'phastCons', 'GWAVA_region', 'Polyphen2_HVAR']: this['consequence'][1][pred] = temp[pred]
    return rvs_vars
# query for annotated variants from vcf
vcfr = vcf.Reader( args.vcf )
for v in vcfr:
    if len(v.FILTER)==0:
        for alt in v.ALT:
            chrom = v.CHROM.replace('chr','')
            start = v.POS
            alt = str(alt)
            
            if v.is_snp or (not v.is_deletion and v.is_indel):
                # snps or insertions
                end = v.POS
            elif v.is_deletion:
                end = v.POS + (len(v.REF) - len(alt))

            vkey = VarCharKey.v2k(chrom, start, end, alt)
            
            for resource in resources:
                # for n in  rvs_collection.find({'chr'      : str(chrom),
                #                                'start'    : str(start),
                #                                'end'      : str(end),
                #                                'alt'      : alt,}):
                #                                #'vkey'     : vkey,
                #                                # 'samples'  : {'$in': vcfr.samples},
                #                                #'resource' : resource}):
                #     pprint(n)
                if rvs_collection.count({ 'vkey'     : vkey,
                                          'samples'  : {'$in': vcfr.samples},
                                          'resource' : resource}) == 0:
                    missing_vkeys[resource].add(vkey)