Ejemplo n.º 1
0
def load(): # load appropriate data from GA4GH
    callset_id = sample_id = session['sample_id']
    variants, variantset_ids = ga4gh.search_variants(
            GENOTYPES,
            ga4gh.OKG,
            callSetIds=[sample_id],
            repo_id='google')
    pid = session['patient'] 
    uploads = []
    for rsid, variant in variants:
        coord = snps.COORDINATES[rsid]
        interp = 'positive' if float(snps.DATA[rsid]['Risk']) > 1 else 'negative'
        seq = {
            'resourceType': 'Sequence',
            'type': DNA,
            'coordinate': [
                   {"start": coord['pos']-1,
                   "end": coord['pos'], 
                   "chromosome": {'text': coord['chromosome']},
                   "genomeBuild": {'text': 'GRCh37'},
                   }],
            'variation': {'text': rsid},
            'species': {'text': 'H**o sapiens'},
            "repository": [
                   {
                   "url": "https://www.googleapis.com/genomics/v1beta2",
                   "variantId": variantset_ids
                   }
                   ]
        }
        uploads.append(gevent.spawn(upload_seq, seq, session['access_token']))
    for g in uploads:
        g.join()
    return redirect('%s/Sequence' % config.API_BASE)
Ejemplo n.º 2
0
def get_snps(sample_id):
    '''
    return sequences mentioned in SNPData.csv
    '''
    variants = ga4gh.search_variants(GENOTYPES,
                                     ga4gh.OKG,
                                     callSetIds=[sample_id],
                                     repo_id='google')
    snps = {}
    for rsid, variant in variants:
        gts = [variant['referenceBases']]
        gts.extend(variant['alternateBases'])
        for call in variant['calls']:
            if call.get('callSetId') != sample_id:
                continue
            snps[rsid] = [gts[i] for i in call['genotype']]
    return jsonify(snps)
Ejemplo n.º 3
0
def get_snps(sample_id):
    '''
    return sequences mentioned in SNPData.csv
    '''
    variants = ga4gh.search_variants(
            GENOTYPES,
            ga4gh.OKG,
            callSetIds=[sample_id],
            repo_id='google')
    snps = {}
    for rsid, variant in variants:
        gts = [variant['referenceBases']]
        gts.extend(variant['alternateBases']) 
        for call in variant['calls']:
            if call.get('callSetId') != sample_id:
                continue
            snps[rsid] = [gts[i] for i in call['genotype']]
    return jsonify(snps)
Ejemplo n.º 4
0
def load():  # load appropriate data from GA4GH
    callset_id = sample_id = session['sample_id']
    variants, variantset_ids = ga4gh.search_variants(GENOTYPES,
                                                     ga4gh.OKG,
                                                     callSetIds=[sample_id],
                                                     repo_id='google')
    pid = session['patient']
    uploads = []
    for rsid, variant in variants:
        coord = snps.COORDINATES[rsid]
        interp = 'positive' if float(
            snps.DATA[rsid]['Risk']) > 1 else 'negative'
        seq = {
            'resourceType':
            'Sequence',
            'type':
            DNA,
            'coordinate': [{
                "start": coord['pos'] - 1,
                "end": coord['pos'],
                "chromosome": {
                    'text': coord['chromosome']
                },
                "genomeBuild": {
                    'text': 'GRCh37'
                },
            }],
            'variation': {
                'text': rsid
            },
            'species': {
                'text': 'H**o sapiens'
            },
            "repository": [{
                "url": "https://www.googleapis.com/genomics/v1beta2",
                "variantId": variantset_ids
            }]
        }
        uploads.append(gevent.spawn(upload_seq, seq, session['access_token']))
    for g in uploads:
        g.join()
    return redirect('%s/Sequence' % config.API_BASE)
Ejemplo n.º 5
0
Archivo: web.py Proyecto: DSrcl/ga2fhir
def load(): # load appropriate data from GA4GH
    callset_id = sample_id = session['sample_id']
    variants, variantset_ids = ga4gh.search_variants(
            GENOTYPES,
            ga4gh.OKG,
            callSetIds=[sample_id],
            repo_id='google')
    pid = session['patient'] 
    uploads = []
    for rsid, variant in variants:
        coord = snps.COORDINATES[rsid]
        interp = 'positive' if float(snps.DATA[rsid]['Risk']) > 1 else 'negative'
        seq = {
            'resourceType': 'Sequence',
            'chromosome': {'text': coord['chromosome']},
            'start': coord['pos']-1,
            'end': coord['pos'], 
            'genomeBuild': {'text': 'GRCh37'},
            'type': 'DNA',
            'source': {'text': 'somatic'},
            'patient': {'reference': '/Patient/%s' % pid},
            'variation': {'text': rsid},
            'species': {'text': 'H**o sapiens'},
            'analysis': [{
                'target': {'text': snps.DATA[rsid]['disease']},
                'type': {'text': 'Disease Risk Analysis'},
                'interpretation': {'text': interp},
                'confidence': 'reviewing'
            }],
            'gaRepository': ga4gh.REPOSITORIES['google'],
            'gaVariantSet': variantset_ids,
            'gaCallSet': callset_id
        }
        uploads.append(gevent.spawn(upload_seq, seq, session['access_token']))
    for g in uploads:
        g.join()
    return redirect('%s/Sequence' % config.API_BASE)
Ejemplo n.º 6
0
    f = TabixFile('snps.sorted.txt.gz', parser=asTuple())
    snp_table = {}
    for row in f.fetch():
        _, snp, chrom, pos = row
        if snp in snps or snp in drug_info:
            snp_table[snp] = {'chromosome': chrom, 'pos': int(pos)}
    with open('snps.py', 'w') as dump:
        dump.write(WARNING)
        dump.write('COORDINATES = %s\n' % snp_table)
        dump.write('DATA = %s\n' % snps)
        dump.write('DRUG_INFO = %s\n' % drug_info)
    print 'Data written to snps.py'
    print 'Determining allele frequencies (using data from 1000 Genomes)'
    genotypes = {snp: snp_data['Code'] for snp, snp_data in snps.iteritems()}
    variants = list(
        ga4gh.search_variants(genotypes, dataset=ga4gh.OKG, repo_id='google'))
    # determine allele frequencies for different population
    freqs = {
        pop: ga4gh.get_frequencies(variants,
                                   genotypes,
                                   population=lambda call: populations.get(
                                       call.get('callSetName')) == pop)
        for pop in set(populations.values())
    }
    # allele frequencies for 1000 Genomes' whole population
    freqs['1kg'] = ga4gh.get_frequencies(variants, genotypes)
    with open('freq.py', 'w') as dump:
        dump.write(WARNING)
        dump.write('FREQUENCIES = %s\n' % freqs)
    print 'Data written to freq.py.'
Ejemplo n.º 7
0
    snp_table = {}
    for row in f.fetch():
        _, snp, chrom, pos = row
        if snp in snps or snp in drug_info:
            snp_table[snp] = {
                'chromosome': chrom,
                'pos': int(pos)
            } 
    with open('snps.py', 'w') as dump:
        dump.write(WARNING)
        dump.write('COORDINATES = %s\n'% snp_table)
        dump.write('DATA = %s\n'% snps)
        dump.write('DRUG_INFO = %s\n'% drug_info)
    print 'Data written to snps.py' 
    print 'Determining allele frequencies (using data from 1000 Genomes)'
    genotypes = {snp: snp_data['Code'] for snp, snp_data in snps.iteritems()} 
    variants = list(ga4gh.search_variants(genotypes, dataset=ga4gh.OKG, repo_id='google'))
    # determine allele frequencies for different population
    freqs = {
            pop: ga4gh.get_frequencies(
                variants,
                genotypes,
                population=lambda call: populations.get(call.get('callSetName'))==pop)
            for pop in set(populations.values())} 
    # allele frequencies for 1000 Genomes' whole population
    freqs['1kg'] = ga4gh.get_frequencies(variants, genotypes)
    with open('freq.py', 'w') as dump:
        dump.write(WARNING)
        dump.write('FREQUENCIES = %s\n'% freqs) 
    print 'Data written to freq.py.'
Ejemplo n.º 8
0
def load():  # load appropriate data from GA4GH
    callset_id = sample_id = session['sample_id']
    variants, variantset_ids = ga4gh.search_variants(GENOTYPES,
                                                     ga4gh.OKG,
                                                     callSetIds=[sample_id],
                                                     repo_id='google')
    pid = session['patient']
    uploads = []
    for rsid, variant in variants:
        coord = snps.COORDINATES[rsid]
        interp = 'positive' if float(
            snps.DATA[rsid]['Risk']) > 1 else 'negative'
        seq = {
            'resourceType':
            'Sequence',
            'chromosome': {
                'text': coord['chromosome']
            },
            'start':
            coord['pos'] - 1,
            'end':
            coord['pos'],
            'genomeBuild': {
                'text': 'GRCh37'
            },
            'type':
            'DNA',
            'source': {
                'text': 'somatic'
            },
            'patient': {
                'reference': '/Patient/%s' % pid
            },
            'variation': {
                'text': rsid
            },
            'species': {
                'text': 'H**o sapiens'
            },
            'analysis': [{
                'target': {
                    'text': snps.DATA[rsid]['disease']
                },
                'type': {
                    'text': 'Disease Risk Analysis'
                },
                'interpretation': {
                    'text': interp
                },
                'confidence': 'reviewing'
            }],
            'gaRepository':
            ga4gh.REPOSITORIES['google'],
            'gaVariantSet':
            variantset_ids,
            'gaCallSet':
            callset_id
        }
        uploads.append(gevent.spawn(upload_seq, seq, session['access_token']))
    for g in uploads:
        g.join()
    return redirect('%s/Sequence' % config.API_BASE)