Example #1
0
def get_descriptions (ensid):
  request='/xrefs/id/%s' %(ensid)
  decoded = ensembl_rest.get_endpoint(server, request)
  desc = []
  for xref in decoded:
    if (xref['description']) and (xref['dbname']):
      string = "DB: %s\tDescription: %s" %(xref['dbname'], xref['description'] )
      desc.append(string)
  return desc
Example #2
0
import json, ensembl_rest

server = 'http://rest.ensembl.org'

vep_endpoint = '/vep/human/id/{}'
vep_post_endpoint = '/vep/human/id'
overlap_endpoint = '/overlap/region/human/{}?feature=variation'

# 1) Print VEP results for rs189863975
# a) For each overlapping transcript (transcript_consequences) print
#          variant_allele,
#          transcript_id,
#          the consequence_terms
#          and if available the polyphen_score and polyphen_prediction

variant_effects = ensembl_rest.get_endpoint(server,
                                            vep_endpoint.format('rs189863975'))

for entry in variant_effects:
    for consequence in entry['transcript_consequences']:
        variant_allele = consequence['variant_allele']
        transcript_id = consequence['transcript_id']
        polyphen_score = consequence.get('polyphen_score', 'no polyphen score')
        polyphen_prediction = consequence.get('polyphen_prediction',
                                              'no polyphen prediction')
        consequence_terms = ','.join(consequence['consequence_terms'])
        print("Variant allele: {}, Transcript ID: {}, Consequence terms: {}".
              format(variant_allele, transcript_id, consequence_terms))
        if (polyphen_score != 'no polyphen score'):
            print("   PolyPhen score: {}, PolyPhen prediction: {}".format(
                polyphen_score, polyphen_prediction))
Example #3
0
import json, ensembl_rest
server = "http://rest.ensembl.org"

# Gene tree endpoint exercises

# CG-6a: Get the information for the protein genetree with the stable id ENSGT00390000003602. output should be in the orthoxml format

ext = "/genetree/id/ENSGT00390000003602?"
content_type = "text/x-orthoxml+xml"
endpoint = ensembl_rest.get_endpoint(server, ext, content_type)
print (endpoint)
Example #4
0
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import requests, json, sys, ensembl_rest

server = "http://rest.ensembl.org"

## Export all microarray platforms that are annotated for humans in Ensembl and their associated information.
request = '/regulatory/species/homo_sapiens/microarray'
decoded = ensembl_rest.get_endpoint(server, request)
print(json.dumps(decoded, indent=4, sort_keys=True))
"""
  You have performed a microarray experiment with the array HumanWG_6_V2. The following probes gave you a positive signal: 
  ILMN_1763508, ILMN_1861090, ILMN_1890175, ILMN_1749304, ILMN_1894173, ILMN_1911643, ILMN_1891089, ILMN_1859810, ILMN_1843473,  ILMN_1770856
    a) Which transcripts do they map to? 
    b) Which genes do these transcripts belong to?
"""

# Transcript and gene mappings for different probes
array = 'HumanWG_6_V2'
probes = [
    'ILMN_1763508', 'ILMN_1861090', 'ILMN_1890175', 'ILMN_1749304',
    'ILMN_1894173', 'ILMN_1911643', 'ILMN_1891089', 'ILMN_1859810',
    'ILMN_1843473', 'ILMN_1770856'
]
for probe in probes:
    print(probe)
    request = '/regulatory/species/homo_sapiens/microarray/%s/probe/%s?content-type=application/json;gene=1;transcript=1' % (
        array, probe)
    decoded = ensembl_rest.get_endpoint(server, request)
    if decoded:
        print("Probe length: %sbp Sequence: %s" %
import json, ensembl_rest
server = "http://rest.ensembl.org"  #http://ebi-cli-003:3000"

# Alignment endpoint exercise

# CG-1a: Get in json format the LastZ pairwise alignment for taeniopygia_guttata V gallus_gallus for region 2:106041430-106041480:1

ext = "/alignment/region/taeniopygia_guttata/2:106041430-106041480:1?method=LASTZ_NET;species_set=taeniopygia_guttata;species_set=gallus_gallus"
endpoint = ensembl_rest.get_endpoint(
    server, ext
)  # a third parameter 'content_type' defaults to 'application/json', so no need to define it here
print(json.dumps(endpoint, indent=4, sort_keys=True))
import json, ensembl_rest
server = "http://rest.ensembl.org"

# Family endpoint exercises

# CG-4: Get the information for families predicted for the human gene ENSG00000283087. What do you notice?

ext = "/family/member/id/ENSG00000283087?"
endpoint = ensembl_rest.get_endpoint(server, ext)
print(json.dumps(endpoint, indent=4, sort_keys=True))
import json, ensembl_rest

server = 'http://rest.ensembl.org'

phenotype_endpoint = '/phenotype/term/homo_sapiens/{}'
variation_post_endpoint = '/variation/human?pops=1'

# 1) Get all variants that are associated with the phenotype 'Coffee consumption'. For each variant print
# a) the p-value for the association
# b) the PMID for the publication which describes the association between that variant and Coffee consumption
# c) the risk allele and the associated gene

request = phenotype_endpoint.format('coffee consumption')
associations = ensembl_rest.get_endpoint(server, request)

variation2risk_allele = {}
for association in associations:
    variation = association['Variation']
    desc = association['description']
    source = association['source']
    mapped_to_accession = association['mapped_to_accession']
    attributes = association['attributes']
    p_value = attributes['p_value']
    external_reference = attributes['external_reference']
    associated_gene = attributes['associated_gene']
    risk_allele = attributes.get('risk_allele', '')
    if risk_allele != '':
        variation2risk_allele[variation] = risk_allele
    print(
        "Variation: {}, Phenotype: {}, p-value: {}, PMID: {}, Associated gene(s): {}, Risk allele: {}"
        .format(variation, desc, p_value, external_reference, associated_gene,
import json, ensembl_rest

server = 'http://rest.ensembl.org'

overlap_region_endpoint = '/overlap/region/human/{}?feature={}'
variation_post_endpoint = '/variation/human'
lookup_id_endpoint = '/lookup/id/{}?expand=1'
overlap_id_endpoint = '/overlap/id/{}?feature={}'

# 1) Print all variants that are located on chromosome 17 between 80348215 and 80348333.
#    Use the overlap endpoint to get the location (seq_region_name, start, end),
#    alleles, consequence_type and clinical_significance for each variant in the region.

request = overlap_region_endpoint.format('17:80348215..80348333', 'variation')

variants = ensembl_rest.get_endpoint(server, request)
for v in variants:
    assembly_name = v['assembly_name']
    seq_region_name = v['seq_region_name']
    start = v['start']
    end = v['end']
    alleles = '/'.join(v['alleles'])
    consequence_type = v['consequence_type']
    clinical_significance = v['clinical_significance']
    print(
        "Location: {}:{}:{}-{}, Alleles: {}, Consequence: {}, Clinical significance: {}"
        .format(assembly_name, seq_region_name, start, end, alleles,
                consequence_type, clinical_significance))

# 2) Get the variant class, evidence attributes, source and the most_severe_consequence
#    for all variants in that region from the variant endpoint.
Example #9
0
    for t in efo['_embedded']['terms']:
        print("Link(IRI): %s" % (t['iri']))
        if t['description']:
            for d in t['description']:
                print("Description: %s" % (d))
        else:
            print('No description provided')
    print()


##  main

# 1. List all Epigenomes available in Ensembl Regulation
server = "http://rest.ensembl.org"
endpoint = '/regulatory/species/homo_sapiens/epigenome'
decoded = ensembl_rest.get_endpoint(server, endpoint, 'application/json')
print(json.dumps(decoded, indent=4, sort_keys=True))

# 2. Find additional information (where available) for each epigenome using the Ontology Lookup Service
efo_server = "http://www.ebi.ac.uk/ols/api/ontologies/efo/terms?obo_id="
for r in decoded:

    print("Epigenome name: %s" % r['name'])
    # No EFO ID assigned to this epigenome
    if not r['efo_id']:
        print("No EFO ID assigned: %s\n" % (r['scientific_name']))
        continue

    request = efo_server + r['efo_id']
    efo = ensembl_rest.get_endpoint_efo(efo_server, request)
import json, ensembl_rest

server = 'http://rest.ensembl.org'

ld_region_endpoint = '/ld/human/region/{}/{}'
ld_endpoint = '/ld/human/{}/{}'

# 1) Compute LD in the region 3:196064297-196068186
# for the population 1000GENOMES:phase_3:CEU.
# Print all results with r2=1 and d_prime=1.

ld_values = ensembl_rest.get_endpoint(
    server,
    ld_region_endpoint.format('3:196064297-196068186',
                              '1000GENOMES:phase_3:CEU'))
high_ld_pairs = (
    ld_value for ld_value in ld_values
    if float(ld_value['d_prime']) == 1.0 and float(ld_value['r2']) == 1.0)

for pair in high_ld_pairs:
    variation1 = pair['variation1']
    variation2 = pair['variation2']
    print("Pair: {}-{}".format(variation1, variation2))

print('')

# Compute pairwise LD for all variants that are not further away from rs535797132
# than 500kb.
# Print all variants that are in LD (d_prime >= 0.8) with rs535797132.
# For each pair of variants also print d_prime and r2.
# Use 1000GENOMES:phase_3:FIN as the population.