예제 #1
0
 def helper(symbolset=annot[colname][1]):
     if len(symbolset) == 0:
         return (dict())
     d = dict()
     for sym in symbolset:
         try:
             description = ensembl_rest.symbol_lookup(
                 'h**o sapiens', sym, expand=1)['description']
         except Exception:
             description = 'No valid lookup found for symbol ' + sym
         d[sym] = description
     return (d)
예제 #2
0
def getSequence(gene):
    "Get the protein sequence coded by the given gene"
    q = ensembl_rest.symbol_lookup(species='h**o sapiens',
                                   symbol=gene,
                                   params={'expand': True})
    try:
        is_canonical = [t["is_canonical"] for t in q["Transcript"]]
        t = q["Transcript"][np.argmax(is_canonical)]
        seq = ensembl_rest.sequence_id(t["Translation"]["id"])["seq"]
        return seq
    except:
        return ""
예제 #3
0
def get_coords(x, args):

    g = x.gene
    if '-' in g and ':' in g:
        (chrom, loc) = g.split(':')
        (start, stop) = loc.split('-')
        (start, stop) = (int(start), int(stop))
    # gene name
    else:
        try:
            gene = ensembl_rest.symbol_lookup(species=args.species, symbol=g)
        except:
            sys.exit('Something went wrong with ENSEMBL gene name query')

        # radius + TSS
        chrom = 'chr' + gene['seq_region_name']
        temp1 = gene['start'] - 5000
        temp2 = gene['start'] + 5000
        start = min(temp1, temp2)
        stop = max(temp1, temp2)

        # # testing
        # print(g)
        # print('start: {}'.format(gene['start']))
        # print('region start: {}'.format(start))
        # print('region end: {}'.format(stop))

        # # radius + whole gene body
        # chrom = 'chr'+gene['seq_region_name']
        # start = min([gene['start'], gene['end']])
        # stop = max([gene['start'], gene['end']])
        # if args.radius:
        # 	temp_start = start - args.radius
        # 	if temp_start >= 0: start = temp_start
        # 	stop = stop + args.radius

        # # testing
        # print('new start: {}'.format(start))
        # print('new stop: {}'.format(stop))

    return pd.Series([chrom, start, stop])
예제 #4
0
import ensembl_rest

r = ensembl_rest.symbol_lookup(species='h**o sapiens', symbol='BRCA2')

print(r)

r = ensembl_rest.variant_recoder(species='h**o sapiens',
                                 id='NC_000007.13:101837173:C:T')

print(r)
import ensembl_rest 
import requests, sys
from pyensembl import EnsemblRelease
from collections import OrderedDict
import json
data=EnsemblRelease(95)
keys=['id','description','display_name','biotype']
ids=data.gene_names()
keys1=['display_name','len']
out=[]
dom=[]
for id in ids:
  try:
    a=ensembl_rest.symbol_lookup('h**o sapiens', id)
    res = dict((k, a[k]) for k in keys if k in a) 
    inp=res['id']
    server = "https://rest.ensembl.org"
    ext = "/sequence/id/"+inp+"?"
 
    r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"})
    if not r.ok:
        r.raise_for_status()
        sys.exit()
    a=r.text
    res['seq']=a
    res['len']=len(res['seq'])
    gene_len = dict((k, res[k]) for k in keys1 if k in res)
    print(gene_len)
    out.append(res)
    dom.append(gene_len)
  except :