def helper(symbolset=annot[colname][1]): if len(symbolset) == 0: return (dict()) d = dict() for sym in symbolset: try: description = ensembl_rest.symbol_lookup( 'h**o sapiens', sym, expand=1)['description'] except Exception: description = 'No valid lookup found for symbol ' + sym d[sym] = description return (d)
def getSequence(gene): "Get the protein sequence coded by the given gene" q = ensembl_rest.symbol_lookup(species='h**o sapiens', symbol=gene, params={'expand': True}) try: is_canonical = [t["is_canonical"] for t in q["Transcript"]] t = q["Transcript"][np.argmax(is_canonical)] seq = ensembl_rest.sequence_id(t["Translation"]["id"])["seq"] return seq except: return ""
def get_coords(x, args): g = x.gene if '-' in g and ':' in g: (chrom, loc) = g.split(':') (start, stop) = loc.split('-') (start, stop) = (int(start), int(stop)) # gene name else: try: gene = ensembl_rest.symbol_lookup(species=args.species, symbol=g) except: sys.exit('Something went wrong with ENSEMBL gene name query') # radius + TSS chrom = 'chr' + gene['seq_region_name'] temp1 = gene['start'] - 5000 temp2 = gene['start'] + 5000 start = min(temp1, temp2) stop = max(temp1, temp2) # # testing # print(g) # print('start: {}'.format(gene['start'])) # print('region start: {}'.format(start)) # print('region end: {}'.format(stop)) # # radius + whole gene body # chrom = 'chr'+gene['seq_region_name'] # start = min([gene['start'], gene['end']]) # stop = max([gene['start'], gene['end']]) # if args.radius: # temp_start = start - args.radius # if temp_start >= 0: start = temp_start # stop = stop + args.radius # # testing # print('new start: {}'.format(start)) # print('new stop: {}'.format(stop)) return pd.Series([chrom, start, stop])
import ensembl_rest r = ensembl_rest.symbol_lookup(species='h**o sapiens', symbol='BRCA2') print(r) r = ensembl_rest.variant_recoder(species='h**o sapiens', id='NC_000007.13:101837173:C:T') print(r)
import ensembl_rest import requests, sys from pyensembl import EnsemblRelease from collections import OrderedDict import json data=EnsemblRelease(95) keys=['id','description','display_name','biotype'] ids=data.gene_names() keys1=['display_name','len'] out=[] dom=[] for id in ids: try: a=ensembl_rest.symbol_lookup('h**o sapiens', id) res = dict((k, a[k]) for k in keys if k in a) inp=res['id'] server = "https://rest.ensembl.org" ext = "/sequence/id/"+inp+"?" r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"}) if not r.ok: r.raise_for_status() sys.exit() a=r.text res['seq']=a res['len']=len(res['seq']) gene_len = dict((k, res[k]) for k in keys1 if k in res) print(gene_len) out.append(res) dom.append(gene_len) except :