async def chromosomes(request: Request, response: Response, release: str, species: str): """ If successful, a JSON response will be returned with a single **chromosomes** element containing a **list** of chromosomes consisting of the following items: | Element | Type | Description | |--|--|--| | chromosome | str | the chromsome | | length | int | length in base pairs | | order | int | order in the genome | If an error occurs, a JSON response will be sent back with just one element called **message** along with a status code of **404**. """ ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) ret['chromosomes'] = meta.chromosomes(db) except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
def info(release: str = typer.Option( None, '--release', '-r', help='release'), species: str = typer.Option( None, '--species', '-s', help='valid values are Mm and Hs'), verbose: int = typer.Option( 0, '--verbose', '-v', count=True)): """ Stats annotation database <filename> for <term> """ try: configure_logging(verbose) LOG = get_logger() LOG.debug('Stats database...') db = dbs.get_database(release, species, ensimpl_dbs_dict) db_meta = meta.db_meta(db) statistics = meta.stats(db) print(f'Release: {db_meta["release"]}') print(f'Species: {db_meta["species"]}, {db_meta["assembly_patch"]}') arr = [] for stat in sorted(statistics): arr.append([stat, statistics[stat]]) print(tabulate(arr)) except Exception as e: print(e)
async def stats(request: Request, response: Response, release: str, species: str): """ Get the information for a particular Ensembl release and species. If successful, a JSON response will be returned with a single **release** element containing a **list** of releases consisting of the following items: | Element | Type | Description | |--|--|--| | release | str | the Ensembl release | | species | str | the species identifier (example 'Hs', 'Mm') | | assembly | str | the genome assembly information | | assembly_patch | str | the genome assembly patch number | | stats | dict | various stats about the database | If an error occurs, a JSON response will be sent back with just one element called **message** along with a status code of **404**. """ ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) ret['stats'] = meta.stats(db) except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
async def releases(request: Request, response: Response): """ Get all the release and species information. If successful, a JSON response will be returned with a single **release** element containing a **list** of releases consisting of the following items: | Element | Type | Description | |--|--|--| | release | str | the Ensembl release | | species | str | the species identifier (example 'Hs', 'Mm') | | assembly | str | the genome assembly information | | assembly_patch | str | the genome assembly patch number | If an error occurs, a JSON response will be sent back with just one element called **message** along with a status code of **404**. """ ret = [] try: for database in request.app.state.dbs: db = dbs.get_database(database['release'], database['species'], request.app.state.dbs_dict) ret.append(meta.db_meta(db)) except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
async def genes_json(request: Request, response: Response): """ Will accept the following """ ret = {} try: json_data = await request.json() if 'release' in json_data: release = json_data['release'] else: raise Exception('release value is missing') if 'species' in json_data: species = json_data['species'] else: raise Exception('species value is missing') if 'ids' in json_data: ids = json_data['ids'] elif 'ids[]' in json_data: # this is for backwards compatibility with ensimplR ids = json_data['ids[]'] else: raise Exception('ids value is missing') if 'details' in json_data: details = utils.str2bool(json_data['details']) else: details = False db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) results = genesdb.get(db, ids=ids, details=details) if len(results) == 0: raise Exception(f'No results found') ret['genes'] = results except JSONDecodeError as e: response.status_code = status.HTTP_404_NOT_FOUND return { 'message': 'Received data is not a valid JSON', 'detail': str(e) } except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
async def genes_model(request: Request, response: Response, gq: GQ): ret = {} try: db = dbs.get_database(gq.release, gq.species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) results = genesdb.get(db, ids=gq.ids, details=gq.details) if len(results) == 0: raise Exception(f'No results found') ret['genes'] = results except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return ret
async def exon_info(request: Request, response: Response, release: str, species: str, chrom: Optional[str] = None, compress: Optional[bool] = False): """ """ ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) ret['genes'] = genesdb.get_exon_info(db, chrom, compress) except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
async def randomids(request: Request, response: Response, release: str, species: str, source_db: str = 'Ensembl', limit: int = 10): """ Get random ids. Mostly useful for examples. No parameters are needed, but the following are allowed: ======== ======= =================================================== Param Type Description ======== ======= =================================================== version integer the Ensembl version number species string the species identifier (example 'Hs', 'Mm') num integer Number of ids to return. source_db string Defaults to 'Ensembl', but other are valid, please see external_dbs(). ======== ======= =================================================== If successful, a JSON response will be returned with an array of IDs. If an error occurs, a JSON response will be sent back with just one element called ``message`` along with a status code of 500. Returns: :class:`flask.Response`: The response which is a JSON response. """ ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) ret['ids'] = genesdb.random_ids(db, source_db, limit) except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
async def external_dbs(request: Request, response: Response, release: str, species: str): """Get the external database information. The following is a list of the valid parameters: ======= ======= =================================================== Param Type Description ======= ======= =================================================== release string the Ensembl release species string the species identifier (example 'Hs', 'Mm') ======= ======= =================================================== If successful, a JSON response will be returned with a single ``external_dbs`` element containing a ``list`` of external databases consisting of the following items: ================= ======= ============================================ Element Type Description ================= ======= ============================================ external_db_id string unique external db identifier external_db_name string external db name ranking_id string internal ranking id ================= ======= ============================================ If an error occurs, a JSON response will be sent back with just one element called ``message`` along with a status code of 500. """ ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) ret['external_dbs'] = meta.external_dbs(db) except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
async def genes_form(request: Request, response: Response, release: str = Form(...), species: str = Form(...), ids: List[str] = Form(...), details: Optional[bool] = Form(...)): ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) results = genesdb.get(db, ids=ids, details=details) if len(results) == 0: raise Exception(f'No results found') ret['genes'] = results except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return ret
def gene(id: str = typer.Argument( ..., help='Ensembl ID'), release: str = typer.Option( None, '--release', '-r', help='release'), species: str = typer.Option( None, '--species', '-s', help='valid values are Mm and Hs'), format: SearchOutput = typer.Option( SearchOutput.pretty, '--format', '-f', case_sensitive=False), verbose: int = typer.Option( 0, '--verbose', '-v', count=True)): """ Get gene information from annotation database. """ try: configure_logging(verbose) LOG = get_logger() LOG.debug(f'Release: {release}') LOG.debug(f'Species: {species}') LOG.debug(f'Format: {format}') LOG.debug(f'ID: {id}') ensembl_ids = [id] db = dbs.get_database(release, species, ensimpl_dbs_dict) tstart = time.time() results = genesdb.get(db, ids=ensembl_ids, details=True) tend = time.time() headers = [ 'ID', 'VERSION', 'SPECIES', 'SYMBOL', 'NAME', 'SYNONYMS', 'EXTERNAL_IDS', 'CHR', 'START', 'END', 'STRAND' ] tbl = [] delim = '"\t"' if format.value == 'tab' else '","' if format.value in ('tab', 'csv'): print('"{}"'.format(delim.join(headers))) for i in results: r = results[i] line = list() line.append(r['id']) line.append(r.get('ensembl_version', '')) line.append(r['species_id']) line.append(r.get('symbol', '')) line.append(r.get('name', '')) line.append('||'.join(r.get('synonyms', []))) external_ids = r.get('external_ids', []) external_ids_str = '' if external_ids: ext_ids_tmp = [] for ext in external_ids: ext_ids_tmp.append('{}/{}'.format(ext['db'], ext['db_id'])) external_ids_str = '||'.join(ext_ids_tmp) line.append(external_ids_str) line.append(r['chromosome']) line.append(r['start']) line.append(r['end']) line.append(r['strand']) if format.value in ('tab', 'csv'): print('"{}"'.format(delim.join(map(str, line)))) elif format.value == 'json': tbl.append(r) else: tbl.append(line) if format.value in ('tab', 'csv'): pass elif format.value == 'json': print(json.dumps({'data': tbl}, indent=2)) else: print(tabulate(tbl, headers)) pass LOG.info('Search time: {format(format_time(tstart, tend)}') except Exception as e: print(e)
def search(term: str = typer.Argument( ..., help='search term'), release: str = typer.Option( None, '--release', '-r', help='release'), species: str = typer.Option( None, '--species', '-s', help='valid values are Mm and Hs'), exact: bool = typer.Option( False, help='exact match or not'), format: SearchOutput = typer.Option( SearchOutput.pretty, '--format', '-f', case_sensitive=False), max: int = typer.Option( -1, '--max', '-m', help='limit the number of matches'), verbose: int = typer.Option( 0, '--verbose', '-v', count=True)): """ Search ensimpl database <filename> for <term> """ configure_logging(verbose) LOG = get_logger() LOG.info('Search database...') maximum = max if max >= 0 else None try: db = dbs.get_database(release, species, ensimpl_dbs_dict) LOG.debug(f'Database: {db}') tstart = time.time() results = searchdb.search(db, term, exact, maximum) tend = time.time() LOG.debug(f'Number of Results: {results.num_results}') count = 0 if len(results.matches) == 0: print('No results found') sys.exit() headers = [ 'ID', 'SYMBOL', 'IDS', 'POSITION', 'MATCH_REASON', 'MATCH_VALUE' ] tbl = [] if format.value in ('tab', 'csv'): delim = '\t' if format.value == 'tab' else ',' print(delim.join(headers)) for match in results.matches: line = list() line.append(match.ensembl_gene_id) line.append(match.symbol) if match.external_ids: ext_ids = [] for ids in match.external_ids: ext_ids.append(f'{ids["db"]}/{ids["db_id"]}') line.append('||'.join(ext_ids)) else: line.append('') line.append(f'{match.chromosome}:{match.position_start}-{match.position_end}') line.append(match.match_reason) line.append(match.match_value) if format.value in ('tab', 'csv'): print(delim.join(map(str, line))) elif format.value == 'json': tbl.append(dict(zip(headers, line))) else: tbl.append(line) count += 1 if count >= max > 0: break if format.value in ('tab', 'csv'): pass elif format.value == 'json': print(json.dumps({'data': tbl}, indent=4)) else: print(tabulate(tbl, headers)) LOG.info(f'Search time: {format_time(tstart, tend)}') except Exception as e: print(e)
async def search(request: Request, response: Response, term: str, release: str, species: str, exact: Optional[bool] = False, limit: Optional[int] = 100000): """ Perform a search of a Ensimpl database. The following is a list of the valid parameters: ======= ======= =================================================== Param Type Description ======= ======= =================================================== term string the term to search for release string the Ensembl release species string the species identifier (example 'Hs', 'Mm') exact string to exact match or not, defaults to 'False' limit string max number of items to return, defaults to 100,000 ======= ======= =================================================== If sucessful, a JSON response will be returned with the following elements: ======= ======= =================================================== Element Type Description ======= ======= =================================================== request dict the request parameters result dict the results ======= ======= =================================================== The ``request`` dictionary will have the same values as listed above in the valid parameters. The ``result`` dictionary will have the following elements: ============ ======= =================================================== Element Type Description ============ ======= =================================================== num_results int the total number of matches num_matches int the number of matches returned (limited by limit) matches list a list of match objects ============ ======= =================================================== Each match object will contain: ================ ======= =============================================== Element Type Description ================ ======= =============================================== match_reason string reason of the match: name, synonym, id, etc match_value string value that matched ensembl_gene_id string Ensembl gene identifier ensembl_version integer version of the identifier chromosome string the chromosome position_start integer start position in base pairs position_end integer end position in base pairs strand string '+' or '-' species string species identifier: 'Mm', 'Hs', etc name string name of the gene symbol string gene symbol synonyms list list of strings external_ids list each having keys of 'db' and 'db_id ================ ======= =============================================== If an error occurs, a JSON response will be sent back with just one element called ``message`` along with a status code of 500. Returns: :class:`flask.Response`: The response which is a JSON response. """ ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) ret['request'] = { 'term': term, 'species': species, 'exact': exact, 'limit': limit, 'release': release } ret['result'] = {'num_results': 0, 'num_matches': 0, 'matches': None} results = searchdb.search(db, term, exact, limit) if len(results.matches) == 0: raise Exception(f'No results found for: {term}') ret['result']['num_results'] = results.num_results ret['result']['num_matches'] = results.num_matches ret['result']['matches'] = [] for match in results.matches: ret['result']['matches'].append(match.dict()) except Exception as e: # TODO: better handling and logging # response.status_code = status.HTTP_404_NOT_FOUND # print(str(e)) pass return CustomORJSONResponse(ret)
async def external_ids(request: Request, response: Response): """ Get the information for an Ensembl gene. The following is a list of the valid parameters: ======== ======= =================================================== Param Type Description ======== ======= =================================================== ids list repeated id elements, one per Ensembl id source_db string Defaults to 'Ensembl', but other are valid, please see external_dbs(). release string the Ensembl release species string the species identifier (example 'Hs', 'Mm') ======== ======= =================================================== If successful, a JSON response will be returned with multiple ``gene`` elements, each consisting of the following items: ================= ======= ============================================ Element Type Description ================= ======= ============================================ id string Ensembl gene identifier ensembl_version integer version of the identifier ================= ======= ============================================ If an id is not found, the gene will still be returned but have ``null`` for a value. If an error occurs, a JSON response will be sent back with just one element called ``message`` along with a status code of 500. Returns: :class:`flask.Response`: The response which is a JSON response. """ ret = {} try: ids = None release = None species = None source_db = None json_data = await request.json() if 'release' in json_data: release = json_data['release'] else: raise Exception('release value is missing') if 'species' in json_data: species = json_data['species'] else: raise Exception('species value is missing') if 'ids' in json_data: ids = json_data['ids'] else: raise Exception('ids value is missing') if 'source_db' in json_data: source_db = json_data['source_db'] else: source_db = 'Ensembl' db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) results = genesdb.get_ids(db, ids=ids, source_db=source_db) if len(results) == 0: raise Exception(f'No results found') ret['ids'] = results except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)
async def gene(request: Request, response: Response, ensembl_id: str, release: str, species: str, details: Optional[bool] = False): """ Get the information for an Ensembl gene. The following is a list of the valid query parameters: ======= ======= =================================================== Param Type Description ======= ======= =================================================== release string the Ensembl release species string the species identifier (example 'Hs', 'Mm') details string true, false, T, F, 0, 1 ======= ======= =================================================== If successful, a JSON response will be returned with a single ``gene`` element consisting the following items: ================= ======= ============================================ Element Type Description ================= ======= ============================================ id string Ensembl gene identifier ensembl_version integer version of the identifier species_id string species identifier: 'Mm', 'Hs', etc chromosome string the chromosome start integer start position in base pairs end integer end position in base pairs strand string '+' or '-' name string name of the gene symbol string gene symbol synonyms list list of strings external_ids list each having keys of 'db' and 'db_id' homolog_ids list each having keys of 'homolog_id' and 'homolog_symbol' transcripts list each having a ``transcript`` element ================= ======= ============================================ ``transcript_element``, with each item containing: ================= ======= ============================================ Element Type Description ================= ======= ============================================ id string Ensembl transcript identifier ensembl_version integer version of the identifier symbol string transcript symbol start integer start position in base pairs end integer end position in base pairs exons list dict of: number,id,start,end,ensembl_version protein dict id, start, end, ensembl_version ================= ======= ============================================ If the id is not found, the gene will still be returned but have ``null`` for a value. If an error occurs, a JSON response will be sent back with just one element called ``message`` along with a status code of 500. Returns: :class:`flask.Response`: The response which is a JSON response. """ ret = {} try: db = dbs.get_database(release, species, request.app.state.dbs_dict) ret['meta'] = meta.db_meta(db) results = genesdb.get(db, ids=[ensembl_id], details=details) if len(results) == 0: raise Exception(f'No results found for: {ensembl_id}') if len(results) > 1: raise ValueError(f'Too many genes found for: {ensembl_id}') ret['gene'] = results except Exception as e: response.status_code = status.HTTP_404_NOT_FOUND return {'message': str(e)} return CustomORJSONResponse(ret)