예제 #1
0
async def chromosomes(request: Request, response: Response, release: str,
                      species: str):
    """
    If successful, a JSON response will be returned with a single
    **chromosomes** element containing a **list** of chromosomes consisting
    of the following items:

    | Element | Type | Description | 
    |--|--|--|
    | chromosome | str | the chromsome |
    | length | int | length in base pairs |
    | order | int | order in the genome |

    If an error occurs, a JSON response will be sent back with just one
    element called **message** along with a status code of **404**.
    """
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)
        ret['chromosomes'] = meta.chromosomes(db)
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #2
0
def info(release: str = typer.Option(
             None, 
             '--release', '-r',
             help='release'),
         species: str = typer.Option(
             None, 
             '--species', '-s',
             help='valid values are Mm and Hs'),
         verbose: int = typer.Option(
             0, '--verbose', '-v', count=True)):

    """
    Stats annotation database <filename> for <term>
    """
    try:
        configure_logging(verbose)
        LOG = get_logger()
        LOG.debug('Stats database...')

        db = dbs.get_database(release, species, ensimpl_dbs_dict)
        db_meta = meta.db_meta(db)
        statistics = meta.stats(db)

        print(f'Release: {db_meta["release"]}')
        print(f'Species: {db_meta["species"]}, {db_meta["assembly_patch"]}')
        arr = []
        for stat in sorted(statistics):
            arr.append([stat, statistics[stat]])
        print(tabulate(arr))
    except Exception as e:
        print(e)
예제 #3
0
async def stats(request: Request, response: Response, release: str,
                species: str):
    """
    Get the information for a particular Ensembl release and species.

    If successful, a JSON response will be returned with a single
    **release** element containing a **list** of releases consisting of the
    following items:

    | Element | Type | Description | 
    |--|--|--|
    | release | str | the Ensembl release |
    | species | str | the species identifier (example 'Hs', 'Mm') |
    | assembly | str | the genome assembly information |
    | assembly_patch | str | the genome assembly patch number |
    | stats | dict | various stats about the database |

    If an error occurs, a JSON response will be sent back with just one
    element called **message** along with a status code of **404**.
    """
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)
        ret['stats'] = meta.stats(db)
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #4
0
async def releases(request: Request, response: Response):
    """
    Get all the release and species information.

    If successful, a JSON response will be returned with a single
    **release** element containing a **list** of releases consisting of the
    following items:

    | Element | Type | Description | 
    |--|--|--|
    | release | str | the Ensembl release |
    | species | str | the species identifier (example 'Hs', 'Mm') |
    | assembly | str | the genome assembly information |
    | assembly_patch | str | the genome assembly patch number |

    If an error occurs, a JSON response will be sent back with just one
    element called **message** along with a status code of **404**.
    """
    ret = []

    try:
        for database in request.app.state.dbs:
            db = dbs.get_database(database['release'], database['species'],
                                  request.app.state.dbs_dict)
            ret.append(meta.db_meta(db))
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #5
0
async def genes_json(request: Request, response: Response):
    """
    Will accept the following
    """
    ret = {}

    try:
        json_data = await request.json()

        if 'release' in json_data:
            release = json_data['release']
        else:
            raise Exception('release value is missing')

        if 'species' in json_data:
            species = json_data['species']
        else:
            raise Exception('species value is missing')

        if 'ids' in json_data:
            ids = json_data['ids']
        elif 'ids[]' in json_data:
            # this is for backwards compatibility with ensimplR
            ids = json_data['ids[]']
        else:
            raise Exception('ids value is missing')

        if 'details' in json_data:
            details = utils.str2bool(json_data['details'])
        else:
            details = False

        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)

        results = genesdb.get(db, ids=ids, details=details)

        if len(results) == 0:
            raise Exception(f'No results found')

        ret['genes'] = results
    except JSONDecodeError as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {
            'message': 'Received data is not a valid JSON',
            'detail': str(e)
        }
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #6
0
async def genes_model(request: Request, response: Response, gq: GQ):
    ret = {}

    try:
        db = dbs.get_database(gq.release, gq.species,
                              request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)

        results = genesdb.get(db, ids=gq.ids, details=gq.details)

        if len(results) == 0:
            raise Exception(f'No results found')

        ret['genes'] = results
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return ret
예제 #7
0
async def exon_info(request: Request,
                    response: Response,
                    release: str,
                    species: str,
                    chrom: Optional[str] = None,
                    compress: Optional[bool] = False):
    """

    """
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)
        ret['genes'] = genesdb.get_exon_info(db, chrom, compress)
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #8
0
async def randomids(request: Request,
                    response: Response,
                    release: str,
                    species: str,
                    source_db: str = 'Ensembl',
                    limit: int = 10):
    """
    Get random ids.  Mostly useful for examples.

    No parameters are needed, but the following are allowed:

    ========  =======  ===================================================
    Param     Type     Description
    ========  =======  ===================================================
    version   integer  the Ensembl version number
    species   string   the species identifier (example 'Hs', 'Mm')
    num       integer  Number of ids to return.
    source_db string   Defaults to 'Ensembl', but other are valid, please see
                       external_dbs().
    ========  =======  ===================================================

    If successful, a JSON response will be returned with an array of IDs.

    If an error occurs, a JSON response will be sent back with just one
    element called ``message`` along with a status code of 500.

    Returns:
        :class:`flask.Response`: The response which is a JSON response.
    """
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)

        ret['ids'] = genesdb.random_ids(db, source_db, limit)
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #9
0
async def external_dbs(request: Request, response: Response, release: str,
                       species: str):
    """Get the external database information.

    The following is a list of the valid parameters:

    =======  =======  ===================================================
    Param    Type     Description
    =======  =======  ===================================================
    release  string   the Ensembl release
    species  string   the species identifier (example 'Hs', 'Mm')
    =======  =======  ===================================================

    If successful, a JSON response will be returned with a single
    ``external_dbs`` element containing a ``list`` of external databases
    consisting of the following items:

    =================  =======  ============================================
    Element            Type     Description
    =================  =======  ============================================
    external_db_id     string   unique external db identifier
    external_db_name   string   external db name
    ranking_id         string   internal ranking id
    =================  =======  ============================================

    If an error occurs, a JSON response will be sent back with just one
    element called ``message`` along with a status code of 500.

    """
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)
        ret['external_dbs'] = meta.external_dbs(db)
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #10
0
async def genes_form(request: Request,
                     response: Response,
                     release: str = Form(...),
                     species: str = Form(...),
                     ids: List[str] = Form(...),
                     details: Optional[bool] = Form(...)):
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)

        results = genesdb.get(db, ids=ids, details=details)

        if len(results) == 0:
            raise Exception(f'No results found')

        ret['genes'] = results
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return ret
예제 #11
0
def gene(id: str = typer.Argument(
               ..., 
               help='Ensembl ID'),
         release: str = typer.Option(
             None, 
             '--release', '-r',
             help='release'),
         species: str = typer.Option(
             None, 
             '--species', '-s',
             help='valid values are Mm and Hs'),
         format: SearchOutput = typer.Option(
             SearchOutput.pretty, 
             '--format', '-f',
             case_sensitive=False),
          verbose: int = typer.Option(
             0, '--verbose', '-v', count=True)):            
    """
    Get gene information from annotation database.
    """
    try:
        configure_logging(verbose)
        LOG = get_logger()
        LOG.debug(f'Release: {release}')
        LOG.debug(f'Species: {species}')
        LOG.debug(f'Format: {format}')
        LOG.debug(f'ID: {id}')

        ensembl_ids = [id]

        db = dbs.get_database(release, species, ensimpl_dbs_dict)

        tstart = time.time()
        results = genesdb.get(db, ids=ensembl_ids, details=True)
        tend = time.time()

        headers = [
            'ID', 
            'VERSION', 
            'SPECIES', 
            'SYMBOL', 
            'NAME', 
            'SYNONYMS',
            'EXTERNAL_IDS', 
            'CHR', 
            'START', 
            'END', 
            'STRAND'
        ]

        tbl = []

        delim = '"\t"' if format.value == 'tab' else '","'

        if format.value in ('tab', 'csv'):
            print('"{}"'.format(delim.join(headers)))

        for i in results:
            r = results[i]
            line = list()
            line.append(r['id'])
            line.append(r.get('ensembl_version', ''))
            line.append(r['species_id'])
            line.append(r.get('symbol', ''))
            line.append(r.get('name', ''))
            line.append('||'.join(r.get('synonyms', [])))

            external_ids = r.get('external_ids', [])
            external_ids_str = ''
            if external_ids:
                ext_ids_tmp = []
                for ext in external_ids:
                    ext_ids_tmp.append('{}/{}'.format(ext['db'], ext['db_id']))
                external_ids_str = '||'.join(ext_ids_tmp)
            line.append(external_ids_str)

            line.append(r['chromosome'])
            line.append(r['start'])
            line.append(r['end'])
            line.append(r['strand'])

            if format.value in ('tab', 'csv'):
                print('"{}"'.format(delim.join(map(str, line))))
            elif format.value == 'json':
                tbl.append(r)
            else:
                tbl.append(line)

        if format.value in ('tab', 'csv'):
            pass
        elif format.value == 'json':
            print(json.dumps({'data': tbl}, indent=2))
        else:
            print(tabulate(tbl, headers))
        pass

        LOG.info('Search time: {format(format_time(tstart, tend)}')
    except Exception as e:
        print(e)
예제 #12
0
def search(term: str = typer.Argument(
               ..., 
               help='search term'),
           release: str = typer.Option(
               None, 
               '--release', '-r',
               help='release'),
           species: str = typer.Option(
               None, 
               '--species', '-s',
               help='valid values are Mm and Hs'),
           exact: bool = typer.Option(
               False, 
               help='exact match or not'),
           format: SearchOutput = typer.Option(
               SearchOutput.pretty, 
               '--format', '-f',
               case_sensitive=False),
           max: int = typer.Option(
               -1,
               '--max', '-m', 
               help='limit the number of matches'),
           verbose: int = typer.Option(
               0, '--verbose', '-v', count=True)):
    """
    Search ensimpl database <filename> for <term>
    """
    configure_logging(verbose)
    LOG = get_logger()
    LOG.info('Search database...')

    maximum = max if max >= 0 else None

    try:
        db = dbs.get_database(release, species, ensimpl_dbs_dict)

        LOG.debug(f'Database: {db}')

        tstart = time.time()
        results = searchdb.search(db, term, exact, maximum)
        tend = time.time()

        LOG.debug(f'Number of Results: {results.num_results}')
        count = 0

        if len(results.matches) == 0:
            print('No results found')
            sys.exit()

        headers = [
            'ID', 
            'SYMBOL', 
            'IDS', 
            'POSITION', 
            'MATCH_REASON',
            'MATCH_VALUE'
        ]
        tbl = []

        if format.value in ('tab', 'csv'):
            delim = '\t' if format.value == 'tab' else ','
            print(delim.join(headers))

        for match in results.matches:
            line = list()
            line.append(match.ensembl_gene_id)
            line.append(match.symbol)

            if match.external_ids:
                ext_ids = []
                for ids in match.external_ids:
                    ext_ids.append(f'{ids["db"]}/{ids["db_id"]}')
                line.append('||'.join(ext_ids))
            else:
                line.append('')

            line.append(f'{match.chromosome}:{match.position_start}-{match.position_end}')
            line.append(match.match_reason)
            line.append(match.match_value)

            if format.value in ('tab', 'csv'):
                print(delim.join(map(str, line)))
            elif format.value == 'json':
                tbl.append(dict(zip(headers, line)))
            else:
                tbl.append(line)

            count += 1
            if count >= max > 0:
                break

        if format.value in ('tab', 'csv'):
            pass
        elif format.value == 'json':
            print(json.dumps({'data': tbl}, indent=4))
        else:
            print(tabulate(tbl, headers))

        LOG.info(f'Search time: {format_time(tstart, tend)}')

    except Exception as e:
        print(e)
예제 #13
0
async def search(request: Request,
                 response: Response,
                 term: str,
                 release: str,
                 species: str,
                 exact: Optional[bool] = False,
                 limit: Optional[int] = 100000):
    """
    Perform a search of a Ensimpl database.

    The following is a list of the valid parameters:

    =======  =======  ===================================================
    Param    Type     Description
    =======  =======  ===================================================
    term     string   the term to search for
    release  string   the Ensembl release
    species  string   the species identifier (example 'Hs', 'Mm')
    exact    string   to exact match or not, defaults to 'False'
    limit    string   max number of items to return, defaults to 100,000
    =======  =======  ===================================================

    If sucessful, a JSON response will be returned with the following elements:

    =======  =======  ===================================================
    Element  Type     Description
    =======  =======  ===================================================
    request  dict     the request parameters
    result   dict     the results
    =======  =======  ===================================================

    The ``request`` dictionary will have the same values as listed above in the
    valid parameters.

    The ``result`` dictionary will have the following elements:

    ============  =======  ===================================================
    Element       Type     Description
    ============  =======  ===================================================
    num_results   int      the total number of matches
    num_matches   int      the number of matches returned (limited by limit)
    matches       list     a list of match objects
    ============  =======  ===================================================

    Each match object will contain:

    ================  =======  ===============================================
    Element           Type     Description
    ================  =======  ===============================================
    match_reason      string   reason of the match: name, synonym, id, etc
    match_value       string   value that matched
    ensembl_gene_id   string   Ensembl gene identifier
    ensembl_version   integer  version of the identifier
    chromosome        string   the chromosome
    position_start    integer  start position in base pairs
    position_end      integer  end position in base pairs
    strand            string   '+' or '-'
    species           string   species identifier: 'Mm', 'Hs', etc
    name              string   name of the gene
    symbol            string   gene symbol
    synonyms          list     list of strings
    external_ids      list     each having keys of 'db' and 'db_id
    ================  =======  ===============================================

    If an error occurs, a JSON response will be sent back with just one
    element called ``message`` along with a status code of 500.

    Returns:
        :class:`flask.Response`: The response which is a JSON response.
    """
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)

        ret['meta'] = meta.db_meta(db)

        ret['request'] = {
            'term': term,
            'species': species,
            'exact': exact,
            'limit': limit,
            'release': release
        }

        ret['result'] = {'num_results': 0, 'num_matches': 0, 'matches': None}

        results = searchdb.search(db, term, exact, limit)

        if len(results.matches) == 0:
            raise Exception(f'No results found for: {term}')

        ret['result']['num_results'] = results.num_results
        ret['result']['num_matches'] = results.num_matches
        ret['result']['matches'] = []

        for match in results.matches:
            ret['result']['matches'].append(match.dict())

    except Exception as e:
        # TODO: better handling and logging
        # response.status_code = status.HTTP_404_NOT_FOUND
        # print(str(e))
        pass

    return CustomORJSONResponse(ret)
예제 #14
0
async def external_ids(request: Request, response: Response):
    """
    Get the information for an Ensembl gene.

    The following is a list of the valid parameters:

    ========  =======  ===================================================
    Param     Type     Description
    ========  =======  ===================================================
    ids       list     repeated id elements, one per Ensembl id
    source_db string   Defaults to 'Ensembl', but other are valid, please see
                       external_dbs().
    release   string   the Ensembl release
    species   string   the species identifier (example 'Hs', 'Mm')
    ========  =======  ===================================================

    If successful, a JSON response will be returned with multiple ``gene``
    elements, each consisting of the following items:

    =================  =======  ============================================
    Element            Type     Description
    =================  =======  ============================================
    id                 string   Ensembl gene identifier
    ensembl_version    integer  version of the identifier
    =================  =======  ============================================


    If an id is not found, the gene will still be returned but have
    ``null`` for a value.

    If an error occurs, a JSON response will be sent back with just one
    element called ``message`` along with a status code of 500.

    Returns:
        :class:`flask.Response`: The response which is a JSON response.
    """
    ret = {}

    try:
        ids = None
        release = None
        species = None
        source_db = None

        json_data = await request.json()

        if 'release' in json_data:
            release = json_data['release']
        else:
            raise Exception('release value is missing')

        if 'species' in json_data:
            species = json_data['species']
        else:
            raise Exception('species value is missing')

        if 'ids' in json_data:
            ids = json_data['ids']
        else:
            raise Exception('ids value is missing')

        if 'source_db' in json_data:
            source_db = json_data['source_db']
        else:
            source_db = 'Ensembl'

        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)

        results = genesdb.get_ids(db, ids=ids, source_db=source_db)

        if len(results) == 0:
            raise Exception(f'No results found')

        ret['ids'] = results
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)
예제 #15
0
async def gene(request: Request,
               response: Response,
               ensembl_id: str,
               release: str,
               species: str,
               details: Optional[bool] = False):
    """
    Get the information for an Ensembl gene.

    The following is a list of the valid query parameters:

    =======  =======  ===================================================
    Param    Type     Description
    =======  =======  ===================================================
    release  string   the Ensembl release
    species  string   the species identifier (example 'Hs', 'Mm')
    details  string   true, false, T, F, 0, 1
    =======  =======  ===================================================

    If successful, a JSON response will be returned with a single ``gene``
    element consisting the following items:

    =================  =======  ============================================
    Element            Type     Description
    =================  =======  ============================================
    id                 string   Ensembl gene identifier
    ensembl_version    integer  version of the identifier
    species_id         string   species identifier: 'Mm', 'Hs', etc
    chromosome         string   the chromosome
    start              integer  start position in base pairs
    end                integer  end position in base pairs
    strand             string   '+' or '-'
    name               string   name of the gene
    symbol             string   gene symbol
    synonyms           list     list of strings
    external_ids       list     each having keys of 'db' and 'db_id'
    homolog_ids        list     each having keys of 'homolog_id' and
                                'homolog_symbol'
    transcripts        list     each having a ``transcript`` element
    =================  =======  ============================================

    ``transcript_element``, with each item containing:

    =================  =======  ============================================
    Element            Type     Description
    =================  =======  ============================================
    id                 string   Ensembl transcript identifier
    ensembl_version    integer  version of the identifier
    symbol             string   transcript symbol
    start              integer  start position in base pairs
    end                integer  end position in base pairs
    exons              list     dict of: number,id,start,end,ensembl_version
    protein            dict     id, start, end, ensembl_version
    =================  =======  ============================================

    If the id is not found, the gene will still be returned but have
    ``null`` for a value.

    If an error occurs, a JSON response will be sent back with just one
    element called ``message`` along with a status code of 500.

    Returns:
        :class:`flask.Response`: The response which is a JSON response.
    """
    ret = {}

    try:
        db = dbs.get_database(release, species, request.app.state.dbs_dict)
        ret['meta'] = meta.db_meta(db)

        results = genesdb.get(db, ids=[ensembl_id], details=details)

        if len(results) == 0:
            raise Exception(f'No results found for: {ensembl_id}')

        if len(results) > 1:
            raise ValueError(f'Too many genes found for: {ensembl_id}')

        ret['gene'] = results
    except Exception as e:
        response.status_code = status.HTTP_404_NOT_FOUND
        return {'message': str(e)}

    return CustomORJSONResponse(ret)