def get_gene_info(ensembl_ids=None, hgnc_symbols=None): """Return the genes info based on the transcripts found Args: ensembl_ids (Optional[list]): list of Ensembl gene ids hgnc_symbols (Optional[list]): list of HGNC gene symbols Returns: iterable: an iterable with `Gene` objects """ uniq_ensembl_ids = set(ensembl_id for ensembl_id in (ensembl_ids or [])) uniq_hgnc_symbols = set(hgnc_symbol for hgnc_symbol in (hgnc_symbols or [])) ensembl = itertools.chain(*[query_gene(ensembl_id=ensembl_id) for ensembl_id in uniq_ensembl_ids]) hgnc = itertools.chain(*[query_gene(hgnc_symbol=hgnc_symbol) for hgnc_symbol in uniq_hgnc_symbols]) gene_data = list(itertools.chain.from_iterable([ensembl, hgnc])) genes = [Gene(symbol=gene['hgnc_symbol'], hgnc_id=gene['hgnc_id'], ensembl_id=gene['ensembl_id'], description=gene['description'], chrom=gene['chrom'], start=gene['start'], stop=gene['stop'], location=get_cytoband_coord(gene['chrom'], gene['start']), hi_score=gene['hi_score'], constraint_score=gene['constraint_score'], omim_number=get_omim_number(gene['hgnc_symbol'])) for gene in gene_data] return genes
def get_gene_info(ensembl_ids=None, hgnc_symbols=None): """Return the genes info based on the transcripts found Args: ensembl_ids (Optional[list]): list of Ensembl gene ids hgnc_symbols (Optional[list]): list of HGNC gene symbols Returns: iterable: an iterable with `Gene` objects """ uniq_ensembl_ids = set(ensembl_id for ensembl_id in (ensembl_ids or [])) uniq_hgnc_symbols = set(hgnc_symbol for hgnc_symbol in (hgnc_symbols or [])) genes = [] gene_data = [] if uniq_ensembl_ids: for ensembl_id in uniq_ensembl_ids: for res in query_gene(ensembl_id=ensembl_id): gene_data.append(res) elif uniq_hgnc_symbols: for hgnc_symbol in uniq_hgnc_symbols: query_res = query_gene(hgnc_symbol=hgnc_symbol) if query_res: for res in query_res: gene_data.append(res) else: # If no result we add just the symbol gene_data.append({ 'hgnc_symbol': hgnc_symbol, 'hgnc_id': None, 'ensembl_id': None, 'description': None, 'chrom': 'unknown', 'start': 0, 'stop': 0, 'hi_score': None, 'constraint_score': None, }) for gene in gene_data: genes.append(Gene( symbol=gene ['hgnc_symbol'], hgnc_id=gene['hgnc_id'], ensembl_id=gene['ensembl_id'], description=gene['description'], chrom=gene['chrom'], start=gene['start'], stop=gene['stop'], location=get_cytoband_coord(gene['chrom'], gene['start']), hi_score=gene['hi_score'], constraint_score=gene['constraint_score'], omim_number=get_omim_number(gene['hgnc_symbol']) )) return genes
def get_gene_info(ensembl_ids=None, hgnc_symbols=None): """Return the genes info based on the transcripts found Args: ensembl_ids (Optional[list]): list of Ensembl gene ids hgnc_symbols (Optional[list]): list of HGNC gene symbols Returns: iterable: an iterable with `Gene` objects """ uniq_ensembl_ids = set(ensembl_id for ensembl_id in (ensembl_ids or [])) uniq_hgnc_symbols = set(hgnc_symbol for hgnc_symbol in (hgnc_symbols or [])) genes = [] gene_data = [] if uniq_ensembl_ids: for ensembl_id in uniq_ensembl_ids: for res in query_gene(ensembl_id=ensembl_id): gene_data.append(res) elif uniq_hgnc_symbols: for hgnc_symbol in uniq_hgnc_symbols: query_res = query_gene(hgnc_symbol=hgnc_symbol) if query_res: for res in query_res: gene_data.append(res) else: # If no result we add just the symbol gene_data.append({ 'hgnc_symbol': hgnc_symbol, 'hgnc_id': None, 'ensembl_id': None, 'description': None, 'chrom': 'unknown', 'start': 0, 'stop': 0, 'hi_score': None, 'constraint_score': None, }) for gene in gene_data: genes.append( Gene(symbol=gene['hgnc_symbol'], hgnc_id=gene['hgnc_id'], ensembl_id=gene['ensembl_id'], description=gene['description'], chrom=gene['chrom'], start=gene['start'], stop=gene['stop'], location=get_cytoband_coord(gene['chrom'], gene['start']), hi_score=gene['hi_score'], constraint_score=gene['constraint_score'], omim_number=get_omim_number(gene['hgnc_symbol']))) return genes
def get_gene_info(ensembl_ids=None, hgnc_symbols=None): """Return the genes info based on the transcripts found Args: transcript(Transcript): A dictionary with transcript info Returns: genes (iterable): An iterable with Genes """ if ensembl_ids: ensembl_ids = set([ens_id for ens_id in ensembl_ids]) elif hgnc_symbols: hgnc_symbols = set([symbol for symbol in hgnc_symbols]) genes = [] if ensembl_ids: for ensembl_id in ensembl_ids: if ensembl_id: for gene in query_gene(ensembl_id=ensembl_id): genes.append(Gene( symbol=gene['hgnc_symbol'], hgnc_id=gene['hgnc_id'], ensembl_id=gene['ensembl_id'], description=gene['description'], chrom=gene['chrom'], start=gene['start'], stop=gene['stop'], location=get_cytoband_coord(gene['chrom'], gene['start']), hi_score=gene['hi_score'], constraint_score=gene['constraint_score'], omim_number=get_omim_number(gene['hgnc_symbol']), )) elif hgnc_symbols: for hgnc_symbol in hgnc_symbols: if hgnc_symbol: for gene in query_gene(hgnc_symbol=hgnc_symbol): genes.append(Gene( symbol=gene['hgnc_symbol'], hgnc_id=gene['hgnc_id'], ensembl_id=gene['ensembl_id'], description=gene['description'], chrom=gene['chrom'], start=gene['start'], stop=gene['stop'], location=get_cytoband_coord(gene['chrom'], gene['start']), hi_score=gene['hi_score'], constraint_score=gene['constraint_score'], omim_number=get_omim_number(gene['hgnc_symbol']), )) return genes
def test_query_ensembl_id(database): ensembl_id = 'ENSG00000070814' hgnc_symbol = 'TCOF1' result = query_gene(ensembl_id=ensembl_id, connection=database) gene = result[0] assert gene['ensembl_id'] == ensembl_id assert gene['hgnc_symbol'] == hgnc_symbol
def test_query_non_existing_term(database): """docstring for query_term""" term = 'ENSG00000000004' result = query_gene(term, connection=database) assert result == []
def test_query_wrong_term(database): """docstring for query_term""" term = 'HPO' with pytest.raises(ValueError): result = query_gene(term, connection=database)
def test_query_no_term(database): """docstring for query_term""" with pytest.raises(SyntaxError): result = query_gene(connection=database)