def get_ids_for_gene(hgnc_name, **kwargs): """Get the curated set of articles for a gene in the Entrez database. Search parameters for the Gene database query can be passed in as keyword arguments. Parameters ---------- hgnc_name : string The HGNC name of the gene. This is used to obtain the HGNC ID (using the hgnc_client module) and in turn used to obtain the Entrez ID associated with the gene. Entrez is then queried for that ID. """ # Get the HGNC ID for the HGNC name hgnc_id = hgnc_client.get_hgnc_id(hgnc_name) if hgnc_id is None: raise ValueError('Invalid HGNC name.') # Get the Entrez ID entrez_id = hgnc_client.get_entrez_id(hgnc_id) if entrez_id is None: raise ValueError('Entrez ID not found in HGNC table.') # Query the Entrez Gene database params = {'db': 'gene', 'retmode': 'xml', 'id': entrez_id} params.update(kwargs) tree = send_request(pubmed_fetch, params) if tree is None: return [] if tree.find('ERROR') is not None: logger.error(tree.find('ERROR').text) return [] # Get all PMIDs from the XML tree id_terms = tree.findall('.//PubMedId') if id_terms is None: return [] # Use a set to remove duplicate IDs ids = list(set([idt.text for idt in id_terms])) return ids
def get_ids_for_gene(hgnc_name, **kwargs): """Get the curated set of articles for a gene in the Entrez database. Search parameters for the Gene database query can be passed in as keyword arguments. Parameters ---------- hgnc_name : string The HGNC name of the gene. This is used to obtain the HGNC ID (using the hgnc_client module) and in turn used to obtain the Entrez ID associated with the gene. Entrez is then queried for that ID. """ # Get the HGNC ID for the HGNC name hgnc_id = hgnc_client.get_hgnc_id(hgnc_name) if hgnc_id is None: raise ValueError('Invalid HGNC name.') # Get the Entrez ID entrez_id = hgnc_client.get_entrez_id(hgnc_id) if entrez_id is None: raise ValueError('Entrez ID not found in HGNC table.') # Query the Entrez Gene database params = {'db': 'gene', 'retmode': 'xml', 'id': entrez_id} params.update(kwargs) tree = send_request(pubmed_fetch, params) if tree is None: return [] if tree.find('ERROR') is not None: logger.error(tree.find('ERROR').text) return [] # Get all PMIDs from the XML tree id_terms = tree.findall('.//PubMedId') if id_terms is None: return [] # Use a set to remove duplicate IDs ids = list(set([idt.text for idt in id_terms])) return ids