def test_get_ids(): time.sleep(0.3) ids1 = pubmed_client.get_ids('JUN', use_text_word=False) ids2 = pubmed_client.get_ids('JUN', use_text_word=True) assert len(ids1) > len(ids2) assert unicode_strs(ids1) assert unicode_strs(ids2)
def test_get_ids(): time.sleep(0.3) ids1 = pubmed_client.get_ids('JUN', use_text_word=False) ids2 = pubmed_client.get_ids('JUN', use_text_word=True) assert len(ids1) > len(ids2) assert unicode_strs(ids1) assert unicode_strs(ids2)
def test_get_pmc_ids(): time.sleep(0.3) ids = pubmed_client.get_ids('braf', retmax=10, db='pmc') assert len(ids) == 10 assert len([i for i in ids if i.startswith('6') or i.startswith('5')]) == 10 assert unicode_strs(ids)
def test_get_pmc_ids(): time.sleep(0.3) ids = pubmed_client.get_ids('braf', retmax=10, db='pmc') assert len(ids) == 10 assert len([i for i in ids if i.startswith('6') or i.startswith('5')]) == 10 assert unicode_strs(ids)
def search_literature(self, date_limit=None): """Search for the model's search terms in the literature. Parameters ---------- date_limit : Optional[int] The number of days to search back from today. Returns ------- pmid_to_terms : dict A dict representing all the PMIDs returned by the searches as keys, and the search terms for which the given PMID was produced as values. """ term_to_pmids = {} for term in self.search_terms: pmids = pubmed_client.get_ids(term, reldate=date_limit) term_to_pmids[term] = pmids pmid_to_terms = {} for term, pmids in term_to_pmids.items(): for pmid in pmids: try: pmid_to_terms[pmid].append(term) except KeyError: pmid_to_terms[pmid] = [term] return pmid_to_terms
def get_text_refs_for_pubmed_search_term(search_term, **kwargs): """"Returns text ref IDs for PMIDs obtained using a PubMed search.""" print('Searching for %s' % search_term) pmids = pubmed_client.get_ids(search_term, **kwargs) print('Getting TextRefs for %d PMIDs' % len(pmids)) db = get_primary_db() tr_pmids = db.select_all(db.TextRef.id, db.TextRef.pmid_in(pmids)) trids = {res.id for res in tr_pmids} return trids
def get_drug_pmids(): """Return PMIDs for all the drugs and their synonyms.""" drugs_pmid_list = [] for drug_synonyms in drug_names.values(): for drug_synonym in drug_synonyms: drugs_pmid_list += pubmed_client.get_ids(drug_synonym, retmax=5000) drugs_pmid_list = list(set(drugs_pmid_list)) print('Found %d PMIDs for drugs' % len(drugs_pmid_list)) return drugs_pmid_list
def get_searchterm_pmids(search_terms, num_days): pmids = {} for s in search_terms: # Special cases if s.upper() == 'MET': s = 'c-MET' elif s.upper() == 'JUN': s = 'c-JUN' pmids[s] = pubmed_client.get_ids(s, reldate=num_days) return pmids
def get_searchterm_pmids(search_terms, num_days): import time pmids = {} for s in search_terms: # Special cases if s.upper() == 'MET': s = 'c-MET' elif s.upper() == 'JUN': s = 'c-JUN' pmids[s] = pubmed_client.get_ids(s, reldate=num_days) time.sleep(1) return pmids
def test_readme_using_indra3(): from indra.sources import reach from indra.literature import pubmed_client # Search for 10 most recent abstracts in PubMed on 'BRAF' pmids = pubmed_client.get_ids('BRAF', retmax=10) all_statements = [] for pmid in pmids: abs = pubmed_client.get_abstract(pmid) if abs is not None: reach_processor = reach.process_text(abs, url=reach.local_text_url) if reach_processor is not None: all_statements += reach_processor.statements assert len(all_statements) > 0
def get_ids(): """Search PubMed for references for the Ras 227 gene set.""" # Check if we've got the files already if os.path.isfile('reading/pmids.pkl') and \ os.path.isfile('reading/pmids_from_gene.pkl'): with open('reading/pmids.pkl') as pmids_file: pmids = pickle.load(pmids_file) with open('reading/pmids_from_gene.pkl') as pmids_from_gene_file: pmids_from_gene = pickle.load(pmids_from_gene_file) return (pmids, pmids_from_gene) # STEP 0: Get gene list gene_list = [] # Get gene list from ras_pathway_proteins.csv fname = os.path.join(indra.__path__[0], 'resources', 'ras_pathway_proteins.csv') with open(fname) as f: csvreader = csv.reader(f, delimiter='\t') for row in csvreader: gene_list.append(row[0].strip()) pmids = OrderedDict() pmids_from_gene = OrderedDict() for gene in gene_list: print("Querying for %s" % gene) ids_gene = set(pubmed_client.get_ids_for_gene(gene)) print("Found %d in gene query" % len(ids_gene)) # Hack to deal with excessive number of names if gene == 'MET': query_gene = 'CMET' elif gene == 'JUN': query_gene = 'CJUN' else: query_gene = gene ids_pubmed = set( pubmed_client.get_ids(query_gene, **{'retmax': 100000})) print("Found %d in string query" % len(ids_pubmed)) pmids[gene] = ids_pubmed pmids_from_gene[gene] = ids_gene with open('reading/pmids.pkl', 'wb') as f: pickle.dump(pmids, f) with open('reading/pmids_from_gene.pkl', 'wb') as f: pickle.dump(pmids_from_gene, f) return pmids, pmids_from_gene
def get_ids(): """Search PubMed for references for the Ras 227 gene set.""" # Check if we've got the files already if os.path.isfile('reading/pmids.pkl') and \ os.path.isfile('reading/pmids_from_gene.pkl'): with open('reading/pmids.pkl') as pmids_file: pmids = pickle.load(pmids_file) with open('reading/pmids_from_gene.pkl') as pmids_from_gene_file: pmids_from_gene = pickle.load(pmids_from_gene_file) return (pmids, pmids_from_gene) # STEP 0: Get gene list gene_list = [] # Get gene list from ras_pathway_proteins.csv with open('../../data/ras_pathway_proteins.csv') as f: csvreader = csv.reader(f, delimiter='\t') for row in csvreader: gene_list.append(row[0].strip()) pmids = OrderedDict() pmids_from_gene = OrderedDict() for gene in gene_list: print "Querying for", gene ids_gene = set(pubmed_client.get_ids_for_gene(gene)) print "Found %d in gene query" % len(ids_gene) # Hack to deal with excessive number of names if gene == 'MET': query_gene = 'CMET' elif gene == 'JUN': query_gene = 'CJUN' else: query_gene = gene ids_pubmed = set(pubmed_client.get_ids(query_gene, **{'retmax': 100000})) print "Found %d in string query" % len(ids_pubmed) pmids[gene] = ids_pubmed pmids_from_gene[gene] = ids_gene with open('reading/pmids.pkl', 'w') as f: pickle.dump(pmids, f) with open('reading/pmids_from_gene.pkl', 'w') as f: pickle.dump(pmids_from_gene, f) return (pmids, pmids_from_gene)
def search_pubmed(search_terms, date_limit): """Search PubMed for given search terms. Parameters ---------- search_terms : list[emmaa.priors.SearchTerm] A list of SearchTerm objects to search PubMed for. date_limit : int The number of days to search back from today. Returns ------- terms_to_pmids : dict A dict representing given search terms as keys and PMIDs returned by searches as values. """ terms_to_pmids = {} for term in search_terms: pmids = pubmed_client.get_ids(term.search_term, reldate=date_limit) logger.info(f'{len(pmids)} PMIDs found for {term.search_term}') terms_to_pmids[term] = pmids time.sleep(1) return terms_to_pmids
def test_get_ids(): ids1 = pubmed_client.get_ids('JUN', use_text_word=False) ids2 = pubmed_client.get_ids('JUN', use_text_word=True) assert (len(ids1) > len(ids2)) assert unicode_strs(ids1) assert unicode_strs(ids2)
def test_get_no_ids(): ids = pubmed_client.get_ids('UUuXNWMCusRpcVTX', retmax=10, db='pubmed') assert (not ids)
def test_get_no_ids(): time.sleep(0.5) ids = pubmed_client.get_ids('UUuXNWMCusRpcVTX', retmax=10, db='pubmed') assert not ids
def test_get_ids(): time.sleep(0.3) ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed') assert len(ids) == 10 assert unicode_strs(ids)
def test_get_pmc_ids(): ids = pubmed_client.get_ids('braf', retmax=10, db='pmc') assert(len(ids) == 10) assert(len([i for i in ids if i.startswith('5') or i.startswith('4')]) == 10) assert unicode_strs(ids)
def test_get_ids(): ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed') assert(len(ids) == 10) assert unicode_strs(ids)
def test_get_no_ids(): time.sleep(0.3) ids = pubmed_client.get_ids('UUuXNWMCusRpcVTX', retmax=10, db='pubmed') assert not ids
def get_ids(search_term, retmax=1000): return pubmed_client.get_ids(search_term, retmax=retmax, db='pmc')
def test_get_ids(): ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed') assert len(ids) == 10 assert unicode_strs(ids)
def test_get_pmc_ids(): time.sleep(0.5) ids = pubmed_client.get_ids('braf', retmax=10, db='pmc') assert len(ids) == 10 assert all(int(i[0]) >= 5 for i in ids), ids
reads the abstracts corresponding to each PMID with Eidos. It is complementary to the pipeline which starts with the CORD19 document set.""" import os import time import pickle from tqdm import tqdm from indra.sources import eidos from indra.literature import pubmed_client root = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir) keywords = ['covid19', 'covid-19', 'sars-cov-2', 'sars-cov2'] ids = [] for kw in keywords: ids += pubmed_client.get_ids(kw) stmts = {} for pmid in tqdm(ids): time.sleep(3) abst = pubmed_client.get_abstract(pmid) if not abst: continue ep = eidos.process_text(abst, webservice='http://localhost:9000/') for stmt in ep.statements: stmt.evidence[0].pmid = pmid stmts[pmid] = ep.statements with open(os.path.join(root, 'stmts', 'eidos_abstract_stmts.pkl'), 'wb') as fh: pickle.dump(stmts, fh)
def test_get_no_ids(): ids = pubmed_client.get_ids('', retmax=10, db='pubmed') assert(not ids)
def get_ids(search_term, retmax=1000): return pubmed_client.get_ids(search_term, retmax=retmax, db='pmc')
def test_get_no_ids(): ids = pubmed_client.get_ids('xkcd', retmax=10, db='pubmed') assert(not ids)
def test_get_ids2(): time.sleep(0.5) ids1 = pubmed_client.get_ids('JUN', use_text_word=False) ids2 = pubmed_client.get_ids('JUN', use_text_word=True) assert len(ids1) > len(ids2)
def test_get_pmc_ids(): ids = pubmed_client.get_ids('braf', retmax=10, db='pmc') assert(len(ids) == 10) assert(len([i for i in ids if i.startswith('5') or i.startswith('4')]) == 10) assert unicode_strs(ids)
def test_get_ids1(): time.sleep(0.5) ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed') assert len(ids) == 10
def get_pmids_text(kinase): pmids = get_ids(kinase) time.sleep(1) return pmids