예제 #1
0
def test_get_ids():
    time.sleep(0.3)
    ids1 = pubmed_client.get_ids('JUN', use_text_word=False)
    ids2 = pubmed_client.get_ids('JUN', use_text_word=True)
    assert len(ids1) > len(ids2)
    assert unicode_strs(ids1)
    assert unicode_strs(ids2)
예제 #2
0
def test_get_ids():
    time.sleep(0.3)
    ids1 = pubmed_client.get_ids('JUN', use_text_word=False)
    ids2 = pubmed_client.get_ids('JUN', use_text_word=True)
    assert len(ids1) > len(ids2)
    assert unicode_strs(ids1)
    assert unicode_strs(ids2)
예제 #3
0
def test_get_pmc_ids():
    time.sleep(0.3)
    ids = pubmed_client.get_ids('braf', retmax=10, db='pmc')
    assert len(ids) == 10
    assert len([i for i in ids if i.startswith('6') or
                i.startswith('5')]) == 10
    assert unicode_strs(ids)
예제 #4
0
def test_get_pmc_ids():
    time.sleep(0.3)
    ids = pubmed_client.get_ids('braf', retmax=10, db='pmc')
    assert len(ids) == 10
    assert len([i for i in ids
                if i.startswith('6') or i.startswith('5')]) == 10
    assert unicode_strs(ids)
예제 #5
0
파일: model.py 프로젝트: cmluria/emmaa
    def search_literature(self, date_limit=None):
        """Search for the model's search terms in the literature.

        Parameters
        ----------
        date_limit : Optional[int]
            The number of days to search back from today.

        Returns
        -------
        pmid_to_terms : dict
            A dict representing all the PMIDs returned by the searches as keys,
            and the search terms for which the given PMID was produced as
            values.
        """
        term_to_pmids = {}
        for term in self.search_terms:
            pmids = pubmed_client.get_ids(term, reldate=date_limit)
            term_to_pmids[term] = pmids
        pmid_to_terms = {}
        for term, pmids in term_to_pmids.items():
            for pmid in pmids:
                try:
                    pmid_to_terms[pmid].append(term)
                except KeyError:
                    pmid_to_terms[pmid] = [term]
        return pmid_to_terms
예제 #6
0
def get_text_refs_for_pubmed_search_term(search_term, **kwargs):
    """"Returns text ref IDs for PMIDs obtained using a PubMed search."""
    print('Searching for %s' % search_term)
    pmids = pubmed_client.get_ids(search_term, **kwargs)
    print('Getting TextRefs for %d PMIDs' % len(pmids))
    db = get_primary_db()
    tr_pmids = db.select_all(db.TextRef.id, db.TextRef.pmid_in(pmids))
    trids = {res.id for res in tr_pmids}
    return trids
예제 #7
0
def get_drug_pmids():
    """Return PMIDs for all the drugs and their synonyms."""
    drugs_pmid_list = []
    for drug_synonyms in drug_names.values():
        for drug_synonym in drug_synonyms:
            drugs_pmid_list += pubmed_client.get_ids(drug_synonym, retmax=5000)
    drugs_pmid_list = list(set(drugs_pmid_list))
    print('Found %d PMIDs for drugs' % len(drugs_pmid_list))
    return drugs_pmid_list
예제 #8
0
파일: machine.py 프로젝트: djmilstein/indra
def get_searchterm_pmids(search_terms, num_days):
    pmids = {}
    for s in search_terms:
        # Special cases
        if s.upper() == 'MET':
            s = 'c-MET'
        elif s.upper() == 'JUN':
            s = 'c-JUN'
        pmids[s] = pubmed_client.get_ids(s, reldate=num_days)
    return pmids
예제 #9
0
def get_searchterm_pmids(search_terms, num_days):
    import time
    pmids = {}
    for s in search_terms:
        # Special cases
        if s.upper() == 'MET':
            s = 'c-MET'
        elif s.upper() == 'JUN':
            s = 'c-JUN'
        pmids[s] = pubmed_client.get_ids(s, reldate=num_days)
        time.sleep(1)
    return pmids
예제 #10
0
def test_readme_using_indra3():
    from indra.sources import reach
    from indra.literature import pubmed_client
    # Search for 10 most recent abstracts in PubMed on 'BRAF'
    pmids = pubmed_client.get_ids('BRAF', retmax=10)
    all_statements = []
    for pmid in pmids:
        abs = pubmed_client.get_abstract(pmid)
        if abs is not None:
            reach_processor = reach.process_text(abs, url=reach.local_text_url)
            if reach_processor is not None:
                all_statements += reach_processor.statements
    assert len(all_statements) > 0
예제 #11
0
def get_ids():
    """Search PubMed for references for the Ras 227 gene set."""
    # Check if we've got the files already
    if os.path.isfile('reading/pmids.pkl') and \
       os.path.isfile('reading/pmids_from_gene.pkl'):
        with open('reading/pmids.pkl') as pmids_file:
            pmids = pickle.load(pmids_file)
        with open('reading/pmids_from_gene.pkl') as pmids_from_gene_file:
            pmids_from_gene = pickle.load(pmids_from_gene_file)
        return (pmids, pmids_from_gene)

    # STEP 0: Get gene list
    gene_list = []
    # Get gene list from ras_pathway_proteins.csv
    fname = os.path.join(indra.__path__[0], 'resources',
                         'ras_pathway_proteins.csv')
    with open(fname) as f:
        csvreader = csv.reader(f, delimiter='\t')
        for row in csvreader:
            gene_list.append(row[0].strip())

    pmids = OrderedDict()
    pmids_from_gene = OrderedDict()

    for gene in gene_list:
        print("Querying for %s" % gene)
        ids_gene = set(pubmed_client.get_ids_for_gene(gene))
        print("Found %d in gene query" % len(ids_gene))
        # Hack to deal with excessive number of names
        if gene == 'MET':
            query_gene = 'CMET'
        elif gene == 'JUN':
            query_gene = 'CJUN'
        else:
            query_gene = gene
        ids_pubmed = set(
            pubmed_client.get_ids(query_gene, **{'retmax': 100000}))
        print("Found %d in string query" % len(ids_pubmed))
        pmids[gene] = ids_pubmed
        pmids_from_gene[gene] = ids_gene

    with open('reading/pmids.pkl', 'wb') as f:
        pickle.dump(pmids, f)
    with open('reading/pmids_from_gene.pkl', 'wb') as f:
        pickle.dump(pmids_from_gene, f)
    return pmids, pmids_from_gene
예제 #12
0
def get_ids():
    """Search PubMed for references for the Ras 227 gene set."""
    # Check if we've got the files already
    if os.path.isfile('reading/pmids.pkl') and \
       os.path.isfile('reading/pmids_from_gene.pkl'):
        with open('reading/pmids.pkl') as pmids_file:
            pmids = pickle.load(pmids_file)
        with open('reading/pmids_from_gene.pkl') as pmids_from_gene_file:
            pmids_from_gene = pickle.load(pmids_from_gene_file)
        return (pmids, pmids_from_gene)

    # STEP 0: Get gene list
    gene_list = []
    # Get gene list from ras_pathway_proteins.csv
    with open('../../data/ras_pathway_proteins.csv') as f:
        csvreader = csv.reader(f, delimiter='\t')
        for row in csvreader:
            gene_list.append(row[0].strip())

    pmids = OrderedDict()
    pmids_from_gene = OrderedDict()

    for gene in gene_list:
        print "Querying for", gene
        ids_gene = set(pubmed_client.get_ids_for_gene(gene))
        print "Found %d in gene query" % len(ids_gene)
        # Hack to deal with excessive number of names
        if gene == 'MET':
            query_gene = 'CMET'
        elif gene == 'JUN':
            query_gene = 'CJUN'
        else:
            query_gene = gene
        ids_pubmed = set(pubmed_client.get_ids(query_gene,
                                               **{'retmax': 100000}))
        print "Found %d in string query" % len(ids_pubmed)
        pmids[gene] = ids_pubmed
        pmids_from_gene[gene] = ids_gene

    with open('reading/pmids.pkl', 'w') as f:
        pickle.dump(pmids, f)
    with open('reading/pmids_from_gene.pkl', 'w') as f:
        pickle.dump(pmids_from_gene, f)
    return (pmids, pmids_from_gene)
예제 #13
0
    def search_pubmed(search_terms, date_limit):
        """Search PubMed for given search terms.

        Parameters
        ----------
        search_terms : list[emmaa.priors.SearchTerm]
            A list of SearchTerm objects to search PubMed for.
        date_limit : int
            The number of days to search back from today.

        Returns
        -------
        terms_to_pmids : dict
            A dict representing given search terms as keys and PMIDs returned
            by searches as values.
        """
        terms_to_pmids = {}
        for term in search_terms:
            pmids = pubmed_client.get_ids(term.search_term, reldate=date_limit)
            logger.info(f'{len(pmids)} PMIDs found for {term.search_term}')
            terms_to_pmids[term] = pmids
            time.sleep(1)
        return terms_to_pmids
예제 #14
0
def test_get_ids():
    ids1 = pubmed_client.get_ids('JUN', use_text_word=False)
    ids2 = pubmed_client.get_ids('JUN', use_text_word=True)
    assert (len(ids1) > len(ids2))
    assert unicode_strs(ids1)
    assert unicode_strs(ids2)
예제 #15
0
def test_get_no_ids():
    ids = pubmed_client.get_ids('UUuXNWMCusRpcVTX', retmax=10, db='pubmed')
    assert (not ids)
예제 #16
0
def test_get_no_ids():
    time.sleep(0.5)
    ids = pubmed_client.get_ids('UUuXNWMCusRpcVTX', retmax=10, db='pubmed')
    assert not ids
예제 #17
0
def test_get_ids():
    time.sleep(0.3)
    ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed')
    assert len(ids) == 10
    assert unicode_strs(ids)
예제 #18
0
def test_get_pmc_ids():
    ids = pubmed_client.get_ids('braf', retmax=10, db='pmc')
    assert(len(ids) == 10)
    assert(len([i for i in ids if i.startswith('5') or
                i.startswith('4')]) == 10)
    assert unicode_strs(ids)
예제 #19
0
def test_get_ids():
    ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed')
    assert(len(ids) == 10)
    assert unicode_strs(ids)
예제 #20
0
def test_get_no_ids():
    time.sleep(0.3)
    ids = pubmed_client.get_ids('UUuXNWMCusRpcVTX', retmax=10, db='pubmed')
    assert not ids
예제 #21
0
def get_ids(search_term, retmax=1000):
    return pubmed_client.get_ids(search_term, retmax=retmax, db='pmc')
예제 #22
0
def test_get_ids():
    ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed')
    assert len(ids) == 10
    assert unicode_strs(ids)
예제 #23
0
def test_get_pmc_ids():
    time.sleep(0.5)
    ids = pubmed_client.get_ids('braf', retmax=10, db='pmc')
    assert len(ids) == 10
    assert all(int(i[0]) >= 5 for i in ids), ids
예제 #24
0
reads the abstracts corresponding to each PMID with Eidos. It is
complementary to the pipeline which starts with the CORD19 document set."""
import os
import time
import pickle
from tqdm import tqdm
from indra.sources import eidos
from indra.literature import pubmed_client

root = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir,
                    os.pardir)

keywords = ['covid19', 'covid-19', 'sars-cov-2', 'sars-cov2']
ids = []
for kw in keywords:
    ids += pubmed_client.get_ids(kw)

stmts = {}
for pmid in tqdm(ids):
    time.sleep(3)
    abst = pubmed_client.get_abstract(pmid)
    if not abst:
        continue
    ep = eidos.process_text(abst, webservice='http://localhost:9000/')
    for stmt in ep.statements:
        stmt.evidence[0].pmid = pmid
    stmts[pmid] = ep.statements

with open(os.path.join(root, 'stmts', 'eidos_abstract_stmts.pkl'), 'wb') as fh:
    pickle.dump(stmts, fh)
예제 #25
0
def test_get_no_ids():
    ids = pubmed_client.get_ids('', retmax=10, db='pubmed')
    assert(not ids)
예제 #26
0
def get_ids(search_term, retmax=1000):
    return pubmed_client.get_ids(search_term, retmax=retmax, db='pmc')
예제 #27
0
def test_get_no_ids():
    ids = pubmed_client.get_ids('xkcd', retmax=10, db='pubmed')
    assert(not ids)
예제 #28
0
def test_get_ids2():
    time.sleep(0.5)
    ids1 = pubmed_client.get_ids('JUN', use_text_word=False)
    ids2 = pubmed_client.get_ids('JUN', use_text_word=True)
    assert len(ids1) > len(ids2)
예제 #29
0
def test_get_pmc_ids():
    ids = pubmed_client.get_ids('braf', retmax=10, db='pmc')
    assert(len(ids) == 10)
    assert(len([i for i in ids if i.startswith('5') or
                i.startswith('4')]) == 10)
    assert unicode_strs(ids)
예제 #30
0
def test_get_ids1():
    time.sleep(0.5)
    ids = pubmed_client.get_ids('braf', retmax=10, db='pubmed')
    assert len(ids) == 10
예제 #31
0
def get_pmids_text(kinase):
    pmids = get_ids(kinase)
    time.sleep(1)
    return pmids