def test_complete_studies(self): ec = Client() id = 28795402 ncts = [ 'NCT00031265', 'NCT02199847', 'NCT00902980', 'NCT01266824', 'NCT03418909' ] article = ec.efetch(db='pubmed', id=id) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') for n in ncts[:3]: crud.review_trial(id, n, False, 'included', 'testuser_1', 1) for n in ncts[3:]: crud.review_trial(id, n, False, 'relevant', 'testuser_1', 1) crud.complete_studies(id, True) metadata = crud.review_medtadata_db(id) self.assertEqual(metadata['included_complete'], True) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] in ncts[:3]: self.assertEqual(trials[i]['verified'], True) self.assertEqual(trials[i]['relationship'], 'included') if t['nct_id'] in ncts[3:]: self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'relevant') crud.complete_studies(id, False) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] in ncts[:3]: self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'included')
def test_pubmedarticle_to_db(self): ec = Client() ids = [28616955, 28800192, 28797191] for id in ids: self.assertIsNone(crud.review_medtadata_db(id)) article = ec.efetch(db='pubmed', id=ids) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') self.assertIsNotNone(crud.review_medtadata_db(ids[i])) self.assertEqual( crud.review_medtadata_db(ids[i])['title'], a.title) self.assertEqual( crud.review_medtadata_db(ids[i])['review_id'], int(a.pmid)) self.assertEqual( crud.review_medtadata_db(ids[i])['abstract'], a.abstract) self.assertEqual( crud.review_medtadata_db(ids[i])['source'], a.jrnl) self.assertEqual(crud.review_medtadata_db(ids[i])['doi'], a.doi) self.assertEqual( crud.review_medtadata_db(ids[i])['publish_date'], int(a.year)) self.assertEqual( crud.review_medtadata_db(ids[i])['authors'], ', '.join(a.authors)) self.assertEqual( crud.review_medtadata_db(ids[i])['included_complete'], False) self.assertEqual( crud.review_medtadata_db(ids[i])['verified_review'], None)
def review_publication(review_id, publication_id, user_id): """ create a new record linking the specified review to the specified publication @param review_id: pmid of review @param publication_id: pmid of trial publication @param user_id: id of user submitting this publication """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() try: cur.execute( "INSERT INTO review_trialpubs (review_id, trialpub_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING;", (review_id, publication_id, user_id)) conn.commit() except psycopg2.IntegrityError as e: print e conn.rollback() ec = Client(api_key=eutils_key) article = ec.efetch(db='pubmed', id=publication_id) for a in article: pubmedarticle_to_db(a, 'trial_publications') cur.execute( "INSERT INTO review_trialpubs (review_id, trialpub_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING;", (review_id, publication_id, user_id)) conn.commit() conn.close()
def job_get_mesh_terms_for_pmid(pmid, queue): ec = Client(api_key="fa081c19a44e9bfe267689cd45c7d31bae08") #ec = Client() result = ec.efetch(db='pubmed', id=pmid) x = iter(result) for i in x: queue.put([pmid, i.mesh_headings])
def update_trial_publications(period): """ Pull the newest pubmed articles that reference ct.gov IDs and save them to the database Should be run every period number of days @param period: number of days back to start search @return: None """ ec = Client(api_key=eutils_key) base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' r = utils.retry_get(base_url, params={ 'db': 'pubmed', 'term': 'clinicaltrials.gov[si]', 'format': 'json', 'retmax': 10000, 'email': crud.eutils_email, 'tool': crud.eutils_tool, 'api_key': eutils_key, 'date_type': 'edat', 'mindate': (datetime.now().date() - timedelta(days=period)).strftime('%Y/%m/%d'), 'maxdate': 3000 }) print r.url json = r.json() pmids = json['esearchresult']['idlist'] print pmids segments = utils.chunks(pmids, 100) for s in segments: while True: try: articles = ec.efetch(db='pubmed', id=s) break except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print e time.sleep(5) for a in articles: print a.pmid if a.nct_ids: ids = a.nct_ids crud.pubmedarticle_to_db(a, 'trial_publications') for id in ids: crud.publication_trial(a.pmid, id, 9)
def get_eutils_client(cache_path, email=DEFAULT_EMAIL, api_key=None): """ :param cache_path: valid filesystem path to SQLite cache file :param email: (optional) email address to submit with cache queries :return: eutils QueryService client object """ from eutils import Client if cache_path is None: return Client(api_key=api_key) return Client(cache=cache_path, api_key=api_key)
def test_pulication_trial(self): ec = Client() trialpub_ids = [29871025, 29859785, 29866619] nct_ids = ['NCT02317328', 'NCT02317874', 'NCT02317887', 'NCT02330055'] trialpubs = ec.efetch(db='pubmed', id=trialpub_ids) for i, a in enumerate(trialpubs): crud.pubmedarticle_to_db(a, 'trial_publications') self.assertIsNone(crud.linked_nctids(a.pmid)) for nct_id in nct_ids: crud.publication_trial(a.pmid, nct_id, 2) self.assertEqual(crud.linked_nctids(a.pmid), nct_ids)
def test_get_link_id(self): ec = Client() id = 28934560 nct_id = 'NCT00678431' article = ec.efetch(db='pubmed', id=id) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') crud.review_trial(id, nct_id, False, 'relevant', 'testuser_2', 2) link_id = crud.get_link_id(nct_id, id) self.assertIsNotNone(link_id) no_link = crud.get_link_id('NCT02064179', 28931939) self.assertIsNone(no_link)
def test_convert_id(self): ec = Client() id = 28795402 article = ec.efetch(db='pubmed', id=id) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') self.assertEqual(crud.convert_id(id, 'doi'), '10.1002/ijc.30922') self.assertEqual(crud.convert_id('10.1002/ijc.30922', 'pmid'), id) article = ec.efetch(db='pubmed', id=24829965) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') self.assertEqual(crud.convert_id(24829965, 'doi'), None)
def test_check_existing_review_trial(self): ec = Client() id = 28934560 nct_id = 'NCT00678431' article = ec.efetch(db='pubmed', id=id) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') crud.review_trial(id, nct_id, False, 'relevant', 'testuser_2', 2) link = crud.check_existing_review_trial(id, nct_id) self.assertIsNotNone(link) no_link = crud.check_existing_review_trial(5464824, 'NCT00000000') self.assertIsNone(no_link)
def test_review_lock_status(self): ec = Client() ids = [28616955, 28800192, 28797191] for id in ids: self.assertIsNone(crud.review_medtadata_db(id)) article = ec.efetch(db='pubmed', id=ids) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') self.assertEqual(crud.review_lock_status(ids[i]), False) crud.complete_studies(ids[i], True) self.assertEqual(crud.review_lock_status(ids[i]), True) crud.complete_studies(ids[i], False) self.assertEqual(crud.review_lock_status(ids[i]), False)
def test_get_review_trials_fast(self): ec = Client() id = 28795402 ncts = [ 'NCT00031265', 'NCT02199847', 'NCT00902980', 'NCT01266824', 'NCT03418909' ] article = ec.efetch(db='pubmed', id=id) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') for n in ncts: crud.review_trial(id, n, False, 'included', 'testuser_1', 1) trials = crud.get_review_trials_fast(id)['reg_trials'] retrieved_ncts = [t['nct_id'] for t in trials] for n in ncts: self.assertTrue(n in retrieved_ncts)
def main(author_name, affiliations=None, api_key=None, style='default', highlight_names=None, highlight_journal=True): """Search PubMed via eutils and format the retreived results""" ec = Client(api_key=api_key) esr = search_pubmed_by_author(ec, author_name, affiliations) pmasets = [pma for pma in iter(ec.efetch(db='pubmed', id=esr.ids))] pubs = [PubMedArticle(pma) for pma in pmasets] for pub in pubs: print( pub.bibliography(style=style, highlight_names=highlight_names, highlight_journal=highlight_journal))
def test_get_locked(self): ec = Client() ids = [28569363, 29202845, 28933578] for id in ids: self.assertIsNone(crud.review_medtadata_db(id)) article = ec.efetch(db='pubmed', id=ids) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') self.assertIsNone(crud.get_locked()) crud.complete_studies(ids[0], True) self.assertEqual(crud.get_locked(), [ids[0]]) crud.complete_studies(ids[1], True) self.assertEqual(crud.get_locked(), [ids[0], ids[1]]) crud.complete_studies(ids[2], True) self.assertEqual(crud.get_locked(), [ids[0], ids[1], ids[2]]) crud.complete_studies(ids[1], False) self.assertEqual(crud.get_locked(), [ids[0], ids[2]])
def test_review_publication(self): ec = Client() trialpub_ids = [29871025, 29859785, 29866619] review_ids = [28775712, 28549125, 29929949] trialpubs = ec.efetch(db='pubmed', id=trialpub_ids) reviews = ec.efetch(db='pubmed', id=review_ids) for i, a in enumerate(trialpubs): crud.pubmedarticle_to_db(a, 'trial_publications') for i, a in enumerate(reviews): crud.pubmedarticle_to_db(a, 'systematic_reviews') crud.review_publication(a.pmid, trialpub_ids[i], 1) conn = self.mock_conn(True) cur = conn.cursor() cur.execute( "SELECT trialpub_id from review_trialpubs where review_id = %s;", (a.pmid, )) trialpub = cur.fetchone() self.assertEqual(trialpub[0], trialpub_ids[i]) conn.close()
def test_vote(self): ec = Client() id = 28934560 nct_id = 'NCT00678431' article = ec.efetch(db='pubmed', id=id) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') crud.review_trial(id, nct_id, False, 'relevant', 'testuser_2', 2) link_id = crud.get_link_id(nct_id, id) crud.vote(link_id, 'up', 1) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_id: self.assertEqual(trials[i]['nct_id'], nct_id) self.assertEqual(trials[i]['upvotes'], 2) self.assertEqual(set(trials[i]['voters'].split(', ')), {'testuser_2', 'testuser_1'}) self.assertEqual(trials[i]['downvotes'], 0) self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'relevant')
def test_add_trial_to_locked(self): ec = Client() ids = [28616955, 28800192, 28797191] nct_ids = ['NCT00195624', 'NCT00200889', 'NCT00207688'] test_nct = 'NCT00695409' for id in ids: self.assertIsNone(crud.review_medtadata_db(id)) article = ec.efetch(db='pubmed', id=ids) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') crud.review_trial(ids[i], nct_ids[i], False, 'included', 'testuser_1', 1, 'up') crud.complete_studies(ids[i], True) crud.review_trial(ids[i], test_nct, False, 'included', 'testuser_1', 1, 'up') self.assertIsNone( crud.check_existing_review_trial(ids[i], test_nct)) crud.complete_studies(ids[i], False) crud.review_trial(ids[i], test_nct, False, 'included', 'testuser_1', 1, 'up') self.assertIsNotNone( crud.check_existing_review_trial(ids[i], test_nct))
def check_trialpubs_nctids(review_id, review_doi=None, sess_id=None): """ resolve the references of a review to PMIDs and NCTIDs @param review_id: PubMed ID of review @param review_doi: DOI of review @param sess_id: session ID if transitting progress via websocket @return: namedtuple with found PMIDs and NCTIDs """ if sess_id: socketio = SocketIO(message_queue='amqp://localhost') ec = Client(api_key=eutils_key) cr = Crossref(mailto=config.MAIL_USERNAME) if not review_doi: while True: try: paset = ec.efetch(db='pubmed', id=review_id) break except ( eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print e time.sleep(5) pa = iter(paset).next() if hasattr(pa, 'doi'): review_doi = pa.doi if not review_doi: if sess_id: socketio.emit('crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id) return try: if review_doi[-1] == '.': review_doi = review_doi[:-1] resp = cr.works(ids=[str(review_doi)]) except requests.HTTPError as e: if sess_id: socketio.emit('crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id) print e return if resp['status'] == 'ok': parsed = resp['message'] if "reference" in parsed: if sess_id: socketio.emit('crossrefbot_update', {'msg': str(len(parsed[ 'reference'])) + ' references found in crossref. trying to resolve these to PubMed articles...'}, room=sess_id) eventlet.sleep(0) print str(len(parsed['reference'])) + ' references found in crossref' to_resolve = [] references = parsed['reference'] dois = [doi["DOI"] for doi in references if 'DOI' in doi] if dois: # if we get pubmed metadata for these DOIs, we can cross-check which dois match the ones in our set of references # what if > 250 chunk_dois = utils.chunks(dois, 250) for dois in chunk_dois: while True: try: esr = ec.esearch(db='pubmed', term=' OR '.join(['"' + doi + '"[AID]' for doi in dois])) break except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError, lxml.etree.XMLSyntaxError) as e: print e time.sleep(5) if esr.ids: while True: try: paset = ec.efetch(db='pubmed', id=esr.ids) break except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print e time.sleep(5) pa_iter = iter(paset) while True: try: pma = pa_iter.next() except StopIteration: break if pma.doi is not None and pma.doi in dois: dois.remove(pma.doi) to_resolve.append(pma.pmid) remaining = [x for x in references if ('DOI' not in x or ('DOI' in x and x['DOI'] in dois)) and ( 'first-page' in x or 'author' in x or 'article-title' in x or 'volume' in x or 'journal-title' in x or 'year' in x)] if remaining: citation_pmids = ecitmatch_tools.batch_pmids_for_citation(remaining, debug=False) check_metadata = [] if citation_pmids: for i, citation in enumerate(citation_pmids): if utils.RepresentsInt(citation): to_resolve.append(citation) check_metadata.append(citation) continue elif citation_pmids[i].startswith('AMBIGUOUS'): cand = citation[10:].split(',') if utils.RepresentsInt(cand[0]): to_resolve.extend(cand) check_metadata.append(cand) if check_metadata: while True: try: paset = ec.efetch(db='pubmed', id=check_metadata) break except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print e time.sleep(5) pa_iter = iter(paset) while True: try: pma = pa_iter.next() except StopIteration: break if pma.doi is not None and pma.doi in dois: dois.remove(pma.doi) to_resolve.append(pma.pmid) try_doi = batch_doi2pmid(dois) if try_doi: for doi in try_doi: if utils.RepresentsInt(str(doi)): to_resolve.append(doi) nct_ids = [] for i, citation in enumerate(references): if 'unstructured' in citation.keys(): spl = citation['unstructured'].split(' ') for i in spl: if re.match(r"(NCT|nct)[0-9]{8}", i): if len(i) == 11: nct_ids.append(i) continue to_resolve = [str(x) for x in to_resolve] to_resolve = list(set(to_resolve)) content = collections.namedtuple('ids', ['pmids', 'nctids']) return content(to_resolve, nct_ids) return False
# Get the ontologies from the `ontologies` link ontologies = get_json(resources["links"]["ontologies"]) # Get the name and ontology id from the returned list ontology_output = [] ontology_names = [] print("There are {} ontologies".format(len(ontologies))) for ontology in ontologies: ontology_names += [ontology['name']] ontology_output.append(f"{ontology['name']}\n{ontology['@id']}\n") showUids = False ec = Client(api_key=api.apikey) #problem entires #International Classification of Diseases, Version 9 - Clinical Modification Ontology #Funding, Research Administration and Projects Ontology #Neomark Oral Cancer Ontology, version 3 #Ontology of Alternative Medicine, French #Neomark Oral Cancer Ontology, version 4 #Devices, Experimental scaffolds and Biomaterials Ontology #Gender, Sex, and Sexual Orientation Ontology #Systematized Nomenclature of Medicine, International Version Ontology onts = ontology_names t0 = time.time() with open('ncbo_citations.json', 'r') as cite_file: cits = json.load(cite_file) with open('ncbo_newcits.json', 'w') as newcites:
def populate_reviews(period): """ download all new reviews made available on pubmed in the last <period> # days & save to db if they have trials in CrossRef or Cochrane """ base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' r = utils.requests_retry_session().get( base_url, params={ 'db': 'pubmed', 'term': 'systematic review[ti] OR meta analysis[ti] OR cochrane database of systematic reviews[ta]', 'format': 'json', 'retmax': 300000, 'email': crud.eutils_email, 'tool': crud.eutils_tool, 'api_key': eutils_key, 'date_type': 'edat', 'mindate': (datetime.now().date() - timedelta(days=period)).strftime('%Y/%m/%d'), 'maxdate': '3000' }) json = r.json() pmids = json['esearchresult']['idlist'] print len(pmids) segments = utils.chunks(pmids, 100) ec = Client(api_key=eutils_key) for s in segments: while True: try: articles = ec.efetch(db='pubmed', id=s) break except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print e time.sleep(5) a_iter = iter(articles) while True: try: article = a_iter.next() except StopIteration: break print '-----------------' + article.pmid + '-------------------------' if article.doi is not None: ids = bot.check_trialpubs_nctids(article.pmid, article.doi) else: ids = bot.check_trialpubs_nctids(article.pmid) if ids: if ids.pmids: print ids.pmids count = crud.articles_with_nctids( tuple(x for x in ids.pmids)) print count if count and len(count) > 0: print 'articles with links = ' + str(len(count)) print 'inserting ' + str(article.pmid) crud.pubmedarticle_to_db(article, 'systematic_reviews') for trialpub in count: crud.review_publication(article.pmid, trialpub, 9) linked_ncts = crud.linked_nctids(trialpub) for nct in linked_ncts: crud.review_trial(article.pmid, nct, False, 'included', user_id=9, nickname='crossrefbot') if ids.nctids: crud.pubmedarticle_to_db(article, 'systematic_reviews') print 'nct ids in crossref = ' + str(len(ids.nctids)) for nct_id in ids.nctids: crud.review_trial(article.pmid, nct_id, False, 'included', 'crossrefbot', 9) if not ids.nctids and not ids.pmids: print 'found nothing' else: print 'nothing' if 'Cochrane' in article.jrnl: print 'Cochrane' crud.pubmedarticle_to_db(article, 'systematic_reviews') bot.cochranebot(article.doi, article.pmid) bot.cochrane_ongoing_excluded(article.doi, article.pmid) conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "select rt.review_id, json_agg(distinct v.user_id) as users from review_rtrial rt" " inner join votes v on rt.id = v.link_id where rt.review_id = %s group by" " rt.review_id;", (article.pmid, )) new_users = cur.fetchone() if not new_users: new_users = {'users': []} if not {17, 9} & set(new_users['users']): print 'deleting ' + str(new_users['users']), article.pmid cur.execute( "delete from votes where link_id in (select id from review_rtrial where review_id = %s);", (article.pmid, )) conn.commit() cur.execute( "delete from review_trialpubs where review_id = %s;", (article.pmid, )) conn.commit() cur.execute( "delete from review_rtrial where review_id = %s;", (article.pmid, )) conn.commit() cur.execute( "delete from systematic_reviews where review_id = %s;", (article.pmid, )) conn.commit() conn.close() else: print 'not cochrane'
def search(json): """ conduct a search @param json: JSON object specifying serch keywords """ id = json['review_id'] emit('search_update', {'msg': 'Searching...'}, room=request.sid) eventlet.sleep(0) if not id: emit('page_content', { 'section': 'no_results', 'data': render_template('noresults.html', id=id) }, room=request.sid) return conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) # try to retrieve review with matching PMID if id is int review = '' found = True if (utils.RepresentsInt(id)): review = crud.review_medtadata_db(id) # try to retrieve review with matching DOI if id is DOI elif utils.is_doi(id): cur.execute("SELECT * FROM systematic_reviews WHERE doi = %s;", (id, )) review = cur.fetchone() conn.close() # if not int or DOI, return no results page else: conn.close() emit('search_update', {'msg': 'Searching for keyword matches in our database'}, room=request.sid) search_result = request_data.advanced_search(id) if not search_result: emit('page_content', { 'section': 'no_results', 'data': render_template('noresults.html', id=id) }, room=request.sid) return emit('page_content', { 'section': 'search_results', 'data': render_template( 'searchresult.html', reviews=search_result, searchterm=id) }, room=request.sid) return # if there is no match in our DB if review is None: found = False if not current_user.is_authenticated: conn.close() emit('page_content', { 'section': 'no_results', 'data': render_template('noresults.html', id=id) }, room=request.sid) return emit('search_update', { 'msg': 'Not found in local database. Searching PubMed for article' }, room=request.sid) eventlet.sleep(0) if utils.is_doi(id): # try to retrieve PMID if DOI convert = crud.convert_id(id, 'pmid') if convert: id = convert # return no result if no results else: emit('search_update', {'msg': 'Not found in Pubmed :('}, room=request.sid) emit('page_content', { 'section': 'no_results', 'data': render_template('noresults.html', id=id) }, room=request.sid) return # try to retrieve the review from pubmed ec = Client(api_key=eutils_key) article = ec.efetch(db='pubmed', id=id) found_review = None for art in article: if art and str(art.pmid) == id: found_review = art break if found_review: result = found_review.pmid if not result: flash( 'Unable to retrieve metadata for this article. Please try again later' ) abort(404) emit('search_update', {'msg': 'Found article on PubMed. Downloading metadata...'}, room=request.sid) eventlet.sleep(0) crud.pubmedarticle_to_db(found_review, 'systematic_reviews') review = crud.review_medtadata_db(id) emit('page_content', { 'data': render_template('review_data.html', review=review), 'section': 'review_data' }, room=request.sid) eventlet.sleep(0) emit('search_update', {'msg': 'Saved metadata... triggering bots'}, room=request.sid) bot.docsim.delay(id, sess_id=request.sid) eventlet.sleep(0) if 'cochrane' in review['source'].lower() and 'doi' in review: cb_bb = bot.cochrane_ongoing_excluded.si(review['doi'], id, sess_id=request.sid) cb_bb.link(bot.basicbot2.si(review_id=id, sess_id=request.sid)) chord( (bot.cochranebot.s(review['doi'], id, sess_id=request.sid), bot.check_citations.s(id, sess_id=request.sid)), cb_bb).delay() else: chord((bot.check_citations.s(id, sess_id=request.sid)), bot.basicbot2.si(review_id=id, sess_id=request.sid)).delay() else: print 'no result' emit('page_content', { 'section': 'no_results', 'data': render_template('noresults.html', id=id) }, room=request.sid) return # if there IS a match in our DB if found: print 'emitting found review' eventlet.sleep(0) emit('search_update', {'msg': 'Found review in our database! Retrieving data..'}, room=request.sid) eventlet.sleep(0) print 'emitting review content' emit('page_content', { 'data': render_template('review_data.html', review=review, starred=crud.is_starred(review['review_id'], current_user.db_id) if current_user.is_authenticated else False), 'section': 'review_data', 'related_reviews': render_template('related_reviews.html', related_reviews=crud.related_reviews( review['review_id'])) }, room=request.sid) eventlet.sleep(0) trials = crud.get_review_trials_fast( review[0], usr=current_user if current_user.is_authenticated else None) relevant = [ trial['nct_id'] for trial in trials['reg_trials'] if trial['relationship'] == 'relevant' ] verified = [ trial['nct_id'] for trial in trials['reg_trials'] if trial['relationship'] == 'included' ] emit('search_update', {'msg': 'Generating cool plots...'}, room=request.sid) eventlet.sleep(0) formatted = utils.trials_to_plotdata(trials['reg_trials']) socketio.emit('page_content', { 'section': 'plot', 'data': formatted, 'page': 'reviewdetail', 'review_id': review[0] }, room=request.sid) emit('page_content', { 'section': 'rel_trials', 'data': render_template('rel_trials.html', reg_trials=trials['reg_trials'], locked=review['included_complete']) }, room=request.sid) eventlet.sleep(0) if verified: emit('page_content', { 'section': 'incl_trials', 'data': render_template('incl_trials.html', reg_trials=trials['reg_trials'], locked=review['included_complete']) }, room=request.sid) eventlet.sleep(0) else: emit('page_content', { 'section': 'incl_trials', 'data': render_template( 'incl_trials.html', reg_trials=[], locked=False) }, room=request.sid)
def check_trialpubs_nctids(review_id, review_doi=None, sess_id=None): """ resolve the references of a review to PMIDs and NCTIDs @param review_id: PubMed ID of review @param review_doi: DOI of review @param sess_id: session ID if transitting progress via websocket @return: namedtuple with found PMIDs and NCTIDs """ if sess_id: socketio = SocketIO(message_queue='amqp://localhost') ec = Client(api_key=eutils_key) cr = Crossref(mailto=config.MAIL_USERNAME) print('bp1') if not review_doi: while True: try: paset = ec.efetch(db='pubmed', id=review_id) break except (eutils.EutilsNCBIError, eutils.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print(e) time.sleep(5) try: pa = next(iter(paset)) except StopIteration as e: print('##EMPTY ITERATOR', e) print('retrying...') time.sleep(60) return check_trialpubs_nctids(review_id, review_doi, sess_id) if hasattr(pa, 'doi'): review_doi = pa.doi if not review_doi: if sess_id: socketio.emit('crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id) return print('bp2') retry_attempts = 0 while True: try: if review_doi[-1] == '.': review_doi = review_doi[:-1] resp = cr.works(ids=[str(review_doi)]) break except requests.HTTPError as e: if e.response.status_code == 404: if sess_id: socketio.emit( 'crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id) print(e) return else: time.sleep(5) print('UNHANDLED HTTP ERROR', e) print('retrying...') continue except requests.exceptions.ConnectionError as e: print(e) time.sleep(10) print('connection error, retrying...') if retry_attempts >= 6: raise Exception('failed too many times') break retry_attempts += 1 print('bp3') if resp['status'] == 'ok': parsed = resp['message'] if "reference" in parsed: if sess_id: socketio.emit('crossrefbot_update', { 'msg': '%s references in crossref. trying to resolve to PubMed articles' % len(parsed['reference']) }, room=sess_id) eventlet.sleep(0) print('%s references found in crossref' % len(parsed['reference'])) to_resolve = [] references = parsed['reference'] dois = [doi["DOI"] for doi in references if 'DOI' in doi] print('bp4') if dois: # if we get pubmed metadata for these DOIs, we can cross-check which dois match the ones in our set of references # what if > 250 TODO: WARNING:eutils._internal.client:NCBI found 251 results, but we truncated the reply at 250 results; see https://github.com/biocommons/eutils/issues/124/ chunk_dois = utils.chunks(dois, 250) for dois in chunk_dois: while True: print( 'bp4.1', ' OR '.join(['"' + doi + '"[AID]' for doi in dois])) try: with eventlet.Timeout(300): esr = ec.esearch(db='pubmed', term=' OR '.join([ '"' + doi + '"[AID]' for doi in dois ])) break except (eutils.EutilsNCBIError, eutils.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError, lxml.etree.XMLSyntaxError, eventlet.timeout.Timeout) as e: print('possible timeout?', e) time.sleep(5) if esr.ids: while True: print('bp4.2', esr.ids) try: paset = ec.efetch(db='pubmed', id=esr.ids) break except (eutils.EutilsNCBIError, eutils.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout, requests.exceptions.ChunkedEncodingError ) as e: print(e) time.sleep(5) pa_iter = iter(paset) while True: try: pma = next(pa_iter) except StopIteration: break if pma.doi is not None and pma.doi in dois: dois.remove(pma.doi) to_resolve.append(pma.pmid) print('bp5') remaining = [ x for x in references if ('DOI' not in x or ('DOI' in x and x['DOI'] in dois)) and ( 'first-page' in x or 'author' in x or 'article-title' in x or 'volume' in x or 'journal-title' in x or 'year' in x) ] if remaining: citation_pmids = ecitmatch_tools.batch_pmids_for_citation( remaining, debug=True) check_metadata = [] if citation_pmids: for i, citation in enumerate(citation_pmids): if utils.RepresentsInt(citation): to_resolve.append(citation) check_metadata.append(citation) continue elif citation_pmids[i].startswith('AMBIGUOUS'): cand = citation[10:].split(',') if utils.RepresentsInt(cand[0]): to_resolve.extend(cand) check_metadata.append(cand) if check_metadata: while True: try: with eventlet.Timeout(300): paset = ec.efetch(db='pubmed', id=check_metadata) break except (eutils.EutilsNCBIError, eutils.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError, eventlet.timeout.Timeout) as e: print('possible timeout?') print(e) time.sleep(5) pa_iter = iter(paset) while True: try: pma = next(pa_iter) except StopIteration: break if pma.doi is not None and pma.doi in dois: dois.remove(pma.doi) to_resolve.append(pma.pmid) print('bp6') try_doi = batch_doi2pmid(dois) if try_doi: for doi in try_doi: if utils.RepresentsInt(str(doi)): to_resolve.append(doi) nct_ids = [] for i, citation in enumerate(references): if 'unstructured' in citation.keys(): spl = citation['unstructured'].split(' ') for i in spl: if re.match(r"(NCT|nct)[0-9]{8}", i): if len(i) == 11: nct_ids.append(i) continue print('bp11') to_resolve = [str(x) for x in to_resolve] to_resolve = list(set(to_resolve)) content = collections.namedtuple('ids', ['pmids', 'nctids']) return content(to_resolve, nct_ids) return False
def check_citations(review_id, sess_id=None, review_doi=None): """ check IDs obtained from the references of a review for automatic links, and save these links @param review_id: PubMed ID of review @param sess_id: session ID if transitting progress via websocket @param review_doi: DOI of review @return: """ if sess_id: socketio = SocketIO(message_queue='amqp://localhost') ec = Client(api_key=eutils_key) while True: try: articles = ec.efetch(db='pubmed', id=review_id) break except (eutils.EutilsNCBIError, eutils.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print(e) time.sleep(5) a_iter = iter(articles) while True: try: article = next(a_iter) except StopIteration: break print('-----------------' + article.pmid + '-------------------------') if article.doi is not None: ids = check_trialpubs_nctids(article.pmid, article.doi, sess_id=sess_id) else: ids = check_trialpubs_nctids(article.pmid, sess_id=sess_id) if ids: if ids.pmids: if sess_id: socketio.emit('crossrefbot_update', { 'msg': 'crossrefbot found references to ' + str(len(ids.pmids)) + ' PubMed articles. Checking articles for links to included trials...' }, room=sess_id) count = crud.articles_with_nctids(ids.pmids) if count and len(count) > 0: if sess_id: socketio.emit('crossrefbot_update', { 'msg': str(len(count)) + ' articles have links to included trials' }, room=sess_id) for trialpub in count: crud.review_publication(article.pmid, trialpub, 9) linked_ncts = crud.linked_nctids(trialpub) for nct in linked_ncts: crud.review_trial(review_id, nct, False, 'included', user_id=9, nickname='crossrefbot') if ids.nctids: print('nct ids in crossref = ' + str(len(ids.nctids))) if sess_id: socketio.emit('crossrefbot_update', { 'msg': str(len(ids.nctids)) + ' included trials were listed directly in crossref' }, room=sess_id) for nct_id in ids.nctids: crud.review_trial(article.pmid, nct_id, False, 'included', 'crossrefbot', 9) if not ids.nctids and not ids.pmids: if sess_id: socketio.emit( 'crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id) elif sess_id: socketio.emit('crossrefbot_update', {'msg': 'crossrefbot complete'}, room=sess_id) elif sess_id: socketio.emit('crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id)
#!/usr/bin/python3 #isntall eutils: pip3 install --user eutils AND pip install eutils from eutils import Client eclient = Client(api_key="f7bba41c57271397bbc91985bce382bf6***") print("\nUsing NCBI E-utilities in Python\n") '''#does work gene_esearch = eclient.esearch(db='gene',term='TNF') #does not work assembly_esearch = eclient.esearch(db='assembly',term='klebsiella pneumoniae') ''' esearch -db assembly -query '573[txid] AND "complete genome"[filter] AND "latest refseq"[filter]'
result_writer.writerow([ row[2], i, ends1[j], row[3], row[12], ex, transcriptvar, row[1], str(row[4]) + "-" + str(row[5]) ]) j += 1 ex += 1 print("\nSearch results: {}\n".format(no_rows)) ### Close csv file result_transcripts.close() print("Results are in '" + searchUCSC[key] + "'\n") ######################################################################################################### ##### API-key (NCBI) eclient = Client(api_key="8ecce891e7fa036ff84bccc7c74e5138dc09") #gene_efetch = eclient.efetch(db='gene', id=91039) Entrez.email = "*****@*****.**" ######################################################################################################### ##### nucleotide search ### Setting up query mRNAtranscripts = [] transcriptmRNA_esearch = eclient.esearch( db='nucleotide', term='(' + gene + '[gene] AND "H**o sapiens"[Primary Organism] AND refseq[filter]) NOT biomol_genomic[PROP]' ) print("\nLoading currently available ids from Entrez nucleotide...") print("=" * 70)
def update_trial_publications(period): """ Pull the newest pubmed articles that reference ct.gov IDs and save them to the database Should be run every period number of days @param period: number of days back to start search @return: None """ # edge cases # 32601120 NCT0282152 -- nct given with missing digit # 31899823 NCT00020085 -- nct is an alias for NCT00004635 ec = Client(api_key=eutils_key) pmids = [] page = 0 print('update_trial_publications, gathering pmids') base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' while True: r = utils.retry_get(base_url, params={ 'db': 'pubmed', 'term': 'clinicaltrials.gov[si]', 'format': 'json', 'retmax': 10000, 'retstart': page * 10000, 'email': crud.eutils_email, 'tool': crud.eutils_tool, 'api_key': eutils_key, 'date_type': 'edat', 'mindate': (datetime.now() - timedelta(days=period)).strftime('%Y/%m/%d'), 'maxdate': 3000 }) if not r: break json = r.json() current_pmids = json['esearchresult']['idlist'] if not current_pmids or len(current_pmids) == 0: break pmids = pmids + current_pmids print('page %s, pmid count: %s' % (page, len(pmids))) page += 1 segments = utils.chunks(pmids, 100) for s in segments: while True: try: articles = ec.efetch(db='pubmed', id=s) # articles = ec.efetch(db='pubmed', id=[31335881]) break except (eutils.EutilsNCBIError, eutils.EutilsRequestError, requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e: print(e) time.sleep(5) for a in articles: xpath = 'MedlineCitation/Article/DataBankList/DataBank[DataBankName = "ClinicalTrials.gov"]/AccessionNumberList/AccessionNumber/text()' nct_ids = a._xml_root.xpath(xpath) print('nct_ids found for pmid %s = [%s]' % (a.pmid, ', '.join(nct_ids))) if len(nct_ids) > 0: crud.pubmedarticle_to_db(a, 'trial_publications') for nct_id in nct_ids: if len(nct_id) != 11: print( '##WARNING!: ignoring %s (%s) - not the expected 11 chars long, possible missing digit' % (nct_id, a.pmid)) continue crud.publication_trial(a.pmid, nct_id, 9)
"--max", help="number of words to test", nargs='?', const=1, type=int, default=50) parser.add_argument("-c", "--corpus", help="the corpus (brown,webtext,gutenberg)", default="brown") args = parser.parse_args() print(args.corpus) corpus = eval(args.corpus) ec = Client(api_key=api.apikey) #replace with your NCBI apikey frequency_list = FreqDist(i.lower() for i in corpus.words()) print("word\tcorpusFreq\tpubmedFreq") for word in random.sample(set(corpus.words()), args.max): freq = frequency_list[word.lower()] #let's focus on somewhat common words if (freq > 1): try: a = ec.esearch(db='pubmed', term=word) print("{}\t{}\t{}".format(word, freq, a.count)) except (TimeoutError): time.sleep(5) #slow down buddy ec = Client(api_key=api.apikey) time.sleep(0.1) #ncbi will complain otherwise
except UnicodeEncodeError: r[key] = joiner.join([_.encode('utf-8') for _ in r[key]]) else: try: r[key] = str(r[key]) except UnicodeEncodeError: r[key] = r[key].encode('utf-8') if callable(record): r = record(r) elif record is not None: raise ValueError('Unknown record transform function (args.record).') if r: writer.write(r) client = Client(api_key = apikey) if prog == 'esearch': sret = client.esearch(db = db, term = term) try: error = list(sret._xml_root.find('ErrorList').iterchildren()) except: error = None print sret.count if not error else 0 if not sret.ids: rets = [] else: rets = client.efetch(db = db, id = sret.ids) rets = list(iter(rets)) writerResults(rets)
def test_review_trial(self): ec = Client() id = 28616955 nct_ids = ['NCT00195624', 'NCT00200889', 'NCT00207688'] article = ec.efetch(db='pubmed', id=id) for i, a in enumerate(article): crud.pubmedarticle_to_db(a, 'systematic_reviews') self.assertEqual(len(crud.get_review_trials_fast(id)['reg_trials']), 0) # trial is inserted with correct values crud.review_trial(id, nct_ids[0], False, 'relevant', 'testuser_1', 1) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_ids[0]: self.assertEqual(trials[i]['nct_id'], nct_ids[0]) self.assertEqual(trials[i]['upvotes'], 1) self.assertEqual(trials[i]['downvotes'], 0) self.assertEqual(trials[i]['voters'], 'testuser_1') self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'relevant') # when the trial is added again by another user, it should recieve an upvote crud.review_trial(id, nct_ids[0], False, 'relevant', 'testuser_2', 2) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_ids[0]: self.assertEqual(trials[i]['nct_id'], nct_ids[0]) self.assertEqual(trials[i]['upvotes'], 2) self.assertEqual(set(trials[i]['voters'].split(', ')), {'testuser_1', 'testuser_2'}) self.assertEqual(trials[i]['downvotes'], 0) self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'relevant') # adding an existing trial from the relevant column as included will move it crud.review_trial(id, nct_ids[0], False, 'included', 'testuser_2', 2) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_ids[0]: self.assertEqual(trials[i]['nct_id'], nct_ids[0]) self.assertEqual(trials[i]['upvotes'], 2) self.assertEqual(set(trials[i]['voters'].split(', ')), {'testuser_1', 'testuser_2'}) self.assertEqual(trials[i]['downvotes'], 0) self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'included') # test included trial crud.review_trial(id, nct_ids[1], False, 'included', 'testuser_2', 2) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_ids[1]: self.assertEqual(trials[i]['nct_id'], nct_ids[1]) self.assertEqual(trials[i]['upvotes'], 1) self.assertEqual(trials[i]['voters'], 'testuser_2') self.assertEqual(trials[i]['downvotes'], 0) self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'included') # trying to insert a relevant trial when it's already included will give a vote but not move the trial crud.review_trial(id, nct_ids[1], False, 'relevant', 'testuser_1', 1) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_ids[1]: self.assertEqual(trials[i]['nct_id'], nct_ids[1]) self.assertEqual(trials[i]['upvotes'], 2) self.assertEqual(set(trials[i]['voters'].split(', ')), {'testuser_1', 'testuser_2'}) self.assertEqual(trials[i]['downvotes'], 0) self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'included') # except for user_id 17 which can move included to relevant crud.review_trial(id, nct_ids[1], False, 'relevant', 'cochranebot', 17, vote_type='down') trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_ids[1]: self.assertEqual(trials[i]['nct_id'], nct_ids[1]) self.assertEqual(trials[i]['upvotes'], 2) self.assertEqual(set(trials[i]['voters'].split(', ')), {'cochranebot', 'testuser_1', 'testuser_2'}) self.assertEqual(trials[i]['downvotes'], 1) self.assertEqual(trials[i]['verified'], False) self.assertEqual(trials[i]['relationship'], 'relevant') # if the review is locked and the trial is included, allow a vote crud.review_trial(id, nct_ids[2], False, 'included', 'testuser_1', 1) crud.complete_studies(id, True) crud.review_trial(id, nct_ids[2], False, 'included', 'testuser_2', 2) trials = crud.get_review_trials_fast(id)['reg_trials'] for i, t in enumerate(trials): if t['nct_id'] == nct_ids[2]: self.assertEqual(trials[i]['nct_id'], nct_ids[2]) self.assertEqual(trials[i]['upvotes'], 2) self.assertEqual(set(trials[i]['voters'].split(', ')), {'testuser_1', 'testuser_2'}) self.assertEqual(trials[i]['downvotes'], 0) self.assertEqual(trials[i]['verified'], True) self.assertEqual(trials[i]['relationship'], 'included')