Ejemplo n.º 1
0
 def test_convert_id(self):
     ec = Client()
     id = 28795402
     article = ec.efetch(db='pubmed', id=id)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     self.assertEqual(crud.convert_id(id, 'doi'), '10.1002/ijc.30922')
     self.assertEqual(crud.convert_id('10.1002/ijc.30922', 'pmid'), id)
     article = ec.efetch(db='pubmed', id=24829965)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     self.assertEqual(crud.convert_id(24829965, 'doi'), None)
Ejemplo n.º 2
0
def review_publication(review_id, publication_id, user_id):
    """
    create a new record linking the specified review to the specified publication

    @param review_id: pmid of review
    @param publication_id: pmid of trial publication
    @param user_id: id of user submitting this publication
    """
    conn = dblib.create_con(VERBOSE=True)
    cur = conn.cursor()
    try:
        cur.execute(
            "INSERT INTO review_trialpubs (review_id, trialpub_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING;",
            (review_id, publication_id, user_id))
        conn.commit()
    except psycopg2.IntegrityError as e:
        print e
        conn.rollback()
        ec = Client(api_key=eutils_key)
        article = ec.efetch(db='pubmed', id=publication_id)
        for a in article:
            pubmedarticle_to_db(a, 'trial_publications')
        cur.execute(
            "INSERT INTO review_trialpubs (review_id, trialpub_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING;",
            (review_id, publication_id, user_id))
        conn.commit()
    conn.close()
Ejemplo n.º 3
0
 def test_pubmedarticle_to_db(self):
     ec = Client()
     ids = [28616955, 28800192, 28797191]
     for id in ids:
         self.assertIsNone(crud.review_medtadata_db(id))
     article = ec.efetch(db='pubmed', id=ids)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
         self.assertIsNotNone(crud.review_medtadata_db(ids[i]))
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['title'], a.title)
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['review_id'], int(a.pmid))
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['abstract'], a.abstract)
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['source'], a.jrnl)
         self.assertEqual(crud.review_medtadata_db(ids[i])['doi'], a.doi)
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['publish_date'], int(a.year))
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['authors'],
             ', '.join(a.authors))
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['included_complete'], False)
         self.assertEqual(
             crud.review_medtadata_db(ids[i])['verified_review'], None)
Ejemplo n.º 4
0
 def test_complete_studies(self):
     ec = Client()
     id = 28795402
     ncts = [
         'NCT00031265', 'NCT02199847', 'NCT00902980', 'NCT01266824',
         'NCT03418909'
     ]
     article = ec.efetch(db='pubmed', id=id)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     for n in ncts[:3]:
         crud.review_trial(id, n, False, 'included', 'testuser_1', 1)
     for n in ncts[3:]:
         crud.review_trial(id, n, False, 'relevant', 'testuser_1', 1)
     crud.complete_studies(id, True)
     metadata = crud.review_medtadata_db(id)
     self.assertEqual(metadata['included_complete'], True)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] in ncts[:3]:
             self.assertEqual(trials[i]['verified'], True)
             self.assertEqual(trials[i]['relationship'], 'included')
         if t['nct_id'] in ncts[3:]:
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'relevant')
     crud.complete_studies(id, False)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] in ncts[:3]:
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'included')
def job_get_mesh_terms_for_pmid(pmid, queue):
    ec = Client(api_key="fa081c19a44e9bfe267689cd45c7d31bae08")
    #ec = Client()

    result = ec.efetch(db='pubmed', id=pmid)
    x = iter(result)
    for i in x:
        queue.put([pmid, i.mesh_headings])
Ejemplo n.º 6
0
def update_trial_publications(period):
    """
    Pull the newest pubmed articles that reference ct.gov IDs and save them to the database
    Should be run every period number of days
    @param period: number of days back to start search
    @return: None
    """
    ec = Client(api_key=eutils_key)
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    r = utils.retry_get(base_url,
                        params={
                            'db':
                            'pubmed',
                            'term':
                            'clinicaltrials.gov[si]',
                            'format':
                            'json',
                            'retmax':
                            10000,
                            'email':
                            crud.eutils_email,
                            'tool':
                            crud.eutils_tool,
                            'api_key':
                            eutils_key,
                            'date_type':
                            'edat',
                            'mindate':
                            (datetime.now().date() -
                             timedelta(days=period)).strftime('%Y/%m/%d'),
                            'maxdate':
                            3000
                        })
    print r.url
    json = r.json()
    pmids = json['esearchresult']['idlist']
    print pmids
    segments = utils.chunks(pmids, 100)
    for s in segments:
        while True:
            try:
                articles = ec.efetch(db='pubmed', id=s)
                break
            except (eutils.exceptions.EutilsNCBIError,
                    eutils.exceptions.EutilsRequestError,
                    requests.exceptions.SSLError,
                    requests.exceptions.ConnectionError) as e:
                print e
                time.sleep(5)
        for a in articles:
            print a.pmid
            if a.nct_ids:
                ids = a.nct_ids
                crud.pubmedarticle_to_db(a, 'trial_publications')
                for id in ids:
                    crud.publication_trial(a.pmid, id, 9)
Ejemplo n.º 7
0
 def test_review_publication(self):
     ec = Client()
     trialpub_ids = [29871025, 29859785, 29866619]
     review_ids = [28775712, 28549125, 29929949]
     trialpubs = ec.efetch(db='pubmed', id=trialpub_ids)
     reviews = ec.efetch(db='pubmed', id=review_ids)
     for i, a in enumerate(trialpubs):
         crud.pubmedarticle_to_db(a, 'trial_publications')
     for i, a in enumerate(reviews):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
         crud.review_publication(a.pmid, trialpub_ids[i], 1)
         conn = self.mock_conn(True)
         cur = conn.cursor()
         cur.execute(
             "SELECT trialpub_id from review_trialpubs where review_id = %s;",
             (a.pmid, ))
         trialpub = cur.fetchone()
         self.assertEqual(trialpub[0], trialpub_ids[i])
         conn.close()
Ejemplo n.º 8
0
 def test_pulication_trial(self):
     ec = Client()
     trialpub_ids = [29871025, 29859785, 29866619]
     nct_ids = ['NCT02317328', 'NCT02317874', 'NCT02317887', 'NCT02330055']
     trialpubs = ec.efetch(db='pubmed', id=trialpub_ids)
     for i, a in enumerate(trialpubs):
         crud.pubmedarticle_to_db(a, 'trial_publications')
         self.assertIsNone(crud.linked_nctids(a.pmid))
         for nct_id in nct_ids:
             crud.publication_trial(a.pmid, nct_id, 2)
         self.assertEqual(crud.linked_nctids(a.pmid), nct_ids)
Ejemplo n.º 9
0
 def test_get_link_id(self):
     ec = Client()
     id = 28934560
     nct_id = 'NCT00678431'
     article = ec.efetch(db='pubmed', id=id)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     crud.review_trial(id, nct_id, False, 'relevant', 'testuser_2', 2)
     link_id = crud.get_link_id(nct_id, id)
     self.assertIsNotNone(link_id)
     no_link = crud.get_link_id('NCT02064179', 28931939)
     self.assertIsNone(no_link)
Ejemplo n.º 10
0
 def test_check_existing_review_trial(self):
     ec = Client()
     id = 28934560
     nct_id = 'NCT00678431'
     article = ec.efetch(db='pubmed', id=id)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     crud.review_trial(id, nct_id, False, 'relevant', 'testuser_2', 2)
     link = crud.check_existing_review_trial(id, nct_id)
     self.assertIsNotNone(link)
     no_link = crud.check_existing_review_trial(5464824, 'NCT00000000')
     self.assertIsNone(no_link)
Ejemplo n.º 11
0
 def test_review_lock_status(self):
     ec = Client()
     ids = [28616955, 28800192, 28797191]
     for id in ids:
         self.assertIsNone(crud.review_medtadata_db(id))
     article = ec.efetch(db='pubmed', id=ids)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
         self.assertEqual(crud.review_lock_status(ids[i]), False)
         crud.complete_studies(ids[i], True)
         self.assertEqual(crud.review_lock_status(ids[i]), True)
         crud.complete_studies(ids[i], False)
         self.assertEqual(crud.review_lock_status(ids[i]), False)
Ejemplo n.º 12
0
 def test_get_review_trials_fast(self):
     ec = Client()
     id = 28795402
     ncts = [
         'NCT00031265', 'NCT02199847', 'NCT00902980', 'NCT01266824',
         'NCT03418909'
     ]
     article = ec.efetch(db='pubmed', id=id)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     for n in ncts:
         crud.review_trial(id, n, False, 'included', 'testuser_1', 1)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     retrieved_ncts = [t['nct_id'] for t in trials]
     for n in ncts:
         self.assertTrue(n in retrieved_ncts)
Ejemplo n.º 13
0
 def test_get_locked(self):
     ec = Client()
     ids = [28569363, 29202845, 28933578]
     for id in ids:
         self.assertIsNone(crud.review_medtadata_db(id))
     article = ec.efetch(db='pubmed', id=ids)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     self.assertIsNone(crud.get_locked())
     crud.complete_studies(ids[0], True)
     self.assertEqual(crud.get_locked(), [ids[0]])
     crud.complete_studies(ids[1], True)
     self.assertEqual(crud.get_locked(), [ids[0], ids[1]])
     crud.complete_studies(ids[2], True)
     self.assertEqual(crud.get_locked(), [ids[0], ids[1], ids[2]])
     crud.complete_studies(ids[1], False)
     self.assertEqual(crud.get_locked(), [ids[0], ids[2]])
Ejemplo n.º 14
0
def main(author_name,
         affiliations=None,
         api_key=None,
         style='default',
         highlight_names=None,
         highlight_journal=True):
    """Search PubMed via eutils and format the retreived results"""
    ec = Client(api_key=api_key)
    esr = search_pubmed_by_author(ec, author_name, affiliations)
    pmasets = [pma for pma in iter(ec.efetch(db='pubmed', id=esr.ids))]
    pubs = [PubMedArticle(pma) for pma in pmasets]

    for pub in pubs:
        print(
            pub.bibliography(style=style,
                             highlight_names=highlight_names,
                             highlight_journal=highlight_journal))
Ejemplo n.º 15
0
 def test_vote(self):
     ec = Client()
     id = 28934560
     nct_id = 'NCT00678431'
     article = ec.efetch(db='pubmed', id=id)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     crud.review_trial(id, nct_id, False, 'relevant', 'testuser_2', 2)
     link_id = crud.get_link_id(nct_id, id)
     crud.vote(link_id, 'up', 1)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_id:
             self.assertEqual(trials[i]['nct_id'], nct_id)
             self.assertEqual(trials[i]['upvotes'], 2)
             self.assertEqual(set(trials[i]['voters'].split(', ')),
                              {'testuser_2', 'testuser_1'})
             self.assertEqual(trials[i]['downvotes'], 0)
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'relevant')
Ejemplo n.º 16
0
 def test_add_trial_to_locked(self):
     ec = Client()
     ids = [28616955, 28800192, 28797191]
     nct_ids = ['NCT00195624', 'NCT00200889', 'NCT00207688']
     test_nct = 'NCT00695409'
     for id in ids:
         self.assertIsNone(crud.review_medtadata_db(id))
     article = ec.efetch(db='pubmed', id=ids)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
         crud.review_trial(ids[i], nct_ids[i], False, 'included',
                           'testuser_1', 1, 'up')
         crud.complete_studies(ids[i], True)
         crud.review_trial(ids[i], test_nct, False, 'included',
                           'testuser_1', 1, 'up')
         self.assertIsNone(
             crud.check_existing_review_trial(ids[i], test_nct))
         crud.complete_studies(ids[i], False)
         crud.review_trial(ids[i], test_nct, False, 'included',
                           'testuser_1', 1, 'up')
         self.assertIsNotNone(
             crud.check_existing_review_trial(ids[i], test_nct))
Ejemplo n.º 17
0
def check_trialpubs_nctids(review_id, review_doi=None, sess_id=None):
    """
    resolve the references of a review to PMIDs and NCTIDs
    @param review_id: PubMed ID of review
    @param review_doi: DOI of review
    @param sess_id: session ID if transitting progress via websocket
    @return: namedtuple with found PMIDs and NCTIDs
    """
    if sess_id:
        socketio = SocketIO(message_queue='amqp://localhost')
    ec = Client(api_key=eutils_key)
    cr = Crossref(mailto=config.MAIL_USERNAME)
    print('bp1')
    if not review_doi:
        while True:
            try:
                paset = ec.efetch(db='pubmed', id=review_id)
                break
            except (eutils.EutilsNCBIError, eutils.EutilsRequestError,
                    requests.exceptions.SSLError,
                    requests.exceptions.ConnectionError) as e:
                print(e)
                time.sleep(5)
        try:
            pa = next(iter(paset))
        except StopIteration as e:
            print('##EMPTY ITERATOR', e)
            print('retrying...')
            time.sleep(60)
            return check_trialpubs_nctids(review_id, review_doi, sess_id)

        if hasattr(pa, 'doi'):
            review_doi = pa.doi
        if not review_doi:
            if sess_id:
                socketio.emit('crossrefbot_update',
                              {'msg': 'No trials found. Crossrefbot complete'},
                              room=sess_id)
            return
    print('bp2')
    retry_attempts = 0
    while True:
        try:
            if review_doi[-1] == '.':
                review_doi = review_doi[:-1]
            resp = cr.works(ids=[str(review_doi)])
            break
        except requests.HTTPError as e:
            if e.response.status_code == 404:
                if sess_id:
                    socketio.emit(
                        'crossrefbot_update',
                        {'msg': 'No trials found. Crossrefbot complete'},
                        room=sess_id)
                print(e)
                return
            else:
                time.sleep(5)
                print('UNHANDLED HTTP ERROR', e)
                print('retrying...')
                continue
        except requests.exceptions.ConnectionError as e:
            print(e)
            time.sleep(10)
            print('connection error, retrying...')
            if retry_attempts >= 6:
                raise Exception('failed too many times')
                break
            retry_attempts += 1
    print('bp3')
    if resp['status'] == 'ok':
        parsed = resp['message']
        if "reference" in parsed:
            if sess_id:
                socketio.emit('crossrefbot_update', {
                    'msg':
                    '%s references in crossref. trying to resolve to PubMed articles'
                    % len(parsed['reference'])
                },
                              room=sess_id)
                eventlet.sleep(0)
            print('%s references found in crossref' % len(parsed['reference']))
            to_resolve = []
            references = parsed['reference']
            dois = [doi["DOI"] for doi in references if 'DOI' in doi]
            print('bp4')
            if dois:
                # if we get pubmed metadata for these DOIs, we can cross-check which dois match the ones in our set of references
                # what if > 250 TODO: WARNING:eutils._internal.client:NCBI found 251 results, but we truncated the reply at 250 results; see https://github.com/biocommons/eutils/issues/124/
                chunk_dois = utils.chunks(dois, 250)
                for dois in chunk_dois:
                    while True:
                        print(
                            'bp4.1',
                            ' OR '.join(['"' + doi + '"[AID]'
                                         for doi in dois]))
                        try:
                            with eventlet.Timeout(300):
                                esr = ec.esearch(db='pubmed',
                                                 term=' OR '.join([
                                                     '"' + doi + '"[AID]'
                                                     for doi in dois
                                                 ]))
                            break
                        except (eutils.EutilsNCBIError,
                                eutils.EutilsRequestError,
                                requests.exceptions.SSLError,
                                requests.exceptions.ConnectionError,
                                lxml.etree.XMLSyntaxError,
                                eventlet.timeout.Timeout) as e:
                            print('possible timeout?', e)
                            time.sleep(5)
                    if esr.ids:
                        while True:
                            print('bp4.2', esr.ids)
                            try:
                                paset = ec.efetch(db='pubmed', id=esr.ids)
                                break
                            except (eutils.EutilsNCBIError,
                                    eutils.EutilsRequestError,
                                    requests.exceptions.SSLError,
                                    requests.exceptions.ConnectionError,
                                    requests.exceptions.ReadTimeout,
                                    requests.exceptions.ChunkedEncodingError
                                    ) as e:
                                print(e)
                                time.sleep(5)
                        pa_iter = iter(paset)
                        while True:
                            try:
                                pma = next(pa_iter)
                            except StopIteration:
                                break
                            if pma.doi is not None and pma.doi in dois:
                                dois.remove(pma.doi)
                                to_resolve.append(pma.pmid)
            print('bp5')
            remaining = [
                x for x in references
                if ('DOI' not in x or ('DOI' in x and x['DOI'] in dois)) and (
                    'first-page' in x or 'author' in x or 'article-title' in x
                    or 'volume' in x or 'journal-title' in x or 'year' in x)
            ]
            if remaining:
                citation_pmids = ecitmatch_tools.batch_pmids_for_citation(
                    remaining, debug=True)
                check_metadata = []
                if citation_pmids:
                    for i, citation in enumerate(citation_pmids):
                        if utils.RepresentsInt(citation):
                            to_resolve.append(citation)
                            check_metadata.append(citation)
                            continue
                        elif citation_pmids[i].startswith('AMBIGUOUS'):
                            cand = citation[10:].split(',')
                            if utils.RepresentsInt(cand[0]):
                                to_resolve.extend(cand)
                                check_metadata.append(cand)
                if check_metadata:
                    while True:
                        try:
                            with eventlet.Timeout(300):
                                paset = ec.efetch(db='pubmed',
                                                  id=check_metadata)
                            break
                        except (eutils.EutilsNCBIError,
                                eutils.EutilsRequestError,
                                requests.exceptions.SSLError,
                                requests.exceptions.ConnectionError,
                                eventlet.timeout.Timeout) as e:
                            print('possible timeout?')
                            print(e)
                            time.sleep(5)
                    pa_iter = iter(paset)
                    while True:
                        try:
                            pma = next(pa_iter)
                        except StopIteration:
                            break
                        if pma.doi is not None and pma.doi in dois:
                            dois.remove(pma.doi)
                            to_resolve.append(pma.pmid)
            print('bp6')
            try_doi = batch_doi2pmid(dois)
            if try_doi:
                for doi in try_doi:
                    if utils.RepresentsInt(str(doi)):
                        to_resolve.append(doi)
            nct_ids = []
            for i, citation in enumerate(references):
                if 'unstructured' in citation.keys():
                    spl = citation['unstructured'].split(' ')
                    for i in spl:
                        if re.match(r"(NCT|nct)[0-9]{8}", i):
                            if len(i) == 11:
                                nct_ids.append(i)
                                continue
            print('bp11')
            to_resolve = [str(x) for x in to_resolve]
            to_resolve = list(set(to_resolve))
            content = collections.namedtuple('ids', ['pmids', 'nctids'])
            return content(to_resolve, nct_ids)
    return False
Ejemplo n.º 18
0
def check_citations(review_id, sess_id=None, review_doi=None):
    """
    check IDs obtained from the references of a review for automatic links, and save these links
    @param review_id: PubMed ID of review
    @param sess_id: session ID if transitting progress via websocket
    @param review_doi: DOI of review
    @return:
    """
    if sess_id:
        socketio = SocketIO(message_queue='amqp://localhost')
    ec = Client(api_key=eutils_key)
    while True:
        try:
            articles = ec.efetch(db='pubmed', id=review_id)
            break
        except (eutils.EutilsNCBIError, eutils.EutilsRequestError,
                requests.exceptions.SSLError,
                requests.exceptions.ConnectionError) as e:
            print(e)
            time.sleep(5)
    a_iter = iter(articles)
    while True:
        try:
            article = next(a_iter)
        except StopIteration:
            break
        print('-----------------' + article.pmid + '-------------------------')
        if article.doi is not None:
            ids = check_trialpubs_nctids(article.pmid,
                                         article.doi,
                                         sess_id=sess_id)
        else:
            ids = check_trialpubs_nctids(article.pmid, sess_id=sess_id)
        if ids:
            if ids.pmids:
                if sess_id:
                    socketio.emit('crossrefbot_update', {
                        'msg':
                        'crossrefbot found references to ' +
                        str(len(ids.pmids)) +
                        ' PubMed articles. Checking articles for links to included trials...'
                    },
                                  room=sess_id)
                count = crud.articles_with_nctids(ids.pmids)
                if count and len(count) > 0:
                    if sess_id:
                        socketio.emit('crossrefbot_update', {
                            'msg':
                            str(len(count)) +
                            ' articles have links to included trials'
                        },
                                      room=sess_id)
                    for trialpub in count:
                        crud.review_publication(article.pmid, trialpub, 9)
                        linked_ncts = crud.linked_nctids(trialpub)
                        for nct in linked_ncts:
                            crud.review_trial(review_id,
                                              nct,
                                              False,
                                              'included',
                                              user_id=9,
                                              nickname='crossrefbot')
            if ids.nctids:
                print('nct ids in crossref = ' + str(len(ids.nctids)))
                if sess_id:
                    socketio.emit('crossrefbot_update', {
                        'msg':
                        str(len(ids.nctids)) +
                        ' included trials were listed directly in crossref'
                    },
                                  room=sess_id)
                for nct_id in ids.nctids:
                    crud.review_trial(article.pmid, nct_id, False, 'included',
                                      'crossrefbot', 9)
            if not ids.nctids and not ids.pmids:
                if sess_id:
                    socketio.emit(
                        'crossrefbot_update',
                        {'msg': 'No trials found. Crossrefbot complete'},
                        room=sess_id)
            elif sess_id:
                socketio.emit('crossrefbot_update',
                              {'msg': 'crossrefbot complete'},
                              room=sess_id)
        elif sess_id:
            socketio.emit('crossrefbot_update',
                          {'msg': 'No trials found. Crossrefbot complete'},
                          room=sess_id)
Ejemplo n.º 19
0
 def test_review_trial(self):
     ec = Client()
     id = 28616955
     nct_ids = ['NCT00195624', 'NCT00200889', 'NCT00207688']
     article = ec.efetch(db='pubmed', id=id)
     for i, a in enumerate(article):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
     self.assertEqual(len(crud.get_review_trials_fast(id)['reg_trials']), 0)
     # trial is inserted with correct values
     crud.review_trial(id, nct_ids[0], False, 'relevant', 'testuser_1', 1)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_ids[0]:
             self.assertEqual(trials[i]['nct_id'], nct_ids[0])
             self.assertEqual(trials[i]['upvotes'], 1)
             self.assertEqual(trials[i]['downvotes'], 0)
             self.assertEqual(trials[i]['voters'], 'testuser_1')
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'relevant')
     # when the trial is added again by another user, it should recieve an upvote
     crud.review_trial(id, nct_ids[0], False, 'relevant', 'testuser_2', 2)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_ids[0]:
             self.assertEqual(trials[i]['nct_id'], nct_ids[0])
             self.assertEqual(trials[i]['upvotes'], 2)
             self.assertEqual(set(trials[i]['voters'].split(', ')),
                              {'testuser_1', 'testuser_2'})
             self.assertEqual(trials[i]['downvotes'], 0)
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'relevant')
     # adding an existing trial from the relevant column as included will move it
     crud.review_trial(id, nct_ids[0], False, 'included', 'testuser_2', 2)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_ids[0]:
             self.assertEqual(trials[i]['nct_id'], nct_ids[0])
             self.assertEqual(trials[i]['upvotes'], 2)
             self.assertEqual(set(trials[i]['voters'].split(', ')),
                              {'testuser_1', 'testuser_2'})
             self.assertEqual(trials[i]['downvotes'], 0)
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'included')
     # test included trial
     crud.review_trial(id, nct_ids[1], False, 'included', 'testuser_2', 2)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_ids[1]:
             self.assertEqual(trials[i]['nct_id'], nct_ids[1])
             self.assertEqual(trials[i]['upvotes'], 1)
             self.assertEqual(trials[i]['voters'], 'testuser_2')
             self.assertEqual(trials[i]['downvotes'], 0)
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'included')
     # trying to insert a relevant trial when it's already included will give a vote but not move the trial
     crud.review_trial(id, nct_ids[1], False, 'relevant', 'testuser_1', 1)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_ids[1]:
             self.assertEqual(trials[i]['nct_id'], nct_ids[1])
             self.assertEqual(trials[i]['upvotes'], 2)
             self.assertEqual(set(trials[i]['voters'].split(', ')),
                              {'testuser_1', 'testuser_2'})
             self.assertEqual(trials[i]['downvotes'], 0)
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'included')
     # except for user_id 17 which can move included to relevant
     crud.review_trial(id,
                       nct_ids[1],
                       False,
                       'relevant',
                       'cochranebot',
                       17,
                       vote_type='down')
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_ids[1]:
             self.assertEqual(trials[i]['nct_id'], nct_ids[1])
             self.assertEqual(trials[i]['upvotes'], 2)
             self.assertEqual(set(trials[i]['voters'].split(', ')),
                              {'cochranebot', 'testuser_1', 'testuser_2'})
             self.assertEqual(trials[i]['downvotes'], 1)
             self.assertEqual(trials[i]['verified'], False)
             self.assertEqual(trials[i]['relationship'], 'relevant')
     # if the review is locked and the trial is included, allow a vote
     crud.review_trial(id, nct_ids[2], False, 'included', 'testuser_1', 1)
     crud.complete_studies(id, True)
     crud.review_trial(id, nct_ids[2], False, 'included', 'testuser_2', 2)
     trials = crud.get_review_trials_fast(id)['reg_trials']
     for i, t in enumerate(trials):
         if t['nct_id'] == nct_ids[2]:
             self.assertEqual(trials[i]['nct_id'], nct_ids[2])
             self.assertEqual(trials[i]['upvotes'], 2)
             self.assertEqual(set(trials[i]['voters'].split(', ')),
                              {'testuser_1', 'testuser_2'})
             self.assertEqual(trials[i]['downvotes'], 0)
             self.assertEqual(trials[i]['verified'], True)
             self.assertEqual(trials[i]['relationship'], 'included')
Ejemplo n.º 20
0
def search(json):
    """
    conduct a search
    @param json: JSON object specifying serch keywords
    """
    id = json['review_id']
    emit('search_update', {'msg': 'Searching...'}, room=request.sid)
    eventlet.sleep(0)
    if not id:
        emit('page_content', {
            'section': 'no_results',
            'data': render_template('noresults.html', id=id)
        },
             room=request.sid)
        return
    conn = dblib.create_con(VERBOSE=True)
    cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
    # try to retrieve review with matching PMID if id is int
    review = ''
    found = True
    if (utils.RepresentsInt(id)):
        review = crud.review_medtadata_db(id)
    # try to retrieve review with matching DOI if id is DOI
    elif utils.is_doi(id):
        cur.execute("SELECT * FROM systematic_reviews WHERE doi = %s;", (id, ))
        review = cur.fetchone()
        conn.close()
    # if not int or DOI, return no results page
    else:
        conn.close()
        emit('search_update',
             {'msg': 'Searching for keyword matches in our database'},
             room=request.sid)
        search_result = request_data.advanced_search(id)
        if not search_result:
            emit('page_content', {
                'section': 'no_results',
                'data': render_template('noresults.html', id=id)
            },
                 room=request.sid)
            return
        emit('page_content', {
            'section':
            'search_results',
            'data':
            render_template(
                'searchresult.html', reviews=search_result, searchterm=id)
        },
             room=request.sid)
        return
    # if there is no match in our DB
    if review is None:
        found = False
        if not current_user.is_authenticated:
            conn.close()
            emit('page_content', {
                'section': 'no_results',
                'data': render_template('noresults.html', id=id)
            },
                 room=request.sid)
            return
        emit('search_update', {
            'msg': 'Not found in local database. Searching PubMed for article'
        },
             room=request.sid)
        eventlet.sleep(0)
        if utils.is_doi(id):
            # try to retrieve PMID if DOI
            convert = crud.convert_id(id, 'pmid')
            if convert:
                id = convert
            # return no result if no results
            else:
                emit('search_update', {'msg': 'Not found in Pubmed :('},
                     room=request.sid)
                emit('page_content', {
                    'section': 'no_results',
                    'data': render_template('noresults.html', id=id)
                },
                     room=request.sid)
                return
        # try to retrieve the review from pubmed
        ec = Client(api_key=eutils_key)
        article = ec.efetch(db='pubmed', id=id)
        found_review = None
        for art in article:
            if art and str(art.pmid) == id:
                found_review = art
                break
        if found_review:
            result = found_review.pmid
            if not result:
                flash(
                    'Unable to retrieve metadata for this article. Please try again later'
                )
                abort(404)
            emit('search_update',
                 {'msg': 'Found article on PubMed. Downloading metadata...'},
                 room=request.sid)
            eventlet.sleep(0)
            crud.pubmedarticle_to_db(found_review, 'systematic_reviews')
            review = crud.review_medtadata_db(id)
            emit('page_content', {
                'data': render_template('review_data.html', review=review),
                'section': 'review_data'
            },
                 room=request.sid)
            eventlet.sleep(0)
            emit('search_update', {'msg': 'Saved metadata... triggering bots'},
                 room=request.sid)
            bot.docsim.delay(id, sess_id=request.sid)
            eventlet.sleep(0)
            if 'cochrane' in review['source'].lower() and 'doi' in review:
                cb_bb = bot.cochrane_ongoing_excluded.si(review['doi'],
                                                         id,
                                                         sess_id=request.sid)
                cb_bb.link(bot.basicbot2.si(review_id=id, sess_id=request.sid))
                chord(
                    (bot.cochranebot.s(review['doi'], id, sess_id=request.sid),
                     bot.check_citations.s(id, sess_id=request.sid)),
                    cb_bb).delay()
            else:
                chord((bot.check_citations.s(id, sess_id=request.sid)),
                      bot.basicbot2.si(review_id=id,
                                       sess_id=request.sid)).delay()
        else:
            print 'no result'
            emit('page_content', {
                'section': 'no_results',
                'data': render_template('noresults.html', id=id)
            },
                 room=request.sid)
            return
    # if there IS a match in our DB
    if found:
        print 'emitting found review'
        eventlet.sleep(0)
        emit('search_update',
             {'msg': 'Found review in our database! Retrieving data..'},
             room=request.sid)
        eventlet.sleep(0)
        print 'emitting review content'
        emit('page_content', {
            'data':
            render_template('review_data.html',
                            review=review,
                            starred=crud.is_starred(review['review_id'],
                                                    current_user.db_id)
                            if current_user.is_authenticated else False),
            'section':
            'review_data',
            'related_reviews':
            render_template('related_reviews.html',
                            related_reviews=crud.related_reviews(
                                review['review_id']))
        },
             room=request.sid)
        eventlet.sleep(0)
        trials = crud.get_review_trials_fast(
            review[0],
            usr=current_user if current_user.is_authenticated else None)
        relevant = [
            trial['nct_id'] for trial in trials['reg_trials']
            if trial['relationship'] == 'relevant'
        ]
        verified = [
            trial['nct_id'] for trial in trials['reg_trials']
            if trial['relationship'] == 'included'
        ]
        emit('search_update', {'msg': 'Generating cool plots...'},
             room=request.sid)
        eventlet.sleep(0)
        formatted = utils.trials_to_plotdata(trials['reg_trials'])
        socketio.emit('page_content', {
            'section': 'plot',
            'data': formatted,
            'page': 'reviewdetail',
            'review_id': review[0]
        },
                      room=request.sid)
        emit('page_content', {
            'section':
            'rel_trials',
            'data':
            render_template('rel_trials.html',
                            reg_trials=trials['reg_trials'],
                            locked=review['included_complete'])
        },
             room=request.sid)
        eventlet.sleep(0)
        if verified:
            emit('page_content', {
                'section':
                'incl_trials',
                'data':
                render_template('incl_trials.html',
                                reg_trials=trials['reg_trials'],
                                locked=review['included_complete'])
            },
                 room=request.sid)
            eventlet.sleep(0)
        else:
            emit('page_content', {
                'section':
                'incl_trials',
                'data':
                render_template(
                    'incl_trials.html', reg_trials=[], locked=False)
            },
                 room=request.sid)
Ejemplo n.º 21
0
def populate_reviews(period):
    """ download all new reviews made available on pubmed in the last <period> # days & save to db if they have trials in
    CrossRef or Cochrane """
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    r = utils.requests_retry_session().get(
        base_url,
        params={
            'db':
            'pubmed',
            'term':
            'systematic review[ti] OR meta analysis[ti] OR cochrane database of systematic reviews[ta]',
            'format':
            'json',
            'retmax':
            300000,
            'email':
            crud.eutils_email,
            'tool':
            crud.eutils_tool,
            'api_key':
            eutils_key,
            'date_type':
            'edat',
            'mindate': (datetime.now().date() -
                        timedelta(days=period)).strftime('%Y/%m/%d'),
            'maxdate':
            '3000'
        })
    json = r.json()
    pmids = json['esearchresult']['idlist']
    print len(pmids)
    segments = utils.chunks(pmids, 100)
    ec = Client(api_key=eutils_key)
    for s in segments:
        while True:
            try:
                articles = ec.efetch(db='pubmed', id=s)
                break
            except (eutils.exceptions.EutilsNCBIError,
                    eutils.exceptions.EutilsRequestError,
                    requests.exceptions.SSLError,
                    requests.exceptions.ConnectionError) as e:
                print e
                time.sleep(5)
        a_iter = iter(articles)
        while True:
            try:
                article = a_iter.next()
            except StopIteration:
                break
            print '-----------------' + article.pmid + '-------------------------'
            if article.doi is not None:
                ids = bot.check_trialpubs_nctids(article.pmid, article.doi)
            else:
                ids = bot.check_trialpubs_nctids(article.pmid)
            if ids:
                if ids.pmids:
                    print ids.pmids
                    count = crud.articles_with_nctids(
                        tuple(x for x in ids.pmids))
                    print count
                    if count and len(count) > 0:
                        print 'articles with links = ' + str(len(count))
                        print 'inserting ' + str(article.pmid)
                        crud.pubmedarticle_to_db(article, 'systematic_reviews')
                        for trialpub in count:
                            crud.review_publication(article.pmid, trialpub, 9)
                            linked_ncts = crud.linked_nctids(trialpub)
                            for nct in linked_ncts:
                                crud.review_trial(article.pmid,
                                                  nct,
                                                  False,
                                                  'included',
                                                  user_id=9,
                                                  nickname='crossrefbot')
                if ids.nctids:
                    crud.pubmedarticle_to_db(article, 'systematic_reviews')
                    print 'nct ids in crossref = ' + str(len(ids.nctids))
                    for nct_id in ids.nctids:
                        crud.review_trial(article.pmid, nct_id, False,
                                          'included', 'crossrefbot', 9)
                if not ids.nctids and not ids.pmids:
                    print 'found nothing'
            else:
                print 'nothing'
            if 'Cochrane' in article.jrnl:
                print 'Cochrane'
                crud.pubmedarticle_to_db(article, 'systematic_reviews')
                bot.cochranebot(article.doi, article.pmid)
                bot.cochrane_ongoing_excluded(article.doi, article.pmid)
                conn = dblib.create_con(VERBOSE=True)
                cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
                cur.execute(
                    "select rt.review_id, json_agg(distinct v.user_id) as users from review_rtrial rt"
                    " inner join votes v on rt.id = v.link_id where rt.review_id = %s group by"
                    " rt.review_id;", (article.pmid, ))
                new_users = cur.fetchone()
                if not new_users:
                    new_users = {'users': []}
                if not {17, 9} & set(new_users['users']):
                    print 'deleting ' + str(new_users['users']), article.pmid
                    cur.execute(
                        "delete from votes where link_id in (select id from review_rtrial where review_id = %s);",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from review_trialpubs where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from review_rtrial where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from systematic_reviews where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                conn.close()
            else:
                print 'not cochrane'
Ejemplo n.º 22
0
		if callable(record):
			r = record(r)
		elif record is not None:
			raise ValueError('Unknown record transform function (args.record).')
		if r:
			writer.write(r)

client = Client(api_key = apikey)
if prog == 'esearch':
	sret  = client.esearch(db = db, term = term)
	try:
		error = list(sret._xml_root.find('ErrorList').iterchildren())
	except:
		error = None
	
	print sret.count if not error else 0
	
	if not sret.ids:
		rets = []
	else:
		rets = client.efetch(db = db, id = sret.ids)
		rets = list(iter(rets))
	writerResults(rets)

else:
	fetches = client.efetch(db = db, id = term)
	key     = [key for key in dir(fetches) if not key.startswith('_')][0]
	rets    = getattr(fetches, key)
	writerResults(rets)

sleep (sleepsec)
Ejemplo n.º 23
0
def check_trialpubs_nctids(review_id, review_doi=None, sess_id=None):
    """
    resolve the references of a review to PMIDs and NCTIDs
    @param review_id: PubMed ID of review
    @param review_doi: DOI of review
    @param sess_id: session ID if transitting progress via websocket
    @return: namedtuple with found PMIDs and NCTIDs
    """
    if sess_id:
        socketio = SocketIO(message_queue='amqp://localhost')
    ec = Client(api_key=eutils_key)
    cr = Crossref(mailto=config.MAIL_USERNAME)
    if not review_doi:
        while True:
            try:
                paset = ec.efetch(db='pubmed', id=review_id)
                break
            except (
            eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError, requests.exceptions.SSLError,
            requests.exceptions.ConnectionError) as e:
                print e
                time.sleep(5)
        pa = iter(paset).next()
        if hasattr(pa, 'doi'):
            review_doi = pa.doi
        if not review_doi:
            if sess_id:
                socketio.emit('crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id)
            return
    try:
        if review_doi[-1] == '.':
            review_doi = review_doi[:-1]
        resp = cr.works(ids=[str(review_doi)])
    except requests.HTTPError as e:
        if sess_id:
            socketio.emit('crossrefbot_update', {'msg': 'No trials found. Crossrefbot complete'}, room=sess_id)
        print e
        return
    if resp['status'] == 'ok':
        parsed = resp['message']
        if "reference" in parsed:
            if sess_id:
                socketio.emit('crossrefbot_update', {'msg': str(len(parsed[
                                                                        'reference'])) + ' references found in crossref. trying to resolve these to PubMed articles...'},
                              room=sess_id)
                eventlet.sleep(0)
            print str(len(parsed['reference'])) + ' references found in crossref'
            to_resolve = []
            references = parsed['reference']
            dois = [doi["DOI"] for doi in references if 'DOI' in doi]
            if dois:
                # if we get pubmed metadata for these DOIs, we can cross-check which dois match the ones in our set of references
                # what if > 250
                chunk_dois = utils.chunks(dois, 250)
                for dois in chunk_dois:
                    while True:
                        try:
                            esr = ec.esearch(db='pubmed', term=' OR '.join(['"' + doi + '"[AID]' for doi in dois]))
                            break
                        except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError,
                                requests.exceptions.SSLError, requests.exceptions.ConnectionError,
                                lxml.etree.XMLSyntaxError) as e:
                            print e
                            time.sleep(5)
                    if esr.ids:
                        while True:
                            try:
                                paset = ec.efetch(db='pubmed', id=esr.ids)
                                break
                            except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError,
                                    requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e:
                                print e
                                time.sleep(5)
                        pa_iter = iter(paset)
                        while True:
                            try:
                                pma = pa_iter.next()
                            except StopIteration:
                                break
                            if pma.doi is not None and pma.doi in dois:
                                dois.remove(pma.doi)
                                to_resolve.append(pma.pmid)
            remaining = [x for x in references if ('DOI' not in x or ('DOI' in x and x['DOI'] in dois)) and (
                        'first-page' in x or 'author' in x or 'article-title' in x or 'volume' in x or 'journal-title' in x or 'year' in x)]
            if remaining:
                citation_pmids = ecitmatch_tools.batch_pmids_for_citation(remaining, debug=False)
                check_metadata = []
                if citation_pmids:
                    for i, citation in enumerate(citation_pmids):
                        if utils.RepresentsInt(citation):
                            to_resolve.append(citation)
                            check_metadata.append(citation)
                            continue
                        elif citation_pmids[i].startswith('AMBIGUOUS'):
                            cand = citation[10:].split(',')
                            if utils.RepresentsInt(cand[0]):
                                to_resolve.extend(cand)
                                check_metadata.append(cand)
                if check_metadata:
                    while True:
                        try:
                            paset = ec.efetch(db='pubmed', id=check_metadata)
                            break
                        except (eutils.exceptions.EutilsNCBIError, eutils.exceptions.EutilsRequestError,
                                requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e:
                            print  e
                            time.sleep(5)
                    pa_iter = iter(paset)
                    while True:
                        try:
                            pma = pa_iter.next()
                        except StopIteration:
                            break
                        if pma.doi is not None and pma.doi in dois:
                            dois.remove(pma.doi)
                            to_resolve.append(pma.pmid)
            try_doi = batch_doi2pmid(dois)
            if try_doi:
                for doi in try_doi:
                    if utils.RepresentsInt(str(doi)):
                        to_resolve.append(doi)
            nct_ids = []
            for i, citation in enumerate(references):
                if 'unstructured' in citation.keys():
                    spl = citation['unstructured'].split(' ')
                    for i in spl:
                        if re.match(r"(NCT|nct)[0-9]{8}", i):
                            if len(i) == 11:
                                nct_ids.append(i)
                                continue
            to_resolve = [str(x) for x in to_resolve]
            to_resolve = list(set(to_resolve))
            content = collections.namedtuple('ids', ['pmids', 'nctids'])
            return content(to_resolve, nct_ids)
    return False
Ejemplo n.º 24
0
def update_trial_publications(period):
    """
    Pull the newest pubmed articles that reference ct.gov IDs and save them to the database
    Should be run every period number of days
    @param period: number of days back to start search
    @return: None
    """
    # edge cases
    # 32601120 NCT0282152 -- nct given with missing digit
    # 31899823 NCT00020085 -- nct is an alias for NCT00004635

    ec = Client(api_key=eutils_key)

    pmids = []
    page = 0
    print('update_trial_publications, gathering pmids')
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

    while True:
        r = utils.retry_get(base_url,
                            params={
                                'db':
                                'pubmed',
                                'term':
                                'clinicaltrials.gov[si]',
                                'format':
                                'json',
                                'retmax':
                                10000,
                                'retstart':
                                page * 10000,
                                'email':
                                crud.eutils_email,
                                'tool':
                                crud.eutils_tool,
                                'api_key':
                                eutils_key,
                                'date_type':
                                'edat',
                                'mindate':
                                (datetime.now() -
                                 timedelta(days=period)).strftime('%Y/%m/%d'),
                                'maxdate':
                                3000
                            })
        if not r:
            break
        json = r.json()
        current_pmids = json['esearchresult']['idlist']
        if not current_pmids or len(current_pmids) == 0:
            break
        pmids = pmids + current_pmids
        print('page %s, pmid count: %s' % (page, len(pmids)))
        page += 1

    segments = utils.chunks(pmids, 100)
    for s in segments:
        while True:
            try:
                articles = ec.efetch(db='pubmed', id=s)
                # articles = ec.efetch(db='pubmed', id=[31335881])
                break
            except (eutils.EutilsNCBIError, eutils.EutilsRequestError,
                    requests.exceptions.SSLError,
                    requests.exceptions.ConnectionError) as e:
                print(e)
                time.sleep(5)
        for a in articles:
            xpath = 'MedlineCitation/Article/DataBankList/DataBank[DataBankName = "ClinicalTrials.gov"]/AccessionNumberList/AccessionNumber/text()'
            nct_ids = a._xml_root.xpath(xpath)
            print('nct_ids found for pmid %s = [%s]' %
                  (a.pmid, ', '.join(nct_ids)))
            if len(nct_ids) > 0:
                crud.pubmedarticle_to_db(a, 'trial_publications')
                for nct_id in nct_ids:
                    if len(nct_id) != 11:
                        print(
                            '##WARNING!: ignoring %s (%s) - not the expected 11 chars long, possible missing digit'
                            % (nct_id, a.pmid))
                        continue
                    crud.publication_trial(a.pmid, nct_id, 9)