コード例 #1
0
 def test_review_publication(self):
     ec = Client()
     trialpub_ids = [29871025, 29859785, 29866619]
     review_ids = [28775712, 28549125, 29929949]
     trialpubs = ec.efetch(db='pubmed', id=trialpub_ids)
     reviews = ec.efetch(db='pubmed', id=review_ids)
     for i, a in enumerate(trialpubs):
         crud.pubmedarticle_to_db(a, 'trial_publications')
     for i, a in enumerate(reviews):
         crud.pubmedarticle_to_db(a, 'systematic_reviews')
         crud.review_publication(a.pmid, trialpub_ids[i], 1)
         conn = self.mock_conn(True)
         cur = conn.cursor()
         cur.execute(
             "SELECT trialpub_id from review_trialpubs where review_id = %s;",
             (a.pmid, ))
         trialpub = cur.fetchone()
         self.assertEqual(trialpub[0], trialpub_ids[i])
         conn.close()
コード例 #2
0
def populate_reviews(period):
    """ download all new reviews made available on pubmed in the last <period> # days & save to db if they have trials in
    CrossRef or Cochrane """
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    r = utils.requests_retry_session().get(
        base_url,
        params={
            'db':
            'pubmed',
            'term':
            'systematic review[ti] OR meta analysis[ti] OR cochrane database of systematic reviews[ta]',
            'format':
            'json',
            'retmax':
            300000,
            'email':
            crud.eutils_email,
            'tool':
            crud.eutils_tool,
            'api_key':
            eutils_key,
            'date_type':
            'edat',
            'mindate': (datetime.now().date() -
                        timedelta(days=period)).strftime('%Y/%m/%d'),
            'maxdate':
            '3000'
        })
    json = r.json()
    pmids = json['esearchresult']['idlist']
    print len(pmids)
    segments = utils.chunks(pmids, 100)
    ec = Client(api_key=eutils_key)
    for s in segments:
        while True:
            try:
                articles = ec.efetch(db='pubmed', id=s)
                break
            except (eutils.exceptions.EutilsNCBIError,
                    eutils.exceptions.EutilsRequestError,
                    requests.exceptions.SSLError,
                    requests.exceptions.ConnectionError) as e:
                print e
                time.sleep(5)
        a_iter = iter(articles)
        while True:
            try:
                article = a_iter.next()
            except StopIteration:
                break
            print '-----------------' + article.pmid + '-------------------------'
            if article.doi is not None:
                ids = bot.check_trialpubs_nctids(article.pmid, article.doi)
            else:
                ids = bot.check_trialpubs_nctids(article.pmid)
            if ids:
                if ids.pmids:
                    print ids.pmids
                    count = crud.articles_with_nctids(
                        tuple(x for x in ids.pmids))
                    print count
                    if count and len(count) > 0:
                        print 'articles with links = ' + str(len(count))
                        print 'inserting ' + str(article.pmid)
                        crud.pubmedarticle_to_db(article, 'systematic_reviews')
                        for trialpub in count:
                            crud.review_publication(article.pmid, trialpub, 9)
                            linked_ncts = crud.linked_nctids(trialpub)
                            for nct in linked_ncts:
                                crud.review_trial(article.pmid,
                                                  nct,
                                                  False,
                                                  'included',
                                                  user_id=9,
                                                  nickname='crossrefbot')
                if ids.nctids:
                    crud.pubmedarticle_to_db(article, 'systematic_reviews')
                    print 'nct ids in crossref = ' + str(len(ids.nctids))
                    for nct_id in ids.nctids:
                        crud.review_trial(article.pmid, nct_id, False,
                                          'included', 'crossrefbot', 9)
                if not ids.nctids and not ids.pmids:
                    print 'found nothing'
            else:
                print 'nothing'
            if 'Cochrane' in article.jrnl:
                print 'Cochrane'
                crud.pubmedarticle_to_db(article, 'systematic_reviews')
                bot.cochranebot(article.doi, article.pmid)
                bot.cochrane_ongoing_excluded(article.doi, article.pmid)
                conn = dblib.create_con(VERBOSE=True)
                cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
                cur.execute(
                    "select rt.review_id, json_agg(distinct v.user_id) as users from review_rtrial rt"
                    " inner join votes v on rt.id = v.link_id where rt.review_id = %s group by"
                    " rt.review_id;", (article.pmid, ))
                new_users = cur.fetchone()
                if not new_users:
                    new_users = {'users': []}
                if not {17, 9} & set(new_users['users']):
                    print 'deleting ' + str(new_users['users']), article.pmid
                    cur.execute(
                        "delete from votes where link_id in (select id from review_rtrial where review_id = %s);",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from review_trialpubs where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from review_rtrial where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                    cur.execute(
                        "delete from systematic_reviews where review_id = %s;",
                        (article.pmid, ))
                    conn.commit()
                conn.close()
            else:
                print 'not cochrane'
コード例 #3
0
def cochranebot(doi, review_id, sess_id=None):
    """
       extract & save included trial IDs for a review from Cochrane Library website text
       @param doi: DOI of review
       @param review_id: PMID of review
       @param sess_id: session ID if transitting progress via websocket
       """
    if sess_id:
        socketio = SocketIO(message_queue='amqp://localhost')
        socketio.emit('cochranebot_update',
                      {'msg': 'searching cochrane for included studies'},
                      room=sess_id)
        socketio.sleep(0)
    base_url = "https://www.cochranelibrary.com/cdsr/doi/{}/references".format(
        doi)
    while True:
        try:
            r = requests.get(
                base_url,
                headers={
                    'User-Agent':
                    'Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36'
                })
            break
        except requests.exceptions.TooManyRedirects:
            if sess_id:
                socketio.emit('cochranebot_update',
                              {'msg': 'nothing found by cochranebot'},
                              room=sess_id)
                socketio.sleep(0)
                socketio.emit('cochranebot_update',
                              {'msg': 'cochranebot complete'},
                              room=sess_id)
            return
        except requests.exceptions.ChunkedEncodingError as e:
            print(e)
            print('retrying... chunked encoding error, ')
            time.sleep(10)
    if r.status_code == 200:
        soup = bs4.BeautifulSoup(r.content, 'html.parser')
        spl_doi = doi.split('.')[2]
        if 'CD' not in spl_doi:
            if sess_id:
                socketio.emit('cochranebot_update',
                              {'msg': 'nothing found by cochranebot'},
                              room=sess_id)
                socketio.sleep(0)
                socketio.emit('cochranebot_update',
                              {'msg': 'cochranebot complete'},
                              room=sess_id)
            return
        included_studies = soup.find_all(
            "div", {"class": "references_includedStudies"})
        if included_studies:
            nct_ids = []
            pmids = []
            for b in included_studies:
                for x in re.finditer(r"(NCT|nct)[0-9]{8}", str(b)):
                    nct_ids.append(x.group().upper())
                    if sess_id:
                        socketio.emit('cochranebot_update',
                                      {'msg': 'found nct ID ' + nct_ids[-1]},
                                      room=sess_id)
                        socketio.sleep(0)
                for x in re.finditer(r"pubmed/[0-9]{8}", str(b)):
                    pmids.append(x.group().split('/')[1])
                    if sess_id:
                        socketio.emit('cochranebot_update',
                                      {'msg': 'found PMID ' + pmids[-1]},
                                      room=sess_id)
                        socketio.sleep(0)
                for x in re.finditer(r"PUBMED: [0-9]{8}", str(b)):
                    pmids.append(x.group().split(' ')[1])
                    if sess_id:
                        socketio.emit('cochranebot_update',
                                      {'msg': 'found PMID ' + pmids[-1]},
                                      room=sess_id)
                        socketio.sleep(0)
            if sess_id:
                socketio.emit('cochranebot_update', {
                    'msg':
                    'trying to resolve automatic links from PubMed IDs'
                },
                              room=sess_id)
                socketio.sleep(0)
            if pmids:
                count = crud.articles_with_nctids(pmids)
                print('cochrane included articles with links = ' + str(count))
                if count and len(count) > 0:
                    for trialpub in count:
                        crud.review_publication(review_id, trialpub, 17)
                        linked_ncts = crud.linked_nctids(trialpub)
                        for nct in linked_ncts:
                            crud.review_trial(review_id,
                                              nct,
                                              False,
                                              'included',
                                              user_id=17,
                                              nickname='cochranebot',
                                              vote_type='up')
                            if sess_id:
                                socketio.emit('cochranebot_update', {
                                    'msg':
                                    'cochranebot found included trials with IDs '
                                    + ', '.join(linked_ncts)
                                },
                                              room=sess_id)
            nct_ids = list(set(nct_ids))
            print('cochrane nct_ids ' + str(nct_ids))

            for id in nct_ids:
                crud.review_trial(review_id, id, False, 'included',
                                  'cochranebot', 17)
        if not included_studies:
            if sess_id:
                socketio.emit('cochranebot_update',
                              {'msg': 'nothing found by cochranebot'},
                              room=sess_id)
                socketio.sleep(0)
                socketio.emit('cochranebot_update',
                              {'msg': 'cochranebot complete'},
                              room=sess_id)
            return
    if sess_id:
        socketio.emit('cochranebot_update', {'msg': 'cochranebot complete'},
                      room=sess_id)
        socketio.sleep(0)
コード例 #4
0
def cochrane_ongoing_excluded(doi, review_id, sess_id=None):
    """
    extract & save ongoing and excluded trial IDs for a review from Cochrane Library website text
    @param doi: DOI of review
    @param review_id: PMID of review
    @param sess_id: session ID if transitting progress via websocket
    @return:
    """
    if sess_id:
        socketio = SocketIO(message_queue='amqp://localhost')
        socketio.emit(
            'cochranebot_update',
            {'msg': 'searching cochrane for ongoing or excluded studies'},
            room=sess_id)
        socketio.sleep(0)
    base_url = "https://www.cochranelibrary.com/cdsr/doi/{}/references".format(
        doi)
    try:
        r = requests.get(
            base_url,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36'
            })
    except requests.exceptions.TooManyRedirects:
        if sess_id:
            socketio.emit('cochranebot_update',
                          {'msg': 'nothing found by cochranebot'},
                          room=sess_id)
            socketio.sleep(0)
            socketio.emit('cochranebot_update',
                          {'msg': 'cochranebot complete'},
                          room=sess_id)
        return
    if r.status_code == 200:
        soup = bs4.BeautifulSoup(r.content, 'html.parser')
        spl_doi = doi.split('.')[2]
        if 'CD' not in spl_doi:
            if sess_id:
                socketio.emit('cochranebot_update',
                              {'msg': 'nothing found by cochranebot'},
                              room=sess_id)
                socketio.sleep(0)
                socketio.emit('cochranebot_update',
                              {'msg': 'cochranebot complete'},
                              room=sess_id)
            return
        excluded_studies = soup.find_all(
            "div", {"class": "references_excludedStudies"})
        if excluded_studies:
            nct_ids = []
            pmids = []
            for b in excluded_studies:
                for x in re.finditer(r"(NCT|nct)[0-9]{8}", str(b)):
                    nct_ids.append(x.group().upper())
                    if sess_id:
                        socketio.emit('cochranebot_update',
                                      {'msg': 'found nct ID ' + nct_ids[-1]},
                                      room=sess_id)
                        socketio.sleep(0)
                for x in re.finditer(r"pubmed/[0-9]{8}", str(b)):
                    pmids.append(x.group().split('/')[1])
                    if sess_id:
                        socketio.emit('cochranebot_update',
                                      {'msg': 'found PMID ' + pmids[-1]},
                                      room=sess_id)
                        socketio.sleep(0)
                for x in re.finditer(r"PUBMED: [0-9]{8}", str(b)):
                    pmids.append(x.group().split(' ')[1])
                    if sess_id:
                        socketio.emit('cochranebot_update',
                                      {'msg': 'found PMID ' + pmids[-1]},
                                      room=sess_id)
                        socketio.sleep(0)
            # if included by crossrefbot, move it
            if pmids:
                count = crud.articles_with_nctids(pmids)
                print('cochrane excluded articles with links = ' + str(count))
                if count and len(count) > 0:
                    for trialpub in count:
                        crud.review_publication(review_id, trialpub, 17)
                        linked_ncts = crud.linked_nctids(trialpub)
                        for nct in linked_ncts:
                            crud.review_trial(review_id,
                                              nct,
                                              False,
                                              'relevant',
                                              user_id=17,
                                              nickname='cochranebot',
                                              vote_type='down')
                            if sess_id:
                                socketio.emit('cochranebot_update', {
                                    'msg':
                                    'cochranebot found excluded trials with IDs '
                                    + ', '.join(linked_ncts)
                                },
                                              room=sess_id)
            nct_ids = list(set(nct_ids))
            print('excluded: ' + ', '.join(nct_ids))
            for id in nct_ids:
                # if included by crossrefbot, move it
                crud.review_trial(review_id,
                                  id,
                                  False,
                                  'relevant',
                                  'cochranebot',
                                  17,
                                  vote_type='down')
        ongoing_studies = soup.find_all("div",
                                        {"class": "references_ongoingStudies"})
        if ongoing_studies:
            relevant_nct = []
            for b in ongoing_studies:
                for x in re.finditer(r"(NCT|nct)[0-9]{8}", str(b)):
                    relevant_nct.append(x.group().upper())
                    if sess_id:
                        socketio.emit(
                            'cochranebot_update',
                            {'msg': 'found nct ID ' + relevant_nct[-1]},
                            room=sess_id)
            relevant_nct = list(set(relevant_nct))
            print(relevant_nct)
            for nct in relevant_nct:
                # TODO ensure that already included gets moved to relevant
                crud.review_trial(review_id, nct, False, 'relevant',
                                  'cochranebot', 17)
        awaiting_studies = soup.find_all(
            "div", {"class": "references_awaitingAssessmentStudies"})
        if awaiting_studies:
            relevant_nct = []
            for b in awaiting_studies:
                for x in re.finditer(r"(NCT|nct)[0-9]{8}", str(b)):
                    relevant_nct.append(x.group().upper())
                    if sess_id:
                        socketio.emit(
                            'cochranebot_update',
                            {'msg': 'found nct ID ' + relevant_nct[-1]},
                            room=sess_id)
            relevant_nct = list(set(relevant_nct))
            print(relevant_nct)
            for nct in relevant_nct:
                crud.review_trial(review_id, nct, False, 'relevant',
                                  'cochranebot', 17)
        if not excluded_studies and not awaiting_studies and not ongoing_studies:
            if sess_id:
                socketio.emit('cochranebot_update',
                              {'msg': 'nothing found by cochranebot'},
                              room=sess_id)
                socketio.sleep(0)
                socketio.emit('cochranebot_update',
                              {'msg': 'cochranebot complete'},
                              room=sess_id)
            return
        if sess_id:
            socketio.emit('cochranebot_update', {
                'msg': 'cochranebot complete',
                'refresh_both': True
            },
                          room=sess_id)
            socketio.sleep(0)
コード例 #5
0
def check_citations(review_id, sess_id=None, review_doi=None):
    """
    check IDs obtained from the references of a review for automatic links, and save these links
    @param review_id: PubMed ID of review
    @param sess_id: session ID if transitting progress via websocket
    @param review_doi: DOI of review
    @return:
    """
    if sess_id:
        socketio = SocketIO(message_queue='amqp://localhost')
    ec = Client(api_key=eutils_key)
    while True:
        try:
            articles = ec.efetch(db='pubmed', id=review_id)
            break
        except (eutils.EutilsNCBIError, eutils.EutilsRequestError,
                requests.exceptions.SSLError,
                requests.exceptions.ConnectionError) as e:
            print(e)
            time.sleep(5)
    a_iter = iter(articles)
    while True:
        try:
            article = next(a_iter)
        except StopIteration:
            break
        print('-----------------' + article.pmid + '-------------------------')
        if article.doi is not None:
            ids = check_trialpubs_nctids(article.pmid,
                                         article.doi,
                                         sess_id=sess_id)
        else:
            ids = check_trialpubs_nctids(article.pmid, sess_id=sess_id)
        if ids:
            if ids.pmids:
                if sess_id:
                    socketio.emit('crossrefbot_update', {
                        'msg':
                        'crossrefbot found references to ' +
                        str(len(ids.pmids)) +
                        ' PubMed articles. Checking articles for links to included trials...'
                    },
                                  room=sess_id)
                count = crud.articles_with_nctids(ids.pmids)
                if count and len(count) > 0:
                    if sess_id:
                        socketio.emit('crossrefbot_update', {
                            'msg':
                            str(len(count)) +
                            ' articles have links to included trials'
                        },
                                      room=sess_id)
                    for trialpub in count:
                        crud.review_publication(article.pmid, trialpub, 9)
                        linked_ncts = crud.linked_nctids(trialpub)
                        for nct in linked_ncts:
                            crud.review_trial(review_id,
                                              nct,
                                              False,
                                              'included',
                                              user_id=9,
                                              nickname='crossrefbot')
            if ids.nctids:
                print('nct ids in crossref = ' + str(len(ids.nctids)))
                if sess_id:
                    socketio.emit('crossrefbot_update', {
                        'msg':
                        str(len(ids.nctids)) +
                        ' included trials were listed directly in crossref'
                    },
                                  room=sess_id)
                for nct_id in ids.nctids:
                    crud.review_trial(article.pmid, nct_id, False, 'included',
                                      'crossrefbot', 9)
            if not ids.nctids and not ids.pmids:
                if sess_id:
                    socketio.emit(
                        'crossrefbot_update',
                        {'msg': 'No trials found. Crossrefbot complete'},
                        room=sess_id)
            elif sess_id:
                socketio.emit('crossrefbot_update',
                              {'msg': 'crossrefbot complete'},
                              room=sess_id)
        elif sess_id:
            socketio.emit('crossrefbot_update',
                          {'msg': 'No trials found. Crossrefbot complete'},
                          room=sess_id)