def publication_trial(publication_id, nct_id, user_id): """ create a new record linking the specfied publication to the specified trial registry entry @param publication_id: pmid of publication @param nct_id: nct id of trial registry entry @param user_id: of submitting user """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() try: cur.execute( "INSERT INTO trialpubs_rtrial (trialpub_id, nct_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING ;", (publication_id, nct_id, user_id)) conn.commit() except psycopg2.IntegrityError as e: print(nct_id, 'not found in db, grabbing it from ct api') conn.rollback() # Returns redirected nct_id if the argument nct is actually an alias for another trial true_nct_id = add_missing_trial(nct_id) if true_nct_id: cur.execute( "INSERT INTO trialpubs_rtrial (trialpub_id, nct_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING ;", (publication_id, true_nct_id, user_id)) conn.commit() conn.close()
def convert_id(known_id, desired_id): """ convert from PMID to DOI or vice versa @param known_id: id we have (pmid/doi) @param desired_id: type of id we want (pmid/doi) @return: desired id (in pmid/doi form) """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) if desired_id == 'doi': try: cur.execute("SELECT doi FROM systematic_reviews WHERE review_id = %s;", (known_id,)) except psycopg2.DataError: return None elif desired_id == "pmid": try: cur.execute("SELECT review_id FROM systematic_reviews WHERE doi = %s;", (known_id,)) except psycopg2.DataError: return None existing = cur.fetchone() conn.close() if existing: return existing[0] else: request_data.pubmed_convert_id(known_id, desired_id)
def review_publication(review_id, publication_id, user_id): """ create a new record linking the specified review to the specified publication @param review_id: pmid of review @param publication_id: pmid of trial publication @param user_id: id of user submitting this publication """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() try: cur.execute( "INSERT INTO review_trialpubs (review_id, trialpub_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING;", (review_id, publication_id, user_id)) conn.commit() except psycopg2.IntegrityError as e: print e conn.rollback() ec = Client(api_key=eutils_key) article = ec.efetch(db='pubmed', id=publication_id) for a in article: pubmedarticle_to_db(a, 'trial_publications') cur.execute( "INSERT INTO review_trialpubs (review_id, trialpub_id, user_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING;", (review_id, publication_id, user_id)) conn.commit() conn.close()
def _matfac_trials(): print(utils.most_recent_matfac()) print(utils.most_recent_matfac_pmids()) print(utils.most_recent_matfac_nctids()) remote_tasks.remove_bot_votes(11) results = np.load(utils.most_recent_matfac()) pmid_arr = np.load(utils.most_recent_matfac_pmids()) nct_ids = np.load(utils.most_recent_matfac_nctids()) con = dblib.create_con(VERBOSE=True) cur = con.cursor() for c, col in enumerate(results.T): cur.execute( "SELECT nct_id from review_rtrial where relationship = 'included' and review_id = %s;", (pmid_arr[c], )) incl = cur.fetchall() if not incl: continue incl = list(list(zip(*incl))[0]) if len(incl) > 2: sorted = col.argsort()[::-1][:100] top_trials = nct_ids[sorted].flatten() if len(set(top_trials) & set(incl)) >= len(incl) / 2: for i, trial in enumerate(set(top_trials[:100]) - set(incl)): print(pmid_arr[c], trial) crud.review_trial(pmid_arr[c], trial, False, 'relevant', 'matfacbot', 11) con.close()
def change_relationship(id, relationship): """ set relationship of review-trial link with specified ID """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute("UPDATE review_rtrial SET relationship = %s WHERE id = %s;", (relationship, id)) conn.commit() conn.close()
def link_ftext_trial(review_id, nct_id): """ create a new link between the specified freetext review and a trial @param review_id: PMID of review @param nct_id: NCTID of trial """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() sql = """ INSERT INTO freetext_review_rtrial(review_id, nct_id) VALUES (%s, %s) ON CONFLICT (review_id, nct_id) DO NOTHING; """ try: cur.execute(sql, ( review_id, nct_id, )) conn.commit() except psycopg2.IntegrityError as e: print(e) conn.rollback() # Returns redirected nct_id if the argument nct is actually an alias for another trial true_nct_id = add_missing_trial(nct_id) if true_nct_id: cur.execute(sql, ( review_id, true_nct_id, )) conn.commit() conn.close()
def category_counts(only_verified): """ get the # reviews with linked trials in each category """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) sql = """ SELECT ct.category, ct.code, count(distinct r.review_id) AS review_count FROM ct_categories ct INNER JOIN category_condition cd ON cd.category_id = ct.id INNER JOIN trial_conditions tc ON tc.condition_id = cd.condition_id INNER JOIN review_rtrial r ON r.nct_id = tc.nct_id %s GROUP BY ct.category; """ if only_verified: sql = sql % """ INNER JOIN systematic_reviews sr ON r.review_id = sr.review_id WHERE sr.included_complete and r.relationship = 'included' """ else: sql = sql % "WHERE r.relationship = 'included' " cur.execute(sql) categories = cur.fetchall() conn.close() return [{ 'name': c['category'], "code": c['code'], "count": c['review_count'] } for c in categories]
def __init__(self, email, nickname, password, permissions): """ Init user from kwargs @param email: unique email address used to login @param nickname: alias that is visible by other users @param password: used to login @param permissions: admin or standard """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute("SELECT * FROM users WHERE user_name = %s", (email, )) user = cur.fetchone() if user is None: self.id = str(email) password, salt = self.set_password(password) self.permissions = permissions self.nickname = nickname cur.execute( "INSERT INTO users (user_name, nickname, user_type, salt, salted_password) VALUES (%s,%s,%s,%s,%s) ON CONFLICT (user_name) DO NOTHING RETURNING ID;", (email, nickname, permissions, salt, password)) conn.commit() self.db_id = cur.fetchone() else: self.id = user['user_name'] self.password = user['salted_password'] self.salt = user['salt'] self.permissions = user['user_type'] self.nickname = user['nickname'] self.db_id = user['id'] conn.close()
def link_review_trial(review_id, nct_id, verified, relationship, nickname, user_id): """ create a new link between the specified review & trial @param review_id: PMID of review @param nct_id: NCTID of trial @param verified: T or F @param relationship: included or relevant @param nickname: of user submitting trial @param user_id: of user submitting trisl """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() try: cur.execute( "INSERT INTO review_rtrial(review_id, nct_id, verified, upvotes, downvotes, relationship, nickname, user_id) VALUES (%s, %s, %s, %s," " %s , %s, %s, %s) ON CONFLICT (review_id, nct_id) DO NOTHING;", (review_id, nct_id, verified, 0, 0, relationship, nickname, user_id)) conn.commit() except psycopg2.IntegrityError as e: print e conn.rollback() if add_missing_trial(nct_id): cur.execute( "INSERT INTO review_rtrial(review_id, nct_id, verified, upvotes, downvotes, relationship, nickname, user_id) VALUES (%s, %s, %s, %s," " %s , %s, %s, %s) ON CONFLICT (review_id, nct_id) DO NOTHING;", (review_id, nct_id, verified, 0, 0, relationship, nickname, user_id)) conn.commit() conn.close()
def get_ftext_trials_fast(review_id): """ retrieve all ftext trials related to a review @param review_id: pmid of review @return: all registered trials and their linked publications """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( """ SELECT tr.nct_id, tr.brief_title, tr.overall_status, tr.brief_summary, tr.enrollment, tr.completion_date FROM tregistry_entries tr INNER JOIN freetext_review_rtrial rt ON tr.nct_id = rt.nct_id LEFT JOIN trialpubs_rtrial t on tr.nct_id = t.nct_id WHERE rt.review_id = %s GROUP BY tr.nct_id, rt.review_id, rt.nct_id """, (review_id, )) reg_trials = list(cur.fetchall()) # for i, trial in enumerate(reg_trials): # trial = dict(trial) # if usr: # for v in trial['voters']: # if usr and usr.nickname == v[1]: # trial['user_vote'] = v[0] # trial['nicknames'] = ['you' if x[1] == usr.nickname else x[1] for x in trial['voters'] if x[1] is not None] # else: # trial['nicknames'] = [x[1] for x in trial['voters'] if x[1] is not None] # if trial['nicknames']: # trial['voters'] = str(', '.join(trial['nicknames'])) # else: # trial['voters'] = "" # reg_trials[i] = trial.copy() return {'reg_trials': reg_trials}
def get_categories(): """ get the complete list of trial categories """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute("SELECT category, code, id from ct_categories;") categories = cur.fetchall() conn.close() return categories
def linked_nctids(pmid): """ Get the linked NCTIDs for the specified PMID """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute("select nct_id from trialpubs_rtrial where trialpub_id = %s;", (pmid,)) ids = cur.fetchall() conn.close() return list(zip(*ids)[0]) if ids else None
def articles_with_nctids(pmid_list): """ get subset of PMIDs in pmid_list that have links to ClinicalTrials.gov """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute("select distinct(trialpub_id) from trialpubs_rtrial where trialpub_id in %s;", (tuple(pmid_list),)) matches = cur.fetchall() conn.close() return list(zip(*matches)[0]) if matches else None
def review_medtadata_db(pmid): """ get metadata for review with specified PMID """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute("SELECT * FROM systematic_reviews WHERE review_id = %s;", (pmid,)) result = cur.fetchone() conn.close() return result
def is_starred(review_id, user_id): """ retrieve starred status of review for specified user """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute("select * from user_reviews where user_id = %s and review_id=%s;", (user_id, review_id)) res = cur.fetchone() conn.close() return True if res else False
def get_link_id(nct_id, review_id): """ get the link id of the specified review-trial link """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute("SELECT id from review_rtrial where review_id = %s and nct_id = %s;", (review_id, nct_id)) link_id = cur.fetchone() conn.close() return link_id[0] if link_id else None
def check_existing_review_trial(review_id, nct_id): """ check whether there exists a link between the specified review & trial """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute("SELECT id, relationship FROM review_rtrial WHERE nct_id = %s AND review_id = %s;", (nct_id, review_id)) existing = cur.fetchone() conn.close() return existing if existing else None
def basicbot2(review_id=None, sess_id=None): """ use document similarity to recommend trials for a review based on similarity to current included trials @param review_id: PMID of review @param sess_id: session ID if transitting progress via websocket """ if sess_id: socketio = SocketIO(message_queue='amqp://localhost') conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute( "SELECT nct_id FROM review_rtrial WHERE relationship = 'included' AND review_id = %s;", (review_id, )) trials = cur.fetchall() if len(trials) < 1: print('no trials for basicbot2') conn.close() return False if trials: cur.execute( "delete from votes where link_id in (select id from review_rtrial where review_id = %s) and user_id = %s;", (review_id, 10)) conn.commit() cur.execute( "delete from review_rtrial where upvotes = 0 and downvotes = 0 and user_id = 10;" ) conn.commit() conn.close() if sess_id: socketio.emit('basicbot2_update', {'msg': 'triggering basicbot2'}, room=sess_id) tfidf_matrix = utils.most_recent_tfidf() ids = np.load(utils.most_recent_tfidf_labels()) trials = list(list(zip(*trials))[0]) ix = np.isin(ids, trials) trial_indices = np.where(ix)[0] if sess_id: socketio.emit('basicbot2_update', {'msg': 'vectorizing stuff'}, room=sess_id) trial_vecs = tfidf_matrix[trial_indices, :] cos_sim = linear_kernel(trial_vecs, tfidf_matrix) if sess_id: socketio.emit('basicbot2_update', {'msg': 'calculating cosine similarity'}, room=sess_id) final = cos_sim.sum(axis=0) top = np.argpartition(final, -100)[-100:] top_ranked = set(ids[np.array(top)]) - set(ids[trial_indices]) if sess_id: socketio.emit('basicbot2_update', {'msg': 'inserting basicbot 2 predictions'}, room=sess_id) for nct_id in top_ranked: crud.review_trial(review_id, nct_id, False, 'relevant', 'basicbot2', 10) if sess_id: socketio.emit('basicbot2_update', {'msg': 'basicbot2 complete!'}, room=sess_id)
def get_reviews_with_ids(ids): """ get a list of reviews from their PMIDs """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute("SELECT review_id, title, publish_date as year from systematic_reviews where review_id in %s;", (tuple(x for x in ids),)) matches = cur.fetchall() conn.close() return matches if matches else None
def category_name(category_id): """ get the name of category from its ID """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute("select category from ct_categories where id = %s;", (category_id,)) category = cur.fetchall() conn.close() return category
def category_counts(): """ get the # reviews with linked trials in each category """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "SELECT ct.category, ct.code, count(distinct r.review_id) as review_count from ct_categories ct inner join category_condition cd on cd.category_id = ct.id inner join trial_conditions tc on tc.condition_id = cd.condition_id inner join review_rtrial r on r.nct_id = tc.nct_id where r.relationship = 'included' group by ct.category ORDER BY count(*) desc;") categories = cur.fetchall() conn.close() return [{'name': c['category'], "code": c['code'], "count": c['review_count']} for c in categories]
def unique_reviews_trials(): """ get the count of unique reviews and unique trials that have links """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "select count(distinct review_id) as reviews, count(distinct nct_id) as trials from review_rtrial where relationship = 'included';") reviews = cur.fetchone() conn.close() return {"reviews": reviews['reviews'], "trials": reviews['trials']}
def complete_studies(review_id, value): """ set the completeness of the list of trials for the specified review (T or F) """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute("UPDATE systematic_reviews SET included_complete = %s WHERE review_id = %s;", (value, review_id)) cur.execute("UPDATE review_rtrial SET verified = %s WHERE relationship = " " 'included' AND review_id = %s;", (value, review_id)) conn.commit() conn.close()
def remove_bot_votes(bot_id): """ remove all votes from the specified bot""" conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute( "DELETE FROM srss.public.votes WHERE srss.public.votes.user_id = %s;", (bot_id, )) conn.commit() conn.close()
def get_saved_reviews(user_id): """ Retrieve all saved reviews for user """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute("SELECT review_id from user_reviews where user_id = %s;", (user_id,)) res = cur.fetchall() if not res: return None reviews = list(zip(*res)[0]) return get_reviews_with_ids(reviews)
def get_all(cls): """ @return: a list of all User instances """ conn = dblib.create_con(VERBOSE=True) cursor = conn.cursor() cursor.execute("select user_name from users;") usrs = cursor.fetchall() objs = map(lambda u: cls.get(u), list(list(zip(*usrs))[0])) return objs
def reviews_for_condition(condition): """ get a list of reviews with linked trials with the specified condition """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "SELECT r.review_id, sr.title, sr.publish_date as year, ct.condition from ct_conditions ct inner join trial_conditions tc on tc.condition_id = ct.id inner join review_rtrial r on r.nct_id = tc.nct_id inner join systematic_reviews sr on r.review_id = sr.review_id where r.relationship = 'included' and tc.condition_id = %s group by r.review_id, sr.title, ct.condition, sr.publish_date order by sr.publish_date desc;", (condition,)) reviews = cur.fetchall() conn.close() return reviews
def check_included(): conn = dblib.create_con(VERBOSE=True) cur = conn.cursor() cur.execute( "select review_id from review_rtrial where relationship = 'included' GROUP BY review_id HAVING count(*) > 4;" ) reviews = cur.fetchall() conn.close() bot.remove_bot_votes(10) for review in reviews: bot.basicbot2(review[0])
def related_reviews_from_trials(nct_ids): """ get a list of review PMIDs that share trials with the specified review PMID, ordered by # of shared trials """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "SELECT r.review_id, sr.title, count(r.*) FROM review_rtrial r INNER JOIN systematic_reviews sr ON r.review_id = " "sr.review_id WHERE r.relationship = 'included' AND r.nct_id IN %s GROUP BY r.review_id, sr.title " " ORDER BY count(*) DESC LIMIT 10;", (tuple(nct_ids),)) result = cur.fetchall() conn.close() return result
def get_conditions(category): """ get complete list of coditions for the specified category """ conn = dblib.create_con(VERBOSE=True) cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "select condition, id from ct_conditions where id in (select distinct condition_id from category_condition where category_id = %s)" "and exists (select 1 from trial_conditions tr where tr.condition_id = ct_conditions.id) order by condition;", (category,)) conditions = cur.fetchall() conn.close() return conditions