def get_journal_predictions(prediction_type, site_id): """ Returns all predictions of the given type on the jouransl of the given site. :param prediction_type: :param site_id: :return: List of (data, prob) pairs for all journals on the given site. Can be empty. """ db = get_db() cursor = db.execute( """SELECT data, probability FROM journalPrediction WHERE prediction_type = ? AND site_id = ? GROUP BY journal_oid ORDER BY id DESC""", (prediction_type, site_id)) predictions = [] results = cursor.fetchall() if results is not None: for result in results: data = result['data'] if 'probability' in result and result['probability'] is not None: prob = float(result['probability']) else: prob = None predictions.append((data, prob)) return predictions
def handle_journal_discussion_post(phase, evidence_user, reconsider_user, responsibility, discussion_id): # save the value from the request batch_id = int(request.form['batch_id']) site_id = int(request.form['site_id']) journal_oid = request.form['journal_oid'] highlighted_text = request.form['highlighted_text'] additional_discussion = request.form['additional_discussion'] is_annotation_changed = request.form['is_annotation_changed'] == "true" is_annotation_changed = 1 if is_annotation_changed else 0 if phase == "evidence" \ and is_annotation_changed == 0 \ and additional_discussion.strip().startswith(IS_ANNOTATION_CHANGED_EVIDENCE_DESCRIPTION.strip()): print("WARNING: Annotator indicated Evidence task shouldn't change annotation, but they had previously indicated that they WOULD change the annotation.") db = get_db() db.execute( """INSERT INTO discussionEntry (site_id, journal_oid, responsibility, phase, batch_id, discussion_id, evidence_username, reconsider_username, highlighted_text, additional_discussion, is_annotation_changed) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", (site_id, journal_oid, responsibility, phase, batch_id, discussion_id, evidence_user, reconsider_user, highlighted_text, additional_discussion, is_annotation_changed) ) db.commit() return make_response("OK", 200)
def get_responsibility_annotations_by_username(): db = get_db() created_at = '2018-08-23' cursor = db.execute(""" SELECT a.site_id, a.journal_oid, a.data, a.username, c.correct_username FROM journalAnnotation a LEFT JOIN journalAnnotationConflictResolution c ON a.site_id = c.site_id AND a.journal_oid = c.journal_oid AND a.annotation_type = c.annotation_type WHERE a.annotation_type = "journal_patient_responsibilities" AND a.data <> "" AND a.created >= ? GROUP BY a.site_id, a.journal_oid, a.username ORDER BY a.id DESC """, (created_at,)) responsibility_annotations = [] rows = cursor.fetchall() for row in rows: site_id, journal_oid = row['site_id'], row['journal_oid'] username = row['username'] is_corrected = row['correct_username'] is not None and row['correct_username'] != "" responsibilities = get_labels_from_responsibility_string(row['data']) responsibility_annotation_data = {'site_id': site_id, 'journal_oid': journal_oid, 'username': username, 'responsibilities': responsibilities, 'is_corrected': is_corrected} responsibility_annotations.append(responsibility_annotation_data) return responsibility_annotations
def register(): if request.method == 'POST': username = request.form['username'] password = request.form['password'] db = get_db() error = None if not username: error = 'Username is required.' elif not password: error = 'Password is required.' elif db.execute( 'SELECT id FROM user WHERE username = ?', (username,) ).fetchone() is not None: error = 'User {} is already registered.'.format(username) if error is None: db.execute( 'INSERT INTO user (username, password) VALUES (?, ?)', (username, generate_password_hash(password)) ) db.commit() return redirect(url_for('auth.login')) flash(error) return render_template('auth/register.html')
def load_logged_in_user(): user_id = session.get('user_id') if user_id is None: g.user = None else: g.user = get_db().execute( 'SELECT * FROM user WHERE id = ?', (user_id,) ).fetchone()
def get_existing_tasks(responsibility, current_user, other_user, phase): """ Gets all of the existing tasks (both Evidence and Reconsider) :param responsibility: :param current_user: :param other_user: :param phase: :return: """ evidence_username = current_user if phase == "evidence" else other_user reconsider_username = other_user if phase == "evidence" else current_user db = get_db() # First, we query to identify this phase's tasks in the database task_df = pd.read_sql_query( """SELECT * FROM discussionTask WHERE responsibility = ? AND evidence_username = ? AND reconsider_username = ? AND phase = ? AND batch_id = (SELECT MAX(batch_id) FROM discussionTask WHERE responsibility = ? AND evidence_username = ? AND reconsider_username = ? AND phase = ?) GROUP BY discussion_id ORDER BY id DESC""", db, index_col="id", params=(responsibility, evidence_username, reconsider_username, phase, responsibility, evidence_username, reconsider_username, phase)) if len(task_df) == 0: return None # Next, we want to fill in if each task is complete or not # In other words, is there a DiscussionEntry corresponding to this DiscussionTask batch_id = int(task_df.iloc[0]['batch_id']) # pull the batch_id to match on cursor = db.execute( """SELECT discussion_id, highlighted_text, additional_discussion, is_annotation_changed FROM discussionEntry WHERE responsibility = ? AND evidence_username = ? AND reconsider_username = ? AND phase = ? AND batch_id = ? GROUP BY discussion_id ORDER BY id DESC""", (responsibility, evidence_username, reconsider_username, phase, batch_id)) results = cursor.fetchall() assert len(results) <= len(task_df) task_df["is_complete"] = False for row in results: if phase == "evidence": # we only consider a task to be complete # if it has provided at least one piece of evidence highlighted_text, additional_discussion, is_annotation_changed = row['highlighted_text'], row['additional_discussion'], row['is_annotation_changed'] if (highlighted_text is None or highlighted_text.strip() == "") and (additional_discussion is None or additional_discussion.strip() == "") and is_annotation_changed == 0: # In other words, they didn't change their mind on the annotation but also didn't highlight any text or provide any additional discussion. continue discussion_id = row['discussion_id'] task_df.loc[task_df["discussion_id"] == discussion_id, "is_complete"] = True return task_df
def get_discussion_entry(responsibility, phase, discussion_id, evidence_user, reconsider_user, batch_id): db = get_db() cursor = db.execute( """SELECT * FROM discussionEntry WHERE responsibility = ? AND phase = ? AND discussion_id = ? AND evidence_username = ? AND reconsider_username = ? AND batch_id = ? ORDER BY id DESC""", (responsibility, phase, discussion_id, evidence_user, reconsider_user, batch_id) ) results = cursor.fetchall() if results is None or len(results) == 0: return None return results[0]
def get_discussion_task(responsibility, phase, discussion_id, evidence_user, reconsider_user): db = get_db() cursor = db.execute( """SELECT * FROM discussionTask WHERE responsibility = ? AND phase = ? AND discussion_id = ? AND evidence_username = ? AND reconsider_username = ? AND batch_id = (SELECT MAX(batch_id) FROM discussionTask WHERE responsibility = ? AND evidence_username = ? AND reconsider_username = ? AND phase = ?) ORDER BY id DESC""", (responsibility, phase, discussion_id, evidence_user, reconsider_user, responsibility, evidence_user, reconsider_user, phase) ) results = cursor.fetchall() # assert len(results) == 1 # unknown if this is a required condition... return results[0]
def get_discussion_entries(responsibility, phase, evidence_user, reconsider_user): db = get_db() cursor = db.execute( """SELECT * FROM discussionEntry WHERE responsibility = ? AND phase = ? AND evidence_username = ? AND reconsider_username = ? GROUP BY site_id, journal_oid ORDER BY id DESC""", (responsibility, phase, evidence_user, reconsider_user) ) results = cursor.fetchall() if results is None or len(results) == 0: return [] return results
def generate_reconsider_batch(responsibility, evidence_username, reconsider_username, batch_size=20): discussion_entries = get_discussion_entries(responsibility, "evidence", evidence_username, reconsider_username) print(f"Prior to removal of unfilled and annotation-changing journals, identified {len(discussion_entries)} Evidence task entries.") discussion_candidates = [{"site_id": entry['site_id'], "journal_oid": entry['journal_oid'], "batch_id": entry['batch_id'], "discussion_id": entry['discussion_id']} for entry in discussion_entries if is_evidence_discussion_entry_filled(entry) and entry['is_annotation_changed'] == 0] # TODO Need to trim Evidence entries with existing Reconsider entries print(f"Explored completed Evidence tasks, finding {len(discussion_candidates)} relevant disagreements.") if len(discussion_candidates) == 0: print("After trimming, no candidate journals were identified for Reconsider tasks.") return True random.shuffle(discussion_candidates) new_batch = discussion_candidates[:batch_size] # Create a new batch in the database db = get_db() cursor = db.execute("""SELECT MAX(batch_id) AS max_batch_id FROM discussionTask WHERE responsibility = ? AND evidence_username = ? AND reconsider_username = ? AND phase = 'reconsider' """, (responsibility, evidence_username, reconsider_username)) result = cursor.fetchone() prev_batch_id = result['max_batch_id'] if prev_batch_id is None: batch_id = 0 else: batch_id = prev_batch_id + 1 for discussion_id, disagreement in enumerate(new_batch): next_discussion_id = discussion_id + 1 if next_discussion_id == len(new_batch): next_discussion_id = 0 db.execute( """INSERT INTO discussionTask (site_id, journal_oid, responsibility, phase, batch_id, discussion_id, next_discussion_id, evidence_username, reconsider_username) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", (disagreement['site_id'], disagreement['journal_oid'], responsibility, "reconsider", batch_id, discussion_id, next_discussion_id, evidence_username, reconsider_username) ) db.commit() print(f"Created new batch of {len(new_batch)} Reconsider tasks with batch_id {batch_id}.") return True
def login(): if request.method == 'POST': username = request.form['username'] password = request.form['password'] db = get_db() error = None user = db.execute( 'SELECT * FROM user WHERE username = ?', (username,) ).fetchone() if user is None: error = 'Incorrect username.' elif not check_password_hash(user['password'], password): error = 'Incorrect password.' if error is None: session.clear() session['user_id'] = user['id'] return redirect(url_for('index')) flash(error) return render_template('auth/login.html')
def get_journal_prediction(prediction_type, site_id, journal_oid, default=None, return_prob=True): db = get_db() cursor = db.execute( """SELECT data, probability FROM journalPrediction WHERE prediction_type = ? AND site_id = ? AND journal_oid = ? ORDER BY id DESC""", (prediction_type, site_id, journal_oid)) latest_prediction = cursor.fetchone() if latest_prediction is not None and latest_prediction['data'] != "": data = latest_prediction['data'] prob = float(latest_prediction['probability']) else: data = default prob = None if return_prob: return data, prob else: return data
def generate_evidence_batch(responsibility, evidence_username, reconsider_username, batch_size=20): responsibility_annotations = get_responsibility_annotations_by_username() resp_df = pd.DataFrame(responsibility_annotations) resp_df = resp_df[resp_df['is_corrected'] == False] disagreement_list = [] journals_evaluated = 0 for key, group in resp_df.groupby(by=("site_id", "journal_oid")): if len(group) <= 1: continue annotating_users = set(group['username']) if evidence_username not in annotating_users or reconsider_username not in annotating_users: continue journals_evaluated += 1 evidence_user_responsibilities = group[group['username'] == evidence_username].iloc[0]['responsibilities'] reconsider_user_responsibilities = group[group['username'] == reconsider_username].iloc[0]['responsibilities'] evidence_user_present = responsibility in evidence_user_responsibilities reconsider_user_present = responsibility in reconsider_user_responsibilities if evidence_user_present and not reconsider_user_present: site_id, journal_oid = key disagreement = { "site_id": int(site_id), "journal_oid": journal_oid, "evidence_username": evidence_username, "reconsider_username": reconsider_username, } disagreement_list.append(disagreement) print(f"Explored {journals_evaluated} journals for Evidence disagreements, finding {len(disagreement_list)} relevant disagreements.") if len(disagreement_list) == 0: # No failure generating the list, but there weren't any disagreements to include return True disagreement_list = remove_completed_disagreement_tasks( disagreement_list, evidence_username, reconsider_username, responsibility) if len(disagreement_list) == 0: # No failure generating the list, but there weren't any disagreements to include return True random.shuffle(disagreement_list) new_batch = disagreement_list[:batch_size] # Create a new batch in the database db = get_db() cursor = db.execute("""SELECT MAX(batch_id) AS max_batch_id FROM discussionTask WHERE responsibility = ? AND evidence_username = ? AND reconsider_username = ? AND phase = 'evidence' """, (responsibility, evidence_username, reconsider_username)) result = cursor.fetchone() prev_batch_id = result['max_batch_id'] if prev_batch_id is None: batch_id = 0 else: batch_id = prev_batch_id + 1 for discussion_id, disagreement in enumerate(new_batch): next_discussion_id = discussion_id + 1 if next_discussion_id == len(new_batch): next_discussion_id = 0 db.execute( """INSERT INTO discussionTask (site_id, journal_oid, responsibility, phase, batch_id, discussion_id, next_discussion_id, evidence_username, reconsider_username) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", (disagreement['site_id'], disagreement['journal_oid'], responsibility, "evidence", batch_id, discussion_id, next_discussion_id, evidence_username, reconsider_username) ) db.commit() print(f"Created new batch of {len(new_batch)} Evidence tasks with batch_id {batch_id}.") return True