def _clear_this_user_locks(self, all_assignments): priorities_locked_by_this_user = self._get_all_priorities_locked_by_this_user(all_assignments) for p in priorities_locked_by_this_user: p.is_out = 0 p.locked_by = None Session.add(p) Session.commit()
def customize_citations(self): # Obtain the parameters. show_journal_str = request.params['toggle_journal'] show_authors_str = request.params['toggle_authors'] show_keywords_str = request.params['toggle_keywords'] # Obtain the User object (as opposed to the auth.User object). cur_user = controller_globals._get_user_from_email(request.environ.get('repoze.who.identity')['user'].email) # Make changes to the booleans of the user object. cur_user.show_journal = {"Show":True, "Hide":False}[show_journal_str] cur_user.show_authors = {"Show":True, "Hide":False}[show_authors_str] cur_user.show_keywords = {"Show":True, "Hide":False}[show_keywords_str] # Add the changes to the database. Session.commit() # These messages appear in their designated separate locations, # i.e. on the top left corner of the div that corresponds to this function. # This is more appropriate than having a general message on some part of the screen. c.account_msg = "" c.account_msg_citation_settings = "Citation Settings changes have been applied." return render("/accounts/account.mako")
def __call__(self, environ, start_response): """Invoke the Controller""" # WSGIController.__call__ dispatches to the Controller method # the request is routed to. This routing information is # available in environ['pylons.routes_dict'] try: return WSGIController.__call__(self, environ, start_response) finally: Session.remove()
def create_account_handler(self): ''' Note that the verification goes on in model/form.py. ''' # create the new user; post to db via sqlalchemy new_user = model.User() new_user.username = request.params['username'] new_user.fullname = " ".join([request.params['first_name'], request.params['last_name']]) new_user.experience = request.params['experience'] new_user._set_password(request.params['password']) new_user.email = request.params['email'] # These are for citation settings, # initialized to True to make everything in the citation visible by default. new_user.show_journal = True new_user.show_authors = True new_user.show_keywords = True Session.add(new_user) Session.commit() # send out an email greeting_message = """ Hi, %s.\n Thanks for signing up at abstrackr (%s). You should be able to sign up now with username %s (only you know your password). This is just a welcome email to say hello, and that we've got your email. Should you ever need to reset your password, we'll send you instructions to this email. In the meantime, happy screening! -- The Brown EPC. """ % (new_user.fullname, url('/', qualified=True), new_user.username) try: self.send_email_to_user(new_user, "welcome to abstrackr", greeting_message) except: # this almost certainly means we're on our Windows dev box :) pass ### # log this user in programmatically (issue #28) rememberer = request.environ['repoze.who.plugins']['cookie'] identity = {'repoze.who.userid': new_user.username} response.headerlist = response.headerlist + \ rememberer.remember(request.environ, identity) rememberer.remember(request.environ, identity) # if they were originally trying to join a review prior to # registering, then join them now. (issue #8). if 'then_join' in request.params and request.params['then_join'] != '': redirect(url(controller="review", action="join", review_code=request.params['then_join'])) else: redirect(url(controller="account", action="login"))
def change_password(self): current_user = request.environ.get('repoze.who.identity')['user'] if request.params["password"] == request.params["password_confirm"]: current_user._set_password(request.params['password']) Session.commit() c.account_msg = "ok, your password has been changed." else: c.account_msg = "whoops -- the passwords didn't match! try again." c.account_msg_citation_settings = "" return render("/accounts/account.mako")
def my_projects(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person # Get user object from db. user = controller_globals._get_user_from_email(person.email) # Set user's show preference defaults in case they weren't set. if (user.show_journal==True or user.show_journal==False): c.show_journal = user.show_journal else: user.show_journal = True if (user.show_authors==True or user.show_authors==False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords==True or user.show_keywords==False): c.show_keywords = user.show_keywords else: user.show_keywords = True project_q = Session.query(model.Project) c.leading_projects = user.leader_of_projects leading_project_ids = [proj.id for proj in c.leading_projects] c.participating_projects = [p for p in user.member_of_projects if p.id not in leading_project_ids] c.review_ids_to_names_d = self._get_review_ids_to_names_d(c.participating_projects) statuses_q = Session.query(model.PredictionsStatus) c.statuses = {} c.do_we_have_a_maybe = {} for project_id in leading_project_ids: predictions_for_review = statuses_q.filter(model.PredictionsStatus.project_id==project_id).all() if len(predictions_for_review) > 0 and predictions_for_review[0].predictions_exist: c.statuses[project_id] = True else: c.statuses[project_id] = False c.do_we_have_a_maybe[project_id] = False # Flag projects that have locked priorities c.projects_w_locked_priorities = self._get_projects_w_locked_priorities(leading_project_ids) c.my_work = False c.my_projects = True return render('/accounts/dashboard.mako')
def _get_users_labels_for_assignment(self, project_id, user_id, assignment_id): """Returns a user's list of labels across a specific project""" labels_q = Session.query(model.Label) labels = labels_q.filter_by(project_id=project_id, user_id=user_id, assignment_id=assignment_id).all() return labels
def _get_username_from_id(self, id): if id == CONSENSUS_USER: return "consensus" if id not in self.user_dict: user_q = Session.query(model.User) self.user_dict[id] = user_q.filter(model.User.id == id).one().username return self.user_dict[id]
def _build_tags_dict(self): tags = Session.query(model.Tag, model.TagType).filter(model.Tag.citation_id.in_(self.all_citations)).join(model.TagType, model.Tag.tag_id == model.TagType.id).all() for citation_id in self.all_citations: if citation_id not in self.citation_to_tags_dict: self.citation_to_tags_dict[citation_id] = [] for tag in tags: self.citation_to_tags_dict[tag[0].citation_id].append(tag[1].text)
def gen_token_to_reset_pwd(self, user): # generate a random token for the user to reset their password; stick # it in the database make_token = lambda N: ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(N)) reset_pwd_q = Session.query(model.ResetPassword) existing_tokens = [entry.token for entry in reset_pwd_q.all()] token_length=10 cur_token = make_token(token_length) while cur_token in existing_tokens: cur_token = make_code(token_length) reset = model.ResetPassword() reset.token = cur_token reset.user_email = user.email Session.add(reset) Session.commit() return cur_token
def _get_notes_for_citation(self, citation_id, user_id): notes_q = Session.query(model.Note) notes = notes_q.filter(and_(\ model.Note.citation_id == citation_id, model.Note.creator_id == user_id)).all() if len(notes) > 0: return notes[0] return None
def _row_unique(row, project_id, user_id): try: labeledfeatures = Session.query(model.LabeledFeature).\ filter_by(term = row[0]).\ filter_by(label = row[1]).\ filter_by(project_id = project_id).\ filter_by(user_id = user_id).all() except IndexError, e: return False, 2
def confirm_password_reset(self, id): token = str(id) reset_pwd_q = Session.query(model.ResetPassword) # we pull all in case they've tried to reset their pwd a few times # by the way, these should time-expire... matches = reset_pwd_q.filter(model.ResetPassword.token == token).all() if len(matches) == 0: return """ Hrmm... It looks like you're trying to reset your password, but I can't match the provided token. Please go back to the email that was sent to you and make sure you've copied the URL correctly. """ user = controller_globals._get_user_from_email(matches[0].user_email) for match in matches: Session.delete(match) user._set_password(token) Session.commit() return ''' ok! your password has been set to %s (you can change it once you've logged in).\n <a href="%s">log in here</a>.''' % (token, url('/', qualified=True))
def _get_all_priorities_locked_by_this_user(self, all_assignments): locked_priorities = [] for a in all_assignments: user_id = a.user_id project_id = a.project_id priorities_q = Session.query(model.Priority).filter_by(project_id=project_id).\ filter_by(locked_by=user_id) priorities = priorities_q.all() locked_priorities.extend([p for p in priorities]) return list(set(locked_priorities))
def _build_notes_dict(self): notes = Session.query(model.Note).filter(model.Note.citation_id.in_(self.all_citations)).all() for citation_id in self.all_citations: if citation_id not in self.citation_to_notes_dict: self.citation_to_notes_dict[citation_id] = {} for labeler in self.all_labelers: self.citation_to_notes_dict[citation_id][labeler.username] = None for note in notes: self.citation_to_notes_dict[note.citation_id][self._get_username_from_id(note.creator_id)] = note
def _get_tag_types_for_review(self, review_id, only_for_user_id=None): tag_q = Session.query(model.TagType) if only_for_user_id: tag_types = tag_q.filter(and_(\ model.TagType.project_id == review_id,\ model.TagType.creator_id == only_for_user_id )).all() else: tag_types = tag_q.filter(model.TagType.project_id == review_id).all() return [tag_type.text for tag_type in tag_types]
def _project_has_locked_priorities(self, project_id): """Returns True if project has any locked priorities, else False Integer -> Boolean """ priorities_q = Session.query(model.Priority).\ filter_by(project_id=project_id).\ filter_by(is_out=1) priority = priorities_q.first() if priority: return True else: return False
def _get_tag_types_for_citation(self, citation_id, objects=False): tags = self._get_tags_for_citation(citation_id) # now map those types to names tag_type_q = Session.query(model.TagType) tags = [] for tag in tags: tag_obj = tag_type_q.filter(model.TagType.id == tag.tag_id).one() if objects: tags.append(tag_obj) else: tags.append(tag_obj.text) return tags
def _get_tags_for_citation(self, citation_id, texts_only=True, only_for_user_id=None): tag_q = Session.query(model.Tag) tags = None if only_for_user_id: # then filter on the study and the user tags = tag_q.filter(and_(\ model.Tag.citation_id == citation_id,\ model.Tag.creator_id == only_for_user_id)).all() else: # all tags for this citation, regardless of user tags = tag_q.filter(model.Tag.citation_id == citation_id).all() if texts_only: return self._tag_ids_to_texts([tag.tag_id for tag in tags]) return tags
def parse_csv_row(row, users, assignments, project): source_id = row[0] citation = Session.query(model.Citation).filter_by( project_id=project.id).filter_by(pmid=source_id).first() for index, label_val in enumerate(row[1:]): if label_val: user = users[index] assignment = assignments[index] label = model.Label() label.project_id = project.id label.study_id = citation.id label.assignment_id = assignment.id label.user_id = user.id label.labeling_time = 1 label.first_labeled = datetime.datetime.utcnow() label.label_last_updated = datetime.datetime.utcnow() label.label = label_val model.Session.add(label) print "Created label " + label_val + " by user " + user.username + " for " + str( citation.id)
def init_model(engine): """Call me before using any of the tables or classes in the model""" Session.configure(bind=engine)
def _set_assignment_done_status(self, all_assignments): for a in all_assignments: b_assignment_done = controller_globals._check_assignment_done(a) a.done = b_assignment_done Session.add(a) Session.commit()
def _create_reviews(p_id, iter_size, which_iter): lock_file_path = join(dirname(abspath(__file__)), '_delete_lock.lck') if not isfile(lock_file_path): Session.query( model.Citation).filter(model.Citation.project_id != p_id).delete() Session.query( model.Label).filter(model.Label.project_id != p_id).delete() Session.commit() open(lock_file_path, 'w+').close() u_id = 2629 k_init = 400 c_count = len( Session.query(model.Citation).filter_by(project_id=p_id).all()) k_inc = 100 for itercount in range(iter_size * which_iter, iter_size * which_iter + iter_size): ### THIS is the code for one run of the experiment ## labeled citation counter labeled_citation_counter = 0 labels = Session.query(model.Label).filter_by(project_id=p_id).all() user = Session.query(model.User).filter_by(id=u_id).first() citations = Session.query( model.Citation).filter_by(project_id=p_id).all() print len(citations) c_count = len(citations) r_sample = defaultdict(list) sample_indexes = sample(range(c_count), k_init) C_r = [] for ii in sample_indexes: C_r.append(citations[ii]) for cc in C_r: for ll in Session.query(model.Label).filter_by( project_id=p_id).filter_by(study_id=cc.id).all(): r_sample[ll.study_id].append(ll) new_review = model.Project() new_review.leaders.append(user) new_review.initial_round_size = 0 new_review.tag_privacy = True Session.add(new_review) Session.flush() state_dict = defaultdict(int) citation_dict = {} for c in citations: citation = model.Citation() citation.project_id = new_review.id citation.title = c.title citation.abstract = c.abstract citation.keywords = c.keywords citation.refman = c.refman model.Session.add(citation) Session.flush() citation_dict[citation.id] = c.id if c.id in r_sample: labeled_citation_counter += 1 state_dict[citation.id] = 1 for t in r_sample[c.id]: label = model.Label() label.project_id = new_review.id label.study_id = citation.id label.label = t.label model.Session.add(label) print new_review.id Session.commit() ## i is a counter for the current increment i = 0 while True: ## we want to change the increment size if there are a certain number of citations is labeled #if labeled_citation_counter > 15000: # k_inc = 2000 #elif labeled_citation_counter > 5000: # k_inc = 1000 #else: # k_inc = 500 r_sample = defaultdict(list) print "EXPERIMENT NO: " + str(itercount) make_predictions(new_review.id) ######################## here's where I record the results preds_for_review = Session.query(model.Prediction).filter( model.Prediction.project_id == new_review.id).all() path_to_preds_out = os.path.join( "_exports", "predictions_%d_%d_of_%d.csv" % (p_id, i, itercount)) with open(path_to_preds_out, 'w+') as fout: csv_out = csv.writer(fout) preds_file_headers = [ "citation_id", "refman", "title", "predicted p of being relevant", "'hard' screening prediction*", "state" ] csv_out.writerow(preds_file_headers) sorted_preds = sorted(preds_for_review, key=lambda x: x.predicted_probability, reverse=True) for pred in sorted_preds: citation = Session.query(model.Citation).filter( model.Citation.id == pred.study_id).first() #citation = self._get_citation_from_id(pred.study_id) citation_title = citation.title.encode('ascii', 'ignore') row_str = [ citation.id, citation.refman, citation_title, pred.predicted_probability, pred.prediction, state_dict[citation.id] ] csv_out.writerow(row_str) ######################### --------------------------- i += 1 if labeled_citation_counter >= c_count: break P_a = [] for pa in Session.query(model.Prediction).filter_by( project_id=new_review.id).order_by( model.Prediction.predicted_probability.desc()).all(): if state_dict[pa.study_id] == 0: P_a.append(pa) if len(P_a) == k_inc: break if len(P_a) == 0: print "~~~NO PREDS!!!" ccc = [ label for label in Session.query(model.Citation.id).filter_by( project_id=new_review.id).filter( ~model.Citation.labels.any()).limit(k_inc) ] print len(ccc) for cc in ccc: labeled_citation_counter += 1 state_dict[cc.id] = 1 for ll in Session.query(model.Label).filter_by( study_id=citation_dict[cc.id]).all(): label = model.Label() label.project_id = new_review.id label.study_id = cc.id label.label = ll.label model.Session.add(label) else: for pp in P_a: labeled_citation_counter += 1 state_dict[pp.study_id] = 2 for ll in Session.query( model.Label).filter_by(project_id=p_id).filter_by( study_id=citation_dict[pp.study_id]).all(): label = model.Label() label.project_id = new_review.id label.study_id = pp.study_id label.label = ll.label model.Session.add(label) Session.commit() print len( Session.query( model.Label).filter_by(project_id=new_review.id).all()) return
def my_projects(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person # Get user object from db. user = controller_globals._get_user_from_email(person.email) # Set user's show preference defaults in case they weren't set. if (user.show_journal == True or user.show_journal == False): c.show_journal = user.show_journal else: user.show_journal = True if (user.show_authors == True or user.show_authors == False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords == True or user.show_keywords == False): c.show_keywords = user.show_keywords else: user.show_keywords = True project_q = Session.query(model.Project) c.leading_projects = user.leader_of_projects leading_project_ids = [proj.id for proj in c.leading_projects] c.participating_projects = [ p for p in user.member_of_projects if p.id not in leading_project_ids ] c.review_ids_to_names_d = self._get_review_ids_to_names_d( c.participating_projects) statuses_q = Session.query(model.PredictionsStatus) c.statuses = {} c.prediction_dates = {} c.labels_since_prediction = {} c.screened_all = {} c.do_we_have_a_maybe = {} for project_id in leading_project_ids: predictions_for_review = statuses_q.filter( model.PredictionsStatus.project_id == project_id).all() labels_since_prediction = Session.query( model.Label).filter(model.Label.project_id == project_id) if len(predictions_for_review ) > 0 and predictions_for_review[0].predictions_last_made: labels_since_prediction = labels_since_prediction.filter( model.Label.label_last_updated > predictions_for_review[0].predictions_last_made) labels_since_prediction = labels_since_prediction.order_by( model.Label.label_last_updated.desc()) if len(predictions_for_review ) > 0 and predictions_for_review[0].predictions_exist: c.statuses[project_id] = True c.prediction_dates[project_id] = predictions_for_review[ 0].predictions_last_made c.labels_since_prediction[ project_id] = labels_since_prediction.count() c.screened_all[project_id] = Session.query( model.Prediction).filter( model.Prediction.project_id == project_id).count() == 0 else: c.statuses[project_id] = False c.do_we_have_a_maybe[project_id] = False # Flag projects that have locked priorities c.projects_w_locked_priorities = self._get_projects_w_locked_priorities( leading_project_ids) c.my_work = False c.my_projects = True return render('/accounts/dashboard.mako')
def my_work(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person user = controller_globals._get_user_from_email(c.person.email) if not user: log.error('''\ Hum...fetching user from the database returned False. We need to investigate. Go remove the catch all in controller_globals.py, method _get_user_from_email() to see which OperationalError is being raised ''') # If somehow the user's citation settings variables don't get initialized yet, # then the following 3 if-else blocks should take care of it in order to avoid # any errors due to the values of the variables being null: c.show_journal = user.show_journal if not user.show_journal is None else True if (user.show_authors == True or user.show_authors == False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords == True or user.show_keywords == False): c.show_keywords = user.show_keywords else: user.show_keywords = True # pull all assignments for this person assignment_q = Session.query(model.Assignment) all_assignments = assignment_q.filter( model.Assignment.user_id == person.id).all() # This process is incredibly slow. Take it out for now and find out # why the .done and .done_so_far field on assignment is off sometimes. #self._set_assignment_done_status(all_assignments) self._clear_this_user_locks(all_assignments) # Build assignment completion status dictionary c.d_completion_status = self._get_assignment_completion_status( all_assignments) c.outstanding_assignments = [a for a in all_assignments if not a.done] # if there's an initial assignment, we'll only show that. assignment_types = [assignment.assignment_type for assignment in \ c.outstanding_assignments] ##### # for any review that has an initial assignment, we will show # *only* that assignment, thereby forcining participants to # finish initial assignments before moving on to other # assignments. fix for issue #5. #### # which reviews have (outstanding) initial assigments? reviews_with_initial_assignments = [] for assignment in c.outstanding_assignments: if assignment.assignment_type == "initial": reviews_with_initial_assignments.append(assignment.project_id) # now remove other (non-initial) assignments for reviews # that have an initial assignment filtered_assignments = [assignment for assignment in c.outstanding_assignments if \ assignment.project_id not in reviews_with_initial_assignments or \ assignment.assignment_type == "initial"] c.outstanding_assignments = filtered_assignments c.finished_assignments = [a for a in all_assignments if a.done] project_q = Session.query(model.Project) c.participating_projects = user.member_of_projects c.review_ids_to_names_d = self._get_review_ids_to_names_d( c.participating_projects) c.my_work = True c.my_projects = False return render('/accounts/dashboard.mako')
from pylons import config from abstrackr.config.environment import load_environment from abstrackr.model.meta import Session import abstrackr.model as model from sqlalchemy import and_ conf = appconfig('config:production.ini', relative_to='.') load_environment(conf.global_conf, conf.local_conf) ### This is fixing Emily's project. The refman ids weren't saved when she imported the project PROJECT_ID = 219 FILE_PATH = './Abstraktr_Update_Lit_Review_11.12.13.txt' citations_q = Session.query(model.Citation) found = 0 not_found = 0 with open(FILE_PATH, 'r') as f: reader = csv.DictReader(f, delimiter='\t') for row in reader: #print(row['id'], row['title'], row['abstract']) citation = citations_q.filter_by(title=row['title'], project_id=PROJECT_ID).first() if not citation: print('could not find title matching with %s' % row['title']) not_found += 1 else: found += 1
def my_work(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person user = controller_globals._get_user_from_email(c.person.email) if not user: log.error('''\ Hum...fetching user from the database returned False. We need to investigate. Go remove the catch all in controller_globals.py, method _get_user_from_email() to see which OperationalError is being raised ''') # If somehow the user's citation settings variables don't get initialized yet, # then the following 3 if-else blocks should take care of it in order to avoid # any errors due to the values of the variables being null: c.show_journal = user.show_journal if not user.show_journal is None else True if (user.show_authors==True or user.show_authors==False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords==True or user.show_keywords==False): c.show_keywords = user.show_keywords else: user.show_keywords = True # pull all assignments for this person assignment_q = Session.query(model.Assignment) all_assignments = assignment_q.filter(model.Assignment.user_id == person.id).all() # This process is incredibly slow. Take it out for now and find out # why the .done and .done_so_far field on assignment is off sometimes. #self._set_assignment_done_status(all_assignments) self._clear_this_user_locks(all_assignments) # Build assignment completion status dictionary c.d_completion_status = self._get_assignment_completion_status(all_assignments) c.outstanding_assignments = [a for a in all_assignments if not a.done] # if there's an initial assignment, we'll only show that. assignment_types = [assignment.assignment_type for assignment in \ c.outstanding_assignments] ##### # for any review that has an initial assignment, we will show # *only* that assignment, thereby forcining participants to # finish initial assignments before moving on to other # assignments. fix for issue #5. #### # which reviews have (outstanding) initial assigments? reviews_with_initial_assignments = [] for assignment in c.outstanding_assignments: if assignment.assignment_type == "initial": reviews_with_initial_assignments.append(assignment.project_id) # now remove other (non-initial) assignments for reviews # that have an initial assignment filtered_assignments = [assignment for assignment in c.outstanding_assignments if \ assignment.project_id not in reviews_with_initial_assignments or \ assignment.assignment_type == "initial"] c.outstanding_assignments = filtered_assignments c.finished_assignments = [a for a in all_assignments if a.done] project_q = Session.query(model.Project) c.participating_projects = user.member_of_projects c.review_ids_to_names_d = self._get_review_ids_to_names_d(c.participating_projects) c.my_work = True c.my_projects = False return render('/accounts/dashboard.mako')
def _get_citations_for_review(self, review_id): citation_q = Session.query(model.Citation) citations_for_review = citation_q.filter(model.Citation.project_id == review_id).all() return citations_for_review
def write_labels(self): # get fields review_q = Session.query(model.Project) review = review_q.filter(model.Project.id == self.project_id).one() self.all_labelers = self._get_participants_for_review(self.project_id) ## some helpers none_to_str = lambda x: "" if x is None else x zero_to_none = lambda x: "none" if x==0 else x fields_to_export = self.fields # map citation ids to dictionaries that, in turn, map # usernames to labels citation_to_lbls_dict = {} self.all_citations = [cit.id for cit in self._get_citations_for_review(self.project_id)] citations_labeled_dict = {} for cit in self.all_citations: citations_labeled_dict[cit]=False # likewise, for notes # citation_to_notes_dict = {} if "notes" in fields_to_export: self._build_notes_dict() if "tags" in fields_to_export: self._build_tags_dict() # we filter the citations list (potentially) citations_to_export = [] # for efficiency reasons, we keep track of whether we need # create a new empty dictionary for the current citation last_citation_id = None labeler_names = ["consensus"] # always export the consensus # first collect labels for all citations that pass our # filtering criteria for citation, label in Session.query(\ model.Citation, model.Label).filter(model.Citation.id==model.Label.study_id).\ filter(model.Label.project_id==self.project_id).order_by(model.Citation.id).all(): # the above gives you all labeled citations for this review # i.e., citations that have at least one label citations_labeled_dict[citation.id]=True if self.lbl_filter_f(label): cur_citation_id = citation.id if last_citation_id != cur_citation_id: citation_to_lbls_dict[citation.id] = {} # citation_to_notes_dict[cur_citation_id] = {} citations_to_export.append(citation) # NOTE that we are assuming unique user names per-review labeler = self._get_username_from_id(label.user_id) if not labeler in labeler_names: labeler_names.append(labeler) citation_to_lbls_dict[cur_citation_id][labeler] = label.label last_citation_id = cur_citation_id # note that this will only contain entries for reviews that have # been labeled! i.e., notes made on unlabeled citations are not # reflected here. # if "notes" in fields_to_export # citation_to_notes_dict[cur_citation_id][labeler] = \ # self._get_notes_for_citation(cur_citation_id, label.user_id) # we automatically export all labeler's labels for labeler in labeler_names: fields_to_export.append(labeler) # finally, export notes (if asked) notes_fields = ["general", "population", "intervention/comparator", "outcome"] if "notes" in fields_to_export: fields_to_export.remove("notes") # we append all labelers notes for labeler in labeler_names: if labeler != "consensus": for notes_field in notes_fields: fields_to_export.append("%s notes (%s)" % (notes_field, labeler)) self.write_buffer = [",".join(fields_to_export)] for citation in citations_to_export: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) elif field == "tags": #cur_tags = self._get_tags_for_citation(citation.id) cur_tags = self.citation_to_tags_dict[citation.id] cur_line.append('"%s"' % ",".join(cur_tags)) elif field in labeler_names: cur_labeler = field cur_lbl = "o" cit_lbl_d = citation_to_lbls_dict[citation.id] if cur_labeler in cit_lbl_d: cur_lbl = str(cit_lbl_d[cur_labeler]) # create a consensus label automagically in cases where # there is unanimous agreement elif cur_labeler == "consensus": if len(set(cit_lbl_d.values()))==1: if len(cit_lbl_d) > 1: # if at least two people agree (and none disagree), set the # consensus label to reflect this cur_lbl = str(cit_lbl_d.values()[0]) else: # then only one person has labeled it -- # consensus is kind of silly cur_lbl = "o" else: # no consensus! cur_lbl = "x" cur_line.append(cur_lbl) elif "notes" in field: # notes field # this is kind of hacky -- we first parse out the labeler # name from the column header string assembled above and # then get a user id from this. get_labeler_name_from_str = lambda x: x.split("(")[1].split(")")[0] cur_labeler = get_labeler_name_from_str(field) # @TODO not sure what we should do in consensus case... if cur_labeler == "consensus": cur_line.append("") else: cur_note = None cur_notes_d = self.citation_to_notes_dict[citation.id] if cur_labeler in cur_notes_d: cur_note = cur_notes_d[cur_labeler] if cur_note is None: cur_line.append("") else: notes_field = field if "general" in notes_field: cur_line.append("\"%s\"" % cur_note.general.replace('"', "'")) elif "population" in notes_field: cur_line.append("\"%s\"" % cur_note.population.replace('"', "'")) elif "outcome" in notes_field: cur_line.append("\"%s\"" % cur_note.outcome.replace('"', "'")) else: # intervention/comparator cur_line.append("\"%s\"" % cur_note.ic.replace('"', "'")) self.write_buffer.append(",".join(cur_line)) # exporting *all* (including unlabeled!) citations, per Ethan's request #-- may want to make this optional self.write_buffer.append("citations that are not yet labeled by anyone") # jj 2014-08-20: Request to include citation information even for those citations that have not # been labeled yet. unlabeled_citation_ids = [cit for cit in citations_labeled_dict if not citations_labeled_dict[cit]] unlabeled_citations = Session.query(model.Citation).filter(model.Citation.id.in_(unlabeled_citation_ids)).all() for citation in unlabeled_citations: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) self.write_buffer.append(",".join(cur_line)) path_to_export = os.path.join(STATIC_FILES_PATH, "exports", "labels_%s.csv" % self.project_id) try: fout = open(path_to_export, 'w') except IOError: os.makedirs(os.path.dirname(path_to_export)) fout = open(path_to_export, 'w') lbls_str = "\n".join(self.write_buffer) lbls_str = lbls_str.encode("utf-8", "ignore") fout.write(lbls_str) fout.close() return "%sexports/labels_%s.csv" % (url('/', qualified=True), self.project_id)
def __init__(self, id, lbl_filter_f = None): """ id => id of the abstrackr project filter => filter function for labels """ self.project_id = id # default to exporting everything if lbl_filter_f is None: lbl_filter_f = lambda label: True # get fields conf = appconfig('config:development.ini', relative_to='.') #load_environment(conf.global_conf, conf.local_conf) review_q = Session.query(model.Project) review = review_q.filter(model.Project.id == id).one() """ create XML root and add meta information -Birol """ self.root = ET.Element('project') et_project_name = ET.SubElement(self.root, 'name') et_project_name.text = review.name et_project_id = ET.SubElement(self.root, 'id') et_project_id.text = str(review.id) labeler_dict = {} member_list = review.members leader_list = review.leaders et_project_member_list = ET.SubElement(self.root, 'member_list') for user in member_list + leader_list: et_project_member = ET.SubElement(et_project_member_list, 'member') et_project_member_id = ET.SubElement(et_project_member, 'id') et_project_member_id.text = str(user.id) et_project_member_username = ET.SubElement(et_project_member, 'username') et_project_member_username.text = user.username et_project_member_email = ET.SubElement(et_project_member, 'email') et_project_member_email.text = user.email """ create an ET subelement to hold the citations -Birol """ et_citation_list = ET.SubElement(self.root, "citation_list") citations_to_export = Session.query(model.Citation).filter_by(project_id = id).all() for citation in citations_to_export: """ create ET subelement for each citation, then add the relevant fields -Birol """ ## some helpers none_to_str = lambda x: "" if x is None else x zero_to_none = lambda x: "none" if x==0 else str(x) et_citation = ET.SubElement(et_citation_list, "citation") et_citation_internal = ET.SubElement(et_citation, "internal_id") et_citation_internal.text = zero_to_none(citation.id) et_citation_source = ET.SubElement(et_citation, "source_id") et_citation_source.text = zero_to_none(citation.refman) et_citation_pubmed = ET.SubElement(et_citation, "pubmed_id") et_citation_pubmed.text = zero_to_none(citation.pmid) """ We replace double quotes with single quotes here, so we do not have to do it later. -Birol """ et_citation_abstract = ET.SubElement(et_citation, "abstract") et_citation_abstract.text = none_to_str(citation.abstract).replace('"', "'") et_citation_title = ET.SubElement(et_citation, "title") et_citation_title.text = citation.title.replace('"', "'") """ Not sure if all the keywords are separated by commas. If not, this would cause problems. -Birol """ kw_list = citation.keywords.replace('"', "'").split(',') et_citation_keyword_list = ET.SubElement(et_citation, "keyword_list") for kw in kw_list: if kw == "": continue et_citation_keyword = ET.SubElement(et_citation_keyword_list, "keyword") et_citation_keyword.text = kw et_citation_journal = ET.SubElement(et_citation, "journal") et_citation_journal.text = none_to_str(citation.journal) """ Also not sure if all the authors are separated by " and ". If not, there will be suffering. -Birol """ auth_list = citation.authors.split(' and ') et_citation_author_list = ET.SubElement(et_citation, "author_list") for auth in auth_list: if auth == "": continue et_citation_author = ET.SubElement(et_citation_author_list, "author") et_citation_author.text = auth tag_list = Session.query(model.TagType).join(model.Tag, model.Tag.tag_id == model.TagType.id).filter_by(citation_id=citation.id).all() et_citation_tag_list = ET.SubElement(et_citation, "tag_list") for tag in tag_list: et_citation_tag = ET.SubElement(et_citation_tag_list, "tag") et_citation_tag.text = tag.text label_list = Session.query(model.Label).filter_by(study_id=citation.id).all() et_citation_label_list = ET.SubElement(et_citation, "label_list") for label in label_list: if not lbl_filter_f(label): continue et_citation_label = ET.SubElement(et_citation_label_list, "label") et_citation_label_labeler = ET.SubElement(et_citation_label, "labeler") et_citation_label_labeler.text = str(label.user_id) et_citation_label_decision = ET.SubElement(et_citation_label, "decision") et_citation_label_decision.text = str(label.label) notes_list = Session.query(model.Note).filter_by(citation_id = citation.id).all() et_citation_notes_list = ET.SubElement(et_citation, "notes_list") for note in notes_list: et_citation_note = ET.SubElement(et_citation_notes_list, "note") et_citation_note_creator = ET.SubElement(et_citation_note, "user") et_citation_note_creator.text = str(note.creator_id) et_citation_note_general = ET.SubElement(et_citation_note, "general") et_citation_note_general.text = note.general et_citation_note_population = ET.SubElement(et_citation_note, "population") et_citation_note_population.text = note.population et_citation_note_ic = ET.SubElement(et_citation_note, "ec") et_citation_note_ic.text = note.ic et_citation_note_outcome = ET.SubElement(et_citation_note, "outcome") et_citation_note_outcome.text = note.outcome return
def _text_for_tag(self, tag_id): tag_type_q = Session.query(model.TagType) tag_obj = tag_type_q.filter(model.TagType.id == tag_id).one() return tag_obj.text
def write_labels(self): # get fields review_q = Session.query(model.Project) review = review_q.filter(model.Project.id == self.project_id).one() all_labelers = self._get_participants_for_review(self.project_id) ## some helpers none_to_str = lambda x: "" if x is None else x zero_to_none = lambda x: "none" if x==0 else x fields_to_export = self.fields # map citation ids to dictionaries that, in turn, map # usernames to labels citation_to_lbls_dict = {} all_citations = [cit.id for cit in self._get_citations_for_review(self.project_id)] citations_labeled_dict = {} for cit in all_citations: citations_labeled_dict[cit]=False # likewise, for notes citation_to_notes_dict = {} # we filter the citations list (potentially) citations_to_export = [] # for efficiency reasons, we keep track of whether we need # create a new empty dictionary for the current citation last_citation_id = None labeler_names = ["consensus"] # always export the consensus # first collect labels for all citations that pass our # filtering criteria for citation, label in Session.query(\ model.Citation, model.Label).filter(model.Citation.id==model.Label.study_id).\ filter(model.Label.project_id==self.project_id).order_by(model.Citation.id).all(): # the above gives you all labeled citations for this review # i.e., citations that have at least one label citations_labeled_dict[citation.id]=True if self.lbl_filter_f(label): cur_citation_id = citation.id if last_citation_id != cur_citation_id: citation_to_lbls_dict[citation.id] = {} citation_to_notes_dict[cur_citation_id] = {} citations_to_export.append(citation) # NOTE that we are assuming unique user names per-review labeler = self._get_username_from_id(label.user_id) if not labeler in labeler_names: labeler_names.append(labeler) citation_to_lbls_dict[cur_citation_id][labeler] = label.label last_citation_id = cur_citation_id # note that this will only contain entries for reviews that have # been labeled! i.e., notes made on unlabeled citations are not # reflected here. citation_to_notes_dict[cur_citation_id][labeler] = \ self._get_notes_for_citation(cur_citation_id, label.user_id) # we automatically export all labeler's labels for labeler in labeler_names: fields_to_export.append(labeler) # finally, export notes (if asked) notes_fields = ["general", "population", "intervention/comparator", "outcome"] if "notes" in fields_to_export: fields_to_export.remove("notes") # we append all labelers notes for labeler in labeler_names: if labeler != "consensus": for notes_field in notes_fields: fields_to_export.append("%s notes (%s)" % (notes_field, labeler)) self.write_buffer = [",".join(fields_to_export)] for citation in citations_to_export: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) elif field == "tags": cur_tags = self._get_tags_for_citation(citation.id) cur_line.append('"%s"' % ",".join(cur_tags)) elif field in labeler_names: cur_labeler = field cur_lbl = "o" cit_lbl_d = citation_to_lbls_dict[citation.id] if cur_labeler in cit_lbl_d: cur_lbl = str(cit_lbl_d[cur_labeler]) # create a consensus label automagically in cases where # there is unanimous agreement elif cur_labeler == "consensus": if len(set(cit_lbl_d.values()))==1: if len(cit_lbl_d) > 1: # if at least two people agree (and none disagree), set the # consensus label to reflect this cur_lbl = str(cit_lbl_d.values()[0]) else: # then only one person has labeled it -- # consensus is kind of silly cur_lbl = "o" else: # no consensus! cur_lbl = "x" cur_line.append(cur_lbl) elif "notes" in field: # notes field # this is kind of hacky -- we first parse out the labeler # name from the column header string assembled above and # then get a user id from this. get_labeler_name_from_str = lambda x: x.split("(")[1].split(")")[0] cur_labeler = get_labeler_name_from_str(field) # @TODO not sure what we should do in consensus case... if cur_labeler == "consensus": cur_line.append("") else: cur_note = None cur_notes_d = citation_to_notes_dict[citation.id] if cur_labeler in cur_notes_d: cur_note = cur_notes_d[cur_labeler] if cur_note is None: cur_line.append("") else: notes_field = field if "general" in notes_field: cur_line.append("\"%s\"" % cur_note.general.replace('"', "'")) elif "population" in notes_field: cur_line.append("\"%s\"" % cur_note.population.replace('"', "'")) elif "outcome" in notes_field: cur_line.append("\"%s\"" % cur_note.outcome.replace('"', "'")) else: # intervention/comparator cur_line.append("\"%s\"" % cur_note.ic.replace('"', "'")) self.write_buffer.append(",".join(cur_line)) # exporting *all* (including unlabeled!) citations, per Ethan's request #-- may want to make this optional self.write_buffer.append("citations that are not yet labeled by anyone") # jj 2014-08-20: Request to include citation information even for those citations that have not # been labeled yet. unlabeled_citation_ids = [cit for cit in citations_labeled_dict if not citations_labeled_dict[cit]] unlabeled_citations = Session.query(model.Citation).filter(model.Citation.id.in_(unlabeled_citation_ids)).all() for citation in unlabeled_citations: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) self.write_buffer.append(",".join(cur_line)) path_to_export = "~/labels_%s.csv" % self.project_id try: fout = open(path_to_export, 'w') except IOError: os.makedirs(os.path.dirname(path_to_export)) fout = open(path_to_export, 'w') lbls_str = "\n".join(self.write_buffer) lbls_str = lbls_str.encode("utf-8", "ignore") fout.write(lbls_str) fout.close() print "finished" + str(self.project_id) return
def _get_participants_for_review(self, project_id): project = Session.query(model.Project).filter(model.Project.id == project_id).one() members = project.members return members