def insert_citation(review_id, ref_id, citation_d): citation = model.Citation() citation.project_id = review_id # Ensure that ref_id is an integer, drop it otherwise. # try: # ref_id = int(ref_id) # except: # ref_id = None citation.refman = ref_id pmid = citation_d['pmid'] citation.pmid = pmid if (pmid is not None and pmid != '') else 0 # we truncate the citation if it's too long! citation.title = citation_d['title'][:MAX_TITLE_LENGTH] if \ citation_d['title'] is not None else "(no title found)" citation.abstract = citation_d['abstract'][:9980] if \ citation_d['abstract'] is not None else None citation.authors = " and ".join(citation_d['authors']) citation.keywords = ','.join(citation_d['keywords']) citation.journal = citation_d['journal'] model.Session.add(citation) model.Session.commit() return citation.id
def test_delete_citation(self): """ Deleting Citation entry cascades Verify that entries in CitationTask table are destroyed when corresponding citation and/or task is deleted """ # Create citation and task objects c1 = model.Citation() c2 = model.Citation() t1 = model.Task() t2 = model.Task() # Append tasks to citation, incidentally this verifies that # the relationship are properly set. c1.tasks.append(t1) c1.tasks.append(t2) c2.tasks.append(t2) # Persist the changes. Session.add(c1) Session.add(c2) Session.commit() # Verify first that the entries in table CitationTask actually exist assert len(Session.query(model.citations_tasks_table).all()) == 3 # Finally remove one of the citations and check cascade Session.delete(c1) Session.commit() assert len(Session.query(model.citations_tasks_table).all()) == 1 # Do the same for when the task is removed Session.delete(t2) assert len(Session.query(model.citations_tasks_table).all()) == 0
def _create_reviews(p_id, iter_size, which_iter): lock_file_path = join(dirname(abspath(__file__)), '_delete_lock.lck') if not isfile(lock_file_path): Session.query( model.Citation).filter(model.Citation.project_id != p_id).delete() Session.query( model.Label).filter(model.Label.project_id != p_id).delete() Session.commit() open(lock_file_path, 'w+').close() u_id = 2629 k_init = 400 c_count = len( Session.query(model.Citation).filter_by(project_id=p_id).all()) k_inc = 100 for itercount in range(iter_size * which_iter, iter_size * which_iter + iter_size): ### THIS is the code for one run of the experiment ## labeled citation counter labeled_citation_counter = 0 labels = Session.query(model.Label).filter_by(project_id=p_id).all() user = Session.query(model.User).filter_by(id=u_id).first() citations = Session.query( model.Citation).filter_by(project_id=p_id).all() print len(citations) c_count = len(citations) r_sample = defaultdict(list) sample_indexes = sample(range(c_count), k_init) C_r = [] for ii in sample_indexes: C_r.append(citations[ii]) for cc in C_r: for ll in Session.query(model.Label).filter_by( project_id=p_id).filter_by(study_id=cc.id).all(): r_sample[ll.study_id].append(ll) new_review = model.Project() new_review.leaders.append(user) new_review.initial_round_size = 0 new_review.tag_privacy = True Session.add(new_review) Session.flush() state_dict = defaultdict(int) citation_dict = {} for c in citations: citation = model.Citation() citation.project_id = new_review.id citation.title = c.title citation.abstract = c.abstract citation.keywords = c.keywords citation.refman = c.refman model.Session.add(citation) Session.flush() citation_dict[citation.id] = c.id if c.id in r_sample: labeled_citation_counter += 1 state_dict[citation.id] = 1 for t in r_sample[c.id]: label = model.Label() label.project_id = new_review.id label.study_id = citation.id label.label = t.label model.Session.add(label) print new_review.id Session.commit() ## i is a counter for the current increment i = 0 while True: ## we want to change the increment size if there are a certain number of citations is labeled #if labeled_citation_counter > 15000: # k_inc = 2000 #elif labeled_citation_counter > 5000: # k_inc = 1000 #else: # k_inc = 500 r_sample = defaultdict(list) print "EXPERIMENT NO: " + str(itercount) make_predictions(new_review.id) ######################## here's where I record the results preds_for_review = Session.query(model.Prediction).filter( model.Prediction.project_id == new_review.id).all() path_to_preds_out = os.path.join( "_exports", "predictions_%d_%d_of_%d.csv" % (p_id, i, itercount)) with open(path_to_preds_out, 'w+') as fout: csv_out = csv.writer(fout) preds_file_headers = [ "citation_id", "refman", "title", "predicted p of being relevant", "'hard' screening prediction*", "state" ] csv_out.writerow(preds_file_headers) sorted_preds = sorted(preds_for_review, key=lambda x: x.predicted_probability, reverse=True) for pred in sorted_preds: citation = Session.query(model.Citation).filter( model.Citation.id == pred.study_id).first() #citation = self._get_citation_from_id(pred.study_id) citation_title = citation.title.encode('ascii', 'ignore') row_str = [ citation.id, citation.refman, citation_title, pred.predicted_probability, pred.prediction, state_dict[citation.id] ] csv_out.writerow(row_str) ######################### --------------------------- i += 1 if labeled_citation_counter >= c_count: break P_a = [] for pa in Session.query(model.Prediction).filter_by( project_id=new_review.id).order_by( model.Prediction.predicted_probability.desc()).all(): if state_dict[pa.study_id] == 0: P_a.append(pa) if len(P_a) == k_inc: break if len(P_a) == 0: print "~~~NO PREDS!!!" ccc = [ label for label in Session.query(model.Citation.id).filter_by( project_id=new_review.id).filter( ~model.Citation.labels.any()).limit(k_inc) ] print len(ccc) for cc in ccc: labeled_citation_counter += 1 state_dict[cc.id] = 1 for ll in Session.query(model.Label).filter_by( study_id=citation_dict[cc.id]).all(): label = model.Label() label.project_id = new_review.id label.study_id = cc.id label.label = ll.label model.Session.add(label) else: for pp in P_a: labeled_citation_counter += 1 state_dict[pp.study_id] = 2 for ll in Session.query( model.Label).filter_by(project_id=p_id).filter_by( study_id=citation_dict[pp.study_id]).all(): label = model.Label() label.project_id = new_review.id label.study_id = pp.study_id label.label = ll.label model.Session.add(label) Session.commit() print len( Session.query( model.Label).filter_by(project_id=new_review.id).all()) return