def generate_queries(queries): result = [] for q in queries: query = Query.objects(content=q).first() if not query: query = Query(q) query.save() result.append(query) return result
def get(self): headers = {'Content-Type': 'application/json'} args = parser.parse_args() assignment_id = args['assignment_id'] query_content = args['query_content'] documents = [] assignment = Assignment.objects(id=assignment_id).first() query = Query.objects(assignment=assignment,content=query_content).first() doc_scores = query.doc_scores for doc_name in doc_scores: score = doc_scores[doc_name] full_doc_name = doc_name + ".txt" # get the document and count rel_num document = Document.objects(dataset=assignment.dataset,name=full_doc_name).first() rel_num = Annotation.objects(document=document,query=query,judgement='relevant').count() irrel_num = Annotation.objects(document=document,query=query,judgement='irrelevant').count() documents.append({ 'name' : full_doc_name, 'score' : score, 'rel_num' : rel_num, 'irrel_num' : irrel_num }) documents = sorted(documents, key=lambda k: k['score'], reverse=True) return make_response(jsonify(documents), 200, headers)
def get(self, owner_id, assignment_name): headers = {'Content-Type': 'text/html'} args = parser.parse_args() owner = User.objects(id=owner_id).first() user_id = session['user_id'] user = User.objects(id=user_id).first() assignment = Assignment.objects(name=assignment_name) \ .filter(owner=owner).first() # Allow re-submission #if assignment.statuses[str(user_id)]: # return redirect("/annotator") assignment.owner_name = assignment.owner.name assignment.ds_name = assignment.dataset.name assignment.ds_owner_id = str(assignment.dataset.owner.id) queries = Query.objects(assignment=assignment, creator=user, submitted=False) return make_response( render_template("assignment.html", user=user, assignment=assignment, queries=queries), 200, headers)
def post(self): headers = {'Content-Type': 'application/json'} args = parser.parse_args() assignment_id = args['assignment_id'] assignment = Assignment.objects(id=assignment_id).first() annotations = args['annotations'] user_id = session['user_id'] # Allow re-submission #if assignment.statuses[str(user_id)]: # return make_response(jsonify("Failed: Already submitted assignment"), 200) annotator = User.objects(id=user_id).first() for query_content in annotations: query = Query.objects(assignment=assignment, content=query_content).first() apq = annotations[query_content] for file_name in apq: label = apq[file_name] dataset = assignment.dataset document = Document.objects(dataset=dataset) \ .filter(name=file_name).first() a = Annotation() a.annotator = annotator a.document = document a.judgement = label a.query = query a.save() Query.objects(id=query.id).update(submitted=True) # student has completed the assignment cstatus = assignment.statuses cstatus[str(user_id)] = True Assignment.objects(id=assignment_id).update(statuses=cstatus) return make_response(jsonify("succeed"), 200, headers)
def post(self): headers = {'Content-Type': 'application/json'} args = parser.parse_args() content = args['content'] dataset = args['dataset'] creator = args['creator'] dataset = Dataset.objects(id=dataset) creator = User.objects(id=creator) query = Query() query.content = content query.data_set = dataset[0] query.creator = creator[0] query.submitted = False query.save() return make_response(jsonify("succeed"), 200, headers)
def post(self): args = parser.parse_args() query_content = args['query'] assignment_id = args['assignment_id'] user_id = args['user_id'] doc_scores = args['doc_scores'] assignment = Assignment.objects(id=assignment_id).first() creator = User.objects(id=user_id).first() query = Query() query.content = query_content query.assignment = assignment query.doc_scores = doc_scores query.creator = creator query.submitted = False query.save() return "OK"
def store_annotations(annotations, assignment_id): assignment = Assignment.objects(id=assignment_id).first() for query_id in annotations: query = Query.objects(id=query_id).first() for doc_id in annotations[query_id]: result = annotations[query_id][doc_id] judgement = False if result == 'T': judgement = True doc = Document.objects(id=doc_id).first() user = User.objects(email=current_user.email).first() try: Annotation(user, query, doc, judgement, assignment).save() except (NotUniqueError, ValidationError): return False AnnotatorAPI.query_nsa_filter(assignment) return True
def search(assignment, dataset_name, queries, ranker, params, num_results): author = User.objects(email=current_user.email).first() path = os.path.join(current_app.root_path, 'data', author.name) searcher = Searcher(dataset_name, path) for query in queries: results = searcher.search(query, ranker, params, num_results)['results'] for result in results: doc_path = str( os.path.join(path, result['path'].encode('utf8')[2:])) doc_score = result['score'] document = Document.objects(path=doc_path).first() q = Query.objects(content=query).first() Score(result=doc_score, assignment=assignment, query=q, document=document).save()
def _write_query_files(self, to_write, queries_filepath, qrels_filepath): if os.path.isfile(queries_filepath): os.remove(queries_filepath) if os.path.isfile(qrels_filepath): os.remove(qrels_filepath) for entries in to_write: query_id = entries["query_id"] with open(queries_filepath, 'a') as f: query = Query.objects(id=query_id).first() f.write(query.content + "\n") f.close() with open(qrels_filepath, 'a') as f: for entry in entries["docs"]: qnum, doc_id, judgement = entry f.write( str(qnum) + " " + str(doc_id) + " " + str(judgement) + "\n") f.close()
def _collect_assignment_data(self, assignment): assignment.queries = Query.objects(assignment=assignment) assignment.ds_name = assignment.dataset.name assignment.owner_id = str(assignment.owner.id) return assignment
def post(self): args = parser.parse_args() dataset_id = args['dataset'] dataset = Dataset.objects(id=dataset_id).first() assignments = Assignment.objects(dataset=dataset_id) assignment_ids = [a.id for a in assignments] queries = Query.objects(assignment__in=assignment_ids, submitted=True) query_ids = [q.id for q in queries] metadata_filepath = cfg["anno_dataset_base_path"] + str( dataset.owner.gitlab_id) + "/" + dataset.name + "/metadata.data" doc_ids = self._get_doc_ids(metadata_filepath) to_write = [] valid_query_num = 0 for query_id in query_ids: annotations = Annotation.objects(query=query_id) if len(annotations) < 40: continue judgements = {} for a in annotations: doc_id = doc_ids[a.document.name] judge_score = 1 if a.judgement == "relevant" else 0 if doc_id in judgements: judgements[doc_id].append(judge_score) else: judgements[doc_id] = [judge_score] overall_judgements = {} is_valid = False for doc_id in judgements: judgem = int( round( float(sum(judgements[doc_id])) / len(judgements[doc_id]))) if judgem > 0: overall_judgements[doc_id] = judgem is_valid = True if is_valid: entries = {"docs": [], "query_id": query_id} for doc_id in overall_judgements: entry = (valid_query_num, doc_id, overall_judgements[doc_id]) entries["docs"].append(entry) to_write.append(entries) valid_query_num += 1 old_dataset_path = cfg["anno_dataset_base_path"] + str( dataset.owner.gitlab_id) + "/" + dataset.name new_dataset_path = cfg["perm_dataset_base_path"] self._copy_dataset(old_dataset_path, new_dataset_path + '/' + dataset.name) self._update_dataset_config(new_dataset_path, dataset.name) path = cfg["perm_dataset_base_path"] + '/' + dataset.name + "/" qrels_filepath = path + dataset.name + "-qrels.txt" queries_filepath = path + dataset.name + "-queries.txt" self._write_query_files(to_write, queries_filepath, qrels_filepath) response = { "status": "success", "queries_filepath": queries_filepath, "qrels_filepath": qrels_filepath } return make_response(jsonify(response))