Exemple #1
0
    def get(self):
        headers = {'Content-Type': 'application/json'}
        args = parser.parse_args()
        assignment_id = args['assignment_id']
        query_content = args['query_content']

        documents = []

        assignment = Assignment.objects(id=assignment_id).first()
        query = Query.objects(assignment=assignment,content=query_content).first()        
        doc_scores = query.doc_scores

        for doc_name in doc_scores:
            score = doc_scores[doc_name]
            full_doc_name = doc_name + ".txt"

            # get the document and count rel_num
            document = Document.objects(dataset=assignment.dataset,name=full_doc_name).first()

            rel_num = Annotation.objects(document=document,query=query,judgement='relevant').count()
            irrel_num = Annotation.objects(document=document,query=query,judgement='irrelevant').count()

            documents.append({
                'name' : full_doc_name,
                'score' : score,
                'rel_num' : rel_num,
                'irrel_num' : irrel_num
            })

        documents = sorted(documents, key=lambda k: k['score'], reverse=True) 


        return make_response(jsonify(documents), 200, headers)
Exemple #2
0
    def get(self, owner_id, assignment_name):
        headers = {'Content-Type': 'text/html'}
        args = parser.parse_args()
        owner = User.objects(id=owner_id).first()
        user_id = session['user_id']
        user = User.objects(id=user_id).first()

        assignment = Assignment.objects(name=assignment_name)  \
            .filter(owner=owner).first()

        # Allow re-submission
        #if assignment.statuses[str(user_id)]:
        #    return redirect("/annotator")

        assignment.owner_name = assignment.owner.name
        assignment.ds_name = assignment.dataset.name
        assignment.ds_owner_id = str(assignment.dataset.owner.id)

        queries = Query.objects(assignment=assignment,
                                creator=user,
                                submitted=False)
        return make_response(
            render_template("assignment.html",
                            user=user,
                            assignment=assignment,
                            queries=queries), 200, headers)
Exemple #3
0
 def generate_queries(queries):
     result = []
     for q in queries:
         query = Query.objects(content=q).first()
         if not query:
             query = Query(q)
             query.save()
         result.append(query)
     return result
Exemple #4
0
    def post(self):
        headers = {'Content-Type': 'application/json'}
        args = parser.parse_args()
        assignment_id = args['assignment_id']
        assignment = Assignment.objects(id=assignment_id).first()

        annotations = args['annotations']
        user_id = session['user_id']

        # Allow re-submission
        #if assignment.statuses[str(user_id)]:
        #    return make_response(jsonify("Failed: Already submitted assignment"), 200)

        annotator = User.objects(id=user_id).first()

        for query_content in annotations:
            query = Query.objects(assignment=assignment,
                                  content=query_content).first()
            apq = annotations[query_content]

            for file_name in apq:
                label = apq[file_name]

                dataset = assignment.dataset

                document = Document.objects(dataset=dataset) \
                            .filter(name=file_name).first()

                a = Annotation()
                a.annotator = annotator
                a.document = document
                a.judgement = label
                a.query = query
                a.save()

            Query.objects(id=query.id).update(submitted=True)

        # student has completed the assignment
        cstatus = assignment.statuses
        cstatus[str(user_id)] = True
        Assignment.objects(id=assignment_id).update(statuses=cstatus)

        return make_response(jsonify("succeed"), 200, headers)
Exemple #5
0
 def store_annotations(annotations, assignment_id):
     assignment = Assignment.objects(id=assignment_id).first()
     for query_id in annotations:
         query = Query.objects(id=query_id).first()
         for doc_id in annotations[query_id]:
             result = annotations[query_id][doc_id]
             judgement = False
             if result == 'T':
                 judgement = True
             doc = Document.objects(id=doc_id).first()
             user = User.objects(email=current_user.email).first()
             try:
                 Annotation(user, query, doc, judgement, assignment).save()
             except (NotUniqueError, ValidationError):
                 return False
     AnnotatorAPI.query_nsa_filter(assignment)
     return True
Exemple #6
0
 def search(assignment, dataset_name, queries, ranker, params, num_results):
     author = User.objects(email=current_user.email).first()
     path = os.path.join(current_app.root_path, 'data', author.name)
     searcher = Searcher(dataset_name, path)
     for query in queries:
         results = searcher.search(query, ranker, params,
                                   num_results)['results']
         for result in results:
             doc_path = str(
                 os.path.join(path, result['path'].encode('utf8')[2:]))
             doc_score = result['score']
             document = Document.objects(path=doc_path).first()
             q = Query.objects(content=query).first()
             Score(result=doc_score,
                   assignment=assignment,
                   query=q,
                   document=document).save()
Exemple #7
0
    def _write_query_files(self, to_write, queries_filepath, qrels_filepath):
        if os.path.isfile(queries_filepath):
            os.remove(queries_filepath)

        if os.path.isfile(qrels_filepath):
            os.remove(qrels_filepath)

        for entries in to_write:
            query_id = entries["query_id"]
            with open(queries_filepath, 'a') as f:
                query = Query.objects(id=query_id).first()
                f.write(query.content + "\n")
                f.close()

            with open(qrels_filepath, 'a') as f:
                for entry in entries["docs"]:
                    qnum, doc_id, judgement = entry
                    f.write(
                        str(qnum) + " " + str(doc_id) + " " + str(judgement) +
                        "\n")
                f.close()
Exemple #8
0
 def _collect_assignment_data(self, assignment):
     assignment.queries = Query.objects(assignment=assignment)
     assignment.ds_name = assignment.dataset.name
     assignment.owner_id = str(assignment.owner.id)
     return assignment
Exemple #9
0
    def post(self):
        args = parser.parse_args()
        dataset_id = args['dataset']
        dataset = Dataset.objects(id=dataset_id).first()

        assignments = Assignment.objects(dataset=dataset_id)
        assignment_ids = [a.id for a in assignments]
        queries = Query.objects(assignment__in=assignment_ids, submitted=True)
        query_ids = [q.id for q in queries]

        metadata_filepath = cfg["anno_dataset_base_path"] + str(
            dataset.owner.gitlab_id) + "/" + dataset.name + "/metadata.data"
        doc_ids = self._get_doc_ids(metadata_filepath)

        to_write = []
        valid_query_num = 0
        for query_id in query_ids:
            annotations = Annotation.objects(query=query_id)
            if len(annotations) < 40:
                continue

            judgements = {}
            for a in annotations:
                doc_id = doc_ids[a.document.name]
                judge_score = 1 if a.judgement == "relevant" else 0
                if doc_id in judgements:
                    judgements[doc_id].append(judge_score)
                else:
                    judgements[doc_id] = [judge_score]

            overall_judgements = {}
            is_valid = False
            for doc_id in judgements:
                judgem = int(
                    round(
                        float(sum(judgements[doc_id])) /
                        len(judgements[doc_id])))
                if judgem > 0:
                    overall_judgements[doc_id] = judgem
                    is_valid = True

            if is_valid:
                entries = {"docs": [], "query_id": query_id}
                for doc_id in overall_judgements:
                    entry = (valid_query_num, doc_id,
                             overall_judgements[doc_id])
                    entries["docs"].append(entry)
                to_write.append(entries)
                valid_query_num += 1

        old_dataset_path = cfg["anno_dataset_base_path"] + str(
            dataset.owner.gitlab_id) + "/" + dataset.name
        new_dataset_path = cfg["perm_dataset_base_path"]

        self._copy_dataset(old_dataset_path,
                           new_dataset_path + '/' + dataset.name)
        self._update_dataset_config(new_dataset_path, dataset.name)

        path = cfg["perm_dataset_base_path"] + '/' + dataset.name + "/"
        qrels_filepath = path + dataset.name + "-qrels.txt"
        queries_filepath = path + dataset.name + "-queries.txt"

        self._write_query_files(to_write, queries_filepath, qrels_filepath)

        response = {
            "status": "success",
            "queries_filepath": queries_filepath,
            "qrels_filepath": qrels_filepath
        }

        return make_response(jsonify(response))