예제 #1
0
    def create_evaluate_task_by_train_job_id(train_job_id, evaluate_task_name, evaluate_task_desc, mark_job_ids, doc_term_ids, doc_relation_ids, use_rule=0):
        """
        如果后面要加重新训练的逻辑,这部分要改,不能根据train_job_id去创建评估任务,而是根据train_task_id,
        目前先保留,因为目前train_job和train_task是一一对应,不会有影响
        """
        # get correspondent train_job, doc_type, train_task, nlp_task by train_job_id
        train_job = TrainJobModel().get_by_id(train_job_id)
        doc_type = DocTypeModel().get_by_id(train_job.doc_type_id)
        doc_term_list = DocTermModel().get_by_filter(limit=99999, doc_type_id=doc_type.doc_type_id)
        doc_type.doc_term_list = doc_term_list

        nlp_task = NlpTaskEnum(doc_type.nlp_task_id)
        _, train_task_list = TrainTaskModel().get_by_filter(train_job_id=train_job_id)
        train_task = train_task_list[0]

        # create evaluate_task
        evaluate_task = EvaluateTaskModel().create(evaluate_task_name=evaluate_task_name,
                                                   evaluate_task_desc=evaluate_task_desc,
                                                   train_task_id=train_task.train_task_id,
                                                   evaluate_task_status=int(StatusEnum.processing))
        # bulk create evaluate m2m mark
        evaluate_m2m_mark_list = [{"evaluate_task_id": evaluate_task.evaluate_task_id, "mark_job_id": _id} for _id in mark_job_ids]
        EvaluateM2mMarkModel().bulk_create(evaluate_m2m_mark_list)

        # push to evaluate redis queue
        doc_term_ids = [str(t.doc_term_id) for t in RelationM2mTermModel().get_by_filter(limit=99999, doc_relation_ids=[int(rl) for rl in doc_relation_ids])]
        push_evaluate_task_to_redis(nlp_task, evaluate_task, train_task, doc_type, mark_job_ids, doc_term_ids, doc_relation_ids, use_rule)
        session.commit()
        return evaluate_task
예제 #2
0
 def get_evaluate_task_list_by_train_job_id(train_job_id, order_by, order_by_desc, offset, limit):
     count, evaluate_task_list = EvaluateTaskModel().get_by_train_job_id(train_job_id=train_job_id, order_by=order_by, order_by_desc=order_by_desc, offset=offset, limit=limit)
     # assign train_job_id to evaluate_task for dumping
     for evaluate_task in evaluate_task_list:
         evaluate_task.train_job_id = train_job_id
         evaluate_task.mark_job_ids = [m2m.mark_job_id for m2m in EvaluateM2mMarkModel().get_by_filter(limit=99999, evaluate_task_id=evaluate_task.evaluate_task_id)]
     return count, evaluate_task_list
    def get_doc_type_info_by_nlp_task_by_user(nlp_task_id, current_user: CurrentUser):
        """
        获取管理大厅首页的doc_type信息
        """
        result = []
        # get doc_type list by user
        _, doc_type_list = DocTypeModel().get_by_nlp_task_id_by_user(nlp_task_id=nlp_task_id, current_user=current_user)
        for doc_type, terms in doc_type_list:
            doc_type.doc_terms = [int(t) for t in terms.split(",")] if terms is not None else []
        doc_type_list = [d[0] for d in doc_type_list]
        doc_type_list = [{"doc_type": DocTypeSchema().dump(doc_type)} for doc_type in doc_type_list]

        # get all job count and approved job count
        all_status, all_marked_status = MarkTaskModel().count_status_by_user(nlp_task_id=nlp_task_id, current_user=current_user)

        # calculate marked mark_job count and all mark_job for each doc_type
        all_status_dict = Common().tuple_list2dict(all_status)
        all_marked_status_dict = Common().tuple_list2dict(all_marked_status)

        for doc_type in doc_type_list:
            doc_type_id = doc_type["doc_type"]["doc_type_id"]
            mark_job_count = len(all_status_dict.get(doc_type_id, {}))
            marked_mark_job_count = 0
            for _mark_job_id, _count_sum in all_status_dict.get(doc_type_id, {}).items():
                if _count_sum == all_marked_status_dict.get(doc_type_id, {}).get(_mark_job_id, 0):
                    marked_mark_job_count += 1
            doc_type.update(progress_state={"job_num": mark_job_count,
                                            "labeled_job_number": marked_mark_job_count,
                                            "progress_rate": round(marked_mark_job_count / mark_job_count, 2) if mark_job_count > 0 else 0})

            # get latest evaluation result if exists
            latest_evaluate = EvaluateTaskModel().get_latest_evaluate_by_doc_type_id(nlp_task_id=nlp_task_id,
                                                                                     doc_type_id=doc_type_id)
            if latest_evaluate:
                doc_type.update(evaluate=EvaluateTaskSchema().dump(latest_evaluate))
            result.append(doc_type)
        return result
예제 #4
0
 def delete_evaluate_task_by_id(evaluate_task_id):
     EvaluateTaskModel().delete(evaluate_task_id)
     session.commit()
예제 #5
0
 def update_evaluate_task_by_id(evaluate_task_id, args):
     evaluate_task = EvaluateTaskModel().update(evaluate_task_id, **args)
     session.commit()
     return evaluate_task
예제 #6
0
 def get_evaluate_task_by_id(evaluate_task_id):
     evaluate_task = EvaluateTaskModel().get_by_id(evaluate_task_id)
     evaluate_task.train_job_id = TrainTaskModel().get_by_id(evaluate_task.train_task_id).train_job_id
     return evaluate_task
예제 #7
0
    def get_by_nlp_task_id(nlp_task_id,
                           search,
                           current_user: CurrentUser,
                           order_by="created_time",
                           order_by_desc=True,
                           limit=10,
                           offset=0,
                           **kwargs):
        """
        get (traintask, trainjob, doctype) tuple by nlp_task_id and other filters
        """
        # Define allowed filter keys
        accept_keys = ["train_job_status", "doc_type_id"]
        # Compose query, select 3 tables related to a train job
        q = session.query(TrainTask, TrainJob, DocType) \
            .outerjoin(TrainJob, TrainTask.train_job_id == TrainJob.train_job_id) \
            .outerjoin(DocType, DocType.doc_type_id == TrainJob.doc_type_id) \
            .filter(DocType.nlp_task_id == nlp_task_id,
                    ~DocType.is_deleted,
                    ~TrainJob.is_deleted,
                    ~TrainTask.is_deleted)
        # auth
        if current_user.user_role in [
                RoleEnum.manager.value, RoleEnum.guest.value
        ]:
            q = q.filter(DocType.group_id.in_(current_user.user_groups))
        # Filter conditions
        for key, val in kwargs.items():
            if key in accept_keys:
                q = q.filter(getattr(TrainJob, key) == val)
        if search:
            q = q.filter(TrainJob.train_job_name.like(f'%{search}%'))
        # Order by key
        if order_by_desc:
            q = q.order_by(getattr(TrainJob, order_by).desc())
        else:
            q = q.order_by(getattr(TrainJob, order_by))

        train_job_list = []
        job_id_list = []
        for train_task, train_job, doc_type in q.all():
            train_task.mark_job_ids = [
                m2m.mark_job_id for m2m in TrainM2mMarkbModel().get_by_filter(
                    limit=99999, train_job_id=train_task.train_job_id)
            ]
            # assign train_task, doc_type to train_job
            if train_task.train_job_id not in job_id_list:
                job_id_list.append(train_task.train_job_id)
                train_job.doc_type = doc_type
                _, model_evaluate_list = EvaluateTaskModel(
                ).get_by_train_job_id(train_job_id=train_job.train_job_id,
                                      evaluate_task_status=int(
                                          StatusEnum.success))
                if model_evaluate_list:
                    train_job.model_evaluate = model_evaluate_list[0]
                train_job.model_version = train_task.model_version
                train_job.train_list = [train_task]
                train_job_list.append(train_job)
            else:
                train_job_list[job_id_list.index(
                    train_task.train_job_id)].train_list.append(train_task)
        count = len(train_job_list)
        return count, train_job_list[offset:offset + limit]