def creat_user_task(): from app.model import UserTaskModel if UserTaskModel().is_empty_table(): user_tasks = [ dict(app_id=1, created_by=1, mark_task_id=1, annotator_id=3, user_task_status=int(StatusEnum.labeled)), dict(app_id=1, created_by=1, mark_task_id=2, annotator_id=3, user_task_status=int(StatusEnum.labeled)), dict(app_id=1, created_by=1, mark_task_id=3, annotator_id=3, user_task_status=int(StatusEnum.labeled)), dict(app_id=1, created_by=1, mark_task_id=4, annotator_id=3, user_task_status=int(StatusEnum.labeled)), dict(app_id=1, created_by=1, mark_task_id=5, annotator_id=3, user_task_status=int(StatusEnum.labeled)), ] UserTaskModel().bulk_create(user_tasks) session.commit()
def create_evaluate_task_by_train_job_id(train_job_id, evaluate_task_name, evaluate_task_desc, mark_job_ids, doc_term_ids, doc_relation_ids, use_rule=0): """ 如果后面要加重新训练的逻辑,这部分要改,不能根据train_job_id去创建评估任务,而是根据train_task_id, 目前先保留,因为目前train_job和train_task是一一对应,不会有影响 """ # get correspondent train_job, doc_type, train_task, nlp_task by train_job_id train_job = TrainJobModel().get_by_id(train_job_id) doc_type = DocTypeModel().get_by_id(train_job.doc_type_id) doc_term_list = DocTermModel().get_by_filter(limit=99999, doc_type_id=doc_type.doc_type_id) doc_type.doc_term_list = doc_term_list nlp_task = NlpTaskEnum(doc_type.nlp_task_id) _, train_task_list = TrainTaskModel().get_by_filter(train_job_id=train_job_id) train_task = train_task_list[0] # create evaluate_task evaluate_task = EvaluateTaskModel().create(evaluate_task_name=evaluate_task_name, evaluate_task_desc=evaluate_task_desc, train_task_id=train_task.train_task_id, evaluate_task_status=int(StatusEnum.processing)) # bulk create evaluate m2m mark evaluate_m2m_mark_list = [{"evaluate_task_id": evaluate_task.evaluate_task_id, "mark_job_id": _id} for _id in mark_job_ids] EvaluateM2mMarkModel().bulk_create(evaluate_m2m_mark_list) # push to evaluate redis queue doc_term_ids = [str(t.doc_term_id) for t in RelationM2mTermModel().get_by_filter(limit=99999, doc_relation_ids=[int(rl) for rl in doc_relation_ids])] push_evaluate_task_to_redis(nlp_task, evaluate_task, train_task, doc_type, mark_job_ids, doc_term_ids, doc_relation_ids, use_rule) session.commit() return evaluate_task
def create_doc(): from app.model import DocModel from uuid import uuid4 if DocModel().is_empty_table(): docs = [ dict(app_id=1, created_by=1, doc_id=1, doc_raw_name="doc1.pdf", doc_unique_name=str(uuid4())), dict(app_id=1, created_by=1, doc_id=2, doc_raw_name="doc1.pdf", doc_unique_name=str(uuid4())), dict(app_id=1, created_by=1, doc_id=3, doc_raw_name="doc1.pdf", doc_unique_name=str(uuid4())), dict(app_id=1, created_by=1, doc_id=4, doc_raw_name="doc1.pdf", doc_unique_name=str(uuid4())), dict(app_id=1, created_by=1, doc_id=5, doc_raw_name="doc1.pdf", doc_unique_name=str(uuid4())), ] DocModel().bulk_create(docs) session.commit()
def import_mark_job(self, files, args, nlp_task): DocTypeModel().get_by_id(args['doc_type_id']) job = MarkJobModel().create( mark_job_name=args['mark_job_name'], mark_job_type=args['mark_job_type'], mark_job_desc=args.get('mark_job_desc'), doc_type_id=args['doc_type_id'], mark_job_status=int(StatusEnum.approved), assign_mode='average', ) tasks = [] for f in files: if nlp_task == NlpTaskEnum.classify: single_file_tasks = self.import_labeled_classify_files(f, job) elif nlp_task == NlpTaskEnum.extract: single_file_tasks = self.import_labeled_extract_files(f, job) elif nlp_task == NlpTaskEnum.wordseg: single_file_tasks = self.import_labeled_wordseg_files(f, job) else: raise TypeError('nlp_task illegal') tasks.extend(single_file_tasks) session.commit() result = MarkJobSchema().dump(job) return result
def create_mark_job(self, files, nlp_task: NlpTaskEnum, args): reviewer_ids = [args['assessor_id']] if args.get('assessor_id') else [] mark_job = MarkJobModel().create( mark_job_name=args['mark_job_name'], mark_job_type=args['mark_job_type'], mark_job_desc=args.get('mark_job_desc'), doc_type_id=args['doc_type_id'], assign_mode=args['assign_mode'], reviewer_ids=reviewer_ids, annotator_ids=args['labeler_ids'] ) unassigned_tasks = [] pipe = r.pipeline() for f in files: filename = f.filename if get_ext(filename) == 'csv': tasks = self.upload_batch_files(f, mark_job, nlp_task) elif get_ext(filename) in ['txt', 'docx', 'doc', 'pdf']: tasks = self.upload_single_file(f, mark_job, nlp_task) else: raise TypeError('file type illegal') for task in tasks: unassigned_tasks.append(task) if unassigned_tasks: # 分配标注员 self.assign_annotator(unassigned_tasks, args['assign_mode'], args['labeler_ids']) pipe.execute() session.commit() result = MarkJobSchema().dump(mark_job) return result
def delete_relation_mapping(doc_relation_id): # 删除关系表 session.query(RelationM2mTerm).filter( ~RelationM2mTerm.is_deleted, RelationM2mTerm.doc_relation_id == doc_relation_id, ).update({RelationM2mTerm.is_deleted: 1}, synchronize_session='fetch') session.commit()
def detele_task(task_id): mark_job_id = MarkTaskModel().get_mark_job_id_by_id(task_id) MarkTaskModel().delete(task_id) session.commit() mark_job_status = MarkJobModel().check_mark_job_status(mark_job_id) mark_job = MarkJobModel().update(mark_job_id, **{'mark_job_status': mark_job_status}) session.commit() return mark_job
def create_new_rule(args): is_existed = DocTermModel().check_exists_rule(args.get("doc_term_id")) if is_existed: raise ValueError("该标签的规则已存在,请勿重复创建") classify_rule = DocTermModel().create_classify_rule(**args) session.commit() result = ClassifyDocRuleSchema().dump(classify_rule) return result
def create_classify_doc_term(args, doc_type_id, doc_rule_list): doc_term = DocTermModel().create(**args, doc_type_id=doc_type_id) doc_term.flush() for doc_rule_dict in doc_rule_list: ClassifyRuleModel().create(doc_term_id=doc_term.doc_term_id, **doc_rule_dict) session.commit() result = DocTermSchema().dump(doc_term) return result
def create_relation(doc_type_id: int, doc_term_ids: typing.List, doc_relation_name: str): if not DocTypeModel().get_by_id(doc_type_id): raise ValueError(f"DocType {doc_type_id} 不存在") if len(DocTermModel().get_by_filter(doc_term_ids=doc_term_ids)) != 2: raise ValueError(f"DocTerm 不存在或已被删除") item = DocTermModel().create_relation(doc_relation_name, doc_term_ids, doc_type_id=doc_type_id) session.commit() return { "doc_relation_name": doc_relation_name, "doc_relation_id": item.doc_relation_id }
def update(self, doc_type_id, **kwargs) -> DocType: accept_keys = [ "doc_type_name", "doc_type_desc", "is_favorite", "group_id" ] _doc_type = session.query(DocType).filter( DocType.doc_type_id == doc_type_id).one() for key, val in kwargs.items(): if key in accept_keys: setattr(_doc_type, key, val) session.commit() return _doc_type
def delete(self: Resource, doc_type_id: int, doc_term_id: int) -> typing.Tuple[typing.Dict, int]: """ 删除一个条款 """ if DocTermService().check_term_in_relation(doc_term_id): abort(400, message="该条款仍有关联关系,请确保条款没有关联关系后再做清除") DocTermService().remove_doc_term(doc_term_id) session.commit() return { "message": "删除成功", }, 204
def update(self, doc_rule_id, **kwargs): accept_keys = ["rule_content", "state"] classify_rule = session.query(ClassifyDocRule).filter( ClassifyDocRule.classify_rule_id == doc_rule_id).one() for key, val in kwargs.items(): if key == "state": classify_rule.is_deleted = val elif key in accept_keys: setattr(classify_rule, key, val) session.commit() return classify_rule
def create_evaluate_task(): from app.model import EvaluateTaskModel if EvaluateTaskModel().is_empty_table(): EvaluateTaskModel().create(app_id=1, created_by=1, evaluate_task_id=1, evaluate_task_name="test", evaluate_task_status=int( StatusEnum.success), train_task_id=1) EvaluateTaskModel().create(app_id=1, created_by=1, evaluate_task_id=2, evaluate_task_name="test", evaluate_task_status=int( StatusEnum.success), train_task_id=1) EvaluateTaskModel().create(app_id=1, created_by=1, evaluate_task_id=3, evaluate_task_name="test", evaluate_task_status=int( StatusEnum.success), train_task_id=2) EvaluateTaskModel().create(app_id=1, created_by=1, evaluate_task_id=4, evaluate_task_name="test", evaluate_task_status=int( StatusEnum.success), train_task_id=3) EvaluateTaskModel().create(app_id=1, created_by=1, evaluate_task_id=5, evaluate_task_name="test", evaluate_task_status=int( StatusEnum.success), train_task_id=3) EvaluateTaskModel().create(app_id=1, created_by=1, evaluate_task_id=6, evaluate_task_name="test", evaluate_task_status=int( StatusEnum.success), train_task_id=4) EvaluateTaskModel().create(app_id=1, created_by=1, evaluate_task_id=7, evaluate_task_name="test", evaluate_task_status=int( StatusEnum.success), train_task_id=5) session.commit()
def update_mark_task_status(current_user: CurrentUser, task_id, args): if args.get('task_result'): args['mark_task_result'] = args.get('task_result') del args['task_result'] if args.get('task_state'): args['mark_task_status'] = status_str2int_mapper()[args['task_state']] del args['task_state'] item = MarkTaskModel().update(task_id, **args) schema = MarkTaskSchema session.commit() result = schema().dump(item) return result
def update_user_task_status(current_user: CurrentUser, task_id, args): if args.get('task_result'): args['user_task_result'] = args.get('task_result') del args['task_result'] if args.get('task_state'): args['user_task_status'] = status_str2int_mapper()[args['task_state']] del args['task_state'] item = UserTaskModel().update_by_annotator_id(current_user, task_id, **args) MarkTaskModel().check_user_task_and_update_mark_task(task_id) schema = UserTaskSchema session.commit() result = schema().dump(item) return result
def update_mark_task_and_user_task_by_mark_task_id(mark_task_id, args): # update mark task mark_task = MarkTaskModel().update(mark_task_id, **args) # update user task list user_task_list = UserTaskModel().get_by_filter(limit=99999, mark_task_id=mark_task_id) user_update_params = { "user_task_status": mark_task.mark_task_status, "user_task_result": mark_task.mark_task_result } user_task_list = UserTaskModel().bulk_update( [user_task.user_task_id for user_task in user_task_list], **user_update_params) session.commit() return mark_task, user_task_list
def create_status(): from app.entity import Status from app.model import StatusModel if len(StatusModel().get_all()) == 0: init_status = [] for i in StatusEnum: init_status.append( Status(app_id=1, created_by=1, status_id=int(i), status_name=i.name)) StatusModel().bulk_create(init_status) session.commit() logger.info(" [x] Seeds status has been created. ")
def create_nlp_task(): from app.entity import NlpTask from app.model import NlpTaskModel if len(NlpTaskModel().get_all()) == 0: init_nlp_tasks = [] for i in NlpTaskEnum: init_nlp_tasks.append( NlpTask(app_id=1, created_by=1, nlp_task_id=int(i), nlp_task_name=i.name)) NlpTaskModel().bulk_create(init_nlp_tasks) session.commit() logger.info(" [x] Seeds nlp_task has been created. ")
def create_doc_type(current_user: CurrentUser, args): doc_term_list = args.pop('doc_term_list') if 'group_id' not in args or args['group_id'] < 1: if current_user.user_groups: args['group_id'] = current_user.user_groups[0] else: abort(403, message="当前角色禁止创建项目,请切换角色操作") doc_type = DocTypeModel().create(**args) for item in doc_term_list: item.update({'doc_type_id': doc_type.doc_type_id}) doc_type.doc_term_list = DocTermModel().bulk_create(doc_term_list) session.commit() result = DocTypeSchema().dumps(doc_type) return result
def create_seeds(self, debug=False): # Create seeds data from base tables self.create_nlp_task() self.create_status() if debug: self.create_doc_type() self.create_doc_term() self.create_mark_job() self.create_train_job() self.create_doc() self.create_mark_task() self.creat_user_task() self.create_train_task() self.create_evaluate_task() session.commit()
def update_mark_job_status_by_mark_task(mark_task: MarkTask): # 更新这个task对应的job的状态,如果其下所有的task都成功,则修改job状态成功;如果其下有一个任务失败,则修改job状态失败 status_list = MarkTaskModel().get_distinct_status_by_mark_job(mark_task.mark_job_id) if int(StatusEnum.fail) in status_list: # 有一个失败,则整个job失败 new_job_status = int(StatusEnum.fail) else: new_job_status = min(status_list) mark_job = MarkJobModel().get_by_id(mark_task.mark_job_id) if mark_job.mark_job_status != new_job_status: MarkJobModel().update( mark_task.mark_job_id, mark_job_status=new_job_status, updated_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S") ) session.commit()
def update_train_task_by_id(train_job_id, train_task_id, is_check_train_terms, model_type, args): """ 1. 根据字段状态更新训练状态和结果 2. 直接设置训练状态和结果 3. 模型上线状态更新(分类和抽取还不一样) """ train_job = TrainJobModel().get_by_id(train_job_id) train_task = TrainTaskModel().get_by_id(train_task_id) if is_check_train_terms: # 是否需要检查train_term的状态 _, training_terms = TrainTermTaskModel().get_by_filter(limit=99999, train_task_id=train_task_id, train_term_status=int(StatusEnum.training)) _, failed_terms = TrainTermTaskModel().get_by_filter(limit=99999, train_task_id=train_task_id, train_term_status=int(StatusEnum.fail)) if not training_terms: # 没有处于训练中 if not failed_terms: # 没有处于失败的 args["train_status"] = int(StatusEnum.success) else: args["train_status"] = int(StatusEnum.fail) else: args["train_status"] = int(StatusEnum.training) else: # no limit to set model_train_state=success/failed if args["train_status"] == int(StatusEnum.online): # validation if train_task.train_status == StatusEnum.online: abort(400, message="该模型已经上线") if train_task.train_status != StatusEnum.success: abort(400, message="只能上线训练成功的模型") # send model train http request service_url = _get("CLASSIFY_MODEL_ONLINE") if model_type == "classify" else _get("EXTRACT_MODEL_ONLINE") resp = requests.post(f"{service_url}?model_version={train_task.model_version}") if resp.status_code < 200 or resp.status_code >= 300: abort(500, message=f"上线服务 <{service_url}> 出现错误: {resp.text}") # find all online model under this doc_type_id online_models = TrainTaskModel().get_by_doc_type_id(doc_type_id=train_job.doc_type_id, train_status=int(StatusEnum.online)) # unload online models TrainTaskModel().bulk_update([train.train_task_id for train in online_models], train_status=int(StatusEnum.success)) # update train task train_task = TrainTaskModel().update(train_task_id, **args) session.commit() return train_task
def update_doc_type(args, doc_type_id): item = DocTypeModel().update(doc_type_id, **args) existed_doc_term_ids = [dt.doc_term_id for dt in DocTermModel().get_by_filter(doc_type_id=doc_type_id)] updated_doc_term_ids = [] if args.get("doc_term_list"): for i in args.get("doc_term_list"): i.update({"doc_type_id": doc_type_id}) updated_doc_term_ids.append(i.get("doc_term_id", 0)) DocTermModel().bulk_update(args.get("doc_term_list")) session.commit() # Remove doc terms for i in existed_doc_term_ids: if i not in updated_doc_term_ids: DocTermModel().delete(i) session.commit() return DocTypeSchema().dump(item)
def update_predict_job_status_by_predict_task(predict_task: PredictTask): # 更新这个task对应的job的状态,如果其下所有的task都成功,则修改job状态成功;如果其下有一个任务失败,则修改job状态失败 _, predict_task_list = PredictTaskModel().get_by_filter( limit=99999, predict_job_id=predict_task.predict_job_id) states = [ predict_task.predict_task_status for predict_task in predict_task_list ] if int(StatusEnum.fail) in states: # 有一个失败,则整个job失败 new_job_status = int(StatusEnum.fail) elif int(StatusEnum.processing) in states: # 没有失败但是有处理中,则整个job处理中 new_job_status = int(StatusEnum.processing) else: new_job_status = int(StatusEnum.success) PredictJobModel().update( predict_task.predict_job_id, predict_job_status=new_job_status, updated_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) session.commit()
def update_train_task_by_model_version(model_version, is_check_train_terms, args): train_task = TrainTaskModel().get_by_filter(model_version=model_version)[1][0] if is_check_train_terms: _, training_terms = TrainTermTaskModel().get_by_filter(limit=99999, train_task_id=train_task.train_task_id, train_term_status=int(StatusEnum.training)) _, failed_terms = TrainTermTaskModel().get_by_filter(limit=99999, train_task_id=train_task.train_task_id, train_term_status=int(StatusEnum.fail)) if not training_terms: # 没有处于训练中 if not failed_terms: # 没有处于失败的 args["train_status"] = int(StatusEnum.success) else: args["train_status"] = int(StatusEnum.fail) else: args["train_status"] = int(StatusEnum.training) train_task = TrainTaskModel().update(train_task.train_task_id, **args) session.commit() return train_task
def create_doc_term(): from app.model import DocTermModel if len(DocTermModel().get_all()) == 0: DocTermModel().create(app_id=1, created_by=1, doc_term_id=1, doc_term_name="人名", doc_term_alias="nr", doc_type_id=1) DocTermModel().create(app_id=1, created_by=1, doc_term_id=2, doc_term_name="地名", doc_term_alias="ns", doc_type_id=1) DocTermModel().create(app_id=1, created_by=1, doc_term_id=3, doc_term_name="机构名", doc_term_alias="nt", doc_type_id=1) session.commit()
def create_export_task(current_user: CurrentUser, mark_job_ids, mark_type, export_type): # raise no result found exception redis_message = {} doc_type_id = MarkJobModel().get_by_id(int(mark_job_ids.split(',')[0])).doc_type_id doc_terms = [str(row.doc_term_id) for row in DocTermModel().get_by_filter(doc_type_id=doc_type_id)] if mark_type == 'wordseg': doc_terms = ['10086'] elif mark_type == 'relation': relation_2_entity_mapping = [{i[0]: [d for d in i[1].split(",")]} for i in RelationM2mTermModel.get_relation_term_mapping(doc_type_id)] redis_message.update({ 'relation_2_entity_mapping': relation_2_entity_mapping, }) version = '{}{}_{}_{}'.format(datetime.now().strftime("%Y%m%d%H%M%S"), str(uuid.uuid4())[:4], doc_type_id, mark_job_ids) file_path = 'upload/export/{}.zip'.format(version) new_export_job = ExportJobModel().create(**{ "export_file_path": file_path, "doc_type_id": doc_type_id, "created_by": current_user.user_id, "export_job_status": StatusEnum.processing.value, "export_mark_job_ids": [int(i) for i in mark_job_ids.split(',')] }) export_id = new_export_job.export_job_id session.commit() # 发送给offline nlp redis_message.update({ 'export_id': export_id, 'export_type': export_type, 'file_path': file_path, 'version': version, 'doc_type': doc_type_id, 'fields': ','.join(doc_terms), 'mark_job_ids': mark_job_ids, 'task_type': mark_type, }) r.lpush(_get('DATA_EXPORT_QUEUE_KEY'), json.dumps(redis_message))
def export_custom_algorithm_by_id(custom_algorithm_id): custom_algorithm = CustomAlgorithmModel().get_by_id( custom_algorithm_id) if not custom_algorithm: abort(400, message="该自定义模型不存在") resp = requests.post( f'http://{custom_algorithm.custom_algorithm_ip}:{custom_algorithm.custom_algorithm_predict_port}/docker', json={ "custom_algorithm_id": custom_algorithm.custom_algorithm_id, "al_name": custom_algorithm.custom_algorithm_alias }) result = json.loads(resp.text) if result['status'] == 'FAIL': abort( 500, message= f"导出服务 <{custom_algorithm.custom_algorithm_ip}:{custom_algorithm.custom_algorithm_predict_port}> 出现错误" ) CustomAlgorithmModel().update(custom_algorithm_id, export_url=result['image_name'], custom_state=int(StatusEnum.processing)) session.commit() return result
def update_mark_task_or_user_task_status(current_user: CurrentUser, task_id, args): if current_user.user_role in [RoleEnum.annotator.value]: if args.get('task_result'): args['user_task_result'] = args.get('task_result') del args['task_result'] if args.get('task_state'): args['user_task_status'] = int(getattr(StatusEnum, args['task_state'])) del args['task_state'] item = UserTaskModel().update_by_annotator_id(current_user, task_id, **args) MarkTaskModel().check_user_task_and_update_mark_task(task_id) schema = UserTaskSchema else: if args.get('task_result'): args['mark_task_result'] = args.get('task_result') del args['task_result'] if args.get('task_state'): args['mark_task_status'] = int(getattr(StatusEnum, args['task_state'])) del args['task_state'] item = MarkTaskModel().update(task_id, **args) schema = MarkTaskSchema session.commit() result = schema().dump(item) return result