예제 #1
0
class WordsegMarkJobImportResource(Resource):
    @parse(
        {
            "mark_job_name":
            fields.String(required=True),
            "mark_job_type":
            fields.String(required=True),
            "mark_job_desc":
            fields.String(),
            "doc_type_id":
            fields.Integer(required=True),
            "files":
            fields.List(fields.File(), required=True),
            "task_type":
            fields.String(required=True,
                          validate=lambda x: x in ['machine', 'manual']),
        },
        locations=('form', 'files'))
    def post(self: Resource,
             args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        """
        上传已标注数据
        """
        files = args['files']
        # validate file extensions
        for f in files:
            if get_ext(f.filename) not in ["txt"]:
                abort(400, message="上传已标注分词数据仅支持txt格式。")
        result = MarkJobService().import_mark_job(files,
                                                  args,
                                                  nlp_task=NlpTaskEnum.wordseg)
        return {"message": "创建成功", "result": result}, 201
예제 #2
0
class ClassifyMarkJobImportResource(Resource):
    @parse({
        "mark_job_name": fields.String(required=True),
        "mark_job_type": fields.String(required=True),
        "mark_job_desc": fields.String(),
        "doc_type_id": fields.Integer(required=True),
        "files": fields.List(fields.File(), required=True),
    }, locations=('form', 'files'))
    def post(
            self: Resource,
            args: typing.Dict
    ) -> typing.Tuple[typing.Dict, int]:
        files = args['files']
        for f in files:
            if get_ext(f.filename) not in ["csv"]:
                abort(400, message="已标注分类数据仅支持csv格式。")
        try:
            result = MarkJobService().import_mark_job(files, args, nlp_task=NlpTaskEnum.classify)
            return {
                       "message": "创建成功",
                       "result": result
                   }, 201
        except UnicodeDecodeError:
            abort(400, message="文件编码错误 请上传utf-8编码文件")
        except KeyError:
            abort(400, message="文件格式不合规 请查看csv文件模版")
예제 #3
0
class WordsegMarkJobListResource(Resource):
    @parse({
        "is_superuser": fields.Boolean(missing=False),
        "query": fields.String(missing=''),
        "offset": fields.Integer(missing=0),
        "limit": fields.Integer(missing=10),
        "doc_type_id": fields.Integer(missing=None),
        'order_by': fields.String(missing='-created_time'),
    })
    def get(self: Resource,
            args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        count, result = MarkJobService().get_mark_job_list_by_nlp_task(
            args, nlp_task=NlpTaskEnum.wordseg)
        return {
            "message": "请求成功",
            "result": result,
            "count": count,
        }, 200

    @parse(
        {
            "mark_job_name":
            fields.String(required=True),
            "mark_job_type":
            fields.String(required=True),
            "mark_job_desc":
            fields.String(),
            "doc_type_id":
            fields.Integer(required=True),
            "files":
            fields.List(fields.File(), required=True),
            "assign_mode":
            fields.String(required=True,
                          validate=lambda x: x in ['average', 'together']),
            "assessor_id":
            fields.Integer(missing=0),
            "labeler_ids":
            fields.List(fields.Integer(), required=True),
        },
        locations=('form', 'files'))
    def post(self: Resource,
             args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        files = args["files"]
        assign_mode = args["assign_mode"]
        if assign_mode == AssignModeEnum.together:
            abort(400, message="不支持共同标注")
        job_type = Common().check_job_type_by_files(files)
        if job_type != "text":
            abort(400, message="请上传纯文本文档(txt/csv)")
        else:
            args['mark_job_type'] = job_type
        try:
            result = MarkJobService().create_mark_job(files,
                                                      NlpTaskEnum.wordseg,
                                                      args)
            return {"message": "创建成功", "result": result}, 201
        except TypeError:
            abort(400, message="上传文件类型错误")
예제 #4
0
class RelationMarkJobListResource(Resource):
    @parse({
        "is_superuser": fields.Boolean(missing=False),
        "query": fields.String(missing=''),
        "offset": fields.Integer(missing=0),
        "limit": fields.Integer(missing=10),
        "doc_type_id": fields.Integer(missing=None),
        'order_by': fields.String(missing='-created_time'),
    })
    def get(self: Resource,
            args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        count, result = MarkJobService().get_mark_job_list_by_nlp_task(
            args, NlpTaskEnum.relation)
        return {
            "message": "请求成功",
            "result": result,
            "count": count,
        }, 200

    @parse(
        {
            "mark_job_name":
            fields.String(required=True),
            "mark_job_type":
            fields.String(required=True),
            "mark_job_desc":
            fields.String(),
            "doc_type_id":
            fields.Integer(required=True),
            "files":
            fields.List(fields.File(), required=True),
            "assign_mode":
            fields.String(required=True,
                          validate=lambda x: x in ['average', 'together']),
            "assessor_id":
            fields.Integer(),
            "labeler_ids":
            fields.List(fields.Integer(), required=True),
            "use_rule":
            fields.Integer(missing=1)  # 默认使用规则
        },
        locations=('form', 'files'))
    def post(self: Resource,
             args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        files = args['files']
        job_type = Common().check_job_type_by_files(files)
        if not job_type:
            abort(400, message='请上传全部纯文本文档(txt/csv)或者全部电子文档(pdf/word文档)')
        else:
            args['mark_job_type'] = job_type

        result = MarkJobService().create_mark_job(files, NlpTaskEnum.relation,
                                                  args)

        return {"message": "创建成功", "result": result}, 201
예제 #5
0
class ExtractMarkJobImportResource(Resource):
    @parse({
        "mark_job_name": fields.String(required=True),
        "mark_job_type": fields.String(required=True),
        "mark_job_desc": fields.String(),
        "doc_type_id": fields.Integer(required=True),
        "files": fields.List(fields.File(), required=True),
    }, locations=('form', 'files'))
    def post(self: Resource, args: typing.Dict):
        files = args['files']
        args['task_type'] = 'manual'
        # validate file extensions
        for f in files:
            if get_ext(f.filename) not in ["txt"]:
                abort(400, message="导入已标注序列标注数据仅支持txt格式。")
        result = MarkJobService().import_mark_job(files, args, nlp_task=NlpTaskEnum.extract)
        return {
                   "message": "创建成功",
                   "result": result
               }, 201
예제 #6
0
class ExtractJobListResource(Resource, CurrentUserMixin):
    @parse({
        "offset": fields.Integer(missing=0),
        "limit": fields.Integer(missing=10),
        "query": fields.String(missing=''),
        "doc_type_id": fields.Integer(missing=0),
        "order_by": fields.String(missing='-created_time'),
    })
    def get(self: Resource,
            args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        nlp_task_id = Common().get_nlp_task_id_by_route()
        order_by = args["order_by"][1:]
        order_by_desc = True if args["order_by"][0] == "-" else False
        count, predict_job_list = PredictService(
        ).get_predict_job_list_by_nlp_task_id(
            nlp_task_id=nlp_task_id,
            doc_type_id=args['doc_type_id'],
            search=args['query'],
            order_by=order_by,
            order_by_desc=order_by_desc,
            offset=args['offset'],
            limit=args['limit'],
            current_user=self.get_current_user())
        # get the serialized result
        result = PredictJobSchema().dump(predict_job_list, many=True)
        return {
            "message": "请求成功",
            "result": result,
            "count": count,
        }, 200

    @parse(
        {
            "extract_job_name":
            fields.String(required=True),
            "extract_job_type":
            fields.String(required=True),
            "extract_job_desc":
            fields.String(missing=""),
            "doc_type_id":
            fields.Integer(required=True),
            "files":
            fields.List(fields.File(), required=True),
            "task_type":
            fields.String(required=True,
                          validate=lambda x: x in ['machine', 'manual']),
            "use_rule":
            fields.Integer(missing=0)
        },
        locations=('form', 'files'))
    def post(self: Resource,
             args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        predict_job = PredictService().create_predict_job_by_doc_type_id(
            doc_type_id=args["doc_type_id"],
            predict_job_name=args["extract_job_name"],
            predict_job_desc=args["extract_job_desc"],
            predict_job_type=args["extract_job_type"],
            files=args["files"],
            use_rule=args["use_rule"])
        result = PredictJobSchema().dump(predict_job)
        return {"message": "创建成功", "result": result}, 201