Beispiel #1
0
    def update_rule_to_redis(doc_type_id: int) -> None:
        rule_mapper = {}
        rule_and_terms = ClassifyRuleModel().get_rule_with_term(doc_type_id)
        for doc_rule, doc_term in rule_and_terms:
            rule_mapper[doc_term.doc_term_id] = doc_rule.rule_content

        r.set(f'queue:rule:{doc_type_id}', json.dumps(rule_mapper))
        r.set(f'queue:time:{doc_type_id}',
              time.strftime("%Y%m%d%H%M%S", time.localtime()))
        logger.info(rule_mapper)  # 开发调试
Beispiel #2
0
 def create_status():
     from app.entity import Status
     from app.model import StatusModel
     if len(StatusModel().get_all()) == 0:
         init_status = []
         for i in StatusEnum:
             init_status.append(
                 Status(app_id=1,
                        created_by=1,
                        status_id=int(i),
                        status_name=i.name))
         StatusModel().bulk_create(init_status)
         session.commit()
         logger.info(" [x] Seeds status has been created. ")
Beispiel #3
0
 def create_nlp_task():
     from app.entity import NlpTask
     from app.model import NlpTaskModel
     if len(NlpTaskModel().get_all()) == 0:
         init_nlp_tasks = []
         for i in NlpTaskEnum:
             init_nlp_tasks.append(
                 NlpTask(app_id=1,
                         created_by=1,
                         nlp_task_id=int(i),
                         nlp_task_name=i.name))
         NlpTaskModel().bulk_create(init_nlp_tasks)
         session.commit()
         logger.info(" [x] Seeds nlp_task has been created. ")
Beispiel #4
0
def generate_classify_data(mark_job_ids):
    results = []
    for mark_task, doc in MarkTaskModel(
    ).get_mark_task_and_doc_by_mark_job_ids(mark_job_ids):
        uuid = doc.doc_unique_name.split('.')[0]
        if mark_task.mark_task_result and len(mark_task.mark_task_result) > 0:
            marked_label = next(
                filter(lambda x: x['marked'] == 1, mark_task.mark_task_result))
            label = marked_label['label_id']
        else:
            label = ""
        row = ["", label, uuid]
        results.append(row)
    file_path = upload_fileset.export_to_csv(results=results,
                                             header=["text", "label", "uuid"])
    logger.info(f'save classify csv file to {file_path}')
    return file_path
Beispiel #5
0
    def post(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]:
        """
        message queue回调统一入口
        """
        message = args['message']
        logger.info(f"receive callback info from mq. response is: {json.dumps(args)}")

        if message['business'] in [
            'label',  # 实体预标注
            'classify_label',  # 分类预标注
            'relation_label',  # 实体关系预标注
            'wordseg_label'  # 分词预标注
        ]:
            update_params = {}
            if args.get("task_state"):
                if args['task_state'] == 'success':     # 如果mq预标注返回成功,则初试状态是unlabel
                    update_params.update(mark_task_status=int(StatusEnum.unlabel))
                else:   # 如果mq预标注返回失败,则初试状态是fail
                    update_params.update(mark_task_status=int(StatusEnum.fail))
            if args.get("task_result"):
                update_params.update(mark_task_result=args["task_result"])
            mark_task, user_task_list = MarkJobService()\
                .update_mark_task_and_user_task_by_mark_task_id(mark_task_id=message["task_id"], args=update_params)
            MarkJobService().update_mark_job_status_by_mark_task(mark_task=mark_task)
            result = UserTaskSchema(many=True).dump(user_task_list)
            return {
                       "message": "更新成功",
                       "result": result,
                   }, 201
        elif message['business'] in [
            'extract',  # 实体抽取
            'classify_extract',  # 分类抽取
            'relation_extract',  # 实体关系抽取
            'wordseg_extract'  # 分词抽取
        ]:
            update_params = {}
            if args.get("task_state"):
                update_params.update(predict_task_status=status_str2int_mapper()[args["task_state"]])
            if args.get("task_result"):
                update_params.update(predict_task_result=args["task_result"])
            predict_task = PredictService().update_predict_task_by_id(predict_task_id=message["task_id"], args=update_params)
            result = PredictTaskSchema().dump(predict_task)
            return {
                       "message": "更新成功",
                       "result": result,
                   }, 201
Beispiel #6
0
 def do_ok(self):
     try:
         self.root.withdraw()
         self.data_processor.field_input_file = self.entry_open_file.get()
         if not self.crops_selected:
             self.get_default_crops()
         self.data_processor.crops_selected = self.crops_selected
         # self.data_processor.field_input_file = r'C:\Users\mayn\Desktop\权重计算测试基础数据.xlsx'
         logger.info(f'开始处理文件:{self.data_processor.field_input_file}')
         self.data_processor.process()
         logger.info('处理完成')
     except Exception as e:
         logger.error("运行出错,请确定选择了正确的文件和数据")
         logger.error(e)
         logger.error(traceback.format_exc())
     finally:
         self.root.deiconify()
Beispiel #7
0
 def before_request() -> None:
     s = base64.b64decode(request.headers.get('User-Info', '')).decode('utf-8')
     if s:
         user_info = json.loads(s)
     else:
         user_info = {}
     g.app_id = request.args.get('app_id') or user_info.get('app_id', 0)
     g.user_id = request.args.get('user_id') or user_info.get('id', 0)
     g.user_name = request.args.get('username') or user_info.get('username', '')
     g.user_roles = request.args.getlist('user_roles') or [r.get('name') for r in user_info.get('roles', [])]
     try:
         g.user_groups = request.args.getlist('groups') or [r.get('id') for r in user_info.get('groups', [1])]
     except:
         g.user_groups = [-1]
     logger.info({
         "app_id": g.app_id,
         "user_id": g.user_id,
         "user_name": g.user_name,
         "user_roles": g.user_roles,
     })
     bind_request_id_to_g()
Beispiel #8
0
def add_user_tags():
    logger.debug("function add_user_tags __enter__")
    #print (request.args)
    if request.method == 'POST' or request.method == 'GET':
        user_tags = UserTags()
        user_tags.user_id = request.values.get("user_id", '',
                                               type=str)  # user_id
        user_tags.tags = request.values.get("tags", '',
                                            type=str).replace(',', '|')  # tags
        user_tags.create_time = timestamp()
        user_tags.update_time = timestamp()

        ut = UserTags.query.filter(
            UserTags.user_id == user_tags.user_id).first()
        if ut:
            logger.info("user exist, update. user_id: " + str(ut.user_id))
            ut.user_id = user_tags.user_id
            ut.tags = user_tags.tags.replace(',', '|')
            ut.update_time = user_tags.update_time
            if 'user_tags_update_times' in session:
                session['user_tags_update_times'] = session[
                    'user_tags_update_times'] + 1
            else:
                session['user_tags_update_times'] = 1
            ut.modify_times = session['user_tags_update_times']
            #ut.session.add()
            db.session.commit()
            end = '{"code": 1000, "msg": "%s" }' % ("用户标签已更新。")
            return json.loads(json.dumps(end))
        else:
            db.session.add(user_tags)
            db.session.commit()
            logger.info("user tags added, user_id: " + str(user_tags.user_id))
            end = '{"code": 1000, "msg": "%s" }' % ("用户标签已添加。")
            return json.loads(json.dumps(end))
    logger.warning("error, GET/POST wrong. ")
    end = '{"code": 1003, "msg": "%s" }' % ("请求方法不正确。")
    retstr = json.loads(json.dumps(end.encode('utf-8')))
    return retstr
Beispiel #9
0
def get_simi_ids():
    logger.debug("function get_simi_ids __enter__")

    if request.method == 'POST' or request.method == 'GET':
        #logger.debug(str(request.args)+ str(request.values) + str(request.data)+ str(request.get_json())+ str(request.json))
        logger.debug(request.args)
        item_id = request.values.get('article_id')
        page_size = request.values.get('page_size', 10, type=int)
        page = request.args.get('page', 1, type=int)
        page = page - 1

        # check if session cache articles exist
        session.clear()
        article_simi_articles = item_id + "_simi_articles"
        if article_simi_articles in session:
            #logger.debug(session[article_simi_articles])
            article_ids = session[article_simi_articles][page *
                                                         page_size:(page + 1) *
                                                         page_size]
            end = '{"code": 1000, "article_num": %d, "article_ids": %s }' % (
                len(article_ids), json.dumps(article_ids))
            logger.debug(
                'simi_articles in session: article %s got articles page %d' %
                (item_id, page))
            logger.debug(end)
            logger.debug("function get_simi_ids __exit__ 0")
            retstr = json.loads(json.dumps(end.encode('utf-8')))
            return retstr
        else:
            session[article_simi_articles] = []

        item = ArticlesSimi.query.filter(
            ArticlesSimi.article_id == item_id).first()
        #article = ArticlesSimi()
        #article.username = "******"

        if not item:
            logger.warning("article not exist, item_id: " + str(item_id))
            end = '{"code": 1006, "msg": "%s" }' % ("请求的文章不存在")
            retstr = json.loads(json.dumps(end.encode('utf-8')))
            logger.debug(retstr)
            logger.debug("function get_simi_ids __exit__ 1")
            return retstr
        #lastids = []
        for article in item.simi_ids.split('|'):
            if article not in session[article_simi_articles]:
                session[article_simi_articles].append(article.encode('utf-8'))
        lastids = session[article_simi_articles][page * page_size:(page + 1) *
                                                 page_size]

        #logger.debug(session)
        end = '{"code": 1000, "article_num": %d, "article_ids": %s }' % (
            len(lastids), json.dumps(lastids))
        retstr = json.loads(json.dumps(end.encode('utf-8')))
        logger.debug('similarity article: ' + str(lastids).encode('utf-8'))
        logger.debug(end)
        logger.debug("function get_simi_ids __exit__ 2")
        return retstr
        #return render_template('private.html', form=form)
    end = '{"code": 1003, "msg": "%s" }' % ("请求方法错误")
    logger.info(end)
    logger.debug("function get_simi_ids __exit__ 3")
    retstr = json.loads(json.dumps(end.encode('utf-8')))
    return retstr
Beispiel #10
0
def get_user_recomm_articles():
    logger.debug("function get_user_recomm_articles __enter__")

    if request.method == 'POST' or request.method == 'GET':
        logger.debug(
            str(request.args) + '\n' + str(request.values) + '\n' +
            str(request.data) + '\n' + str(request.get_json()) + '\n' +
            str(request.json))
        user_id = request.values.get('user_id', '', type=str)
        page_size = request.values.get('page_size', 10, type=int)
        page = request.values.get('page', 1, type=int)
        #if not page_size or not str(page_size).isdigit():
        #    page_size = 10
        #if not page or not str(page).isdigit():
        #    page = 1
        page = page - 1  # page start with 1, mysql db offset start with 0
        logger.debug(request.values)
        #session.clear()

        # check if session cache articles exist
        session.clear()
        user_recomm_articles = user_id + "_recomm_articles"
        if user_recomm_articles in session:
            #logger.debug(session)
            article_ids = session[user_recomm_articles][page *
                                                        page_size:(page + 1) *
                                                        page_size]
            end = '{"code": 1000, "article_num": %d, "article_ids": %s }' % (
                len(article_ids), json.dumps(article_ids))
            logger.debug(
                'recomm_articles in session: user %s got articles page %d' %
                (user_id, page))
            logger.debug("function get_user_recomm_articles __exit__ 0")
            retstr = json.loads(json.dumps(end.encode('utf-8')))
            return retstr
        else:
            session[user_recomm_articles] = []

        # get user's tags
        utag = UserTags.query.filter(UserTags.user_id == user_id).first()
        if not utag:
            logger.warning("user-tags not exist, user_id: " + str(user_id))
            article_ids = _get_default_recomm_articles(page)
            end = '{"code": 1000, "article_num": %d, "article_ids": %s }' % (
                len(article_ids), json.dumps(article_ids))
            logger.debug(end)
            logger.debug("function get_user_recomm_articles __exit__ 1")
            retstr = json.loads(json.dumps(end.encode('utf-8')))
            return retstr
        articles = {}
        tags = utag.tags
        logger.debug('User tags found for ' + str(user_id) + ': ' + tags)
        empty_num = 0
        tags_arr = tags.split('|')
        for tag in tags_arr:
            if not tag:
                empty_num += 1
                logger.debug('Empty tag ' + str(tag))
                if empty_num == tags_arr.len:
                    article_ids = _get_default_recomm_articles(page)
                    end = '{"code": 1000, "article_num": %d, "article_ids": %s }' % (
                        len(article_ids), json.dumps(article_ids))
                    logger.debug(end)
                    logger.debug(
                        "function get_user_recomm_articles __exit__ 2")
                    retstr = json.loads(json.dumps(end.encode('utf-8')))
                    return retstr
                continue
            tagarticle = TagArticles.query.filter(
                TagArticles.tag == tag).first()
            if not tagarticle:
                empty_num += 1
                logger.debug('No article found for tag ' + str(tag))
                if empty_num == tags_arr.len:
                    article_ids = _get_default_recomm_articles(page)
                    end = '{"code": 1000, "article_num": %d, "article_ids": %s }' % (
                        len(article_ids), json.dumps(article_ids))
                    logger.debug(end)
                    logger.debug(
                        "function get_user_recomm_articles __exit__ 2")
                    retstr = json.loads(json.dumps(end.encode('utf-8')))
                    return retstr
                continue
            for article_id in tagarticle.article_ids.split('|'):
                artids = article_id.split(':')
                uid = artids[0].encode('utf-8')
                uval = float(artids[1])
                #if not set([uid]).issubset(session['hit_articles']):
                #    session['hit_articles'].add(uid)
                articles[uid] = uval
        #logger.debug(articles)
        articles = sorted(articles.items(),
                          key=lambda x: float(x[1]),
                          reverse=True)
        logger.debug(articles)
        for article in articles:
            if article[0] not in session[user_recomm_articles]:
                session[user_recomm_articles].append(article[0])
        lastids = session[user_recomm_articles][page * page_size:(page + 1) *
                                                page_size]

        end = '{"code": 1000, "article_num": %d, "article_ids": %s }' % (
            len(lastids), json.dumps(lastids))
        retstr = json.loads(json.dumps(end.encode('utf-8')))
        logger.debug('user ' + user_id + ' request article: ' + str(lastids))
        logger.debug(retstr)
        logger.debug("function get_user_recomm_articles __exit__ 3")
        return retstr
        #return render_template('private.html', form=form)
    end = '{"code": 1003, "msg": "%s" }' % ("请求方法错误")
    logger.info(end)
    logger.debug("function get_user_recomm_articles __exit__ 4")
    retstr = json.loads(json.dumps(end.encode('utf-8')))
    return retstr