def translate_any_2_anyone(article, target="en"):
    """
    临时用的,将任意语言的文章翻译为英文

    :param article: String.文章
    :param target: String.可选值有en、zh等

    """
    url = config.translate_url
    article_t = ""
    try:
        article_detect = detect(article)
        if article_detect == 'ja' or article_detect == 'zh-cn':
            article_array = article.split("。")
            for sentence in article_array:
                data = {
                    "from": "auto",
                    "to": target,
                    "apikey": config.translate_user_key,
                    "src_text": sentence
                }
                cur_time = date_util.sys_date("%Y-%m-%d %H:%M:%S")
                logger.info(f"-----------翻译句子开始----------- : {cur_time}")
                res = http_post(data, url)
                cur_time = date_util.sys_date("%Y-%m-%d %H:%M:%S")
                logger.info(f"-----------翻译句子结束----------- : {cur_time}")
                res_dict = json.loads(res)
                if "tgt_text" in res_dict:
                    content = res_dict['tgt_text']
                    article_t += content + ". "
        else:
            article_array = article.split(".")
            for sentence in article_array:
                data = {
                    "from": "auto",
                    "to": target,
                    "apikey": config.translate_user_key,
                    "src_text": sentence
                }
                cur_time = date_util.sys_date("%Y-%m-%d %H:%M:%S")
                logger.info(f"-----------翻译句子开始----------- : {cur_time}")
                res = http_post(data, url)
                cur_time = date_util.sys_date("%Y-%m-%d %H:%M:%S")
                logger.info(f"-----------翻译句子结束----------- : {cur_time}")
                res_dict = json.loads(res)
                if "tgt_text" in res_dict:
                    content = res_dict['tgt_text']
                    article_t += content + ". "
        return article_t
    except HTTPError as e:
        logger.error('翻译时发生的错误:', e)
        return ''
Esempio n. 2
0
def article_zdxj(article_id):
    db = DatabaseWrapper()
    try:
        url = config.coref_interface_uri
        article = db.query(f"SELECT article_id,content FROM t_article_msg_en "
                           f"where is_zdxj='0' and article_id='{article_id}'")
        for article_id, content in zip(article.article_id, article.content):
            logger.info(f"ID = {article_id}")
            # 调用指代消解接口
            data = {'content': content}
            result = http_post(data, url)
            result = json.loads(result)
            logger.info(f"ID = {article_id}")
            if result["status"] == "success":
                article_title = db.query(
                    f"SELECT title FROM t_article_msg where article_id='{article_id}'",
                    (), QueryResultType.PANDAS)
                title = ""
                if len(article_title.title) > 0:
                    title = translate_any_2_anyone(article_title.title[0],
                                                   "zh")
                    # 数据插入表
                    db.execute(
                        "INSERT INTO t_article_msg_zh(article_id, content,title) VALUES(%s,%s,%s)",
                        (article_id, result["coref"], title))
                    db.execute(
                        "update t_article_msg_en set is_zdxj='1',finish_date=%s,finish_time=%s where article_id=%s",
                        (date_util.sys_date("%Y-%m-%d"),
                         date_util.sys_time("%H:%M:%S"), article_id))
                    db.commit()
                return "success"
            else:
                logger.info(f"error article_id:{article_id}")
                return "error"
    except Exception as e:
        db.rollback()
        raise RuntimeError(e)
    finally:
        db.close()
Esempio n. 3
0
def __check_event_info_by_nn(short_sentence, cameo):
    """
    NN网络事件匹配。

    :param short_sentence: string.事件短语
    :param cameo: string.CAMEO CODE
    :return NN匹配事件是否成功。若匹配成功则返回:True, 事件列表;若匹配失败则返回:False, None。
    """
    data = {
        "short_sentence": short_sentence,
        "cameo": cameo,
        "threshold": config.event_similarly_threshold1
    }
    res = http_post(data, config.event_similarly_uri)
    response = json.loads(res)
    if response["status"] != 'success':
        logging.warning("调用事件相似度匹配接口失败,该事件跳过:" + short_sentence)
        return False, None
    result = response["result"]
    if result is None or len(result) < 1:
        return False, None
    else:
        return True, result
Esempio n. 4
0
def get_events_by_search_helper(search, event_tag, start_date, end_date):
    """
     根据传入的文本进行全文检索匹配,开始结束日期做条件过滤查询。
     :param search: String.待检索文本。
     :param event_tag: String.图数据库中的事件标签,传入空时默认为Event。
     :param start_date: String.开始日期。
     :param end_date: String.结束日期。
     :return 图数据库中的事件节点数组,如:{"status": "success", "events": [{"event_sentence": "", "event_id": "", "event_date": ""}]}
     """
    # 2、程序调用“事件类型分析接口”,根据用户输入的搜索句子,接口返回用户输入句子的CAMEO编号数组;
    # data = {"sentence": search}
    # res = http_post(data, config.event_extract_uri)
    # response = json.loads(res)
    # sentence_parsed_array = response["data"]
    # if len(sentence_parsed_array) < 1:
    #     return 'success', []
    # cameo_code_list = []
    # for sentence_parsed in sentence_parsed_array:
    #     for event in sentence_parsed["events"]:
    #         cameo_code = event["cameo"]
    #         if cameo_code is None or cameo_code == '':
    #             cameo_code = '000'
    #         cameo_code_list.append(cameo_code)

    # data = {"sentence": search}
    # res = http_post(data, config.constituency_parsed_uri)
    # res_dict = json.loads(res)
    # constituency_txt = res_dict["constituency"]
    # content = res_dict["content"]
    #
    # data = {"constituency": constituency_txt, "content": content}
    # res = http_post(data, config.event_code_parsed_uri)
    # res_dict = json.loads(res)
    # cameo_code = res_dict["cameo_code"]
    # cameo_code = list(set(cameo_code))
    # cameo_code = ','.join(cameo_code)
    event_id_list = []
    data = {
        "short_sentence": search,
        "threshold": config.event_similarly_threshold2,
        "top_n": 10
    }
    # data = {"short_sentence": search, "threshold": config.event_similarly_threshold2}
    res = http_post(data, config.event_similarly2_uri)
    response = json.loads(res)
    if response["status"] != 'success':
        logging.warning("调用事件相似度匹配接口失败,该事件搜索跳过:" + search)
        return 'error', []
    result = response["result"]
    for item in result:
        event_id_list.append(item["event_id"])
        if len(event_id_list) >= config.event_similarly_topN:
            break
    # 3、程序调用“文本相似度匹配接口”得到匹配的事件。并根据每个事件的事件发生日期、CAMEO编号,与输入条件进行匹配过滤,
    # 得到最终符合的事件清单,接口返回事件id数组;
    # event_id_list = []
    # for cameo_code in cameo_code_list:
    #     if len(event_id_list) >= config.event_similarly_topN:
    #         break
    #     data = {"short_sentence": search, "cameo": cameo_code, "threshold": config.event_similarly_threshold2}
    #     res = http_post(data, config.event_similarly_uri)
    #     response = json.loads(res)
    #     if response["status"] != 'success':
    #         logging.warning("调用事件相似度匹配接口失败,该事件搜索跳过:" + search)
    #         return 'error', []
    #     result = response["result"]
    #     for item in result:
    #         event_id_list.append(item["event_id"])
    #         if len(event_id_list) >= config.event_similarly_topN:
    #             break
    #
    # if len(event_id_list) < 1:
    #     return 'success', []

    # data = {"search": search, "cameo_code": cameo_code, "start_date": start_date, "end_date": end_date}
    # res = http_post(data, config.fulltext_match_uri)
    # res_dict = json.loads(res)
    # event_id_list = res_dict["event_ids"]

    # 4、程序调用“图数据查询接口”,在图数据库中根据事件id数组查询出对应的事件节点数组;
    if event_tag is None:
        event_tag = global_event_tag
    data = {
        "event_ids": event_id_list,
        "event_tag": event_tag,
        "start_date": start_date,
        "end_date": end_date
    }
    res = http_post(data, config.get_events_uri)
    res_dict = json.loads(res)

    return res_dict['status'], res_dict['events']
Esempio n. 5
0
def __parsed_sentence_and_insert2db(graph_db, content_id, content, event_tag,
                                    update_synonyms):
    """
    对传入的句子做事件抽取、组成成份分析、事件类型分析等,以及将分析结果入库。
    :param graph_db: object.图数据库连接对象。
    :param content_id: String.文本id。
    :param content: String.文本。
    :param event_tag: String.事件节点标签(默认Event)。
    :return content_id, event_id。content_id:若传入的content_id为空,则程序会生成一个id并返回;event_id:事件id
    """
    # 句子入库
    content_id, sentence_id = rdms_service.insert_event_sentence(
        content_id, content)
    # 6、程序调用“事件抽取接口”,接口根据事件短句进行分析,接口返回事件短句对应的主谓宾、命名实体、事件发生日期、事件发生地点、事件否定词、事件发生状态、情感分析、CAMEO CODE;
    event_id_list = []
    event_list = []
    data = {"sentence": content}
    res = http_post(data, config.event_extract_uri)
    response = json.loads(res)
    sentence_parsed_array = response["data"]
    # 接口返回句子及其事件(主谓宾)
    for sentence_parsed in sentence_parsed_array:
        for event in sentence_parsed["events"]:
            if not event["cameo"]:
                continue
            subject = event["subject"]
            verb = event["verb"]
            object = event["object"]
            namedentity = event["namedentity"]
            namedentity_location = namedentity["location"]
            namedentity_organization = namedentity["organization"]
            namedentity_person = namedentity["person"]
            # sentiment_analysis = sentence_parsed["sentiment_analysis"]
            sentiment_analysis = ""
            event_datetime = event["event_datetime"]
            event_location = event["event_location"]
            negative_word = event["negative_word"]
            state = event["state"]
            cameo = event["cameo"]
            triggerloc_index = event["triggerloc_index"]
            # 事件抽取存在问题,这边需要判断一下主谓宾的长度
            if len(subject) <= 50 and len(verb) <= 50 and len(
                    object) <= 50 and len(event_datetime) <= 20 and len(
                        event_location) <= 40:
                short_sentence = subject + negative_word + verb + object
                subject = ",".join(subject) if type(
                    subject) == list else subject
                verb = ",".join(verb) if type(verb) == list else verb
                object = ",".join(object) if type(object) == list else object
                # 根据主语、谓语、宾语、否定词匹配事件表(字符匹配)
                check_event_info, event_infos = __check_event_info_by_char(
                    subject, verb, object, negative_word, update_synonyms)
                namedentity_location_str = ",".join(namedentity_location)
                namedentity_organization_str = ",".join(
                    namedentity_organization)
                namedentity_person_str = ",".join(namedentity_person)
                # 事件匹配成功,则进行匹配属性表(字符匹配)
                if check_event_info:
                    for event_info in event_infos:
                        event_id_list.append(event_info['event_id'])
                        event_list.append(event_info['shorten_sentence'])
                        event_id = event_info['event_id']
                        logging.info("字符匹配事件成功,事件id为:" + event_id)
                        check_event_attribute, ebm_event_attributes = __check_event_attribute_by_char(
                            event_id, event_datetime, event_location,
                            namedentity_organization, namedentity_person)
                        # 若字符属性匹配成功,意味着该事件已经存在,仅仅记录句子与这些属性的关系
                        if check_event_attribute:
                            for event_attribute in ebm_event_attributes:
                                attribute_id = event_attribute['relation_id']
                                rdms_service.insert_sentattribute_rel(
                                    short_sentence, attribute_id, sentence_id)
                        else:
                            # 若字符属性匹配失败,则认为该事件为新事件,记录到事件属性表、事件句子属性关系表
                            attribute_id = rdms_service.insert_event_attribute(
                                sentiment_analysis, event_datetime,
                                event_location, state,
                                namedentity_location_str,
                                namedentity_organization_str,
                                namedentity_person_str, "", event_id)
                            rdms_service.insert_sentattribute_rel(
                                short_sentence, attribute_id, sentence_id)
                            # 若字符属性匹配失败,则进行NN网络属性匹配(NN网络属性匹配为空实现)
                            check_event_attribute = __check_event_attribute_by_nn(
                                event_location, event_datetime,
                                namedentity_organization, namedentity_person)
                            # 若NN属性匹配成功,则记录到事件属性校验表(目前无实现)
                            if check_event_attribute:
                                pass
                            else:
                                pass
                else:
                    # 字符事件匹配失败,则记录事件信息表、事件属性表、事件句子属性表、图数据库
                    event_id, event = __storage_new_event(
                        subject, verb, object, short_sentence, cameo,
                        triggerloc_index, sentiment_analysis, event_datetime,
                        negative_word, state, event_location,
                        namedentity_location_str, namedentity_organization_str,
                        namedentity_person_str, graph_db, sentence_id,
                        event_tag)
                    event_list.append(event)
                    event_id_list.append(event_id)
                    # 字符事件匹配失败,则进行NN事件匹配
                    check_event_info, event_infos = __check_event_info_by_nn(
                        short_sentence, cameo)
                    # 若NN事件匹配成功,记录到事件信息镜像校验表
                    if check_event_info:
                        copy_event_id = event_id
                        for event_info in event_infos:
                            event_id = event_info["event_id"]
                            rdms_service.insert_event_copy(
                                copy_event_id, event_id)
                    else:
                        # 若NN事件匹配失败,则保存神经网络向量
                        data = {
                            "short_sentence": short_sentence,
                            "cameo": cameo,
                            "event_id": event_id
                        }
                        res = http_post(data, config.event_vector_uri)
                        response = json.loads(res)
                        if response["status"] != 'success':
                            logging.warning("调用事件向量存储接口失败:" +
                                            response["message"])
    # 上面代码中出错后不需要回滚
    # rdms_db.commit()

    return event_id_list, event_list
Esempio n. 6
0
from jdqd.a04.event_interface.config.project import Config

config = Config()

db = DatabaseWrapper()
try:
    # 1cd63e541ba6fe91c1b0483516f7dff0
    df = db.query(
        "select article_id,content from t_article_msg_zh where is_relation='0' "
    )
    for aid, content in zip(df.article_id, df.content):
        logger.info(f"ID = {aid}")
        cur_time = date_util.sys_date("%Y-%m-%d %H:%M:%S")
        logger.info(f"end---cur_time = {cur_time}")
        data = {"content_id": aid, "content": content}
        res = http_post(data, config.relextract_interface_uri)
        response = json.loads(res)
        if response["status"] == "success":
            db2 = DatabaseWrapper()
            db2.execute(
                f"update t_article_msg_zh set is_relation='1',finish_date=%s,finish_time=%s "
                f"where article_id=%s", (date_util.sys_date("%Y-%m-%d"),
                                         date_util.sys_time("%H:%M:%S"), aid))
            db2.commit()
            db2.close()
        else:
            logger.error(f"error,article_id:{aid}")
except Exception as e:
    db.rollback()
    raise RuntimeError(e)
finally: