Ejemplo n.º 1
0
def verify_auth(request):
    """
        验证用户-----现在是从固定列表中读取
        :param request:
        :return:
        """
    result = ini_result()
    user_list = {"user": "******"}
    is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
    if is_ajax and request.method == 'GET':
        user = request.GET.get("username", None)
        password = request.GET.get("pwd", None)
        if user is None or password is None:
            result[RESULT_CODE] = -1
            result[RESULT_MSG] = "invalid request parameters"
        else:
            pwd_in_database = user_list.get(user, None)
            if pwd_in_database is None:
                result[RESULT_CODE] = -1
                result[RESULT_MSG] = "no registered user"
            elif pwd_in_database != password:
                result[RESULT_CODE] = -1
                result[RESULT_MSG] = "unmatched password"
            else:
                result[RESULT_CODE] = 1
                result[RESULT_MSG] = "matched user and password"
                result[RESULT_DATA] = {"user": user, "pwd": password}
    else:
        result[RESULT_CODE] = -1
        result[RESULT_MSG] = "invalid request"
    return result
Ejemplo n.º 2
0
def parse_bib_file(file_name):
    """
    从bib文件中解析出文献信息. checked
    :param file_name:
    :return:
    """
    result = ini_result()
    if file_name is None or not isinstance(file_name, str):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result
    try:
        with open(file_name, encoding="utf-8") as bib_file:
            bib_database = bibtexparser.load(bib_file)
    except:
        result[RESULT_CODE] = -1100
        result[RESULT_MSG] = "文件打开失败:【" + file_name + "】"
        return result

    if bib_database is not None:
        result[RESULT_CODE] = 1100
        result[RESULT_MSG] = "成功解析bib文件【" + file_name + "】"
        result[RESULT_DATA] = bib_database.entries
    else:
        result[RESULT_CODE] = 1101
        result[RESULT_MSG] = "未从bib文件中解析出结果!【" + file_name + "】"
    return result
Ejemplo n.º 3
0
def generate_pub_node_from_file(driver, file_name, sheet_name, column_name):
    """
    从文件中提取bib信息,然后写入neo4j.checked
    :param column_name:
    :param sheet_name:
    :param file_name: bib文件或Excel文件
    :param driver: 数据库接口
    :return:
    """
    # 提取bib信息
    result = ini_result()
    extract_result = extract_bib_info_from_file(file_name, sheet_name, column_name)  # 结果是models类

    if extract_result[RESULT_CODE] == 1003:
        extracted_data = extract_result[RESULT_DATA]  # list of pubs
        db_result = create_or_match_nodes(extracted_data, driver)  # 写入节点
        if db_result[RESULT_CODE] != 1303:
            result[RESULT_CODE] = -1201
            result[RESULT_MSG] = extract_result[RESULT_MSG]
        else:
            result[RESULT_CODE] = 1200
            result[RESULT_MSG] = "success"
    else:
        print("不写入数据库:" + extract_result[RESULT_MSG])
        result[RESULT_CODE] = -1200
        result[RESULT_MSG] = extract_result[RESULT_MSG]

    return result
Ejemplo n.º 4
0
def upload_bib_add_record(request):
    result = ini_result()
    try:
        files = request.FILES.getlist('file')
    except:
        result[RESULT_MSG] = 'failed to retrieve file in the request'
        result[RESULT_CODE] = -701
        return wrap_result(result)

    if files is None or len(files) == 0:
        result[RESULT_MSG] = 'No file in the request'
        result[RESULT_CODE] = -702
        return wrap_result(result)

    driver = initialize_neo4j_driver()
    dir = os.path.join(os.path.dirname(__file__), 'upload_file')  # 拼装目录名称+文件名称

    file_not_processed = []
    for file in files:
        today = str(datetime.date.today())  # 获得今天日期
        filename = today + '_' + file.name  # 获得上传来的文件名称,加入下划线分开日期和名称

        file_path = save_file_stream_on_disk(file, dir, filename)  # 处理上传来的文件

        if file_path is None:
            file_not_processed.append(file)
            continue

        publication_info = extract_bib_info_from_file(file_path)  # dict

        pubs = []
        for entry in publication_info:
            # 解析文献
            tmp_result_pub = extract_publication_from_bib_info(entry)
            if tmp_result_pub[RESULT_CODE] == 1001:
                pubs.append(tmp_result_pub[RESULT_DATA])

        pubs = None if pubs == [] else pubs
        db_pub_result = create_or_match_nodes(driver,
                                              pubs,
                                              return_type="class",
                                              to_create=True)
        if db_pub_result[RESULT_CODE] != 1303:
            result[RESULT_CODE] = 00
            result[RESULT_MSG] = "Publication节点生成失败"
        else:
            result[RESULT_CODE] = 00
            result[RESULT_MSG] = "Publication节点生成成功"

        os.remove(file_path)

    if len(file_not_processed) > 0:
        result[RESULT_CODE] = -201
        result[RESULT_MSG] = "not all files are written into database"
        result[RESULT_DATA] = file_not_processed
    else:
        result[RESULT_CODE] = 200
        result[RESULT_MSG] = "success"

    return wrap_result(result)
Ejemplo n.º 5
0
def query_by_multiple_field(driver,
                            node_info,
                            node_type,
                            page=None,
                            limit=None,
                            identifier="m"):
    """
    通用方法:给予节点的一个或多个字段来搜索节点信息。checked
    :param identifier:
    :param limit:
    :param page:
    :param driver: 数据库信息
    :param node_info: dict
    :param node_type:数据库节点类型名
    :return:返回的是dict,其中code:-1:没有传入数据;0:未搜索到数据;2:搜索到多条记录;1:搜索到1条记录
    """
    # 输入数据检测
    result = ini_result()
    # 输入数据检查
    if driver is None or node_info is None or not isinstance(node_info, dict) or \
            (page is not None and not isinstance(page, int)) or (limit is not None and not isinstance(limit, int)):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result

    # 查询条件
    cond = generate_cypher_for_multi_field_condition(node_info, node_type,
                                                     identifier)
    if cond is None:
        result[RESULT_CODE] = -904
        result[RESULT_MSG] = "No cypher has been generated"
        return result

    if page is not None and limit is not None:
        paging = " skip " + str((page - 1) * limit) + " limit " + str(limit)

    cypher = "match ({IF}:{NODE_TYPE}) ".format(
        IF=identifier,
        NODE_TYPE=node_type) + cond + " return " + identifier + " " + paging
    # 查询数据库

    try:
        with driver.session() as session:
            records = session.run(cypher)
            records = records.data()
    except:
        result[RESULT_CODE] = -910
        result[RESULT_MSG] = "数据库连接失败"
        return result

    records = process_neo4j_result(records, identifier, flag=1)  # 用match时
    result[RESULT_MSG] = "success"
    result[RESULT_CODE] = 904
    result[RESULT_DATA] = records
    if records is None:
        result["count"] = 0
    else:
        result["count"] = len(records)
    return result
Ejemplo n.º 6
0
def query_by_multiple_field_count(driver,
                                  node_info,
                                  node_type,
                                  identifier="m"):
    """
    通用方法:根据给定的节点信息,查询有多少条数据满足条件。checked
    :param identifier:
    :param driver:
    :param node_info: dict
    :param node_type:数据库节点类型名
    :return:返回的是dict,其中code:-1:没有传入数据;0:未搜索到数据;2:搜索到多条记录;1:搜索到1条记录
    """

    result = ini_result()
    result["count"] = -1

    # 输入数据检测
    if driver is None or node_info is None or not isinstance(
            node_info, dict) or node_type is None or node_type not in [
                "PUBLICATION", "PERSON", "VENUE"
            ]:
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result
    # 查询条件
    cond = generate_cypher_for_multi_field_condition(node_info, node_type,
                                                     identifier)  # 指定的查询条件
    if cond is None:
        result[RESULT_CODE] = -904
        result[RESULT_MSG] = "No cypher has been generated"
        return result

    cypher = "match ({IF}:{NODE_TYPE}) ".format(
        IF=identifier,
        NODE_TYPE=node_type) + cond + " return count({IF})".format(
            IF=identifier)

    # 查询数据库

    try:
        with driver.session() as session:
            records = session.run(cypher)
            records = records.data()
    except:
        result[RESULT_CODE] = -910
        result[RESULT_MSG] = "数据库连接失败"
        return result

    result[RESULT_MSG] = "success"
    result[RESULT_CODE] = 906

    if records is None or len(records) == 0:
        result["count"] = 0
    elif len(records) > 1:
        result["count"] = len(records)
    else:
        result["count"] = records[0]["count(m)"]
    return result
Ejemplo n.º 7
0
def add_relation(request):
    """
    向cypher添加relation
    :param request:
    :return: {RESULT_MSG: "", RESULT_CODE: 0}, 0:缺少参数;-1:参数格式错误;-10:请求方式错误,-2~-5见create_or_match_persons方法
    """
    result = ini_result()
    is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
    if is_ajax and request.method == 'POST':
        node_info = request.body  # 处理后是dict,直接传到后台写入数据库就可以了

        if node_info is None or node_info == "":
            result[RESULT_CODE] = -301
            result[RESULT_MSG] = "no data is given"
            return wrap_result(result)

        try:
            node_info = bytes.decode(node_info)
            node_info = json.loads(node_info)
        except json.JSONDecodeError or TypeError:
            result[RESULT_CODE] = -1
            result[RESULT_MSG] = "given data is not a json string"
            return HttpResponse(json.dumps(result, ensure_ascii=False),
                                content_type='application/json',
                                charset='utf-8')
        # 调方法写数据库
        source_id = node_info["sourceID"]
        source_type = node_info["sourceType"]
        target_id = node_info["targetID"]
        target_type = node_info["targetType"]
        rel_type = node_info["relType"]

        driver = initialize_neo4j_driver()
        query_result = query_or_create_relation(driver, source_type, source_id,
                                                target_type, target_id,
                                                rel_type)

        if query_result[RESULT_CODE] == 1306:
            result[RESULT_CODE] = 1
            result[RESULT_MSG] = "successfully write into database"
            return HttpResponse(json.dumps(result, ensure_ascii=False),
                                content_type='application/json',
                                charset='utf-8')
        else:
            result[RESULT_CODE] = 0
            result[
                RESULT_MSG] = "error when writing into database: " + query_result[
                    RESULT_MSG]
            return HttpResponse(json.dumps(result, ensure_ascii=False),
                                content_type='application/json',
                                charset='utf-8')
    else:
        result[RESULT_CODE] = -10
        result[RESULT_MSG] = "not supported request form"
        return HttpResponse(json.dumps(result, ensure_ascii=False),
                            content_type='application/json',
                            charset='utf-8')
Ejemplo n.º 8
0
def read_from_excel(file, sheet_name, column_specified):
    result = ini_result()
    try:
        wb = xlrd.open_workbook(filename=None,
                                file_contents=file.read())  # 打开文件
    except xlrd.XLRDError:
        result["code"] = -703
        result[RESULT_MSG] = '打开Excel文件失败'
        return result

    # 读取工作表
    try:
        sheet_content = wb.sheet_by_name(sheet_name)  # 通过名字获取表格
    except ValueError:
        result["code"] = -704
        result[RESULT_MSG] = '读取Excel指定工作簿失败'
        return result

    # 读取表头
    try:
        sheet_title = sheet_content.row_values(0)
    except:
        result["code"] = -705
        result[RESULT_MSG] = '读取Excel工作簿表头失败'
        return result

    # 按列读取车站信息
    if column_specified is not None and isinstance(column_specified, list):
        correspondence = {}
        for column_name in column_specified:
            try:
                col_index = sheet_title.index(column_name)
            except ValueError:
                print("Excel文件中未包含<" + column_name + ">列")
                continue
            correspondence[col_index] = column_name
    else:
        correspondence = {i: j for i, j in enumerate(sheet_title)}

    # 读取内容
    content = []
    for iii in range(1, sheet_content.nrows):  # index是行下标
        tmp = {"index": iii}
        for index, column_name in correspondence:
            value = sheet_content.cell_value(iii, index)
            if value == "":
                print("第" + str(iii) + "行文献没有" + column_name + "信息【" +
                      str(sheet_content.row_values(iii)) + "】")
                tmp[column_name] = None
            else:
                tmp[column_name] = value
        content.append(tmp)

    result["code"] = 700
    result[RESULT_MSG] = '读取Excel工作簿成功'
    result[RESULT_DATA] = content
Ejemplo n.º 9
0
def extract_rel_author_by_from_pub_info(publications):
    """
    从数据库中读取到的Publication信息的author字段,提取person信息,然后构建Publication-Authored_by-Person关系。checked
    :param publications: list of publication
    :return:
    """
    result = ini_result()
    if publications is None or not isinstance(publications, list):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "the given data is not valid"
        return result

    flag = [
        isinstance(publication, Publication) for publication in publications
    ]
    if not all(flag):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "the given data is not valid"
        return result

    unprocessed_pub_uuid = []
    processed = []
    for publication in publications:
        author_names = process_author_str(publication.author)
        if author_names is None:
            unprocessed_pub_uuid.append(publication)
            continue
        for author_name in author_names:
            person = Person("", author_name["full_name"],
                            author_name["first_name"],
                            author_name["middle_name"],
                            author_name["last_name"])

            processed.append({
                "pub": publication,
                "venue": person,
                "index": author_name["index"]
            })

    unprocessed_pub_uuid = None if unprocessed_pub_uuid == [] else unprocessed_pub_uuid
    processed = None if processed == [] else processed

    if unprocessed_pub_uuid is not None:
        result[RESULT_CODE] = 1010
        result[RESULT_MSG] = "partially filtered"
    else:
        result[RESULT_CODE] = 1009
        result[RESULT_MSG] = "success"

    result[RESULT_DATA] = {
        "failed": unprocessed_pub_uuid,
        "success": processed
    }

    return result
Ejemplo n.º 10
0
def search_publication_count(request):
    """
    根据搜索条件,计算有多少条数据满足条件. should be post and ajax
    :param request:
    :return:
    """
    result = ini_result()
    result[RESULT_COUNT] = -1
    is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
    if not is_ajax or request.method != 'POST':
        result[RESULT_CODE] = -103
        result[RESULT_MSG] = "not support request method, should be post"
        result["count"] = 0
        return wrap_result(result)

    pub_info = request.body
    if pub_info is None or pub_info == "":
        result[RESULT_CODE] = -301
        result[RESULT_MSG] = "no query condition is given"
        return wrap_result(result)

    try:
        pub_info = bytes.decode(pub_info)
        pub_info = json.loads(pub_info)
    except json.JSONDecodeError or TypeError:
        result[RESULT_CODE] = -603
        result[RESULT_MSG] = "query condition should be json string"
        return wrap_result(result)

    if pub_info is None:
        result[RESULT_CODE] = -601
        result[RESULT_MSG] = "no query condition is given"
        return wrap_result(result)

    # 封装数据为后台数据库能够接收的格式
    parameters = process_search_condition(pub_info)
    if parameters is None:
        result[RESULT_CODE] = -604
        result[RESULT_MSG] = "搜索条件解析失败,请重试"
        result["count"] = 0
        return wrap_result(result)

    driver = initialize_neo4j_driver()
    query_result = utils.db_util.operations.query_by_multiple_field_count(
        driver, parameters,
        "PUBLICATION")  # -1:没有传入数据;0:未搜索到数据;2:搜索到多条记录;1:搜索到1条记录

    result[RESULT_CODE] = query_result[RESULT_CODE]
    result[RESULT_MSG] = query_result.get(RESULT_MSG, "")
    result["count"] = query_result.get("count", -1)
    return wrap_result(result)
Ejemplo n.º 11
0
def extract_rel_publish_in_from_pub_info(publications):
    """
    从数据库中读取到的Publication信息的journal或booktitle字段,提取venue信息,然后构建Publication-Published_in-Venue关系。checked
    :param publications: list of publication
    :return:
    """
    result = ini_result()
    if publications is None or not isinstance(publications, list):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "the given data is not valid"
        return result

    flag = [
        isinstance(publication, Publication) for publication in publications
    ]
    if not all(flag):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "the given data is not valid"
        return result

    unprocessed_pub_uuid = []
    processed = []
    for publication in publications:
        if publication.node_type not in FIELD_OF_PUBLICATION_FOR_VENUE.keys():
            unprocessed_pub_uuid.append(publication.uuid)
            continue

        field = FIELD_OF_PUBLICATION_FOR_VENUE.get(publication.node_type)
        venue_name = publication.__dict__[field]
        venue_type = VENUE_TYPE_FOR_NODE_TYPE.get(publication.node_type)
        venue = Venue("", venue_type, venue_name)
        processed.append({"pub": publication, "venue": venue})

    unprocessed_pub_uuid = None if unprocessed_pub_uuid == [] else unprocessed_pub_uuid
    processed = None if processed == [] else processed

    if unprocessed_pub_uuid is not None:
        result[RESULT_CODE] = 1010
        result[RESULT_MSG] = "partially filtered"
    else:
        result[RESULT_CODE] = 1009
        result[RESULT_MSG] = "success"

    result[RESULT_DATA] = {
        "failed": unprocessed_pub_uuid,
        "success": processed
    }

    return result
Ejemplo n.º 12
0
def extract_bib_info_from_file(file_path, sheet_name=None, column_name=None):
    """
    从文件中提取文献信息,并返回dict. checked
    :param column_name: 列名
    :param file_path: 文件路径
    :param sheet_name: 解析excel时需要的参数, 工作表名
    :return:
    """
    result = ini_result()
    # 解析文件后缀
    ext = os.path.splitext(file_path)[-1]
    if ext is None:
        result[RESULT_CODE] = -1004
        result[
            RESULT_MSG] = "unable to extract he suffix of the file provided."
        return result
    # 按后缀名处理文件,获得文献的字符信息
    if ext == '.bib':
        parse_result = parse_bib_file(file_path)
    elif ext in ['.xls', '.xlsx']:
        if sheet_name is None or column_name is None:
            result[RESULT_CODE] = -707
            result[RESULT_MSG] = "excel未指定sheet and head"
            return result
        parse_result = parse_bib_file_excel(file_path, sheet_name, column_name)
    else:
        result[RESULT_CODE] = -1005
        result[RESULT_MSG] = "无法处理的的文件类型!【" + file_path + "】"
        return result

    if parse_result[RESULT_CODE] not in [1100, 1101, 1103, 1104]:
        result[RESULT_CODE] = -1006
        result[RESULT_MSG] = parse_result[RESULT_MSG]
        return result

    # 解析获得的bib信息
    bib_data = parse_result[RESULT_DATA]

    if bib_data is None or len(bib_data) == 0:
        result[RESULT_CODE] = 1007
        result[RESULT_MSG] = "文件中没有有效的文献数据"
        return result
    result[RESULT_CODE] = 1008
    result[RESULT_MSG] = "success"
    result[RESULT_DATA] = bib_data
    return result
Ejemplo n.º 13
0
def revise_venue(request):
    """
    利用cypher修改person
    :param request:
    :return:{RESULT_MSG: "no data is given", RESULT_CODE: 0} 0:无参数,-1:参数格式错误,
    """
    result = ini_result()
    is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
    if is_ajax and request.method == 'POST':
        node_info = request.body

        if node_info is None or node_info == "":
            result[RESULT_CODE] = -301
            result[RESULT_MSG] = "no data is given"
            return wrap_result(result)

        try:
            node_info = bytes.decode(node_info)
            node_info = json.loads(node_info)
        except json.JSONDecodeError or TypeError:
            return HttpResponse(
                json.dumps({
                    RESULT_MSG: "given data is not a json string",
                    RESULT_CODE: -1
                }))
        # 调方法写数据库
        driver = initialize_neo4j_driver()
        flag = db_operation.revise_venues(driver, node_info)
        if flag == 1:
            return HttpResponse(
                json.dumps({
                    RESULT_MSG: "successfully write into database",
                    RESULT_CODE: 1
                }))
        else:
            return HttpResponse(
                json.dumps({
                    RESULT_MSG: "error when writing into database",
                    RESULT_CODE: flag * 3
                }))
    else:
        return HttpResponse(
            json.dumps({
                RESULT_MSG: "not supported request form",
                RESULT_CODE: -2
            }))
Ejemplo n.º 14
0
def add_venue(request):
    """
    向cypher添加venue
    :param request:
    :return: {RESULT_MSG: "", RESULT_CODE: 0}, 0:缺少参数;-1:参数格式错误;-10:请求方式错误,-2~-5见create_or_match_persons方法
    """
    result = ini_result()
    is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
    if is_ajax and request.method == 'POST':
        node_info = request.body  # 处理后是dict,直接传到后台写入数据库就可以了

        if node_info is None or node_info == "":
            result[RESULT_CODE] = -301
            result[RESULT_MSG] = "no data is given"
            return wrap_result(result)

        try:
            node_info = bytes.decode(node_info)
            node_info = json.loads(node_info)
        except json.JSONDecodeError or TypeError:
            result[RESULT_CODE] = -1
            result[RESULT_MSG] = "given data is not a json string"
            return HttpResponse(json.dumps(result, ensure_ascii=False),
                                content_type='application/json',
                                charset='utf-8')
        # 调方法写数据库
        driver = initialize_neo4j_driver()

        # 应先从node_info构建Venue,然后创建
        venue = [Venue("", node_info)]

        db_ven_result = create_or_match_nodes(driver,
                                              venue,
                                              return_type="class",
                                              to_create=True)
        if db_ven_result[RESULT_CODE] != 1303:
            result[RESULT_CODE] = 00
            result[RESULT_MSG] = "Venue节点生成失败"
        else:
            result[RESULT_CODE] = 1
            result[RESULT_MSG] = "Venue节点生成成功"

    else:
        result[RESULT_CODE] = -10
        result[RESULT_MSG] = "not supported request form"
    return wrap_result(result)
Ejemplo n.º 15
0
def parse_excel_stations(request):
    """
    从Excel文件中解析出车站信息,提取的结果是excel表中的各个数据
    :param request:
    :return:
    """
    result = ini_result()
    try:
        file = request.FILES.getlist('file')
    except:
        result[RESULT_MSG] = 'failed to retrieve file in the request'
        result[RESULT_CODE] = -701
        return wrap_result(result)
    if file is None or len(file) == 0:
        result[RESULT_MSG] = 'No file in the request'
        result[RESULT_CODE] = -702
        return wrap_result(result)

    # 打开Excel文件# todo support multiple files
    result = read_from_excel(file[0], SHEET_NAME, SHEET_TITLE)

    return wrap_result(result)
Ejemplo n.º 16
0
def extract_venue_from_bib_info(entry):
    """
    从节点信息中,提取出venue节点.checked
    :param entry:
    :return:
    """
    result = ini_result()

    entry = capitalize_dict_keys(entry)

    if "VENUE_NAME" not in entry.keys() or len(
            entry["VENUE_NAME"]) == 0 or "VENUE_TYPE" not in entry.keys(
            ) or len(entry["VENUE_TYPE"]) == 0:
        result[RESULT_CODE] = -1007
        result[RESULT_MSG] = "缺少必填字段"
        return result

    info = {
        field_name: process_special_character(entry.get(field_name, ""))
        for field_name in FIELD_NAMES_VENUE
    }

    node = Venue(uuid="",
                 venue_name=info["VENUE_NAME"],
                 abbr=info["ABBR"],
                 venue_type=info["VENUE_TYPE"],
                 publisher=info["PUBLISHER"],
                 year=info["YEAR"],
                 address=info["ADDRESS"],
                 sci_index=info["SCI_INDEX"],
                 ei_index=info["EI_INDEX"],
                 ssci_index=info["SSCI_INDEX"],
                 note=info["NOTE"],
                 start_year=info["START_YEAR"])
    result[RESULT_CODE] = 1005
    result[RESULT_MSG] = "success"
    result[RESULT_DATA] = node
    return result
Ejemplo n.º 17
0
def extract_person_from_bib_info(entry):
    """
    从节点信息中,提取出Person节点。checked
    :param entry:
    :return:
    """
    result = ini_result()

    entry = capitalize_dict_keys(entry)

    if "FULL_NAME" not in entry.keys() or len(entry["VENUE_NAME"]) == 0:
        result[RESULT_CODE] = -1008
        result[RESULT_MSG] = "缺少必填字段"
        return result

    info = {
        field_name: process_special_character(entry.get(field_name, ""))
        for field_name in FIELD_NAMES_PERSON
    }

    node = Person(uuid="",
                  full_name=info["FULL_NAME"],
                  first_name=info["FIRST_NAME"],
                  middle_name=info["MIDDLE_NAME"],
                  last_name=info["LAST_NAME"],
                  name_ch=info["NAME_CN"],
                  first_name_ch=info["FIRST_NAME_CN"],
                  last_name_ch=info["LAST_NAME_CN"],
                  institution=info["INSTITUTION"],
                  research_interest=info["RESEARCH_INTEREST"],
                  note=info["NOTE"],
                  added_by=info["ADDED_BY"],
                  added_date=info["ADDED_DATE"])
    result[RESULT_CODE] = 1006
    result[RESULT_MSG] = "success"
    result[RESULT_DATA] = node
    return result
Ejemplo n.º 18
0
def query_or_create_relation(driver,
                             source_type,
                             source_id,
                             target_type,
                             target_id,
                             relation_type,
                             to_create=True,
                             parameters=None):
    """
    先根据起终点的uuid查询关系是否存在,若存在,直接返回关系id,否则,创建关系并返回id。todo 设计边的属性,现在只有uuid和added_date
    :param relation_type:
    :param target_id:
    :param target_type:
    :param source_id:
    :param source_type:
    :param driver:
    :param to_create:
    :param parameters: dict,边属性
    :return: -1:参数不完整,1:已存在,-2:不存在,且不创建,-3:指定边属性无效,-4:创建失败,2:生成成功
    """
    result = ini_result()
    # 检查输入数据
    if source_type is None or source_id is None or target_id is None or target_type is None or relation_type is None:
        result["code"] = -901
        result["msg"] = "输入参数不完整"
        return result
    if source_type not in NODE_TYPES or source_type not in NODE_TYPES or target_type not in NODE_TYPES or target_type not in NODE_TYPES:
        result[RESULT_CODE] = -1202
        result[RESULT_MSG] = "node type is not valid"
        return result

    if relation_type not in EDGE_TYPES:
        result[RESULT_CODE] = -1203
        result[RESULT_MSG] = "edge type is not valid"
        return result

    if parameters is not None and not isinstance(parameters, dict):
        result["code"] = -901
        result["msg"] = "指定边属性不是dict,取消生成节点"
        return result

    # 生成查询语句
    cypher = "MATCH (s:{source}) -[r:{rel}]-> (t:{target}) where s.uuid='{IDs}' and t.uuid='{IDt}'  " \
             "return r" .format(source=source_type, target=target_type, IDs=source_id,
                                      IDt=target_id, rel=relation_type.upper())
    try:
        with driver.session() as session:
            edges = session.run(cypher)
    except:
        result["code"] = -910
        result["msg"] = "数据库连接失败"
        return result

    edges = edges.data()
    if len(edges) > 0:
        tmp = process_neo4j_result(edges, "r", 1)
        if tmp is None:
            result["code"] = -1305
            result["msg"] = "边查询成功,但结果处理失败"
        else:
            result["code"] = 1304
            result["data"] = tmp
            result["msg"] = "{IDs}和{IDt}的{REL}关系已经存在".format(IDs=source_id,
                                                             IDt=target_id,
                                                             REL=relation_type)
        return result

    if to_create:
        # 解析属性
        tmp_uuid = uuid.uuid1()
        added_date = time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.localtime(time.time()))
        cypher_cond = "{uuid:'" + tmp_uuid.hex + "', added_date:'" + added_date + "', "
        if parameters is not None:  # 生成查询语句
            for (key, value) in parameters.items():
                cypher_cond += "" + key + ":'" + str(value) + "', "
        cypher_cond = cypher_cond[:-2] + "}"
        cypher = "MATCH (s:{source}), (t:{target}) where s.uuid='{IDs}' and t.uuid='{IDt}' CREATE (s) -[r:{REL}{COND}]->(t) " \
                 "return r" .format(source=source_type, target=target_type, IDs=source_id,
                                          IDt=target_id, REL=relation_type.upper(), COND=cypher_cond)
        # 执行查询+生成过程
        try:
            with driver.session() as session:
                edges = session.run(cypher)
        except:
            result["code"] = -910
            result["msg"] = "数据库连接失败"
            return result

        edges = edges.data()
        if len(edges) > 0:
            tmp = process_neo4j_result(edges, "r", 2)
            if tmp is None:
                result["code"] = -1305
                result["msg"] = "边新建成功,但结果处理失败"
            else:
                result["code"] = 1306
                result["data"] = tmp
                result["msg"] = "{IDs}和{IDt}的{REL}关系创建成功".format(
                    IDs=source_id, IDt=target_id, REL=relation_type)

        else:
            result["code"] = -1306
            result["msg"] = "{IDs}和{IDt}的{REL}关系创建关系失败,数据库操作失败".format(
                IDs=source_id, IDt=target_id, REL=relation_type)

    else:
        result["code"] = 1305
        result["msg"] = "数据库无记录,已选择不创建新节点"
    return result
Ejemplo n.º 19
0
def query_bib_node_by_multiple_field(driver,
                                     node_type,
                                     node_info,
                                     page=None,
                                     limit=None):
    """
    利用Publication/venue/person的多个字段来进行查询,并对处理结果进行解析。checked
    :param limit:
    :param page:
    :param node_type:
    :param driver:
    :param node_info:pub uuid
    :return:
    """
    # 根据提供的文献信息,查询数据库,并整理结果

    result = ini_result()

    if driver is None or node_info is None or not isinstance(
            node_info, dict) or node_type not in [
                "PUBLICATION", "PERSON", "VENUE"
            ]:
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result

    identifier = "m"
    tmp = query_by_multiple_field(driver, node_info, node_type, page, limit,
                                  identifier)

    if tmp[RESULT_CODE] != 904:
        result[RESULT_CODE] = tmp[RESULT_CODE]
        result[RESULT_MSG] = tmp[RESULT_MSG]
        return result

    records = tmp[RESULT_DATA]

    result[RESULT_CODE] = 905
    result[RESULT_MSG] = "success"

    if node_type in ["VENUE", "PERSON"]:
        result[RESULT_DATA] = records
        return result

    pubs = []
    for record in records:
        page1, page2 = split_pages(record["pages"])  # 处理页码

        pub = {
            "node_type": record["node_type"],
            "book_title": null_string(record["book_title"]),
            "how_published": null_string(record["how_published"]),
            "title": null_string(record["title"]),
            "author": null_string(record["author"]),
            "editor": null_string(record["editor"]),
            "keywords": null_string(record["keywords"]),
            "edition": null_string(record["edition"]),
            "year": null_int(record["year"]),
            "month": null_string(record["month"]),
            "journal": null_string(record["journal"]),
            "volume": null_string(record["volume"]),
            "type": null_string(record["type"]),
            "chapter": null_string(record["chapter"]),
            "number": null_string(record["number"]),
            "pages1": null_string(page1),
            "pages2": null_string(page2),
            "publisher": null_string(record["publisher"]),
            "organization": null_string(record["organization"]),
            "institution": null_string(record["institution"]),
            "school": null_string(record["school"]),
            "address": null_string(record["address"]),
            "series": null_string(record["series"]),
            "indexing": 0,
            "id": null_string(record["id"]),
            "uuid": null_string(record["uuid"]),
            "note": null_string(record["note"])
        }
        pubs.append(pub)
    result[RESULT_DATA] = pubs
    return result
Ejemplo n.º 20
0
def create_or_match_nodes(driver,
                          node_list,
                          return_type="class",
                          to_create=True):
    """
    根据指定的数据信息,建立节点,若节点已存在,则不操作, 要返回新生成或查询到的节点的,且要有对应关系(直接填到节点信息里).checked
    :param node_list: 是Publication/Venue/Person类的list
    :param driver: 数据库信息
    :param return_type:class/dict
    :param to_create: 查询后若没有记录,是否新建
    :return:list of Publication/Venue/Person
    """
    # 输入数据检查
    result = ini_result()

    if node_list is None or not isinstance(node_list,
                                           list) or len(node_list) == 0:
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "the given data is not of type list"
        return result

    if driver is None:
        result[RESULT_CODE] = -500
        result[RESULT_MSG] = "the database is not configured!"
        return result

    if return_type != 'class' and return_type != "dict":
        result[RESULT_CODE] = -1300
        result[RESULT_MSG] = "返回值类型指定错误!"
        return result

    # 在数据库中查询数据
    counter_processed = 0
    has_failed = False
    bib_data_new = []
    try:
        with driver.session() as session:
            for entry in node_list:
                query_result = query_or_create_node(
                    session, entry, to_create=to_create,
                    match_field=None)  # 查询或创建节点,返回是list of dict
                if query_result[RESULT_CODE] not in [1301, 1302]:
                    print(query_result[RESULT_MSG])
                    has_failed = True
                else:
                    counter_processed += 1
                    result_content = query_result[RESULT_DATA][
                        0]  # todo 有多个的时候怎么办
                    if return_type == 'class':  # todo 这里要
                        entry.uuid = result_content["uuid"]
                        bib_data_new.append(entry)
                    elif return_type == 'dict':
                        bib_data_new.append(result_content)
    except:
        result[RESULT_CODE] = -910
        result[RESULT_MSG] = "数据库连接失败"
        return result
    # 查询结果分析
    if has_failed:
        result[RESULT_CODE] = -1304
        result[RESULT_MSG] = "查询/新建节点出现错误"
    else:
        result[RESULT_CODE] = 1303
        result[RESULT_MSG] = "成功"
        result[RESULT_DATA] = bib_data_new
    return result
Ejemplo n.º 21
0
def query_or_create_node(tx, node, to_create=True, match_field=None):
    """
    先查询节点是否存在,若存在,直接返回节点所有字段,否则,创建节点并返回所有数据。checked
    :param tx: neo4j连接
    :param node:节点信息,是models.py中定义的类
    :param to_create:当节点不在数据库中时是否创建新节点
    :param match_field: 用来匹配数据库中记录的条件,dict
    :return: 若查到数据,返回uuid,否则,若to_create为真,返回新建节点uuid,否则,返回0;出错返回-1
    """
    # 取回查询语句
    result = ini_result()
    if tx is None or node is None:
        result["code"] = -901
        result["msg"] = "未提供数据查询/新建节点或未提供数据库信息"
        return result

    if match_field is None:  # 利用默认的mandatory field来进行搜索
        cypher = node.get_match_cypher()
    else:
        if not isinstance(match_field, dict):
            result["code"] = -901
            result["msg"] = "提供查询的条件格式错误"
            return result
        else:
            cypher = node.get_match_cypher(match_field)  # todo 暂未实现
    # 查询数据库
    try:
        nodes = tx.run(cypher)
    except:
        result["code"] = -910
        result["msg"] = "数据库连接错误"
        return result

    nodes = nodes.data()
    # 分析结果
    if nodes is not None and len(nodes) > 0:
        tmp = process_neo4j_result(nodes, "node", 1)
        if tmp is not None:
            result["data"] = tmp
            result["code"] = 1301
            result["msg"] = "查询成功,有" + str(len(tmp["data"])) + "个匹配节点:"
        else:
            result["code"] = -1301
            result["msg"] = "异常!"
        return result

    if not to_create:
        result["code"] = 1300
        result["msg"] = "查询成功,无记录,且不创建新节点"
        return result

    node.uuid = uuid.uuid1()
    node.added_date = time.strftime('%Y-%m-%d %H:%M:%S',
                                    time.localtime(time.time()))
    create_cypher = node.get_create_cypher()
    try:
        nodes = tx.run(create_cypher)
    except:
        result["code"] = -910
        result["msg"] = "查询无记录,创建过程中数据库连接失败!"
        return result

    if nodes is None:
        result["code"] = -1303
        result["msg"] = "创建失败!"
        return result

    if len(nodes) > 1:
        result["code"] = -1302
        result["msg"] = "异常!"
        return result

    tmp = process_neo4j_result(nodes, "node", 2)
    if tmp is not None:
        result["data"] = tmp
        result["code"] = 152
        result["msg"] = "查询无记录,创建成功!"
    else:
        result["code"] = -1301
        result["msg"] = "异常!"

    return result
Ejemplo n.º 22
0
def parse_bib_file_excel(file_name, sheet_name, column_name):
    """
    从Excel文件中解析出文献信息,提取的结果是excel表中的各个数据,这里并不处理数据
    :param column_name: 列名, e.g. bib
    :param sheet_name: 要分析的工作表名称, e.g. deep learning
    :param file_name: str 文件名
    :return:
    """
    result = ini_result()

    if file_name is None or not isinstance(file_name, str) or sheet_name is None or not isinstance(sheet_name, str) \
            or column_name is None or not isinstance(column_name, str):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result

    try:
        wb = xlrd.open_workbook(filename=file_name)  # 打开文件
    except:
        result[RESULT_CODE] = -703
        result[RESULT_MSG] = '打开Excel文件失败'
        return result

    try:
        sheet_content = wb.sheet_by_name(sheet_name)  # 通过名字获取表格
    except:
        result[RESULT_CODE] = -704
        result[RESULT_MSG] = '读取Excel指定工作簿失败'
        return result
    try:
        sheet_title = sheet_content.row_values(0)
    except:
        result[RESULT_CODE] = -705
        result[RESULT_MSG] = '读取Excel工作簿表头失败'
        return result

    try:
        col_index = sheet_title.index(column_name)  # 列下标
    except ValueError:
        result[RESULT_CODE] = -706
        result[RESULT_MSG] = '指定的工作表中没有' + column_name + '列'
        return result

    content = []
    counter_bib = 0  # 成功解析出的行数
    counter_all = sheet_content.nrows - 1  # 总信息行数
    counter_null = 0  # 空白行个数
    for index in range(1, sheet_content.nrows):  # index是行下标
        value = sheet_content.cell_value(index, col_index)
        if value == "":
            print("第" + str(index) + "行文献没有bib信息【" +
                  str(sheet_content.row_values(index)) + "】")
            counter_null += 1
            continue
        try:
            tmp = bibtexparser.loads(value)
        except:
            print("第" + str(index) + "行文献解析bib过程失败【" +
                  str(sheet_content.row_values(index)) + "】")  # 内部错误,不返回错误码
            continue
        if tmp is not None:
            content.append(tmp.entries[0])
            counter_bib += counter_bib
        else:
            print("第" + str(index) + "行文献解析bib过程失败2【" +
                  str(sheet_content.row_values(index)) + "】")  # 内部错误,不返回错误码

    if counter_bib == (counter_all - counter_null):
        result[RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + \
                        ">列全部信息(除空白行)均有效解析,共" + str(counter_all) + "行," + str(counter_null) + "空白行," + \
                        str(counter_bib) + "成功解析"
        result[RESULT_CODE] = 1102
        result[RESULT_DATA] = content
        return result

    if counter_null == counter_all:
        result[
            RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + ">列全部为空白"
        result[RESULT_CODE] = 1103
        return result

    if (counter_all - counter_null) > counter_bib > 0:
        result[RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + \
                        ">列全部信息(除空白行)部分有效解析,共" + str(counter_all) + "行," + str(counter_null) + "空白行," + \
                        str(counter_bib) + "成功解析"
        result[RESULT_CODE] = 1104
        result[RESULT_DATA] = content
        return result

    if counter_all > counter_null and counter_bib == 0:
        result[
            RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + ">列(除空白外)全部解析失败"
        result[RESULT_CODE] = -1101
        return result

    result[
        RESULT_MSG] = "【总结】Excel文件<" + file_name + ">的<" + sheet_name + ">页<" + column_name + ">列。结果未考虑!"
    result[RESULT_CODE] = -1102
    return result
Ejemplo n.º 23
0
def query_person_pub_venue_by_person_name(driver,
                                          person_list,
                                          skip=None,
                                          limit=None):
    """
    利用person name来获取Publication、venue等数据,用来展示关系图。checked
    :param limit:
    :param skip:
    :param person_list: list of person name
    :param driver:
    :return:
    """
    result = ini_result()
    if driver is None or person_list is None or not isinstance(
            person_list, list):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result

    # 查询是否存在数据
    cypher_total = "match (n:Person)-[r:Write]->(m:Publication)  where n.name in " + str(person_list) + \
                   " return count(m)"
    with driver.session() as session:
        records = session.run(cypher_total)
        total = records.value()[0]

    if total == 0:  # no data found
        result[RESULT_CODE] = 901
        result[RESULT_MSG] = "no matched data"
        return result

    # get details of matched Publication and Person
    cypher = "match (m:Person)-[r:Write]->(n:Publication)  where m.name in " + str(person_list) + \
             " return n, m order by n.year"

    if skip is not None:
        cypher += " skip " + str(skip) + " limit " + str(limit)

    nodes = []
    node_ids = []
    relations = []
    with driver.session() as session:
        records = session.run(cypher)
        for record in records:
            print("查询person-->publication结果成功")
            # 组装结果
            if record["m"]["uuid"] not in node_ids:
                node = {
                    "id": record["m"]["uuid"],
                    "label": record["m"]["name"],
                    "type": "person"
                }
                nodes.append(node)
                node_ids.append(record["m"]["uuid"])
            if record["n"]["uuid"] not in node_ids:
                node = {
                    "id": record["n"]["uuid"],
                    "label": record["n"]["title"],
                    "type": "publication"
                }
                nodes.append(node)
                node_ids.append(record["n"]["uuid"])
            relation = {"from": record["m"]["uuid"], "to": record["n"]["uuid"]}
            relations.append(relation)

    if not nodes:
        result[RESULT_CODE] = -902
        result[
            RESULT_MSG] = "failed to get matched data (Publication<-Write<-Person)"
        return result

    # get details of matched Publication and Venue
    cypher = "match (m:Publication)-[r:PUBLISH_IN]->(n:Venue)  where m.uuid in " + str(node_ids) + \
             " return n, m"

    with driver.session() as session:
        records = session.run(cypher)
        for record in records:
            print("查询publication-->venue结果成功")
            # 组装结果
            if record["m"]["uuid"] not in node_ids:
                node = {
                    "id": record["m"]["uuid"],
                    "label": record["m"]["title"],
                    "type": "publication"
                }
                nodes.append(node)
                node_ids.append(record["m"]["uuid"])
            if record["n"]["uuid"] not in node_ids:
                node = {
                    "id": record["n"]["uuid"],
                    "label": record["n"]["venue_name"],
                    "type": "venue"
                }
                nodes.append(node)
                node_ids.append(record["n"]["uuid"])
            relation = {"from": record["m"]["uuid"], "to": record["n"]["uuid"]}
            relations.append(relation)

    result[RESULT_CODE] = 900
    result[RESULT_MSG] = "success"
    result[RESULT_DATA] = {"relation": relations, "nodes": nodes}

    return result
Ejemplo n.º 24
0
def query_one_pub_by_uuid(driver, pub_id):
    """
    利用Publication的uuid来查询其详情,checked,todo 当前有多个匹配的时候,只选取第一个
    :param driver:
    :param pub_id:str, pub uuid
    :return:
    """
    result = ini_result()
    if driver is None or pub_id is None or not isinstance(pub_id, str):
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result

    # 查询是否存在数据
    cypher = "match (m:Publication {uuid:'" + pub_id + "'}) <-[r:Write]- (n:Person) return m, n"

    with driver.session() as session:
        records = session.run(cypher)
        records = records.data()

    if records is None or len(records) == 0:
        result[RESULT_MSG] = "no matched data"
        result[RESULT_CODE] = 902
        return result

    # 处理特殊数据
    pages = records[0]["m"]["pages"]
    page1, page2 = split_pages(pages)

    # 处理author
    authors = [item["n"]["name"] for item in records]
    authors = [split_name(name, authors) for name in authors]

    pub = {
        "paperTypeEdit": records[0]["m"]["node_type"],
        "title": null_string(records[0]["m"]["title"]),
        "booktitle": null_string(records[0]["m"]["book_title"]),
        "author": authors,
        "editor": null_string(records[0]["m"]["editor"]),
        "keywords": null_string(records[0]["m"]["keywords"]),
        "edition": null_string(records[0]["m"]["edition"]),
        "year": null_string(records[0]["m"]["year"]),
        "month": null_string(records[0]["m"]["month"]),
        "journal": null_string(records[0]["m"]["journal"]),
        "volume": null_string(records[0]["m"]["volume"]),
        "type": null_string(records[0]["m"]["type"]),
        "chapter": null_string(records[0]["m"]["chapter"]),
        "number": null_string(records[0]["m"]["number"]),
        "pages1": null_string(page1),
        "pages2": null_string(page2),
        "publisher": null_string(records[0]["m"]["publisher"]),
        "organization": null_string(records[0]["m"]["organization"]),
        "institution": null_string(records[0]["m"]["institution"]),
        "school": null_string(records[0]["m"]["school"]),
        "address": null_string(records[0]["m"]["address"]),
        "series": null_string(records[0]["m"]["series"]),
        "howpublished": null_string(records[0]["m"]["how_published"]),
        "indexing": 0,
        "note": null_string(records[0]["m"]["note"])
    }

    result[RESULT_MSG] = "success"
    result[RESULT_CODE] = 903
    result[RESULT_DATA] = pub

    return result
Ejemplo n.º 25
0
def build_relation_from_node_attribute(driver, source_node_type="PUBLICATION", target_node_type="VENUE",
                                       rel_type="PUBLISHED_IN", filters={"node_type": "ARTICLE"},
                                       info_field='JOURNAL', use_source=1, do_split=False):
    """
    实现了从某一类节点的指定字段中提取信息,新建其他节点并建立指定的连接,这个可以处理info_field中包含多个节点信息的情况,现在只支持人的
    多个信息处理
    :param driver: neo4j 连接信息
    :param source_node_type: 边起点类型
    :param target_node_type: 边终点类型
    :param rel_type: 边类型
    :param filters:dict,分析时,对接点进行过滤的条件,key为字段名,value为选出的可行值
    :param info_field: 待分析的字段名
    :param use_source: boolean,当为1时,使用起点节点进行数据分析,当为0时,使用终点节点进行数据分析
    :param do_split
    :return:json格式,其中
             code:-1:输入filters无效,-2:没查询到Publication节点,-3:Publication节点中没有指定信息,-4:创建/查询新节点失败,
                  -5:未识别的新节点类型,-6:部分边创建失败,1:创建成功
             msg:
    """
    result = ini_result()

    if driver is None:
        result[RESULT_CODE] = -500
        result[RESULT_MSG] = "driver is not given"
        return result

    if source_node_type not in NODE_TYPES or target_node_type not in NODE_TYPES:
        result[RESULT_CODE] = -1202
        result[RESULT_MSG] = "node type is not valid"
        return result

    if rel_type not in EDGE_TYPES:
        result[RESULT_CODE] = -1203
        result[RESULT_MSG] = "edge type is not valid"
        return result

    if info_field not in FIELD_NAMES_PUB:
        result[RESULT_CODE] = -1204
        result[RESULT_MSG] = "field is not valid"
        return result

    if (filters is not None and not isinstance(filters, dict)) or \
            (filters is not None and len(set(filters.keys()) & set(FIELD_NAMES_PUB)) == 0) or \
            use_source is None or do_split is None:
        result[RESULT_CODE] = -901
        result[RESULT_MSG] = "invalid arguments"
        return result

    identifier = 'node'
    # 解析filters的有效性,并生成查询条件语句
    if filters is not None:
        tmp_filter_str = ""
        for (key, value) in filters.items():
            tmp_filter_str += identifier + "." + key + "='" + value + "' and "
        tmp_filter_str = tmp_filter_str[:-5]
        # 生成完整查询语句
        if use_source:
            cypher = "match ({IF}:{NODE}) where {FILTER} return {IF}".format(IF=identifier, NODE=source_node_type, FILTER=tmp_filter_str)
        else:
            cypher = "match ({IF}:{NODE}) where {FILTER} return {IF}".format(IF=identifier, NODE=target_node_type, FILTER=tmp_filter_str)
    else:
        # 生成完整查询语句
        if use_source:
            cypher = "match ({IF}:{NODE}) return {IF}".format(IF=identifier, NODE=source_node_type)
        else:
            cypher = "match ({IF}:{NODE}) return {IF}".format(IF=identifier, NODE=target_node_type)

    # 查询在指定条件下的指定类型节点
    data_pair = []  # 查询之后的输出 source: source_node_type的uuid, target:venue的name, parameter:其他属性,如作者排序等
    if use_source:
        new_type = target_node_type
    else:
        new_type = source_node_type
    try:
        with driver.session() as session:
            nodes = session.run(cypher)
    except:
        result[RESULT_CODE] = -910
        result[RESULT_MSG] = "数据库连接失败"
        return result

    counter_has_content = 0
    counter_all = 0
    # counter_processed = 0  todo 这里没有检查有数据的记录是否成功处理,后面要加
    if use_source:
        for record in nodes:  # 把各节点的info_field字段提取出来,组成dict,key是节点的uuid,value是info_field字段值
            record_id = record[identifier][FIELD_NAMES_PUB[31]]  # uuid
            print("提取{NODE}与{NODE2}之间关系{REL}过程:查询到节点:".format(NODE=source_node_type, NODE2=target_node_type,
                                                              REL=rel_type) + str(record_id))
            counter_all += 1
            record_field = record[identifier][info_field]
            if not string_util(record_field):
                print("{ID} has empty {FIELD} field".format(ID=record_id, FIELD=info_field))
            else:
                if do_split:  # 需要将字段进行分割,然后生成多个节点
                    if new_type.upper() == NODE_TYPES[2]:  # person # todo 这是是什么情况??
                        names = process_person_names([record_field])  # 这里拆分成了多个,返回值:dict, original authors: list of dict of authors
                        names = names[record_field]  # list of dict = {name, index}
                        for name in names:
                            tmp = {"source": record_id, "target": name["name"], "parameter": {"index": name["index"]}}
                            data_pair.append(tmp)
                    else:
                        tmp = {"source": record_id, "target": record_field, "parameter": None}
                        data_pair.append(tmp)
                        result[RESULT_MSG] += "暂不支持针对【" + new_type + "】的拆分"
                        print("暂不支持针对【" + new_type + "】的拆分")
                else:
                    tmp = {"source": record_id, "target": record_field, "parameter": None}
                    data_pair.append(tmp)
                counter_has_content += 1
    else:
        for record in nodes:  # 把各节点的info_field字段提取出来,组成dict,key是节点的uuid,value是info_field字段值
            record_id = record[identifier][FIELD_NAMES_PUB[31]]  # uuid
            print("提取{NODE}与{NODE2}之间关系{REL}过程:查询到节点:".format(NODE=source_node_type, NODE2=target_node_type,
                                                              REL=rel_type) + str(record_id))
            counter_all += 1
            record_field = record[identifier][info_field]
            if not string_util(record_field):
                print("{ID} has empty {FIELD} field".format(ID=record_id, FIELD=info_field))
            else:
                if do_split:  # 需要将字段进行分割,然后生成多个节点
                    if new_type.upper() == NODE_TYPES[2]:  # person
                        names = process_person_names([record_field])  # 这里拆分成了多个,返回值:dict, original authors: list of dict of authors
                        names = names[record_field]  # list of dict = {name, index}
                        for name in names:
                            tmp = {"target": record_id, "source": name["name"], "parameter": {"index": name["index"]}}
                            data_pair.append(tmp)
                    else:
                        tmp = {"target": record_id, "source": record_field}
                        data_pair.append(tmp)
                        result[RESULT_MSG] += "暂不支持针对【" + new_type + "】的拆分"
                        print("暂不支持针对【" + new_type + "】的拆分")
                else:
                    tmp = {"target": record_id, "source": record_field}
                    data_pair.append(tmp)
                counter_has_content += 1
    if counter_all == 0:
        result[RESULT_CODE] = 125
        result[RESULT_MSG] += "\n 提取{NODE}与{NODE2}之间关系{REL}过程:未查询到{NODE_Q}节点中满足条件{FILTER}的节点".format(
            NODE=source_node_type, FILTER=str(filters), NODE2=target_node_type, REL=rel_type, NODE_Q=new_type)
        print(result[RESULT_MSG])
        return result
    if counter_has_content == 0:
        result[RESULT_CODE] = 126
        result[RESULT_MSG] += "\n 提取{NODE}与{NODE2}之间关系{REL}过程:在满足条件{FILTER}的{NODE_Q}节点的字段{FIELD}中没有有效信息".format(
            NODE=source_node_type, FILTER=str(filters), NODE2=target_node_type, REL=rel_type, FIELD=info_field,
            NODE_Q=new_type)
        print(result[RESULT_MSG])
        return result
    # 先将要生成的节点数据筛选出来 todo 没有检查是否所有记录都成功处理
    data_switched = []
    if use_source:
        for tmp in data_pair:
            data_switched.append(tmp["target"])
    else:
        for tmp in data_pair:
            data_switched.append(tmp["source"])
    data_switched = list(set(data_switched))
    # 将要建立节点的信息进行封装,models的类
    info = {"name": data_switched, "type": new_type}
    nodes = wrap_info_to_model(info, filters)  # node是封装后的节点类
    # 查询/创建节点
    create_result = create_or_match_nodes(nodes, database_info, to_create=True, return_type="class")
    if create_result[RESULT_CODE] < 0:
        result[RESULT_CODE] = -128
        result[RESULT_MSG] = create_result[RESULT_MSG] + "\t 查询/新建节点信息失败,停止创建关系"
        return result
    # 解析出新生成节点的uuid和关键字的对应关系,key是关键字,value是uuid
    nodes = create_result[RESULT_DATA]
    mapping = {}
    for datum in nodes:
        if new_type == NODE_TYPES[0]:  # Publication
            mapping[datum.id] = datum.uuid
        elif new_type == NODE_TYPES[1]:  # Venue
            mapping[datum.venue_name] = datum.uuid
        elif new_type == NODE_TYPES[2]:  # Person
            mapping[datum.full_name] = datum.uuid
    # 更新data_pair,将其中关键字部分改成uuid
    if use_source:
        for pair in data_pair:
            tmp_id = mapping[pair["target"]]
            pair["target"] = tmp_id
    else:
        for pair in data_pair:
            tmp_id = mapping[pair["source"]]
            pair["source"] = tmp_id
    # 查询/建立边
    counter_has_content = 0
    counter_all = len(data_pair)
    with driver.session() as session:
        for pair in data_pair:
            tmp = query_or_create_relation(session, source_node_type, pair["source"], target_node_type, pair["target"], rel_type,
                                           to_create=True, parameters=pair["parameter"])
            if tmp[RESULT_CODE] == 170 or tmp[RESULT_CODE] == 171:
                counter_has_content += 1
    if counter_has_content < counter_all:
        result[RESULT_CODE] = -131
        result[RESULT_MSG] = "部分边创建失败"
        return result
    else:
        result[RESULT_CODE] = 127
        result[RESULT_MSG] = "全部边创建成功"
        return result
Ejemplo n.º 26
0
def resolve_deprel(request):
    """
    由“开始解析”按钮触发的对给定句子的解析,对外返回解析结果。

    注:采用ajax + post方式,header指定{"X-Requested-With":"XMLHttpRequest", "Content-Type":"application/x-www-form-urlencoded"}

    :param request: 包含待解析sentence
    :return:
    """
    result = ini_result()
    is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
    if not is_ajax or request.method != 'POST':
        result[RESULT_CODE] = -103
        result[
            RESULT_MSG] = "not supported request form (should be post and with ajax)"
        return wrap_result(result)

    data = request.body  #
    if data is None or data == "":
        result[RESULT_CODE] = -301
        result[RESULT_MSG] = "no data is given"
        return wrap_result(result)

    try:
        data = bytes.decode(data)
        data = json.loads(data)
        sentence = data["sentence"]

        # 调用ddparser处理结果-命名规则:sentence+timestamp
        resolve_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        words, head, relation, pos_tag = dd_parser_caller(sentence)

        # 生成依存关系图
        graph_name = sentence + '_' + resolve_time  # 拼装:句子+时间

        g = generate_dep_rel_graph(
            os.path.join(current_dir, 'static/images/cache'), graph_name,
            words, relation, head)
        if g is None:
            print("failed to generate dependency graph")
            result[RESULT_CODE] = -303
            result[RESULT_MSG] = "failed to generate dependency graph"
            result[RESULT_DATA] = {
                'sentence': sentence,
                'deprel': None,
                'relation': str(relation),
                'words': str(words),
                'head': str(head)
            }
        else:
            print('successfully generate dependency graph')
            result[RESULT_CODE] = 300
            result[RESULT_MSG] = "success"
            result[RESULT_DATA] = {
                'sentence': sentence,
                'deprel': graph_name + ".png",
                'relation': str(relation),
                'words': str(words),
                'head': str(head)
            }

    except TypeError:
        result[RESULT_CODE] = -302
        result[RESULT_MSG] = "no valid data is given"

    return wrap_result(result)
Ejemplo n.º 27
0
def save_deprel_result(request):
    """
    存储页面标注结果,将修改信息存储在txt文件中,格式为:id 提交评论时间 句子 解析文件路径(名) relation words head 是否有问题 comments(\t 隔开)
    :param request:
    :return:
    """

    result = ini_result()

    data = request.body

    if data is None or data == "":
        result[RESULT_CODE] = -301
        result[RESULT_MSG] = "no data is given"
        return wrap_result(result)

    is_ajax = request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
    if not is_ajax or request.method != 'POST':

        result[RESULT_CODE] = -103
        result[
            RESULT_MSG] = "not supported request form (should be post and with ajax)"
        return wrap_result(result)

    try:
        data = bytes.decode(data)
        data = json.loads(data)

        record_uuid = str(uuid.uuid1())
        record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        sentence = data.get('sentence', '-')

        result_file_name = data.get('image_path', '-')

        words = data.get('words', '-')
        relation = data.get('relation', '-')
        head = data.get('head', '-')

        is_correct = data.get('is_problematic', '-')
        comments = data.get('comments', '-')

        tmp = [
            record_uuid, record_time, sentence, result_file_name, words,
            relation, head, is_correct, comments
        ]
        tmp = '\t'.join(tmp)

        with open(log_dir.joinpath('dep_rel_service_log.log'),
                  'a+',
                  encoding="utf-8") as f:
            f.write(tmp + '\n')

        result[RESULT_MSG] = "successfully saved"
        result[RESULT_CODE] = 400
        # result["field"] = data

    except TypeError:
        result[RESULT_CODE] = -402
        result[RESULT_MSG] = "no valid data is given"

    return wrap_result(result)
Ejemplo n.º 28
0
def command_resolve(request):
    """
    基本功能,按照模板匹配的方式处理指令,为进行grounding
    :return:
    """
    request_method = request.method
    result = ini_result()
    if request_method != 'GET':
        result[RESULT_CODE] = -101
        result["message"] = "request method should be get"
        return HttpResponse(json.dumps(result, ensure_ascii=False),
                            content_type='application/json',
                            charset='utf-8')

    command = request.GET.get("command", None)
    session_id = request.GET.get("sessionId", '')
    robot_id = request.GET.get("robotId", '')

    if not command or not session_id or not robot_id:
        result[RESULT_CODE] = -802
        result["message"] = "No command found"
        return HttpResponse(json.dumps(result, ensure_ascii=False),
                            content_type='application/json',
                            charset='utf-8')

    result = resolve(command)
    if not result["success"]:
        result[RESULT_CODE] = -803
        result["message"] = "failed to resolve"
        return HttpResponse(json.dumps(result, ensure_ascii=False),
                            content_type='application/json',
                            charset='utf-8')

    task = result.get("task", None)
    if task is None:
        result[RESULT_CODE] = -804
        result["message"] = "success without task information"
        return HttpResponse(json.dumps(result, ensure_ascii=False),
                            content_type='application/json',
                            charset='utf-8')

    task["sessionId"] = session_id
    task["robotId"] = robot_id

    # todo send to reasoning service
    # ret = requests.post(PLANNING_URL, json=task).text

    msg_to_front = {'input': command, 'output': task}
    msg_to_front = json.dumps(msg_to_front, ensure_ascii=False)

    mqtt_client.publish(topic, msg_to_front)

    ret = '{RESULT_CODE: 200}'
    try:
        ret_content = json.loads(ret)
        reason_code = ret_content.get(RESULT_CODE, 0)
    except json.JSONDecodeError or TypeError:
        result[RESULT_CODE] = 801
        result["message"] = "success but failed to send to reasoning module"
        return HttpResponse(json.dumps(result, ensure_ascii=False),
                            content_type='application/json',
                            charset='utf-8')

    if reason_code != 200:
        result[RESULT_CODE] = 802
        result["message"] = "success but reasoning failed"
        return HttpResponse(json.dumps(result, ensure_ascii=False),
                            content_type='application/json',
                            charset='utf-8')

    result[RESULT_CODE] = 800
    result["message"] = "success"

    print_log(request, result)

    return HttpResponse(json.dumps(result, ensure_ascii=False),
                        content_type='application/json',
                        charset='utf-8')
Ejemplo n.º 29
0
def extract_publication_from_bib_info(entry):
    """
    从bib info中构造出PUBLICATION节点. checked
    :param entry: dict of bib info
    :return: PUBLICATION
    """
    result = ini_result()

    if entry is None or not isinstance(entry, dict) or entry.keys() is None:
        result[RESULT_CODE] = -1001
        result[RESULT_MSG] = "输入数据错误!"
        return result

    entry = capitalize_dict_keys(entry)
    node_type = entry.get("ENTRYTYPE", None)

    if node_type is None or node_type not in PUBLICATION_TYPES:
        result[RESULT_CODE] = -1002
        result[RESULT_MSG] = "unrecognized entry type:" + str(entry)
        return result

    entry_processed = {
        field_name: entry.get(field_name).upper()
        for field_name in FIELD_NAMES_PUB if field_name in entry.keys()
    }

    core_field_check = check_core_fields(node_type, entry_processed)

    if not core_field_check:
        result[RESULT_CODE] = -1003
        result[RESULT_MSG] = "缺少必填字段" + str(entry)
        return result

    author = entry_processed.get("AUTHOR", None)
    editor = entry_processed.get("EDITOR", None)
    title = entry_processed.get("TITLE", None)
    journal = entry_processed.get("JOURNAL", None)
    year = entry_processed.get("YEAR", None)
    volume = entry_processed.get("VOLUME", None)
    number = entry_processed.get("NUMBER", None)
    series = entry_processed.get("SERIES", None)
    address = entry_processed.get("ADDRESS", None)
    pages = entry_processed.get("PAGES", None)
    month = entry_processed.get("MONTH", None)
    note = entry_processed.get("NOTE", None)
    publisher = entry_processed.get("PUBLISHER", None)
    edition = entry_processed.get("EDITION", None)
    book_title = entry_processed.get("BOOKTITLE", None)
    organization = entry_processed.get("ORGANIZATION", None)
    chapter = entry_processed.get("CHAPTER", None)
    school = entry_processed.get("SCHOOL", None)
    field_type = entry_processed.get("type", None)
    how_published = entry_processed.get("HOWPUBLISHED", None)
    keywords = entry_processed.get("KEYWORDS", None)
    abstract = entry_processed.get("ABSTRACT", None)
    note_id = entry_processed.get("NOTEID", None)
    institution = entry_processed.get("INSTITUTION", None)
    added_by = entry_processed.get("ADDEDBY", None)
    added_date = entry_processed.get("ADDEDDATE", None)
    sci_index = entry_processed.get("SCIINDEX", None)
    ei_index = entry_processed.get("EIINDEX", None)
    ssci_index = entry_processed.get("SSCIINDEX", None)
    modified_date = entry_processed.get("MODIFIEDDATE", None)
    field_id = entry_processed.get("ID", None)

    node = Publication("",
                       node_type,
                       author=author,
                       editor=editor,
                       title=title,
                       journal=journal,
                       year=year,
                       volume=volume,
                       number=number,
                       series=series,
                       address=address,
                       pages=pages,
                       month=month,
                       note=note,
                       publisher=publisher,
                       edition=edition,
                       book_title=book_title,
                       organization=organization,
                       chapter=chapter,
                       school=school,
                       type=field_type,
                       how_published=how_published,
                       keywords=keywords,
                       abstract=abstract,
                       note_id=note_id,
                       institution=institution,
                       added_by=added_by,
                       added_date=added_date,
                       sci_index=sci_index,
                       ei_index=ei_index,
                       ssci_index=ssci_index,
                       modified_date=modified_date,
                       id=field_id)

    result[RESULT_CODE] = 1001
    result[RESULT_MSG] = "success"
    result[RESULT_DATA] = node
    return result
Ejemplo n.º 30
0
def build(driver, file_name, sheet_name=None, column_name=None):
    """
    从文件中提取文献信息,并生成Publication、Venue、Person和他们之间的Published_in、Authored_by关系。 ---checked
    :param driver:
    :param file_name:
    :param sheet_name:
    :param column_name:
    :return:
    """
    result = ini_result()
    if file_name is None or not os.path.exists(file_name):
        result["code"] = -504
        result["msg"] = "file does not exist"
        return result
    if driver is None:
        result[RESULT_CODE] = -500
        result[RESULT_MSG] = "the database is not configured!"
        return result

    # 从文件中提取info
    publication_info = extract_bib_info_from_file(file_name, sheet_name, column_name)  # dict

    pubs, venues, persons = [], [], []
    fail_pub, fail_venue, fail_person = [], [], []

    for entry in publication_info:
        # 解析文献
        tmp_result_pub = extract_publication_from_bib_info(entry)
        if tmp_result_pub[RESULT_CODE] == 1001:
            pubs.append(tmp_result_pub[RESULT_DATA])
        else:
            fail_pub.append(entry)
        # 解析venue
        tmp_result_venue = extract_venue_from_bib_info(entry)
        if tmp_result_venue[RESULT_CODE] == 1005:
            venues.append(tmp_result_venue[RESULT_DATA])
        else:
            fail_venue.append(entry)
        # 解析person
        tmp_result_person = extract_person_from_bib_info(entry)
        if tmp_result_person[RESULT_CODE] == 1006:
            persons.append(tmp_result_person[RESULT_DATA])
        else:
            fail_person.append(entry)

    pubs = None if pubs == [] else pubs
    venues = None if venues == [] else venues
    persons = None if persons == [] else persons
    # 利用提取的Publication、Venue、Person写入数据库
    db_pub_result = create_or_match_nodes(driver, pubs, return_type="class", to_create=True)
    if db_pub_result[RESULT_CODE] != 1303:
        result[RESULT_CODE] = -1205
        result[RESULT_MSG] = "Publication节点生成失败"
    db_ven_result = create_or_match_nodes(driver, venues, return_type="class", to_create=True)
    if db_ven_result[RESULT_CODE] != 1303:
        result[RESULT_CODE] = -1205
        result[RESULT_MSG] += "。Venue节点生成失败"
    db_ppl_result = create_or_match_nodes(driver, persons, return_type="class", to_create=True)
    if db_ppl_result[RESULT_CODE] != 1303:
        result[RESULT_CODE] = -1205
        result[RESULT_MSG] += "。Person节点生成失败"

    if result[RESULT_CODE] == -1205:
        return result

    # 生成边 published in
    publish_in_info_result = extract_rel_publish_in_from_pub_info(db_pub_result[RESULT_DATA])  # 获取published_in信息

    failed_pair = []
    if publish_in_info_result[RESULT_CODE] in [1009, 1010]:  # 若提取成功,则创建边
        pairs = publish_in_info_result[RESULT_DATA]["success"]
        for entry in pairs:
            venue_info = [entry]
            venue_result = create_or_match_nodes(driver, venue_info, "VENUE")
            if venue_result[RESULT_CODE] != 904:
                failed_pair.append(entry)
                continue
            tmp_result = query_or_create_relation(driver, "PUBLICATION", entry["pub"].uuid, "VENUE", venue_result[RESULT_DATA][0].uuid, "PUBLISHED_IN")
            if tmp_result[RESULT_CODE] not in [1304, 1306]:
                failed_pair.append(entry)

    # 生成边 AUTHORED_BY
    author_by_info_result = extract_rel_author_by_from_pub_info(db_pub_result[RESULT_DATA])  # 获取authored_by信息
    if author_by_info_result[RESULT_CODE] in [1009, 1010]:  # 若提取成功,则创建边
        pairs = author_by_info_result[RESULT_DATA]["success"]
        for entry in pairs:
            person_info = [entry]
            person_result = create_or_match_nodes(driver, person_info, "PERSON")
            if person_result[RESULT_CODE] != 904:
                failed_pair.append(entry)
                continue
            tmp_result = query_or_create_relation(driver, "PUBLICATION", entry["pub"].uuid, "PERSON",
                                                  person_result[RESULT_DATA][0].uuid, "AUTHORED_BY")
            if tmp_result[RESULT_CODE] not in [1304, 1306]:
                failed_pair.append(entry)
    #
    failed_pair = None if failed_pair == [] else failed_pair

    if failed_pair is not None:
        result[RESULT_CODE] = 1400
        result[RESULT_MSG] = "success"
    else:
        result[RESULT_CODE] = -1400
        result[RESULT_MSG] = "partially or fully failed"

    return result