Exemple #1
0
def indirect_search(name, keywords, type, depth=2):
    data = []
    # 生成Cypher语句,并进行查找
    gdb = get_db()

    # MARK:: TEST

    # 根据关键词生成 CONTAINS 子句
    phase = 'WHERE '
    for i, keyword in enumerate(keywords):
        if i == 0:
            phase += 'node.' + labels[
                name].key_prop + ' CONTAINS \'' + keyword + '\''
        else:
            phase += ' OR node.' + labels[
                name].key_prop + ' CONTAINS \'' + keyword + '\''

    # 构造查询语句
    query = 'MATCH(node:' + labels[name].label + ')' + '-[*1..' + str(
        depth) + ']-' + '(result:' + type + ') ' + phase + ' RETURN result'

    print(query)

    # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等----------------
    result = gdb.query(q=query, data_contents=True)

    if len(result) > 0:
        # 将查找到的数据整理为 Pandas 的 DataFrame
        data = [row[0] for row in result.rows]

    return data
Exemple #2
0
def init_link_of_disease(request):
    req_param = json.loads(str(request.body, encoding='utf-8'))
    # keywords 为输入的查找关键词
    params = req_param['params']
    gdb = get_db()
    query = 'MATCH(source:Disease{mimnumber:\'' + params[0] + '\'})--(path:Symptom)--(target:Disease{mimnumber:\'' + \
            params[1] + '\'}) return source,path,target'
    result = gdb.query(q=query, data_contents=True)

    items = []
    nodes = set()
    links = set()
    for row in result.rows:
        source = Node(
            "disease", "mimnumber:" + row[0]["mimnumber"] + "</br>" +
            "prefferred title:" + row[0]["preferredTitle"])
        symptom = Node("symptom", "symptom:" + row[1]["symptom"])
        target = Node(
            "disease", "mimnumber:" + row[2]["mimnumber"] + "</br>" +
            "prefferred title:" + row[2]["preferredTitle"])

        link1 = Link(source.name, symptom.name, "Behave")
        link2 = Link(target.name, symptom.name, "Behave")
        nodes.add(source)
        nodes.add(symptom)
        nodes.add(target)
        links.add(link1)
        links.add(link2)
        item = {"source": row[0], "path": row[1], 'target': row[2]}
        items.append(item)
    res_data = {'nodes': list(nodes), "links": list(links)}

    response = HttpResponse(json.dumps(res_data, cls=MyEncoder))
    # response = HttpResponse(json.dumps(items))
    return response
Exemple #3
0
def links(req_params, name):

    # 假设目前搜索的参数为:在 symptom 中根据 keywords 进行搜索
    # keywords 为输入的查找关键词
    keywords = req_params.get('params')
    # types为用户勾选的想要查找的间接的信息
    types = req_params.get('types')
    # 最大搜索深度
    depth = req_params.get('depth')

    # 比如勾选了 "疾病,基因"
    # 业务处理逻辑为:
    # 1. 根据关键词查找关键词有关的symptom
    # 2. 遍历每个症状,查找与他有关的 疾病,基因 信息

    data = direct_search(name, keywords)
    gdb = get_db()

    for node in data:
        node['neighbors'] = {}
        for type in types:
            node_match = 'MATCH(node:' + labels[name].label + '{id:\'' + node[
                'id'] + '\'})'
            rela_match = '-[*1..' + str(depth) + ']-'
            resu_match = '(result:' + type + ') RETURN result'
            query = node_match + rela_match + resu_match
            result = gdb.query(q=query, data_contents=True)

            if len(result) > 0:
                neighbors = [row[0] for row in result.rows]
                node['neighbors'][type] = neighbors

    # 返回行数据数组,每个节点的 neighbors 字段存储与他相关的节点
    return json.dumps(data)
Exemple #4
0
def get_statistics(params, name):
    # keywords 为输入的查找关键词
    search_type = 'Disease'
    keywords = params['keywords']
    depth = 2

    # 搜索疾病相关信息
    diseases = indirect_search(name, keywords, search_type, depth)
    inheris = []
    response_data = {}
    gdb = get_db()
    for disease in diseases:
        query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
            'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri'
        result = gdb.query(q=query, data_contents=True)
        inheris.append(result.rows[0][0]['name'])

    ss = Series(inheris).value_counts()
    inheris = {}
    try:
        inheris['ar'] = int(ss['Autosomal recessive'])
    except:
        inheris['ar'] = 0
    try:
        inheris['ad'] = int(ss['Autosomal dominant'])
    except:
        inheris['ad'] = 0

    response_data['inheris'] = inheris
    # 对每个疾病提取相关有用信息,构造返回信息

    return json.dumps(response_data)
Exemple #5
0
def get_data(querys):
    gdb = get_db()
    results = []

    for query in querys:
        result = gdb.query(query, data_contents=True)
        results.append(result.graph)

    return results
Exemple #6
0
def get_disease_list2(request):
    req_param = json.loads(str(request.body, encoding='utf-8'))
    # keywords 为输入的查找关键词
    keywords = req_param['params']

    # 设置搜索参数
    name = 'symptom'
    search_type = 'Disease'
    depth = 2
    gdb = get_db()
    # 搜索疾病相关信息
    diseases = indirect_search(name, keywords, search_type, depth)
    items = []
    response_data = {}

    for disease in diseases:
        item = {}
        count = 0
        item['mimnumber'] = disease['mimnumber']
        item['preferredTitle'] = disease['preferredTitle']
        item['shorteningTitle'] = disease['shorteningTitle']
        query = 'MATCH(node:Disease)--(s:Symptom)--(t:Type)' \
                'MATCH(node:Disease)--(i:Inheritance)' \
                'WHERE node.mimnumber =\'' + disease['mimnumber'] \
                + '\' RETURN count(s) as cnt ,t,i' \
                  '   ORDER BY cnt DESC'
        result = gdb.query(q=query, data_contents=True)
        inheri = result.rows[0][2]['name']
        position = [row[1]['name'] for row in result.rows][0:5]
        all_position = [row[1]['name'] for row in result.rows]
        item['inheritance'] = inheri
        item['position'] = position
        for row in result.rows:
            count += row[0]
        item['symptomCount'] = count
        item['allPosition'] = all_position
        items.append(item)

    positions = set()
    inheris = set()

    # 统计所有发病部位和遗传方式
    for disease in diseases:
        query = 'MATCH(node:Disease)--(s:Symptom)--(t:Type)' \
                'MATCH(node:Disease)--(i:Inheritance)' \
                'WHERE node.mimnumber =\'' + disease['mimnumber'] \
                + '\' RETURN t,i'
        result = gdb.query(q=query, data_contents=True)
        for row in result.rows:
            positions.add(row[0]['name'])
            inheris.add(row[1]['name'])
    response_data['list'] = items
    response_data['positions'] = list(positions)
    response_data['inheris'] = list(inheris)
    response = HttpResponse(json.dumps(response_data))
    return response
Exemple #7
0
def get_top5item(request):
    req_param = json.loads(str(request.body, encoding='utf-8'))
    # keywords 为输入的查找关键词
    mimnumber = req_param['params']
    gdb = get_db()
    query = 'MATCH(d:Disease{mimnumber:\'' + mimnumber + '\'})-->(s:Symptom)--(result:Disease) WITH d,result, count(*) AS path order by path desc RETURN d,result,path limit 100'
    result = gdb.query(q=query, data_contents=True)
    res_data = []
    for row in result.rows:
        item = {"source": row[0], "target": row[1], 'path': row[2]}
        res_data.append(item)
    return HttpResponse(json.dumps(res_data))
Exemple #8
0
def get_disease_info():
    gdb = get_db()
    query = "MATCH(node:Disease)-[r]-()  where node.shorteningTitle <> \'null\' return node, count(r) order by count(r)  desc limit 20"
    result = gdb.query(q=query, data_contents=True)
    disease = []
    count = []

    for row in result.rows:
        disease.append(row[0]['shorteningTitle'])
        count.append(row[1])
    data = {'disease': disease, 'count': count}

    return json.dumps(data)
Exemple #9
0
def direct_search(name, keywords):
    # 根据关键词生成正则表达式
    reg = '.*|.*'.join(keywords)
    reg = '\'.*' + reg + '.*\''
    # 生成Cypher语句,并进行查找
    gdb = get_db()
    query = 'MATCH(node:' + labels[name].label + ') WHERE node.' + labels[
        name].key_prop + ' =~ ' + reg + ' RETURN node'

    # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等----------------
    result = gdb.query(q=query, data_contents=True)

    # 将查找到的数据整理为 Pandas 的 DataFrame
    data = [row[0] for row in result.rows]
    return data
Exemple #10
0
def get_json_res(querys):
    gdb = get_db()
    results = []
    try:
        for query in querys:
            result = gdb.query(query, data_contents=True)
            results.append(result.graph)
        info = {'msg': '查询成功', 'code': 1}
        res = {'info': info, 'data': results}
    except:
        info = {'msg': '查询失败', 'code': 0}
        res = {'info': info, 'data': ''}

    return HttpResponse(json.dumps(obj=res, ensure_ascii=False),
                        content_type='application/json')
Exemple #11
0
def get_graph(request):
    req_param = json.loads(str(request.body, encoding='utf-8'))
    # mimnumber:614172 为输入的查找的疾病的id
    mimnumber = req_param['params']
    res_data = {}
    # 生成Cypher语句,并进行查找
    gdb = get_db()

    query = 'MATCH(node:Disease)--(s:Symptom)--(t:Type)' \
            'MATCH(node:Disease)--(g:Gene)' \
            'MATCH(node:Disease)--(i:Inheritance)' \
            'WHERE node.mimnumber =\'' + mimnumber + '\'RETURN node,s,t,g,i'
    result = gdb.query(q=query, data_contents=True)

    # disease = [row for row in result.rows]
    nodes = set()
    links = set()
    for row in result.rows:
        disease = Node(
            "disease", "mimnumber:" + row[0]["mimnumber"] + "</br>" +
            "prefferred title:" + row[0]["preferredTitle"])
        symptom = Node("symptom", "symptom:" + row[1]["symptom"])
        type = Node("type", "type:" + row[2]["name"])
        gene = Node(
            "gene",
            "name:" + row[3]["name"] + "</br>" + "id:" + row[3]["number"])
        inheri = Node("inheritance", "inheritance:" + row[4]["name"])
        nodes.add(disease)
        nodes.add(symptom)
        nodes.add(type)
        nodes.add(gene)
        nodes.add(inheri)

        link1 = Link(disease.name, symptom.name, "Behave")
        link2 = Link(symptom.name, type.name, "Belong")
        link3 = Link(gene.name, disease.name, "Cause")
        link4 = Link(disease.name, inheri.name, "Observe")
        links.add(link1)
        links.add(link2)
        links.add(link3)
        links.add(link4)
    res_data = {'nodes': list(nodes), "links": list(links)}

    response = HttpResponse(json.dumps(res_data, cls=MyEncoder))

    return response
Exemple #12
0
def get_inheritance_info():
    gdb = get_db()
    query = "MATCH(disease)--(result:Inheritance) return result,count(*) order by(count(*)) asc"
    result = gdb.query(q=query, data_contents=True)
    inheri = []
    count = []
    items = []
    for row in result.rows:

        if 'name' in row[0].keys():
            inheri.append(row[0]['name'])
            count.append(row[1])
            item = {"value": row[1], "name": row[0]['name']}
            items.append(item)
    data = {'inheritance': inheri, 'items': items}

    return json.dumps(data)
Exemple #13
0
def indirect_search(name, keywords, type, depth=2):
    # 根据关键词生成正则表达式
    reg = '.*|.*'.join(keywords)
    reg = '\'.*' + reg + '.*\''
    data = []
    # 生成Cypher语句,并进行查找
    gdb = get_db()
    query = 'MATCH(node:' + labels[name].label + ')' + '-[*1..' + str(
        depth) + ']-' + '(result:' + type + ')' + ' WHERE node.' + labels[
            name].key_prop + ' =~ ' + reg + ' RETURN result'

    # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等----------------
    result = gdb.query(q=query, data_contents=True)
    if len(result) > 0:
        # 将查找到的数据整理为 Pandas 的 DataFrame
        data = [row[0] for row in result.rows]

    return data
Exemple #14
0
def get_detail(request):
    req_param = json.loads(str(request.body, encoding='utf-8'))
    # mimnumber 为输入的查找的疾病的id
    mimnumber = req_param['params']
    res_data = {}
    # 生成Cypher语句,并进行查找
    gdb = get_db()
    query = 'MATCH(node:Disease) WHERE node.mimnumber = \'' + mimnumber + '\'RETURN node'
    result = gdb.query(q=query, data_contents=True)

    disease = [row[0] for row in result.rows][0]
    res_data['mimnumber'] = disease['mimnumber']
    res_data['preferredTitle'] = disease['preferredTitle']
    res_data['shorteningTitle'] = disease['shorteningTitle']

    # 查询疾病的遗传方式
    query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
        'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri'
    result = gdb.query(q=query, data_contents=True)
    res_data['inheritance'] = result.rows[0][0]['name']

    # 查询疾病症状并按类别分
    query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
        'mimnumber'] + '\'})-->(symp:Symptom)-->(type:Type)  RETURN symp,type'
    result = gdb.query(q=query, data_contents=True)
    symptoms = []
    temp = {}
    for r in result.rows:
        type = r[1]['name']
        if type in temp:
            temp[type].append(r[0]['symptom'])
        else:
            temp[type] = []
            temp[type].append(r[0]['symptom'])

    for key in temp:
        item = {'type': key, 'symptom': temp[key]}
        symptoms.append(item)

    res_data['symptoms'] = symptoms

    response = HttpResponse(json.dumps(res_data))
    return response
Exemple #15
0
def direct_search(name, keywords):

    # 生成Cypher语句,并进行查找
    gdb = get_db()
    phase = 'WHERE '
    for i, keyword in enumerate(keywords):
        if i == 0:
            phase += 'node.' + labels[
                name].key_prop + ' CONTAINS \'' + keyword + '\''
        else:
            phase += ' OR node.' + labels[
                name].key_prop + ' CONTAINS \'' + keyword + '\''

    query = 'MATCH(node:' + labels[name].label + ') ' + phase + ' RETURN node'

    # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等----------------
    result = gdb.query(q=query, data_contents=True)

    # 将查找到的数据整理为 Pandas 的 DataFrame
    data = [row[0] for row in result.rows]
    return data
Exemple #16
0
def links(request):
    # 获取请求参数
    req_param = json.loads(str(request.body, encoding='utf-8'))

    # keywords 为输入的查找关键词
    keywords = req_param.get('keywords')
    # types为用户勾选的想要查找的间接的信息
    types = req_param.get('types')
    # 最大搜索深度
    depth = req_param.get('depth')

    name = 'gene'
    # 比如勾选了 "疾病,基因"
    # 业务处理逻辑为:
    # 1. 根据关键词查找关键词有关的symptom
    # 2. 遍历每个症状,查找与他有关的 疾病,基因 信息

    data = direct_search(name, keywords)
    gdb = get_db()

    for node in data:
        node['neighbors'] = {}
        for type in types:
            node_match = 'MATCH(node:' + labels[name].label + '{' + labels[
                name].key + ':\'' + node[labels[name].key] + '\'})'
            rela_match = '-[*1..' + str(depth) + ']-'
            resu_match = '(result:' + type + ') RETURN result'

            query = node_match + rela_match + resu_match

            result = gdb.query(q=query, data_contents=True)

            if len(result) > 0:
                neighbors = [row[0] for row in result.rows]
                node['neighbors'][type] = neighbors

    # 返回行数据数组,每个节点的neighbors 字段存储与他相关的节点
    response = HttpResponse(json.dumps(data))
    return response
Exemple #17
0
def get_statistics(request):
    print('to get statistics data')
    # 获取请求参数
    req_param = json.loads(str(request.body, encoding='utf-8'))
    # keywords 为输入的查找关键词
    keywords = req_param['params']
    name = 'gene'
    search_type = 'Disease'
    depth = 2

    # 搜索疾病相关信息
    diseases = indirect_search(name, keywords, search_type, depth)
    inheris = []
    response_data = {}
    gdb = get_db()
    for disease in diseases:
        query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
            'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri'
        result = gdb.query(q=query, data_contents=True)
        inheris.append(result.rows[0][0]['name'])

    ss = Series(inheris).value_counts()
    inheris = {}
    try:
        inheris['ar'] = int(ss['Autosomal recessive'])
    except:
        inheris['ar'] = 0
    try:
        inheris['ad'] = int(ss['Autosomal dominant'])
    except:
        inheris['ad'] = 0

    response_data['inheris'] = inheris
    HttpResponse(json.dumps(response_data))
    # 对每个疾病提取相关有用信息,构造返回信息

    return HttpResponse(json.dumps(response_data))
Exemple #18
0
def get_disease_list_old(request):
    # 获取请求参数
    req_param = json.loads(str(request.body, encoding='utf-8'))

    # keywords 为输入的查找关键词
    keywords = req_param['params']

    name = 'gene'
    data = direct_search(name, keywords)

    depth = 2
    gdb = get_db()
    response_data = []
    type = 'Disease'
    for node in data:

        # 构造query语句查询相关的疾病
        node_match = 'MATCH(node:' + labels[name].label + '{' + labels[
            name].key + ':\'' + node[labels[name].key] + '\'})'
        rela_match = '-[*1..' + str(depth) + ']-'
        resu_match = '(result:' + type + ') RETURN result'
        query = node_match + rela_match + resu_match
        result = gdb.query(q=query, data_contents=True)

        item = {}
        if len(result) > 0:
            diseases = [row[0] for row in result.rows]
        # 每个疾病提取相关有用信息
        for disease in diseases:
            item['mimnumber'] = disease['mimnumber']
            item['preferredTitle'] = disease['preferredTitle']
            item['shorteningTitle'] = disease['shorteningTitle']
            # 查询疾病的遗传方式
            query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
                'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri'
            result = gdb.query(q=query, data_contents=True)
            item['inheritance'] = result.rows[0][0]['name']
            # 统计症状
            query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
                'mimnumber'] + '\'})-->(symp:Symptom) RETURN COUNT(symp)'
            result = gdb.query(q=query, data_contents=True)
            # 查询发病部位
            item['symptomCount'] = result.rows[0][0]
            query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
                'mimnumber'] + '\'})--> (symp:Symptom) -->(type:Type) return type'
            result = gdb.query(q=query, data_contents=True)
            positions = [row[0] for row in result.rows]
            positions_dict = {}
            # 统计发病部位数量并排序,取top4
            for p in positions:
                if p['name'] not in positions_dict:
                    positions_dict[p['name']] = 1
                else:
                    positions_dict[p['name']] += 1
            positions_dict = sorted(positions_dict.items(),
                                    key=lambda x: x[1],
                                    reverse=True)
            item['position'] = [p[0] for p in positions_dict[0:5]]

            response_data.append(item)

    response = HttpResponse(json.dumps(response_data))
    return response
Exemple #19
0
# Create your tests here.
from pandas import DataFrame

from symptom.views import direct_search
from tools.neo4j import get_db, labels

# 假设目前搜索的参数为:在 symptom 中根据 keywords 进行搜索
name = 'symptom'
# keywords 为输入的查找关键词
keywords = ['adult male', '151']
types = ['Disease', 'Gene']
data = direct_search(name, keywords)

# 最大搜索深度
depth = 2
gdb = get_db()
for node in data:
    node['neighbors'] = []
    for type in types:
        node_match = 'MATCH(node:' + labels[name].label + '{id:\'' + node[
            'id'] + '\'})'
        rela_match = '-[*1..' + str(depth) + ']-'
        resu_match = '(result:' + type + ') RETURN result'
        query = node_match + rela_match + resu_match
        print(query)
        result = gdb.query(q=query, data_contents=True)

        if len(result) > 0:
            neighbors = [row[0] for row in result.rows]
            node['neighbors'].extend(neighbors)
Exemple #20
0
def get_disease_list(request):
    # 获取请求参数
    req_param = json.loads(str(request.body, encoding='utf-8'))
    # keywords 为输入的查找关键词
    keywords = req_param['params']

    # 设置搜索参数
    name = 'gene'
    search_type = 'Disease'
    depth = 2

    # 搜索疾病相关信息
    diseases = indirect_search(name, keywords, search_type, depth)

    gdb = get_db()
    response_data = {}
    items = []
    positions = []
    inheris = []

    # 对每个疾病提取相关有用信息,构造返回信息
    for disease in diseases:
        item = {}
        item['mimnumber'] = disease['mimnumber']
        item['preferredTitle'] = disease['preferredTitle']
        item['shorteningTitle'] = disease['shorteningTitle']

        # 查询疾病的遗传方式
        query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
            'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri'
        result = gdb.query(q=query, data_contents=True)
        item['inheritance'] = result.rows[0][0]['name']
        inheris.append(result.rows[0][0]['name'])
        # 统计症状
        query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
            'mimnumber'] + '\'})-->(symp:Symptom) RETURN COUNT(symp)'
        result = gdb.query(q=query, data_contents=True)

        # 查询发病部位
        item['symptomCount'] = result.rows[0][0]
        query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[
            'mimnumber'] + '\'})--> (symp:Symptom) -->(type:Type) return type'
        result = gdb.query(q=query, data_contents=True)

        # 统计发病部位数量并排序,取top5
        position = [row[0]['name'] for row in result.rows]
        item['allPosition'] = position
        positions.extend(position)

        ss = Series(position).value_counts()[0:5]
        item['position'] = list(ss.index)

        items.append(item)

    data = DataFrame({'p': positions}).drop_duplicates()
    positions = data['p'].values.tolist()

    data = DataFrame({'i': inheris}).drop_duplicates()
    inheris = data['i'].drop_duplicates().values.tolist()

    response_data['list'] = items
    response_data['positions'] = positions
    response_data['inheris'] = inheris

    response = HttpResponse(json.dumps(response_data))
    return response