def indirect_search(name, keywords, type, depth=2): data = [] # 生成Cypher语句,并进行查找 gdb = get_db() # MARK:: TEST # 根据关键词生成 CONTAINS 子句 phase = 'WHERE ' for i, keyword in enumerate(keywords): if i == 0: phase += 'node.' + labels[ name].key_prop + ' CONTAINS \'' + keyword + '\'' else: phase += ' OR node.' + labels[ name].key_prop + ' CONTAINS \'' + keyword + '\'' # 构造查询语句 query = 'MATCH(node:' + labels[name].label + ')' + '-[*1..' + str( depth) + ']-' + '(result:' + type + ') ' + phase + ' RETURN result' print(query) # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等---------------- result = gdb.query(q=query, data_contents=True) if len(result) > 0: # 将查找到的数据整理为 Pandas 的 DataFrame data = [row[0] for row in result.rows] return data
def init_link_of_disease(request): req_param = json.loads(str(request.body, encoding='utf-8')) # keywords 为输入的查找关键词 params = req_param['params'] gdb = get_db() query = 'MATCH(source:Disease{mimnumber:\'' + params[0] + '\'})--(path:Symptom)--(target:Disease{mimnumber:\'' + \ params[1] + '\'}) return source,path,target' result = gdb.query(q=query, data_contents=True) items = [] nodes = set() links = set() for row in result.rows: source = Node( "disease", "mimnumber:" + row[0]["mimnumber"] + "</br>" + "prefferred title:" + row[0]["preferredTitle"]) symptom = Node("symptom", "symptom:" + row[1]["symptom"]) target = Node( "disease", "mimnumber:" + row[2]["mimnumber"] + "</br>" + "prefferred title:" + row[2]["preferredTitle"]) link1 = Link(source.name, symptom.name, "Behave") link2 = Link(target.name, symptom.name, "Behave") nodes.add(source) nodes.add(symptom) nodes.add(target) links.add(link1) links.add(link2) item = {"source": row[0], "path": row[1], 'target': row[2]} items.append(item) res_data = {'nodes': list(nodes), "links": list(links)} response = HttpResponse(json.dumps(res_data, cls=MyEncoder)) # response = HttpResponse(json.dumps(items)) return response
def links(req_params, name): # 假设目前搜索的参数为:在 symptom 中根据 keywords 进行搜索 # keywords 为输入的查找关键词 keywords = req_params.get('params') # types为用户勾选的想要查找的间接的信息 types = req_params.get('types') # 最大搜索深度 depth = req_params.get('depth') # 比如勾选了 "疾病,基因" # 业务处理逻辑为: # 1. 根据关键词查找关键词有关的symptom # 2. 遍历每个症状,查找与他有关的 疾病,基因 信息 data = direct_search(name, keywords) gdb = get_db() for node in data: node['neighbors'] = {} for type in types: node_match = 'MATCH(node:' + labels[name].label + '{id:\'' + node[ 'id'] + '\'})' rela_match = '-[*1..' + str(depth) + ']-' resu_match = '(result:' + type + ') RETURN result' query = node_match + rela_match + resu_match result = gdb.query(q=query, data_contents=True) if len(result) > 0: neighbors = [row[0] for row in result.rows] node['neighbors'][type] = neighbors # 返回行数据数组,每个节点的 neighbors 字段存储与他相关的节点 return json.dumps(data)
def get_statistics(params, name): # keywords 为输入的查找关键词 search_type = 'Disease' keywords = params['keywords'] depth = 2 # 搜索疾病相关信息 diseases = indirect_search(name, keywords, search_type, depth) inheris = [] response_data = {} gdb = get_db() for disease in diseases: query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri' result = gdb.query(q=query, data_contents=True) inheris.append(result.rows[0][0]['name']) ss = Series(inheris).value_counts() inheris = {} try: inheris['ar'] = int(ss['Autosomal recessive']) except: inheris['ar'] = 0 try: inheris['ad'] = int(ss['Autosomal dominant']) except: inheris['ad'] = 0 response_data['inheris'] = inheris # 对每个疾病提取相关有用信息,构造返回信息 return json.dumps(response_data)
def get_data(querys): gdb = get_db() results = [] for query in querys: result = gdb.query(query, data_contents=True) results.append(result.graph) return results
def get_disease_list2(request): req_param = json.loads(str(request.body, encoding='utf-8')) # keywords 为输入的查找关键词 keywords = req_param['params'] # 设置搜索参数 name = 'symptom' search_type = 'Disease' depth = 2 gdb = get_db() # 搜索疾病相关信息 diseases = indirect_search(name, keywords, search_type, depth) items = [] response_data = {} for disease in diseases: item = {} count = 0 item['mimnumber'] = disease['mimnumber'] item['preferredTitle'] = disease['preferredTitle'] item['shorteningTitle'] = disease['shorteningTitle'] query = 'MATCH(node:Disease)--(s:Symptom)--(t:Type)' \ 'MATCH(node:Disease)--(i:Inheritance)' \ 'WHERE node.mimnumber =\'' + disease['mimnumber'] \ + '\' RETURN count(s) as cnt ,t,i' \ ' ORDER BY cnt DESC' result = gdb.query(q=query, data_contents=True) inheri = result.rows[0][2]['name'] position = [row[1]['name'] for row in result.rows][0:5] all_position = [row[1]['name'] for row in result.rows] item['inheritance'] = inheri item['position'] = position for row in result.rows: count += row[0] item['symptomCount'] = count item['allPosition'] = all_position items.append(item) positions = set() inheris = set() # 统计所有发病部位和遗传方式 for disease in diseases: query = 'MATCH(node:Disease)--(s:Symptom)--(t:Type)' \ 'MATCH(node:Disease)--(i:Inheritance)' \ 'WHERE node.mimnumber =\'' + disease['mimnumber'] \ + '\' RETURN t,i' result = gdb.query(q=query, data_contents=True) for row in result.rows: positions.add(row[0]['name']) inheris.add(row[1]['name']) response_data['list'] = items response_data['positions'] = list(positions) response_data['inheris'] = list(inheris) response = HttpResponse(json.dumps(response_data)) return response
def get_top5item(request): req_param = json.loads(str(request.body, encoding='utf-8')) # keywords 为输入的查找关键词 mimnumber = req_param['params'] gdb = get_db() query = 'MATCH(d:Disease{mimnumber:\'' + mimnumber + '\'})-->(s:Symptom)--(result:Disease) WITH d,result, count(*) AS path order by path desc RETURN d,result,path limit 100' result = gdb.query(q=query, data_contents=True) res_data = [] for row in result.rows: item = {"source": row[0], "target": row[1], 'path': row[2]} res_data.append(item) return HttpResponse(json.dumps(res_data))
def get_disease_info(): gdb = get_db() query = "MATCH(node:Disease)-[r]-() where node.shorteningTitle <> \'null\' return node, count(r) order by count(r) desc limit 20" result = gdb.query(q=query, data_contents=True) disease = [] count = [] for row in result.rows: disease.append(row[0]['shorteningTitle']) count.append(row[1]) data = {'disease': disease, 'count': count} return json.dumps(data)
def direct_search(name, keywords): # 根据关键词生成正则表达式 reg = '.*|.*'.join(keywords) reg = '\'.*' + reg + '.*\'' # 生成Cypher语句,并进行查找 gdb = get_db() query = 'MATCH(node:' + labels[name].label + ') WHERE node.' + labels[ name].key_prop + ' =~ ' + reg + ' RETURN node' # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等---------------- result = gdb.query(q=query, data_contents=True) # 将查找到的数据整理为 Pandas 的 DataFrame data = [row[0] for row in result.rows] return data
def get_json_res(querys): gdb = get_db() results = [] try: for query in querys: result = gdb.query(query, data_contents=True) results.append(result.graph) info = {'msg': '查询成功', 'code': 1} res = {'info': info, 'data': results} except: info = {'msg': '查询失败', 'code': 0} res = {'info': info, 'data': ''} return HttpResponse(json.dumps(obj=res, ensure_ascii=False), content_type='application/json')
def get_graph(request): req_param = json.loads(str(request.body, encoding='utf-8')) # mimnumber:614172 为输入的查找的疾病的id mimnumber = req_param['params'] res_data = {} # 生成Cypher语句,并进行查找 gdb = get_db() query = 'MATCH(node:Disease)--(s:Symptom)--(t:Type)' \ 'MATCH(node:Disease)--(g:Gene)' \ 'MATCH(node:Disease)--(i:Inheritance)' \ 'WHERE node.mimnumber =\'' + mimnumber + '\'RETURN node,s,t,g,i' result = gdb.query(q=query, data_contents=True) # disease = [row for row in result.rows] nodes = set() links = set() for row in result.rows: disease = Node( "disease", "mimnumber:" + row[0]["mimnumber"] + "</br>" + "prefferred title:" + row[0]["preferredTitle"]) symptom = Node("symptom", "symptom:" + row[1]["symptom"]) type = Node("type", "type:" + row[2]["name"]) gene = Node( "gene", "name:" + row[3]["name"] + "</br>" + "id:" + row[3]["number"]) inheri = Node("inheritance", "inheritance:" + row[4]["name"]) nodes.add(disease) nodes.add(symptom) nodes.add(type) nodes.add(gene) nodes.add(inheri) link1 = Link(disease.name, symptom.name, "Behave") link2 = Link(symptom.name, type.name, "Belong") link3 = Link(gene.name, disease.name, "Cause") link4 = Link(disease.name, inheri.name, "Observe") links.add(link1) links.add(link2) links.add(link3) links.add(link4) res_data = {'nodes': list(nodes), "links": list(links)} response = HttpResponse(json.dumps(res_data, cls=MyEncoder)) return response
def get_inheritance_info(): gdb = get_db() query = "MATCH(disease)--(result:Inheritance) return result,count(*) order by(count(*)) asc" result = gdb.query(q=query, data_contents=True) inheri = [] count = [] items = [] for row in result.rows: if 'name' in row[0].keys(): inheri.append(row[0]['name']) count.append(row[1]) item = {"value": row[1], "name": row[0]['name']} items.append(item) data = {'inheritance': inheri, 'items': items} return json.dumps(data)
def indirect_search(name, keywords, type, depth=2): # 根据关键词生成正则表达式 reg = '.*|.*'.join(keywords) reg = '\'.*' + reg + '.*\'' data = [] # 生成Cypher语句,并进行查找 gdb = get_db() query = 'MATCH(node:' + labels[name].label + ')' + '-[*1..' + str( depth) + ']-' + '(result:' + type + ')' + ' WHERE node.' + labels[ name].key_prop + ' =~ ' + reg + ' RETURN result' # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等---------------- result = gdb.query(q=query, data_contents=True) if len(result) > 0: # 将查找到的数据整理为 Pandas 的 DataFrame data = [row[0] for row in result.rows] return data
def get_detail(request): req_param = json.loads(str(request.body, encoding='utf-8')) # mimnumber 为输入的查找的疾病的id mimnumber = req_param['params'] res_data = {} # 生成Cypher语句,并进行查找 gdb = get_db() query = 'MATCH(node:Disease) WHERE node.mimnumber = \'' + mimnumber + '\'RETURN node' result = gdb.query(q=query, data_contents=True) disease = [row[0] for row in result.rows][0] res_data['mimnumber'] = disease['mimnumber'] res_data['preferredTitle'] = disease['preferredTitle'] res_data['shorteningTitle'] = disease['shorteningTitle'] # 查询疾病的遗传方式 query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri' result = gdb.query(q=query, data_contents=True) res_data['inheritance'] = result.rows[0][0]['name'] # 查询疾病症状并按类别分 query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(symp:Symptom)-->(type:Type) RETURN symp,type' result = gdb.query(q=query, data_contents=True) symptoms = [] temp = {} for r in result.rows: type = r[1]['name'] if type in temp: temp[type].append(r[0]['symptom']) else: temp[type] = [] temp[type].append(r[0]['symptom']) for key in temp: item = {'type': key, 'symptom': temp[key]} symptoms.append(item) res_data['symptoms'] = symptoms response = HttpResponse(json.dumps(res_data)) return response
def direct_search(name, keywords): # 生成Cypher语句,并进行查找 gdb = get_db() phase = 'WHERE ' for i, keyword in enumerate(keywords): if i == 0: phase += 'node.' + labels[ name].key_prop + ' CONTAINS \'' + keyword + '\'' else: phase += ' OR node.' + labels[ name].key_prop + ' CONTAINS \'' + keyword + '\'' query = 'MATCH(node:' + labels[name].label + ') ' + phase + ' RETURN node' # -------------在下面这条语句可以设置返回的数据类型,包括是否返回关系等---------------- result = gdb.query(q=query, data_contents=True) # 将查找到的数据整理为 Pandas 的 DataFrame data = [row[0] for row in result.rows] return data
def links(request): # 获取请求参数 req_param = json.loads(str(request.body, encoding='utf-8')) # keywords 为输入的查找关键词 keywords = req_param.get('keywords') # types为用户勾选的想要查找的间接的信息 types = req_param.get('types') # 最大搜索深度 depth = req_param.get('depth') name = 'gene' # 比如勾选了 "疾病,基因" # 业务处理逻辑为: # 1. 根据关键词查找关键词有关的symptom # 2. 遍历每个症状,查找与他有关的 疾病,基因 信息 data = direct_search(name, keywords) gdb = get_db() for node in data: node['neighbors'] = {} for type in types: node_match = 'MATCH(node:' + labels[name].label + '{' + labels[ name].key + ':\'' + node[labels[name].key] + '\'})' rela_match = '-[*1..' + str(depth) + ']-' resu_match = '(result:' + type + ') RETURN result' query = node_match + rela_match + resu_match result = gdb.query(q=query, data_contents=True) if len(result) > 0: neighbors = [row[0] for row in result.rows] node['neighbors'][type] = neighbors # 返回行数据数组,每个节点的neighbors 字段存储与他相关的节点 response = HttpResponse(json.dumps(data)) return response
def get_statistics(request): print('to get statistics data') # 获取请求参数 req_param = json.loads(str(request.body, encoding='utf-8')) # keywords 为输入的查找关键词 keywords = req_param['params'] name = 'gene' search_type = 'Disease' depth = 2 # 搜索疾病相关信息 diseases = indirect_search(name, keywords, search_type, depth) inheris = [] response_data = {} gdb = get_db() for disease in diseases: query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri' result = gdb.query(q=query, data_contents=True) inheris.append(result.rows[0][0]['name']) ss = Series(inheris).value_counts() inheris = {} try: inheris['ar'] = int(ss['Autosomal recessive']) except: inheris['ar'] = 0 try: inheris['ad'] = int(ss['Autosomal dominant']) except: inheris['ad'] = 0 response_data['inheris'] = inheris HttpResponse(json.dumps(response_data)) # 对每个疾病提取相关有用信息,构造返回信息 return HttpResponse(json.dumps(response_data))
def get_disease_list_old(request): # 获取请求参数 req_param = json.loads(str(request.body, encoding='utf-8')) # keywords 为输入的查找关键词 keywords = req_param['params'] name = 'gene' data = direct_search(name, keywords) depth = 2 gdb = get_db() response_data = [] type = 'Disease' for node in data: # 构造query语句查询相关的疾病 node_match = 'MATCH(node:' + labels[name].label + '{' + labels[ name].key + ':\'' + node[labels[name].key] + '\'})' rela_match = '-[*1..' + str(depth) + ']-' resu_match = '(result:' + type + ') RETURN result' query = node_match + rela_match + resu_match result = gdb.query(q=query, data_contents=True) item = {} if len(result) > 0: diseases = [row[0] for row in result.rows] # 每个疾病提取相关有用信息 for disease in diseases: item['mimnumber'] = disease['mimnumber'] item['preferredTitle'] = disease['preferredTitle'] item['shorteningTitle'] = disease['shorteningTitle'] # 查询疾病的遗传方式 query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri' result = gdb.query(q=query, data_contents=True) item['inheritance'] = result.rows[0][0]['name'] # 统计症状 query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(symp:Symptom) RETURN COUNT(symp)' result = gdb.query(q=query, data_contents=True) # 查询发病部位 item['symptomCount'] = result.rows[0][0] query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})--> (symp:Symptom) -->(type:Type) return type' result = gdb.query(q=query, data_contents=True) positions = [row[0] for row in result.rows] positions_dict = {} # 统计发病部位数量并排序,取top4 for p in positions: if p['name'] not in positions_dict: positions_dict[p['name']] = 1 else: positions_dict[p['name']] += 1 positions_dict = sorted(positions_dict.items(), key=lambda x: x[1], reverse=True) item['position'] = [p[0] for p in positions_dict[0:5]] response_data.append(item) response = HttpResponse(json.dumps(response_data)) return response
# Create your tests here. from pandas import DataFrame from symptom.views import direct_search from tools.neo4j import get_db, labels # 假设目前搜索的参数为:在 symptom 中根据 keywords 进行搜索 name = 'symptom' # keywords 为输入的查找关键词 keywords = ['adult male', '151'] types = ['Disease', 'Gene'] data = direct_search(name, keywords) # 最大搜索深度 depth = 2 gdb = get_db() for node in data: node['neighbors'] = [] for type in types: node_match = 'MATCH(node:' + labels[name].label + '{id:\'' + node[ 'id'] + '\'})' rela_match = '-[*1..' + str(depth) + ']-' resu_match = '(result:' + type + ') RETURN result' query = node_match + rela_match + resu_match print(query) result = gdb.query(q=query, data_contents=True) if len(result) > 0: neighbors = [row[0] for row in result.rows] node['neighbors'].extend(neighbors)
def get_disease_list(request): # 获取请求参数 req_param = json.loads(str(request.body, encoding='utf-8')) # keywords 为输入的查找关键词 keywords = req_param['params'] # 设置搜索参数 name = 'gene' search_type = 'Disease' depth = 2 # 搜索疾病相关信息 diseases = indirect_search(name, keywords, search_type, depth) gdb = get_db() response_data = {} items = [] positions = [] inheris = [] # 对每个疾病提取相关有用信息,构造返回信息 for disease in diseases: item = {} item['mimnumber'] = disease['mimnumber'] item['preferredTitle'] = disease['preferredTitle'] item['shorteningTitle'] = disease['shorteningTitle'] # 查询疾病的遗传方式 query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(inheri:Inheritance) RETURN inheri' result = gdb.query(q=query, data_contents=True) item['inheritance'] = result.rows[0][0]['name'] inheris.append(result.rows[0][0]['name']) # 统计症状 query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})-->(symp:Symptom) RETURN COUNT(symp)' result = gdb.query(q=query, data_contents=True) # 查询发病部位 item['symptomCount'] = result.rows[0][0] query = 'MATCH(d:Disease{mimnumber:' + '\'' + disease[ 'mimnumber'] + '\'})--> (symp:Symptom) -->(type:Type) return type' result = gdb.query(q=query, data_contents=True) # 统计发病部位数量并排序,取top5 position = [row[0]['name'] for row in result.rows] item['allPosition'] = position positions.extend(position) ss = Series(position).value_counts()[0:5] item['position'] = list(ss.index) items.append(item) data = DataFrame({'p': positions}).drop_duplicates() positions = data['p'].values.tolist() data = DataFrame({'i': inheris}).drop_duplicates() inheris = data['i'].drop_duplicates().values.tolist() response_data['list'] = items response_data['positions'] = positions response_data['inheris'] = inheris response = HttpResponse(json.dumps(response_data)) return response