class TwitterGraph(object): '''A class for interfacing with the Neo4j Twitter network database''' # Initial setup and linking into the database def __init__(self, host_port, user, password): '''Makes connection to Neo4j database''' # set up authentication parameters authenticate(host_port, user, password) # connect to authenticated graph database url = 'http://{}/db/data/'.format(host_port) self.graph = Graph(url) try: self.graph.schema.create_uniqueness_constraint('User', 'id') except: #ConstraintViolationException print 'Unique id on Node User already exists' # Functions to add data to the database def add_following(self, user_id, following_ids, rec_count): '''Given a unique user id, adds the relationship for who they follow. Adds a User Node with the id if it doesn't exist.''' user = Node('User', id=user_id) self.graph.merge(user) # important to merge before doing anything rec = 1 + rec_count # preserving the order of the following. 1 = most recent for fid in following_ids: user2 = Node('User', id=fid) self.graph.merge(user2) self.graph.merge(Relationship(user, 'FOLLOWS', user2, rec=rec)) rec += 1 user['following_added'] = True self.graph.push(user) def add_followers(self, user_id, follower_ids, rec_count): '''Given a unique user id, adds the relationship for follows them. Adds a User Node with the id if it doesn't exist.''' user = Node('User', id=user_id) self.graph.merge(user) rec = 1 + rec_count for fid in follower_ids: user2 = Node('User', id=fid) self.graph.merge(user2) self.graph.merge(Relationship(user2, 'FOLLOWS', user, rec=rec)) rec += 1 user['followers_added'] = True self.graph.push(user) def add_user_properties(self, user): '''Given a unique user id, adds properties to the existing user Node''' try: user_id = user.id existing_user = Node('User', id=user_id) clean_prop_dict = self.__clean_user_dict(user.__dict__) self.graph.merge(existing_user) for k, v in clean_prop_dict.iteritems(): existing_user[k] = v # add additional label to verified accounts if clean_prop_dict['verified']: print True existing_user.add_label('Verified') except: # bad user id user_id = user['user_id'] error = user['error'] existing_user = Node('User', id=user_id) self.graph.merge(existing_user) existing_user['screen_name'] = 'INVALID' existing_user['error'] = error print 'Found invalid user id' self.graph.push(existing_user) def __clean_user_dict(self, user_prop_dict): '''Given the ''' keep = ['contributors_enabled', 'created_at', 'default_profile', 'default_profile_image', 'description', 'favourites_count', 'followers_count', 'friends_count', 'geo_enabled', 'id', 'id_str', 'is_translator', 'lang', 'listed_count', 'location', 'name', 'profile_image_url_https', 'protected', 'screen_name', 'statuses_count', 'time_zone', 'utc_offset', 'verified', 'withheld_in_countries', 'withheld_scope'] # only keep the above keys for inserting clean = {k: v for k, v in user_prop_dict.iteritems() if k in keep} image = os.path.splitext(clean['profile_image_url_https'])[0] ext = os.path.splitext(clean['profile_image_url_https'])[1] clean['profile_image_url_https'] = image.rstrip('_normal') + ext # convert date time to string clean['created_at_ord'] = clean['created_at'].toordinal() clean['created_at'] = clean['created_at'].strftime('%Y-%m-%d %H:%M:%S') return clean # Functions to query database def get_nodes_missing_props(self, limit=100): '''Returns the first 100 ids of nodes without user properties''' selector = NodeSelector(self.graph) selected = selector.select('User').where("_.screen_name IS NULL").limit(limit) return [s['id'] for s in selected] def get_nodes_missing_props_follb(self, limit=100): cypherq = """MATCH (n)-[r:FOLLOWS]->(m) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.screen_name) RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels(self, rel='FOLLOWING', limit=1): '''Returns ids missing the follower or following relationships. Valid inputs for rel is FOLLOWING or FOLLOWERS''' selector = NodeSelector(self.graph) if rel == 'FOLLOWING': selected = selector.select('User').where("_.following_added IS NULL").limit(limit) elif rel == 'FOLLOWERS': selected = selector.select('User').where("_.followers_added IS NULL").limit(limit) else: # TO DO: flesh out the exception calling raise Exception return [s['id'] for s in selected] def get_nodes_missing_rels_params(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)-[r:FOLLOWS]->(m:User) WHERE n.followers_count >= 1000 AND NOT EXISTS(n.following_added) AND m.screen_name = 'BernieSanders' RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels_bfriends(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.following_added) RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels_bfriends_step(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.following_added) RETURN n.id LIMIT 500;""" return [i['n.id'] for i in self.graph.run(cypherq).data()[-100:]]
def get_graph(): global NEO4J_URL,NEO4J_HOST,NEO4J_PORT,NEO4J_AUTH # Connect to graph creds = NEO4J_AUTH.split('/') graph = Graph(user=creds[0], password=creds[1], host=NEO4J_HOST) graph.run('match (t:Tweet) return COUNT(t)') return graph
def computeShortestPathCoherence(node1, node2, w): """Connects to graph database, then creates and sends query to graph database. Returns the shortest path between two nodes. Format: (67149)-[:'LINKS_TO']->(421)""" if node1.strip()==node2.strip(): return w fromCache=rds.get("%s:%s" % (node1, node2)) if fromCache: return float(fromCache)*w else: g = Graph() q="MATCH path=shortestPath((m:Page {name:\"%s\"})-[LINKS_TO*1..10]-(n:Page {name:\"%s\"})) RETURN LENGTH(path) AS length, path, m, n" % (node1, node2) cursor=g.run(q) path=None for c in cursor: path=c # if path: rds.set("%s:%s" % (node1, node2), 1/path["length"]) rds.set("%s:%s" % (node2, node1), 1/path["length"]) return w/path["length"] else: rds.set("%s:%s" % (node1, node2), 0.0) rds.set("%s:%s" % (node2, node1), 0.0) return 0.0
def make_sequence(self): authenticate(settings.NeoHost, settings.NeoLog, settings.NeoPass) graph = Graph("{0}/db/data/".format(settings.NeoHost)) query = """MATCH (start:Video)-[:Jaccard*5..10]->(sequence:Video) WHERE start<>sequence MATCH p=shortestPath((start:Video)-[:Jaccard*]->(sequence:Video)) WHERE NONE (n IN nodes(p) WHERE size(filter(x IN nodes(p) WHERE n = x))> 1) RETURN EXTRACT(n IN NODES(p)|[n.id, n.rating]) LIMIT 100000""" r1 = graph.run(query).data() k = 0 for i in r1: #print(i.values) for video in i['EXTRACT(n IN NODES(p)|[n.id, n.rating])']: #print(video) self.seq_ids.append(k) self.video_ids.append(video[0]) self.ratings.append(video[1]) k+=1 data = {'sequence': self.seq_ids, 'video': self.video_ids, 'rating': self.ratings} df = pd.DataFrame(data) df = df[pd.notnull(df['video'])] print(df) dz = df.groupby('sequence')['rating'].std() print(dz) path = '{0}/{1}/'.format(settings.VideosDirPath, self.game) if not os.path.exists(path): os.makedirs(path) file_name = '{0}/sequences.csv'.format(path) df.to_csv(file_name, encoding='utf-8') summary_data = '{0}/summary.csv'.format(path) dz.to_csv(summary_data, encoding='utf-8') return
def graph(): #topic = str(request.args.get('topic')) topic = str(request.form['topic']) graph = Graph("http://ec2-52-205-15-39.compute-1.amazonaws.com:7474/db/data/") node_results = graph.run("MATCH (n:brexit) where has(n.betweenness_centrality) RETURN n.ScreenName as ScreenName, n.TweetId as TweetId, n.FollowerCount as FollowerCount, n.betweenness_centrality as BetweennessCentrality;") linksMap = {} nodes = [] links = [] for index,node_result in enumerate(node_results): linksMap[node_result['ScreenName']]=index nodes.append({'tweetId': str(node_result['TweetId']), 'name' : str(node_result['ScreenName']), 'group' : int(node_result['FollowerCount'])%5,'nodeSize':int(node_result['BetweennessCentrality'])/25+4 }) rels_results = graph.run("MATCH (a:brexit)-[r:FOLLOWEDBY]-(b:brexit) RETURN a.ScreenName, b.ScreenName") for rels_result in rels_results: #print(rels_result) links.append({'source':linksMap[rels_result['a.ScreenName'].encode('utf-8')],'target':linksMap[rels_result['b.ScreenName'].encode('utf-8')],'value':2}) json_object = { "nodes": nodes, "links": links } return jsonify(json_object)
def reach(): #topic = str(request.args.get('topic')) topic = str(request.form['topic']) graph = Graph("http://ec2-52-205-15-39.compute-1.amazonaws.com:7474/db/data/") graph.run("MATCH (n:"+topic+") SET n.FollowerCount = toInt(n.FollowerCount)") reach_count_cursor = graph.run("MATCH (n:"+topic+") RETURN count(n)") preach_count_cursor = graph.run("MATCH (n:"+topic+") RETURN sum(n.FollowerCount)") for reach_count_record in reach_count_cursor: reach_count = reach_count_record[0] for preach_count_record in preach_count_cursor: preach_count = preach_count_record[0] #json_object = { "metric": ["Reach", "Potencial Reach"], "frequency": ["reach_count","reach_count"] } json_array = [] json_array.append({'metric':'Reach', 'value':int(reach_count)}) json_array.append({'metric':'PotencialReach','value':int(preach_count)}) json_object = {'records':json_array} return jsonify(json_object)
def get_requests(item_name, tier=1, enchantment=0, quality=0, after_ts=1000): graph = Graph(password='******') current_ts = datetime.timestamp(datetime.now()) query = f''' MATCH (:Character)-[r:request]->(i:Item)<-[o:offer]-(:Character) WHERE i.Group = "{item_name}" AND i.Tier = {tier} AND i.Enchantment = {enchantment} AND i.Quality = {quality} AND ({current_ts} - r.LastViewed) < {after_ts} AND ({current_ts} - o.LastViewed) < {after_ts} AND (r.UnitPriceSilver < o.UnitPriceSilver) RETURN i, (r.UnitPriceSilver - o.UnitPriceSilver) as profit ORDER BY profit ''' return graph.run(query)
def get_profitable_trades(after_ts=100000): graph = Graph(password='******') current_ts = datetime.timestamp(datetime.now()) query = f''' MATCH (:Character)-[r:request]->(i:Item)<-[o:offer]-(:Character) WHERE ({current_ts} - r.LastViewed) < {after_ts} AND ({current_ts} - o.LastViewed) < {after_ts} AND (r.UnitPriceSilver - o.UnitPriceSilver) > 0 RETURN i, r.UnitPriceSilver as sell_price, o.UnitPriceSilver as buy_price, (r.UnitPriceSilver - o.UnitPriceSilver) as profit ORDER BY profit ''' response = graph.run(query) for r in response: data = r.data() item = data['i'] buy_price = data['buy_price'] sell_price = data['sell_price'] profit = data['profit'] item_name = item_dict[item['Group']] print() print('__' * 20) print('>>> ', item_name) print('T: ', item['Tier']) print('E: ', item['Enchantment']) print('Q: ', item['Quality']) print('Buy for: $', str(buy_price)[:-4]) print('Sell for: $', str(sell_price)[:-4]) print('PROFIT = $', str(profit)[:-4]) print('__' * 20)
from py2neo import Graph, Node import os if __name__ == "__main__": pw = os.environ.get('NEO4J_PASS') g = Graph("http://localhost:7474/", password=pw) ## readme need to document setting environment variable in pycharm tx = g.begin() q1 = '''MATCH (p:Provider) RETURN id(p), p.npi ''' providers = g.run(q1) #======= RETURN provider object: list of dics, key: npi, id ======# provider_lst = [] for provider in providers: provider_dic = {} provider_dic['id'] = provider['id(p)'] provider_dic['npi'] = provider['p.npi'] provider_lst.append(provider_dic) # ===================== Create relation, Iterate Provider (faster, about 5000000 interations)====================# q2 = ''' MATCH (p:Provider) where id(p) = {id_p} MATCH (pc:Prescription) where pc.npi = {p_npi} CREATE (p)-[:WRITES]->(pc)''' match_num = 0 #2407851 for p in provider_lst: p_npi = p['npi'] id_p = p['id']
# Config file with graph location details with open("neo4jconfig.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) graph = Graph(cfg["graph"]+"/db/data") # Script to batch import the user nodes into neo4j load_script = """ USING PERIODIC COMMIT 1000 load csv with headers from %s as row merge (:brexit {TweetId:row.TweetId,CreatedAt:row.CreatedAt,ScreenName:row.ScreenName,FollowerCount:row.FollowerCount}) """ graph.run(load_script % (cfg["users"]) # Script to batch import the realtionships into neo4j rels_script = """ load csv with headers from %s as row2 MATCH (u1:brexit {ScreenName:row2.Users}) MATCH(u2:brexit {ScreenName:row2.Followers}) CREATE (u1)-[:FOLLOWEDBY(]->(u2)""" graph.run(rels_script % (cfg["relationships"])) # Mazerunner/Spark-Noe4j HTTP GET request to calculate betweenness centrality url = cfg["graph"]+"/service/mazerunner/analysis/betweenness_centrality/FOLLOWEDBY"
# create street in database street = Node("Street", nodeids=waynodes, nameslug=wayname) tx = g.begin() tx.create(street) tx.commit() else: # know this street, add the way id street['nodeids'] = street['nodeids'] + waynodes street.push() # now add relationships to nodes in the way for node in waynodes: if(node in knownnodes): streetnames = g.run("MATCH (n:Street) WHERE {nodeid} IN n.nodeids RETURN n.nameslug LIMIT 25", nodeid=node) for streetrecord in streetnames: streetname = streetrecord['n.nameslug'] if streetname == wayname: continue print('matching ' + streetname + ' and ' + wayname) street2 = g.find_one("Street", "nameslug", streetname) if street2 is None: continue intersect = Relationship(street, "MEETS", street2) intersect2 = Relationship(street2, "MEETS", street) tx = g.begin() tx.create(intersect) tx.create(intersect2) tx.commit()
from string_converter import string_filter from fuzzywuzzy import fuzz #add 2301 rel if __name__ == "__main__": pw = os.environ.get('NEO4J_PASS') g = Graph("http://localhost:7474/", password=pw) ## readme need to document setting environment variable in pycharm tx = g.begin() #======= RETURN Drug object: list of dics, key: labelerName, id ======# q1 = ''' MATCH (d: Drug) RETURN id(d), d.labelerName ''' drug_obj = g.run(q1) drugs_lst = [] for object in drug_obj: drug_dic = {} drug_dic['id'] = object['id(d)'] drug_dic['labelerName'] = object['d.labelerName'] drugs_lst.append(drug_dic) #======= RETURN DrugFirm object: list of dics, key: firmName, id ======# q2 = ''' MATCH (df:DrugFirm) RETURN id(df), df.firmName''' df_obj = g.run(q2) df_lst = [] for object in df_obj: df_dic = {}
''' index4 = ''' CREATE INDEX ON :Issue(issueNumber) ''' index5 = ''' CREATE INDEX ON :Lobbyist(firstName) ''' index6 = ''' CREATE INDEX ON :Lobbyist(lastName) ''' index7 = ''' CREATE INDEX ON :Lobbyist(position); ''' g.run(index1) g.run(index2) g.run(index3) g.run(index4) g.run(index5) g.run(index6) g.run(index7) f1 = get_file_path('2013_1stQuarter_XML') f2 = get_file_path('2013_2ndQuarter_XML') f3 = get_file_path('2013_3rdQuarter_XML') f4 = get_file_path('2013_4thQuarter_XML') files = f1 + f2 + f3 + f4 for file in files:
class AnswerClassifier: def __init__(self): self.g = Graph( host="127.0.0.1", http_port=7474, user="******", password="******") self.fit_up_symptom = ['情绪低落', '兴趣减退', '精力丧失', '注意力降低', '自信心丧失', '自责自罪', '前途问题', '自杀', \ '睡眠障碍', '食欲改变', '疼痛', '月经问题', '头晕', '虚弱', '心脏问题', '胸闷问题', '性功能障碍', \ '排泄不适', '消化不适', '妄想', '幻觉', '抑郁性木僵'] '''分类主函数''' def classify1(self, word_dict, preques_infor, down_symptom): medical_dict,down_symptom = self.word_analysis(word_dict,down_symptom) data = {} if not medical_dict: types = [] else: types = list(medical_dict.keys()) # 在这里开始是我需要去考虑的东西.在具有了这些东西之后,我如何进行封装。 preques_type = preques_infor['question_type'] prediagnosis_infor = preques_infor['diagnosis_infor'] # 确定下一个问题的类型 ''' [在诊断完成之后,再进行修改] 目前的方案是分为上一个问题是'new_symptom',还是'spec_symptom',还是'ques_degree' 如果是'new_symptom',就要将症状词语加上,但结果中如果有两个以上(除症状外两个以上的词)并包括频率的其中一种,就为'new_symptom'。 否则如果没有频率词就问'ques_degree',如果只有一个词,那就'spec_symptom' 如果是'spec_symptom',则需要将症状词语, 程度词语。因为用户肯定会有否定或者肯定回答,因此直接采取'new_symptom'。 ''' question_type = '' if preques_type == 'new_symptom': keynum = len(types) - 1 # 当患者描述了一种完整的症状,但是这种情况下可能症状会出现答非所问 if 'symptom' in types and ('time' in types or 'frequent' in types): if 'symptom' in list(preques_infor['diagnosis_infor'].keys()): current_symptom = preques_infor['diagnosis_infor']['symptom'] if medical_dict['symptom'] != current_symptom: preques_infor['mis_num'] += 1 preques_infor['mis_symptom'].append([current_symptom,medical_dict['symptom']]) print("答非所问:咨询%s症状, 患者回答%s症状" %(current_symptom,medical_dict['symptom'])) # 这里要去维护已经完成的诊断列表 question_type = 'new_symptom' preques_infor['question_type'] = question_type preques_infor['diagnosis_infor'] = medical_dict # 当患者并没有症状出现,则将对话系统设置的症状给记录下来 elif bool(1 - ('symptom' in types)): if bool(1 -('symptom' in list(prediagnosis_infor.keys()))): prediagnosis_infor['symptom'] = '情绪低落' medical_dict['symptom'] = prediagnosis_infor['symptom'] word_dict['symptoms'].append(medical_dict['symptom']) word_dict['syms_score'].append(1) types.append('symptom') keynum += 1 # 当患者描述了有完整的症状,但是这种情况下可能症状会出现答非所问 else: if 'symptom' in list(preques_infor['diagnosis_infor'].keys()): current_symptom = preques_infor['diagnosis_infor']['symptom'] if medical_dict['symptom'] == current_symptom: preques_infor['mis_num'] += 1 preques_infor['mis_symptom'].append([current_symptom, medical_dict['symptom']]) print("答非所问:咨询%s症状, 患者回答%s症状" % (current_symptom, medical_dict['symptom'])) if question_type == '': if 'time' in types or 'frequent' in types: # 这里要去维护已经完成的诊断列表 question_type = 'new_symptom' preques_infor['question_type'] = question_type preques_infor['diagnosis_infor'] = medical_dict else: question_type = 'spec_symptom' preques_infor['question_type'] = question_type # 'spec_symptom'查询要问的子症状 sub_symptom = self.search('up_down_symptom', medical_dict['symptom'],down_symptom) data['sub_symptom'] = sub_symptom # 这里不能给那边进行诊断,所以我需要加入进preques_infor列表中 medical_dict['sub_symptom'] = sub_symptom medical_dict['time'] = '大部分时间' preques_infor['diagnosis_infor'] = medical_dict elif preques_type == 'spec_symptom': pre_medical_dict = preques_infor['diagnosis_infor'] # 这里也有可能出现答非所问 if 'symptom' in types: if medical_dict['symptom'] != pre_medical_dict['symptom']: preques_infor['mis_num'] += 1 preques_infor['mis_symptom'].append([pre_medical_dict['symptom'], medical_dict['symptom']]) print("答非所问:咨询%s症状, 患者回答%s症状" % (pre_medical_dict['symptom'], medical_dict['symptom'])) for k in pre_medical_dict.keys(): if bool(1-(k in types)) and k != 'denyword': medical_dict[k] = pre_medical_dict[k] word_dict['symptom'] = medical_dict['symptom'] word_dict['sub_symptom'] = medical_dict['sub_symptom'] # 这种情况直接默认(可诊断)为'new_symptom', 这里要去维护已经完成的诊断列表 question_type = 'new_symptom' elif preques_type == 'ques_degree': pre_medical_dict = preques_infor['diagnosis_infor'] for k in pre_medical_dict.keys(): if bool(1-(k in types)): medical_dict[k] = pre_medical_dict[k] # 这种情况直接默认(可诊断)为'new_symptom', 这里要去维护已经完成的诊断列表 question_type = 'new_symptom' # print('preques_type: %s, question_types: %s.' % (preques_infor['question_type'], question_type)) # 将多个分类结果进行合并处理,组装成一个字典 data['question_type'] = question_type preques_infor['question_type'] = question_type return data, preques_infor, word_dict,down_symptom def word_analysis(self,word_result,down_symptom): medical_dict = {} if word_result['symptoms'] != []: sub_symptom = word_result['symptoms'][0] print('sub_symptom:' + sub_symptom) # 将小症状映射到大症状的过程(利用知识图谱) if sub_symptom in self.fit_up_symptom: medical_dict['symptom'] = sub_symptom else: down_symptom.append(sub_symptom) symptom = self.search('down_up_symptom',sub_symptom,down_symptom)[0] medical_dict['symptom'] = symptom tmplist = ['time','frequent','degree','denyword'] for i, word in enumerate(word_result['other_words']): if word != {}: medical_dict[tmplist[i]] = list(word.keys())[0] return medical_dict,down_symptom def search(self,sql_type, data,down_symptom): sql = self.build_sql(sql_type, data)[0] ress = self.g.run(sql).data() keyword = '' if sql_type == 'up_down_symptom': down_symptoms = list(set([i['n.name'] for i in ress])) if len(down_symptoms) == 1: symptoms_num = 1 else: # 检查子症状中是否出现过 for w in down_symptoms: if w in down_symptom: down_symptoms.remove(w) if down_symptoms == []: down_symptoms.append(data) symptoms_num = min(random.randint(1, len(down_symptoms)), 4) random.shuffle(down_symptoms) keyword = down_symptoms[:symptoms_num] # select_symptoms = down_symptoms[:symptoms_num] # keyword = '、'.join(select_symptoms) elif sql_type == 'down_up_symptom': keyword = list(set([i['m.name'] for i in ress])) return keyword def build_sql(self, sql_type, data): sql = [] datas = [data] if sql_type=='down_up_symptom': sql = ["MATCH (m:down_symptom)-[r:sub_sub_symptom]->(n:down2_symptom) where n.name = '{0}' return m.name".format(i) for i in datas] elif sql_type=='up_down_symptom': sql = ["MATCH (m:down_symptom)-[r:sub_sub_symptom]->(n:down2_symptom) where m.name = '{0}' return n.name".format(i) for i in datas] return sql # def symptom_match(self,types): # # 这里的主要的逻辑是诊断过程中,是否可以匹配上。 # if ('symptom' in types) and ('frequent' in types): # return 'full' # elif ('symptom' in types): # return 'no_frequent' # else: # return 'no_symptom' '''构造词对应的类型''' # def build_wdtype_dict(self): # wd_dict = dict() # for wd in self.region_words: # wd_dict[wd] = [] # if wd in self.disease_wds: # wd_dict[wd].append('disease') # if wd in self.drug_wds: # wd_dict[wd].append('drug') # if wd in self.symptom_wds: # wd_dict[wd].append('symptom') # if wd in self.degree_wds: # wd_dict[wd].append('degree') # if wd in self.frequent_wds: # wd_dict[wd].append('frequent') # if wd in self.time_wds: # wd_dict[wd].append('time') # if wd in self.yesno_wds: # wd_dict[wd].append('yesno') # # # return wd_dict '''构造actree,加速过滤'''
from string_converter import uniq_elem from fuzzywuzzy import fuzz if __name__ == "__main__": pw = os.environ.get('NEO4J_PASS') g = Graph("http://localhost:7474/", password=pw) tx = g.begin() idx1 = ''' CREATE INDEX ON: Legislator(name) ''' idx2 = ''' CREATE INDEX ON: LegislatorInfo(wikipediaID) ''' g.run(idx1) g.run(idx2) create_legislatorInfo = ''' LOAD CSV WITH HEADERS FROM 'https://dl.dropboxusercontent.com/u/67572426/legislators-current.csv' AS line MERGE (legislator:LegislatorInfo { thomasID: line.thomasID }) ON CREATE SET legislator = line ON MATCH SET legislator = line MERGE (s:State {code: line.state}) CREATE UNIQUE (legislator)-[:REPRESENTS]->(s) MERGE (p:Party {name: line.currentParty}) CREATE UNIQUE (legislator)-[:IS_MEMBER_OF]->(p) MERGE (b:Body {type: line.type}) CREATE UNIQUE (legislator)-[:ELECTED_TO]->(b);
class GraphHeroSummoner(): def __init__(self): print("GraphHeroSummoner init...") self.hero_page_path = "../spiderData/hero_page.json" self.g = Graph("http://localhost:7474", username="******", password="******") self.relation = { '1': "最佳搭档", '2': "最佳搭档", '3': "压制英雄", '4': "压制英雄", '5': "被压制英雄", '6': "被压制英雄" } def read_hero_page(self): for item in open(self.hero_page_path, encoding='utf-8'): data = json.loads(item) summoner_ids = data["hero_summoner"][0].split("|") # for summoner_id in summoner_ids: # self.create_hero_summoner_relationship("hero", "summoner", [data["name"], summoner_id], "推荐", "召唤师技能推荐") ex = 'img201606/heroimg/(.*?).jpg' num = 1 for relation_url in data["relation_uri"]: hero_ename = re.findall(ex, relation_url, re.S)[0].split("/")[0] self.create_hero_partner_relationship( "hero", "hero", [data["name"], hero_ename], self.relation[f'{num}'], self.relation[f'{num}'], data["relation_desc"][num - 1]) num += 1 '''创建英雄铭文关联边''' def create_hero_summoner_relationship(self, start_node, end_node, edges, rel_type, rel_name): print( f"create_relationship start_node:{start_node} end_node:{end_node} edges:{edges} rel_type:{rel_type} rel_name:{rel_name}" ) p = edges[0] q = edges[1] query = "match(p:%s),(q:%s) where p.name='%s'and q.summoner_id=%s create (p)-[rel:%s{name:'%s'}]->(q)" % ( start_node, end_node, p, q, rel_type, rel_name) try: self.g.run(query) print(p, rel_type, q) except Exception as e: print(e) return def create_hero_partner_relationship(self, start_node, end_node, edges, rel_type, rel_name, explain): print( f"create_relationship start_node:{start_node} end_node:{end_node} edges:{edges} rel_type:{rel_type} rel_name:{rel_name}" ) p = edges[0] q = edges[1] query = "match(p:%s),(q:%s) where p.name='%s'and q.hero_ename='%s' create (p)-[rel:%s{name:'%s',desc:'%s'}]->(q)" % ( start_node, end_node, p, q, rel_type, rel_name, explain) try: # print(query) self.g.run(query) print(p, rel_type, q) except Exception as e: print(e) return def start(self): self.read_hero_page()
class Neo4j(object): def __init__(self): ''' neo4j查询语言 ''' self.query_models = { 0: "match(n:Movie) where n.title='{title}' return n.rating", # nm 1: "match(n:Movie) where n.title='{title}' return n.releasedate", # nm 2: "match(n:Movie)-[r:is]->(b:Genre) where n.title='{title}' return b.name", # nm 3: "match(n:Movie) where n.title ='{title}' return n.introduction", # nm 4: "match(n:Person)-[:actedin]-(m:Movie) where m.title ='{title}' return n.name", # nm 5: "match(n:Person) where n.name='{name}' return n.birthplace", # nnt 6: "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' match(g:Genre)-[:is]-(m) where g.name=~'{gname}' return distinct m.title", # nnt, ng 7: "match(n:Person)-[:actedin]->(m:Movie) where n.name='{name}' return m.title", # nnt 8: "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' and m.rating > {score} return m.title", # nnt, x 9: "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' and m.rating < {score} return m.title", # nnt, x 10: "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' match(p:Genre)-[:is]-(m) return distinct p.name", # nnt # 11: 单独处理 12: "match(n)-[:actedin]-(m) where n.name ='{name}' return count(*)", # nnt 13: "match(n:Person) where n.name='{name}' return n.birth", # nnt } self.graph = Graph('http://localhost:7474', username="******", password="******") def query(self, model_label, query_dict): try: if model_label == 11: data_x = self._query(model_label=7, query_dict={"nnt": query_dict["nnt"]}) data_y = self._query(model_label=7, query_dict={"nnt": query_dict["nnr"]}) result = list(data_x.intersection(data_y)) return result else: result = list(self._query(model_label, query_dict)) return result except: return [] def _query(self, model_label, query_dict): query_str = "" if model_label in [0, 1, 2, 3, 4]: query_str = self.query_models[model_label].format( title=query_dict["nm"]) elif model_label in [5, 7, 10, 12, 13]: query_str = self.query_models[model_label].format( name=query_dict["nnt"]) elif model_label == 6: query_str = self.query_models[model_label].format( name=query_dict["nnt"], gname=query_dict["ng"]) elif model_label in [8, 9]: query_str = self.query_models[model_label].format( name=query_dict["nnt"], score=query_dict["x"]) data = self.graph.run(query_str).data() result = set() for dic in data: temp = [item[1] for item in dic.items()] # 一个元素 result.add(temp[0]) return result
from py2neo import Graph, Node, Relationship graph = Graph(url) graph.delete_all() # Creating Nodes manas = Node('Person', name='Manas', age='22') lakshya = Node('Person', name='Lakshya', age='101 NOT OUT') divya = Node('Person', name='Divya', age='21') # graph = Graph() graph.create(manas) graph.create(lakshya) graph.create(divya) # CREATING UNI-DIRECTIONAL RELATIONSHIPS graph.create(Relationship(lakshya, 'BULLIES😢', manas)) # EXECUTING AND VIEWING QUERIES query = 'match (person:Person) return person' records = graph.run(query) for record in records: node = record.get('person') print(node) # MATCHING USING 'match' method results = graph.nodes.match('Person') for node in results: print(node)
def ne4jquery(query): graph = Graph(NEO4J_SERVER) return graph.run(query).to_data_frame()
class FanGraph(object): """ This object provides a set of helper methods for creating and retrieving Nodes and relationship from a Neo4j database. """ # Connects to the DB and sets a Graph instance variable. # Also creates a NodeMatcher, which is a py2neo class. def __init__( self, auth, host, port, secure=False, ): self._graph = Graph(secure=secure, bolt=True, auth=auth, host=host, port=port) self._node_matcher = NodeMatcher(self._graph) def run_match(self, labels=None, properties=None): """ Uses a NodeMatcher to find a node matching a "template." :param labels: A list of labels that the node must have. :param properties: A parameter list of the form prop1=value1, prop2=value2, ... :return: An array of Node objects matching the pattern. """ #ut.debug_message("Labels = ", labels) #ut.debug_message("Properties = ", json.dumps(properties)) if labels is not None and properties is not None: result = self._node_matcher.match(labels, **properties) elif labels is not None and properties is None: result = self._node_matcher.match(labels) elif labels is None and properties is not None: result = self._node_matcher.match(**properties) else: raise ValueError( "Invalid request. Labels and properties cannot both be None.") # Convert NodeMatch data into a simple list of Nodes. full_result = [] for r in result: full_result.append(r) return full_result def find_nodes_by_template(self, tmp): """ :param tmp: A template defining the label and properties for Nodes to return. An example is { "label": "Fan", "template" { "last_name": "Ferguson", "first_name": "Donald" }} :return: A list of Nodes matching the template. """ labels = tmp.get('label') props = tmp.get("template") result = self.run_match(labels=labels, properties=props) return result # Create and save a new node for a 'Fan.' def create_fan(self, uni, last_name, first_name): n = Node("Fan", uni=uni, last_name=last_name, first_name=first_name) tx = self._graph.begin(autocommit=True) tx.create(n) # Given a UNI, return the node for the Fan. def get_fan(self, uni): n = self.find_nodes_by_template({ "label": "Fan", "template": { "uni": uni } }) if n is not None and len(n) > 0: n = n[0] else: n = None return n def create_player(self, player_id, last_name, first_name): n = Node("Player", player_id=player_id, last_name=last_name, first_name=first_name) tx = self._graph.begin(autocommit=True) tx.create(n) return n def get_player(self, player_id): n = self.find_nodes_by_template({ "label": "Player", "template": { "player_id": player_id } }) if n is not None and len(n) > 0: n = n[0] else: n = None return n def create_team(self, team_id, team_name): n = Node("Team", team_id=team_id, team_name=team_name) tx = self._graph.begin(autocommit=True) tx.create(n) return n def get_team(self, team_id): n = self.find_nodes_by_template({ "label": "Team", "template": { "team_id": team_id } }) if n is not None and len(n) > 0: n = n[0] else: n = None return n def create_supports(self, uni, team_id): """ Create a SUPPORTS relationship from a Fan to a Team. :param uni: The UNI for a fan. :param team_id: An ID for a team. :return: The created SUPPORTS relationship from the Fan to the Team """ f = self.get_fan(uni) t = self.get_team(team_id) r = Relationship(f, "SUPPORTS", t) tx = self._graph.begin(autocommit=True) tx.create(r) return r # Create an APPEARED relationship from a player to a Team def create_appearance(self, player_id, team_id): try: f = self.get_player(player_id) t = self.get_team(team_id) r = Relationship(f, "APPEARED", t) tx = self._graph.begin(autocommit=True) tx.create(r) except Exception as e: print("create_appearances: exception = ", e) # Create a FOLLOWS relationship from a Fan to another Fan. def create_follows(self, follower, followed): f = self.get_fan(follower) t = self.get_fan(followed) r = Relationship(f, "FOLLOWS", t) tx = self._graph.begin(autocommit=True) tx.create(r) def get_comment(self, comment_id): n = self.find_nodes_by_template({ "label": "Comment", "template": { "comment_id": comment_id } }) if n is not None and len(n) > 0: n = n[0] else: n = None return n def create_comment(self, uni, comment, team_id=None, player_id=None): """ Creates a comment :param uni: The UNI for the Fan making the comment. :param comment: A simple string. :param team_id: A valid team ID or None. team_id and player_id cannot BOTH be None. :param player_id: A valid player ID or None :return: The Node representing the comment. """ if comment: if 'sucks' in comment or 'fuc*' in comment or 'sh*t' in comment: comment_id = str(uuid.uuid4()) com = Node("Comment", comment_id=comment_id, team_id=team_id, player_id=player_id, comment='****') else: comment_id = str(uuid.uuid4()) com = Node("Comment", comment_id=comment_id, team_id=team_id, player_id=player_id, comment=comment) tx = self._graph.begin(autocommit=True) tx.create(com) fan = self.get_fan(uni) # fan and comment are necessary here # define relation shape with COMMENT_BY: r_fan_com = Relationship(fan, "COMMENT_BY", com) tx = self._graph.begin(autocommit=True) tx.create(r_fan_com) # Then define two kinds of COMMENT_ON if team_id is not None: tm = self.get_team(team_id) r_com_tm = Relationship(com, "COMMENT_ON", tm) tx = self._graph.begin(autocommit=True) tx.create(r_com_tm) if player_id is not None: plr = self.get_player(player_id) r_com_plr = Relationship(com, "COMMENT_ON", plr) tx = self._graph.begin(autocommit=True) tx.create(r_com_plr) return com else: raise NameError('There is no valid comments,please check it again') def examine_sub_comment(self, uni, origin_comment_id, comment): ''' This function designed to examine whether our input is valid or not ''' examine_ori = self.get_comment(origin_comment_id) examine_fan = self.get_fan(uni) if not comment: raise NameError('There are some problems about new_comment') if examine_ori is None: raise NameError('There are some problems about origin_comment_id') if examine_fan is None: raise NameError('There are some problems about fans') def create_sub_comment(self, uni, origin_comment_id, comment): """ Create a sub-comment (response to a comment or response) and links with parent in thread. :param uni: ID of the Fan making the comment. :param origin_comment_id: Id of the comment to which this is a response. :param comment: Comment string :return: Created comment. """ #examine the correctness of my function and input: self.examine_sub_comment(uni, origin_comment_id, comment) if 'sucks' in comment or 'fuc*' in comment or 'sh*t' in comment: comment_id = str(uuid.uuid4()) com = Node("Comment", comment_id=comment_id, comment='****') else: comment_id = str(uuid.uuid4()) com = Node("Comment", comment_id=comment_id, comment=comment) existed_comment = self.get_comment(origin_comment_id) tx = self._graph.begin(autocommit=True) tx.create(com) fan = self.get_fan(uni) # Response to : #r_fan_com = Relationship(fan, "RESPONSE_TO", existed_comment) r_com_ori = Relationship(com, "RESPONSE_TO", existed_comment) tx = self._graph.begin(autocommit=True) tx.create(r_com_ori) #comments From fan to ''' r_fan_ori = Relationship(fan, "COMMENT_BY", existed_comment) tx = self._graph.begin(autocommit=True) tx.create(r_fan_ori) ''' # Response by ( from new to fan to new r_fan_com = Relationship(fan, "RESPONSE_BY", com) tx = self._graph.begin(autocommit=True) tx.create(r_fan_com) return com def get_player_comments(self, player_id): """ Gets all of the comments associated with a player, all of the comments on the comment and comments on the comments, etc. Also returns the Nodes for people making the comments. :param player_id: ID of the player. :return: Graph containing comment, comment streams and commenters. """ q = 'match (player:Player{player_id:' + "'" + player_id + "'" + '})-[on:COMMENT_ON*]-(comment:Comment)- \ [response_to:RESPONSE_TO*]-(sub_comment:Comment)-[response_by:RESPONSE_BY*]-(fan2:Fan) with \ player,on,comment,response_to,sub_comment,response_by,fan2 match (comment)-[comment_by:COMMENT_BY*] \ -(fan:Fan) return player,on,comment,comment_by,fan,response_to,sub_comment,response_by,fan2' res = self._graph.run(q) res = res.data( ) #Checking through google and apply in this way can help me to collect complete data #res = list(res) #print('q:',q) return res def get_team_comments(self, team_id): """ Gets all of the comments associated with a teams, all of the comments on the comment and comments on the comments, etc. Also returns the Nodes for people making the comments. :param player_id: ID of the team. :return: Graph containing comment, comment streams and commenters. """ q = 'match (team:Team{team_id:' + "'" + team_id + "'" + '})-[on:COMMENT_ON*]-(comment:Comment)- \ [response_to:RESPONSE_TO*]-(sub_comment:Comment)-[response_by:RESPONSE_BY*]-(fan2:Fan) with \ team,on,comment,response_to,sub_comment,response_by,fan2 match (comment)-[comment_by:COMMENT_BY*] \ -(fan:Fan) return team,on,comment,comment_by,fan,response_to,sub_comment,response_by,fan2' res = self._graph.run(q) res = res.data( ) #Checking through google and apply in this way can help me to collect complete data #res = list(res) print('q:', q) return res
# slice the dataframe for the indexes and rows of choice # questions = [val for val in data.columns[1:5]] # answers = data.iloc[1:4, 1:5] # respondents = data.iloc[1:4, 0] # print(data[0].columns) col = data[0].columns for indexs in data[0].index: notes = data[0].loc[indexs].values labels = notes[1].replace(' ', '').replace('.', '') str = "CREATE (" + labels + ":" + notes[0] + "{" + col[1] + ":'" + notes[ 1] + "'," + col[2] + ":'" + notes[2] + "'," + col[3] + ":'" + notes[ 3] + "'})" str = str print(str) graph.run(str) # graph.run("CREATE (n:BBB{name:'ssss'}) return n") col = data[1].columns for indexs in data[1].index: notes = data[1].loc[indexs].values # str="CREATE (n:"+notes[0]+"{"+col[3]+":'"+notes[3]+"',"+col[2]+":'"+notes[2]+"',"+col[1]+":'"+notes[1]+"'}) return n" str = "MATCH (a:" + notes[0] + "),(b:" + notes[3] + ") WHERE a." + col[ 1] + " = '" + notes[1] + "' AND b." + col[1] + "= '" + notes[ 4] + "' CREATE (a)-[" + col[2] + ":" + notes[2] + " {" + col[ 5] + ":['" + notes[5] + "']}]->(b)" print(str) graph.run(str) # print(data.columns[cols]+"="+notes[cols])
class NeoTalentos(): file_loc = '' g = '' def __init__(self): self.file_location = "file:///talentos1.csv" self.g = Graph("http://neo4j:7474/db/data/") def RunAll(self): self.g.run( "CREATE CONSTRAINT ON (t:Talento) ASSERT t.nombre IS UNIQUE") self.g.run( "CREATE CONSTRAINT ON (u:Ubicacion) ASSERT u.lugar IS UNIQUE") self.g.run("CREATE CONSTRAINT ON (g:Grado) ASSERT g.grados IS UNIQUE") self.g.run( "CREATE CONSTRAINT ON (h:Habilidad) ASSERT h.habilidades IS UNIQUE" ) self.g.run( "CREATE CONSTRAINT ON (a:AlmaMater) ASSERT a.almaMater IS UNIQUE") self.g.run( "CREATE CONSTRAINT ON (i:Idioma) ASSERT i.idiomas IS UNIQUE") self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row MERGE (t:Talento {id: row.ID, nombre: row.ID_NOMBRE}) ON CREATE SET t.idiomas = row.IDI_NOMBRE, t.habilidades = row.HAB_HABILIDAD, t.almaMater = row.EDU_INST, t.area = row.EDU_AREA, t.grados = row.EDU_TITULO, t.posicion = row.TRA_POSICION, t.empresa = row.TRA_EMPRESA, t.departamento = row.TRA_AREA, t.sector = row.TRA_TIPO, t.industria = row.TRA_IND, t.nacimiento = row.INF_FECHNAC, t.lugar = row.INF_UBICACION """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row UNWIND split(row.IDI_NOMBRE, ",") AS idiomasId WITH DISTINCT idiomasId MERGE (i:Idioma {idiomas: idiomasId}) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row WITH split(row.IDI_NOMBRE, ",") AS idiomas, row.ID_NOMBRE AS Nombre UNWIND idiomas AS idiomasId WITH DISTINCT idiomasId, Nombre MATCH (t:Talento {nombre: Nombre}) MATCH (i:Idioma {idiomas: idiomasId}) MERGE (t)-[:FLUYENTES_EN]->(i) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row UNWIND split(row.HAB_HABILIDAD, ",") AS habilidadesId WITH DISTINCT habilidadesId MERGE (h:Habilidad {habilidades: habilidadesId}) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row WITH split(row.HAB_HABILIDAD, ",") AS habilidades, row.ID AS proyectoNo UNWIND habilidades AS habilidadesId WITH DISTINCT habilidadesId, proyectoNo MATCH (t:Talento {id: proyectoNo}) MATCH (h:Habilidad {habilidades: habilidadesId}) MERGE (t)-[:TIENE_HABILIDADES]->(h) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row UNWIND split(row.EDU_INST, ",") AS almaMaters WITH DISTINCT almaMaters MERGE (:AlmaMater {almaMater: almaMaters}) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row WITH split(row.EDU_INST, ",") AS almas, row.ID AS No UNWIND almas AS almasId WITH DISTINCT almasId, No MATCH (t:Talento {id: No}) MATCH (a:AlmaMater {almaMater: almasId}) MERGE (t)-[:EGRESADO_DE]->(a) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row UNWIND split(row.EDU_TITULO, ",") AS Grado WITH DISTINCT Grado MERGE (:Grado {grados: Grado}) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row WITH split(row.EDU_TITULO, ",") AS grados, row.ID AS proyectoNo UNWIND grados AS gradosId WITH DISTINCT gradosId, proyectoNo MATCH (t:Talento {id: proyectoNo}) MATCH (g:Grado {grados: gradosId}) MERGE (t)-[:EGRESADO_EN]->(g) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row MERGE (u:Ubicacion {lugar: row.INF_UBICACION}) """ % self.file_location) self.g.run(""" LOAD CSV WITH HEADERS FROM '%s' AS row MATCH (t:Talento {id: row.ID}) MATCH (u:Ubicacion {lugar: row.INF_UBICACION}) MERGE (t)-[:UBICADO_EN]->(u) """ % self.file_location)
query = "match(n) return n" machineQuery = "match (m:Machine) return m" argusQuery = "match(m:Machine)-[]->(:Department)-[:BELONGS_TO]->(p:Plant {name:'Argus'}) return m" suggestionQuery = "match (cA:CorrectiveAction)<-[r]-(f:Fault)<-[:HAD_FAULT]-(m:Machine) return DISTINCT m.id, cA.suggestion" machineStatusQuery = "match (cA:CorrectiveAction)<-[r]-(f:Fault)<-[:HAD_FAULT]-(m:Machine) return DISTINCT m.id,m.maker,m.model,m.status,f.message,cA.suggestion ORDER BY m.id" machine_Id_list = [] nextActionText = [] machine_ids = [] machine_makers = [] machine_models = [] machine_statuses = [] machine_messages = [] machine_suggestions = [] suggestions = graph.run(suggestionQuery) for suggestion in suggestions: id = suggestion['m.id'] nextActionText.append(suggestion['cA.suggestion']) machines = graph.run(machineStatusQuery) index = 0 currentMachineId = 0 for machine in machines: if(currentMachineId != machine['m.id']): currentMachineId = machine['m.id'] machine_ids.append(machine['m.id']) machine_makers.append(machine['m.maker']) machine_models.append(machine['m.model']) machine_statuses.append(machine['m.status']) machine_messages.append(machine['f.message'])
from py2neo import Graph import sys import json graph = Graph(host='localhost', auth=('neo4j', 'abduabdu')) city = sys.argv[1].replace('-', ' ') attrac = sys.argv[2].replace('-', ' ') query = """ MATCH (p:Place)-[:Near]->(c:City) WHERE c.name="%s" and p.name="%s" RETURN p """ % (city, attrac) # query = """ # MATCH (p:Place)-[:Near]->(c:City) # WHERE c.name="Los Angeles" and p.name="Greenbar Distillery" # RETURN p # """ res = graph.run(query).data() # attractions=[] # for attr in res: # # print(attr['p']['name']) # attractions.append(attr['p']['name']) try: ans = json.dumps(res[0]['p']) except: ans = {} print(ans, end='')
def main(args): init_logger(args['--debug']) logging.info('connect to Neo4j DB') graph = Graph(password=DB_PASSWORD) # list ? if args['--list']: logging.info('available operating systems:') for os in OS.match(graph): logging.info('\t%s', os.name) return os_regex = args['<os_regex>'] os_match = OS.match(graph).where("_.name =~ '{}'".format(os_regex)) if os_match is None: logging.info( 'unable to find OS that matches \'%s\' regex in the database', os_regex) return 1 os_df_list = [] # iterate over OS list, sorted by release date, converted from string to date object for os in sorted( os_match, key=lambda x: datetime.strptime(x.release_date, '%Y-%m-%d')): # TODO translate to py2neo API checksec_inodes = graph.run(OS_CHECKSEC_QUERY.format(os.name)) c = Counter() for node in checksec_inodes: inode = node['i'] logging.debug('%s: %s', inode['name'], inode['mime_type']) c['total'] += 1 if inode['relro']: c['relro'] += 1 if inode['canary']: c['canary'] += 1 if inode['nx']: c['nx'] += 1 if inode['rpath']: c['rpath'] += 1 if inode['runpath']: c['runpath'] += 1 if inode['symtables']: c['symtables'] += 1 if inode['fortify_source']: c['fortify_source'] += 1 logging.info('Results for %s', os.name) logging.info('Total binaries: %d', c['total']) for feature in PROTECTIONS: logging.info('%s: %.1f%%', feature, c[feature] * 100 / c['total']) # fix matplotlib, uses agg by default, non-gui backend matplotlib.use('tkagg') sns.set_style('whitegrid') per_data = [] for feature in PROTECTIONS: value = c[feature] * 100 / c['total'] per_data.append(value) # initialize OS Panda DataFrame df = pd.DataFrame({ 'Protections': PROTECTIONS, 'Percentage': per_data, 'OS': os.name }) os_df_list.append(df) # concatenate all the individual DataFrames main_df = pd.concat(os_df_list, ignore_index=True) logging.info('Displaying results...') if len(os_df_list) == 1: ax = sns.barplot(x="Protections", y="Percentage", data=main_df) ax.set_title('{} binary security overview'.format(os_regex)) else: ax = sns.barplot(x="Protections", y="Percentage", hue="OS", data=main_df) ax.set_title( 'binary security overview for regex "{}"'.format(os_regex)) # show plot plt.legend(loc='upper right') plt.show()
class AnswerSearching: def __init__(self): self.graph = Graph("http://localhost:7474", username="******", password="******") self.top_num = 10 def question_parser_graph(self, data): # 输入的是从问题中抽取的到的实体 """ 主要是根据不同的实体和意图构造cypher查询语句 :param data: {"Disease":[], "Alias":[], "Symptom":[], "Complication":[],“Intentions”:[]} :return: """ sql = [] tag = "" if data: if data.get("Disease"): sql, tag = self.transfor_to_sql_graph("Disease", data["Disease"], data["Intentions"]) elif data.get("Alias"): sql, tag = self.transfor_to_sql_graph("Alias", data["Alias"], data["Intentions"]) elif data.get("Symptom"): sql, tag = self.transfor_to_sql_graph("Symptom", data["Symptom"], data["Intentions"]) elif data.get("Complication"): sql, tag = self.transfor_to_sql_graph("Complication", data["Complication"], data["Intentions"]) return sql, tag # 查询关系节点 # "match data=(na:company{id:'12399145'}) - [*1..3]->(nb:company) return data # "match(p)-[r]->(n:Person{Name:'%s'}) return p.Name,r.relation,n.Name,p.cate,n.cate\ # Union all\ # match(p:Person {Name:'%s'}) -[r]->(n) return p.Name, r.relation, n.Name, p.cate, n.cate" % (name, name) def transfor_to_sql_graph(self, label, entities, intent): """ 将问题转变为cypher查询语句 :param label:实体标签 :param entities:实体列表 :param intent:查询意图 :return:cypher查询语句 """ if not entities: return [] sqls = [] tag = "" if label == "Disease": for e in entities: sql = [ "match (d:Disease{name:'%s'})<-[r]-(p) return r Union all match (d:Disease{name:'%s'})-[r]->(n) return r" % (e, e) ] sqls += sql tag = "Disease" elif label == "Alias": for e in entities: sql = [ "match (d:Disease)-[:ALIAS_IS]->(a:Alias{name:'%s'}) with d (match (p)-[r]->(d:Disease) return r Union all match (d:Disease)-[r]->(n) return r)" % e ] sqls += sql tag = "Disease" elif label == "Symptom": for e in entities: sql = [ "match (p)-[r]->(s:Symptom{name:'%s'}) return r Union all match (s:Symptom{name:'%s'})-[r]->(n) return r" % (e, e) ] sqls += sql tag = "HAS_SYMPTOM" elif label == "Complication": for e in entities: sql = [ "match (p)-[r]->(c:Complication{name:'%s'}) return r Union all match (p:Complication{name:'%s'})-[r]->(n) return r" % (e, e) ] sqls += sql tag = "Disease" return sqls, tag def searching_graph(self, sqls): answers = [] for sql in sqls: ress = self.graph.run(sql).data() answers += ress return answers def question_parser(self, data): # 输入的是从问题中抽取的到的实体 """ 主要是根据不同的实体和意图构造cypher查询语句 :param data: {"Disease":[], "Alias":[], "Symptom":[], "Complication":[],“Intentions”:[]} :return: """ sqls = [] if data: for intent in data["Intentions"]: sql_ = {} sql_["intention"] = intent sql = [] if data.get("Disease"): sql = self.transfor_to_sql("Disease", data["Disease"], intent) # print(sql) elif data.get("Alias"): sql = self.transfor_to_sql("Alias", data["Alias"], intent) elif data.get("Symptom"): sql = self.transfor_to_sql("Symptom", data["Symptom"], intent) elif data.get("Complication"): sql = self.transfor_to_sql("Complication", data["Complication"], intent) if sql: sql_['sql'] = sql sqls.append(sql_) return sqls def transfor_to_sql(self, label, entities, intent): """ 将问题转变为cypher查询语句 :param label:实体标签 :param entities:实体列表 :param intent:查询意图 :return:cypher查询语句 """ if not entities: return [] sql = [] # print(type(entities)) # for e in entities: # print(e) # 查询症状 if intent == "query_symptom" and label == "Disease": sql = [ "MATCH (d:Disease)-[:HAS_SYMPTOM]->(s) WHERE d.name='{0}' RETURN d.name,s.name" .format(e) for e in entities ] if intent == "query_symptom" and label == "Alias": sql = ["MATCH (a:Alias)<-[:ALIAS_IS]-(d:Disease)-[:HAS_SYMPTOM]->(s) WHERE a.name='{0}' return " \ "d.name,s.name".format(e) for e in entities] # 查询治疗方法 if intent == "query_cureway" and label == "Disease": sql = ["MATCH (d:Disease)-[:HAS_DRUG]->(n) WHERE d.name='{0}' return d.name,d.treatment," \ "n.name".format(e) for e in entities] if intent == "query_cureway" and label == "Alias": sql = ["MATCH (n)<-[:HAS_DRUG]-(d:Disease)-[]->(a:Alias) WHERE a.name='{0}' " \ "return d.name, d.treatment, n.name".format(e) for e in entities] if intent == "query_cureway" and label == "Symptom": sql = ["MATCH (n)<-[:HAS_DRUG]-(d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' " \ "return d.name,d.treatment, n.name".format(e) for e in entities] if intent == "query_cureway" and label == "Complication": sql = ["MATCH (n)<-[:HAS_DRUG]-(d:Disease)-[]->(c:Complication) WHERE c.name='{0}' " \ "return d.name,d.treatment, n.name".format(e) for e in entities] # 查询治疗周期 if intent == "query_period" and label == "Disease": sql = [ "MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.period". format(e) for e in entities ] if intent == "query_period" and label == "Alias": sql = [ "MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.period" .format(e) for e in entities ] if intent == "query_period" and label == "Symptom": sql = [ "MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name,d.period" .format(e) for e in entities ] if intent == "query_period" and label == "Complication": sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \ "d.period".format(e) for e in entities] # 查询治愈率 if intent == "query_rate" and label == "Disease": sql = [ "MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.rate". format(e) for e in entities ] if intent == "query_rate" and label == "Alias": sql = [ "MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.rate" .format(e) for e in entities ] if intent == "query_rate" and label == "Symptom": sql = [ "MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name,d.rate" .format(e) for e in entities ] if intent == "query_rate" and label == "Complication": sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \ "d.rate".format(e) for e in entities] # 查询检查项目 if intent == "query_checklist" and label == "Disease": sql = [ "MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.checklist" .format(e) for e in entities ] if intent == "query_checklist" and label == "Alias": sql = [ "MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.checklist" .format(e) for e in entities ] if intent == "query_checklist" and label == "Symptom": sql = ["MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name," \ "d.checklist".format(e) for e in entities] if intent == "query_checklist" and label == "Complication": sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \ "d.checklist".format(e) for e in entities] # 查询科室 if intent == "query_department" and label == "Disease": sql = ["MATCH (d:Disease)-[:DEPARTMENT_IS]->(n) WHERE d.name='{0}' return d.name," \ "n.name".format(e) for e in entities] if intent == "query_department" and label == "Alias": sql = ["MATCH (n)<-[:DEPARTMENT_IS]-(d:Disease)-[:ALIAS_IS]->(a:Alias) WHERE a.name='{0}' " \ "return d.name,n.name".format(e) for e in entities] if intent == "query_department" and label == "Symptom": sql = ["MATCH (n)<-[:DEPARTMENT_IS]-(d:Disease)-[:HAS_SYMPTOM]->(s:Symptom) WHERE s.name='{0}' " \ "return d.name,n.name".format(e) for e in entities] if intent == "query_department" and label == "Complication": sql = ["MATCH (n)<-[:DEPARTMENT_IS]-(d:Disease)-[:HAS_COMPLICATION]->(c:Complication) WHERE " \ "c.name='{0}' return d.name,n.name".format(e) for e in entities] # 查询疾病 if intent == "query_disease" and label == "Alias": sql = ["MATCH (d:Disease)-[]->(s:Alias) WHERE s.name='{0}' return " \ "d.name".format(e) for e in entities] if intent == "query_disease" and label == "Symptom": sql = ["MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return " \ "d.name".format(e) for e in entities] # 查询疾病描述 if intent == "disease_describe" and label == "Alias": sql = ["MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.age," \ "d.insurance,d.infection,d.checklist,d.period,d.rate,d.money".format(e) for e in entities] if intent == "disease_describe" and label == "Disease": sql = ["MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.age,d.insurance,d.infection," \ "d.checklist,d.period,d.rate,d.money".format(e) for e in entities] if intent == "disease_describe" and label == "Symptom": sql = ["MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name,d.age," \ "d.insurance,d.infection,d.checklist,d.period,d.rate,d.money".format(e) for e in entities] if intent == "disease_describe" and label == "Complication": sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \ "d.age,d.insurance,d.infection,d.checklist,d.period,d.rate,d.money".format(e) for e in entities] return sql def searching(self, sqls): """ 执行cypher查询,返回结果 :param sqls: :return:str """ final_answers = [] for sql_ in sqls: intent = sql_['intention'] queries = sql_['sql'] answers = [] for query in queries: ress = self.graph.run(query).data() # print(ress) answers += ress final_answer = self.answer_template(intent, answers) if final_answer: final_answers.append(final_answer) return final_answers def answer_template(self, intent, answers): """ 根据不同意图,返回不同模板的答案 :param intent: 查询意图 :param answers: 知识图谱查询结果 :return: str """ final_answer = "" if not answers: return "" # 查询症状 if intent == "query_symptom": disease_dic = {} for data in answers: d = data['d.name'] s = data['s.name'] if d not in disease_dic: disease_dic[d] = [s] else: disease_dic[d].append(s) i = 0 for k, v in disease_dic.items(): if i >= 10: break final_answer += "疾病 {0} 的症状有:{1}\n".format( k, ','.join(list(set(v)))) i += 1 # 查询疾病 if intent == "query_disease": disease_freq = {} for data in answers: d = data["d.name"] disease_freq[d] = disease_freq.get(d, 0) + 1 n = len(disease_freq.keys()) freq = sorted(disease_freq.items(), key=lambda x: x[1], reverse=True) for d, v in freq[:10]: final_answer += "疾病为 {0} 的概率为:{1}\n".format(d, v / 10) # 查询治疗方法 if intent == "query_cureway": disease_dic = {} for data in answers: disease = data['d.name'] treat = data["d.treatment"] drug = data["n.name"] if disease not in disease_dic: disease_dic[disease] = [treat, drug] else: disease_dic[disease].append(drug) i = 0 for d, v in disease_dic.items(): if i >= 10: break final_answer += "疾病 {0} 的治疗方法有:{1};可用药品包括:{2}\n".format( d, v[0], ','.join(v[1:])) i += 1 # 查询治愈周期 if intent == "query_period": disease_dic = {} for data in answers: d = data['d.name'] p = data['d.period'] if d not in disease_dic: disease_dic[d] = [p] else: disease_dic[d].append(p) i = 0 for k, v in disease_dic.items(): if i >= 10: break final_answer += "疾病 {0} 的治愈周期为:{1}\n".format( k, ','.join(list(set(v)))) i += 1 # 查询治愈率 if intent == "query_rate": disease_dic = {} for data in answers: d = data['d.name'] r = data['d.rate'] if d not in disease_dic: disease_dic[d] = [r] else: disease_dic[d].append(r) i = 0 for k, v in disease_dic.items(): if i >= 10: break final_answer += "疾病 {0} 的治愈率为:{1}\n".format( k, ','.join(list(set(v)))) i += 1 # 查询检查项目 if intent == "query_checklist": disease_dic = {} for data in answers: d = data['d.name'] r = data['d.checklist'] if d not in disease_dic: disease_dic[d] = [r] else: disease_dic[d].append(r) i = 0 for k, v in disease_dic.items(): if i >= 10: break final_answer += "疾病 {0} 的检查项目有:{1}\n".format( k, ','.join(list(set(v)))) i += 1 # 查询科室 if intent == "query_department": disease_dic = {} for data in answers: d = data['d.name'] r = data['n.name'] if d not in disease_dic: disease_dic[d] = [r] else: disease_dic[d].append(r) i = 0 for k, v in disease_dic.items(): if i >= 10: break final_answer += "疾病 {0} 所属科室有:{1}\n".format( k, ','.join(list(set(v)))) i += 1 # 查询疾病描述 if intent == "disease_describe": disease_infos = {} for data in answers: name = data['d.name'] age = data['d.age'] insurance = data['d.insurance'] infection = data['d.infection'] checklist = data['d.checklist'] period = data['d.period'] rate = data['d.rate'] money = data['d.money'] if name not in disease_infos: disease_infos[name] = [ age, insurance, infection, checklist, period, rate, money ] else: disease_infos[name].extend([ age, insurance, infection, checklist, period, rate, money ]) i = 0 for k, v in disease_infos.items(): if i >= 10: break message = "疾病 {0} 的描述信息如下:\n发病人群:{1}\n医保:{2}\n传染性:{3}\n检查项目:{4}\n" \ "治愈周期:{5}\n治愈率:{6}\n费用:{7}\n" final_answer += message.format(k, v[0], v[1], v[2], v[3], v[4], v[5], v[6]) i += 1 return final_answer
exit() # graph = Graph(password="******") graph = Graph(password=inp) print("Enter the symptoms with space") inp = [str(i).lower() for i in input().split()] if inp == []: print("Entered input is not valid.Please try again by entering symptoms with space") exit() for i in range(len(inp)): try: results = graph.run('''match (s:Symptom) Where toLower(s.name) = $symptom return DISTINCT s.name as Symptoms''', parameters={'symptom' : inp[i]}).data() except: print("Entered Neo4j graph password is wrong. Please try again") exit() if results == []: print(f"Symptom '{inp[i]}' does not exists in our database. Please try again") exit() results = graph.run('''with $in as symptoms match (s:Symptom) Where toLower(s.name) in symptoms with collect(s) as symptoms match (d:Disease) where all(s in symptoms Where (s)-[:CAUSES]->(d)) return DISTINCT d.name as Disease, d.id as Disease_id''', parameters={'in' : inp}).data()
import time from py2neo import Graph, Node import networkx as nx graph = Graph(password="******") query = ''' optional match(a:Application)-[:HAS_PREVIOUS]->(p)-[:HAS_PREV_INSTALLMENTS]->(i) with toInteger(i.DAYS_INSTALMENT)-toInteger(i.DAYS_ENTRY_PAYMENT) as DaysInstallMinusEntry, count(i) as TotalInstallments optional match(a)-[:HAS_BUREAU]->(bureau) with size(filter(x IN collect(bureau.CREDIT_ACTIVE) WHERE x="Active")) as TotalActiveBureau,a.SK_ID_CURR as ApplicationID,count(bureau) as TotalBureau, TotalInstallments,DaysInstallMinusEntry return ApplicationID,TotalInstallments,DaysInstallMinusEntry,TotalBureau, toFloat(TotalActiveBureau)/toFloat(TotalBureau) as ActiveBureauByTotalBureauRatio order by ActiveBureauByTotalBureauRatio desc ''' start = time.time() data = graph.run(query) end = time.time() total = end - start #import ipdb; ipdb.set_trace() #time = timeit.Timer('data = graph.run(query)') print(f"It took {total} seconds to run the query")
class MAIN: def __init__(self): # 创建实例 self.g = Graph( host="127.0.0.1", # neo4j 搭载服务器的ip地址,ifconfig可获取到 http_port=7474, # neo4j 服务器监听的端口号 user="******", # 数据库user name,如果没有更改过,应该是neo4j password="******") def write(self, csvfile, message): # 写入csv数据 newfile = open(csvfile, 'a+', newline='') filewriter = csv.writer(newfile) filewriter.writerows(message) def open(self, file): self.file = file with open(self.file, 'r') as f: self.reader = csv.reader(f) self.messages = list(self.reader) print(self.messages) def tra_attribute(self): # 经测试,neo4j的类型名、属性内容可以是中文,但属性名必须是英文。因此先要把所有的属性名全部翻译成英文。 list_attribute = [] for messages in self.messages[1:]: for message in messages: if r'#' in message: for i in message.split(r'@@'): if r'#' in i: node_attribute = i.split(r'#') list_attribute.append(node_attribute[0]) list_attribute = list(set(list_attribute)) lists_attribute = [] for i in list_attribute: j = [] j.append(i) j.append('') lists_attribute.append(j) self.write(csvfile='C:\\chouquxinxi\\出行指南\\属性名.csv', message=lists_attribute) # 后面需要手动打开该文件,手动翻译属性名,写在第二列。弄了半天自动翻译没弄成,有点尴尬。 def cypher_make_send(self): # 核心方法 # 读取翻译结果 dict_attribute = {} with open('C:\\chouquxinxi\\出行指南\\属性名.csv', 'r') as f: reader = csv.reader(f) for l in reader: dict_attribute[l[0]] = l[1] # 判断每行数据信息 for message in self.messages[1:]: if len(message) == 2 or message[2] == '': # 2列信息描述结点 node_type = message[0] # 节点类型 node_attributes = message[1] # 节点属性 dict_node_attribute = {} # 属性字典,下面做这个字典 for i in node_attributes.split(r'@@'): node_attribute = i.split(r'#') # 节点属性分段,node_attribute[0]属性名,node_attribute[1]属性值 node_attribute[0] = dict_attribute[node_attribute[0]] # 属性名翻译为英文 dict_node_attribute[node_attribute[0]] = node_attribute[1] # 转为字典 # print(dict_node_attribute) if '名称' in node_attributes: # 如果这一条信息里面有“名称”字样,则说明该信息是对单个节点进行操作。 # 准备这么操作:建立该节点,然后一条属性一条属性的添加 namevalue = dict_node_attribute.pop('name') # 删除要删除的键值对"name",返回值是删除的值name的内容。剩下的就是需要创建或修订的其他属性 cypher = "MERGE (m:%s { name: '%s' }) RETURN m.name" % (node_type, namevalue) # 上一句的意思是,如果该节点类型、名称不存在,则创建该节点。 re = self.g.run(cypher).data() print("-->节点<--新建节点:%s" % (re)) for key, value in dict_node_attribute.items(): cypher = "MERGE (m:%s { name: '%s' }) \ ON MATCH SET m.%s = '%s' \ RETURN m.%s" % (node_type, namevalue, key, value, key) re = self.g.run(cypher).data() print("-->节点<--'%s':新建或修改属性:%s" % (namevalue, re)) else: # 如果不包含“名称”字样,则说明该信息是所有该类型的节点进行操作 for key, value in dict_node_attribute.items(): cypher = "MERGE (m:%s) \ ON MATCH SET m.%s = '%s' \ RETURN m.%s" % (node_type, key, value, key) re = self.g.run(cypher).data() print("-->节点<--新建或修改属性:%s" % (re)) elif len(message) == 3: # 3列信息描述关系 relationships = message[0] # 关系 dict_relationships = {} # 关系字典,下面做这个字典 if r'@@' in relationships: # 如果包含分割符,说明包含属性,此时需要把属性读取出来 list_relationships = relationships.split(r'@@') dict_relationships['type'] = list_relationships[0] # 关系类型 for i in list_relationships[1:]: relationships_attribute = i.split(r'#') relationships_attribute[0] = dict_attribute[relationships_attribute[0]] # 属性名翻译为英文 dict_relationships[relationships_attribute[0]] = relationships_attribute[1] # 转为字典 else: dict_relationships['type'] = relationships node_start = message[1] # 起节点 dict_node_start = {} # 起节点属性字典,下面做这个字典 for i in node_start.split(r'@@'): node1_attribute = i.split(r'#') node1_attribute[0] = dict_attribute[node1_attribute[0]] # 属性名翻译为英文 dict_node_start[node1_attribute[0]] = node1_attribute[1] # 转为字典 node_end = message[2] # 止节点 dict_node_end = {} # 止节点属性字典,下面做这个字典 for i in node_end.split(r'@@'): node2_attribute = i.split(r'#') # 节点属性分段,node_attribute[0]属性名,node_attribute[1]属性值 node2_attribute[0] = dict_attribute[node2_attribute[0]] # 属性名翻译为英文 dict_node_end[node2_attribute[0]] = node2_attribute[1] # 转为字典 #下面开始写语句 start_type = '' start_attribute = '' if 'type' in dict_node_start: # 如果包含类型 start_type = ":%s" % (dict_node_start['type']) dict_node_start.pop('type') for key, value in dict_node_start.items(): start_attribute = start_attribute + "%s:'%s'" % (key, value) + ',' if start_attribute == '': cypher_node_start = start_type else: cypher_node_start = start_type+'{'+start_attribute[:-1]+'}' #[:-1]把最后一个逗号去掉 end_type = '' end_attribute = '' if 'type' in dict_node_end: # 如果包含类型 end_type = ":%s" % (dict_node_end['type']) dict_node_end.pop('type') for key, value in dict_node_end.items(): end_attribute = end_attribute + "%s:'%s'" % (key, value) + ',' if end_attribute == '': cypher_node_end = end_type else: cypher_node_end = end_type + '{' + end_attribute[:-1] + '}' relationships_type = '' relationships_attribute = '' if 'type' in dict_relationships: # 如果包含类型 relationships_type = ":%s" % (dict_relationships['type']) dict_relationships.pop('type') for key, value in dict_relationships.items(): relationships_attribute = relationships_attribute + "%s:'%s'" % (key, value) + ',' if relationships_attribute == '': cypher_relationships = relationships_type else: cypher_relationships = relationships_type + '{' + relationships_attribute[:-1] + '}' cypher = " MATCH(m" + cypher_node_start + "),(n" + cypher_node_end + ") MERGE(m)-[r" + cypher_relationships +"]->(n) RETURN m,n,r" re = self.g.run(cypher).data() print("-->关系<--新建或修改关系:%s" % (re))
import csv from py2neo import Graph, Node, Relationship import tqdm # /var/lib/neo4j/import if __name__ == "__main__": graph = Graph("bolt://127.0.0.1:7687", username="******", password="******") graph.run('MATCH ()-[r:HasSpeaker]->() DELETE r') graph.run('MATCH ()-[r:HasTrack]->() DELETE r') graph.run('MATCH (n:Title) DELETE n') graph.run('MATCH (n:Title) DELETE n') graph.run('MATCH (n:Speaker) DELETE n') graph.run('MATCH (n:Track) DELETE n') with open('blackhat2019.csv', 'r', encoding='utf-8') as fp: reader = csv.reader(fp) header = next(reader) for row in tqdm.tqdm(reader): title, speakers, tracks, url = row graph.run('MERGE (:Title {name:"' + title + '", url:"' + url + '"})') for speaker in speakers.split(','): tmp = speaker.replace('"', '') graph.run('MERGE (:Speaker {name:"' + tmp + '"})') graph.run('MATCH (t:Title {name:"' + title + '"}), (s:Speaker {name:"' + tmp + '"}) CREATE (t)-[:HasSpeaker]->(s)') for track in tracks.split(','): graph.run('MERGE (:Track {name:"' + track + '"})') graph.run('MATCH (ti:Title {name:"' + title +
def save_data(): graph = Graph( "http://localhost:7474", username="******", password="******" ) instr = "MATCH (n)-[r]-(m) DELETE r" # 删除所有关系 graph.run(instr) # 直接调用Cypher命令对数据库进行操作 instr = "MATCH (n) DELETE n" # 删除所有节点 graph.run(instr) f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/question.csv', 'r', encoding='utf-8') csv_reader_lines = csv.reader(f) next(csv_reader_lines) count = 1 for line in csv_reader_lines: print(str(count) + str(line)) node = Node("Question", question_id=line[0], question_title=line[1], question_body=line[2], question_vote=line[3]) graph.create(node) count = count + 1 f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/answer.csv', 'r', encoding='utf-8') csv_reader_lines = csv.reader(f) next(csv_reader_lines) count = 1 for line in csv_reader_lines: print(str(count) + str(line)) node = Node("Answer", answer_id=line[0], answer_body=line[1], answer_vote=line[2]) graph.create(node) count = count + 1 tag_list = [] f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/tag.csv', 'r', encoding='utf-8') csv_reader_lines = csv.reader(f) next(csv_reader_lines) count = 1 for line in csv_reader_lines: if line[0] in tag_list: continue else: print(str(count) + str(line)) tag_list.append(line[0]) node = Node("Tag", tag_name=line[0], tag_description=line[1]) graph.create(node) count = count + 1 f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/answer.csv', 'r', encoding='utf-8') csv_reader_lines = csv.reader(f) next(csv_reader_lines) count = 1 for line in csv_reader_lines: print(str(count) + " Relation: Question-->Answer" + str(line)) count = count + 1 answer_id = line[0] question_id = line[3] instr = """ match (a:Question),(b:Answer) where a.question_id = '""" + question_id + """' and b.answer_id = '""" + answer_id + """' create (a)-[r:has_answer]->(b) """ graph.run(instr) f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/tag.csv', 'r', encoding='utf-8') csv_reader_lines = csv.reader(f) next(csv_reader_lines) count = 1 for line in csv_reader_lines: print(str(count) + " Relation: Question-->Tag" + str(line)) count = count + 1 tag_name = line[0] question_id = line[2] instr = """ match (a:Question),(b:Tag) where a.question_id = '""" + question_id + """' and b.tag_name = '""" + tag_name + """' create (a)-[r:has_tag]->(b) """ graph.run(instr)
class AnswerSearcher: def __init__(self): self.g = Graph( # "http://localhost:7474/db/data" # py2neo 2.0.8写法 host="127.0.0.1", # py2neo 3写法 user="******", password="******") self.num_limit = 30 '''执行cypher查询,并返回相应结果''' def search_main(self, sqls): final_answers = [] for sql_ in sqls: question_type = sql_['question_type'] queries = sql_['sql'] answers = [] for query in queries: ress = self.g.run(query).data() answers += ress final_answer = self.answer_prettify(question_type, answers) if final_answer: final_answers.append(final_answer) return final_answers '''根据对应的qustion_type,调用相应的回复模板''' def answer_prettify(self, question_type, answers): final_answer = [] if not answers: return '' if question_type == 'disease_symptom': desc = [i['n.name'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}的症状包括:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'symptom_disease': desc = [i['m.name'] for i in answers] subject = answers[0]['n.name'] final_answer = '症状{0}可能染上的疾病有:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_cause': desc = [i['m.cause'] for i in answers] # print(answers) # print(desc) subject = answers[0]['m.name'] final_answer = '{0}可能的成因有:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_prevent': desc = [i['m.prevent'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}的预防措施包括:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_lasttime': desc = [i['m.cure_lasttime'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}治疗可能持续的周期为:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_cureway': desc = [';'.join(i['m.cure_way']) for i in answers] subject = answers[0]['m.name'] final_answer = '{0}可以尝试如下治疗:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_cureprob': desc = [i['m.cured_prob'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}治愈的概率为(仅供参考):{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_getway': desc = [i['m.get_way'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}的传播方式为:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_easyget': desc = [i['m.easy_get'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}的易感人群包括:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_desc': desc = [i['m.desc'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0},熟悉一下:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_acompany': desc1 = [i['n.name'] for i in answers] desc2 = [i['m.name'] for i in answers] subject = answers[0]['m.name'] desc = [i for i in desc1 + desc2 if i != subject] final_answer = '{0}的并发症包括:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_can_eat': desc = [answers[0]['m.can_eat']] # print(answers) # print(desc) subject = answers[0]['m.name'] # print(subject) if desc: final_answer = '{0}可以吃/喝:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_not_food': desc = [i['n.name'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}忌食的食物包括有:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_do_food': do_desc = [i['n.name'] for i in answers if i['r.name'] == '宜吃'] recommand_desc = [ i['n.name'] for i in answers if i['r.name'] == '推荐食谱' ] subject = answers[0]['m.name'] final_answer = '{0}推荐{1}\n推荐食谱包括有:{2}'.format( subject, ';'.join(list(set(do_desc))[:self.num_limit]), ';'.join(list(set(recommand_desc))[:self.num_limit])) elif question_type == 'food_not_disease': desc = [i['m.name'] for i in answers] subject = answers[0]['n.name'] final_answer = '患有{0}的人最好不要吃{1}'.format( ';'.join(list(set(desc))[:self.num_limit]), subject) elif question_type == 'food_do_disease': desc = [i['m.name'] for i in answers] subject = answers[0]['n.name'] final_answer = '患有{0}的人建议多试试{1}'.format( ';'.join(list(set(desc))[:self.num_limit]), subject) elif question_type == 'disease_drug': desc = [i['n.name'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}通常的使用的药品包括:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'drug_disease': desc = [i['m.name'] for i in answers] subject = answers[0]['n.name'] final_answer = '{0}主治的疾病有{1},可以试试'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'disease_check': desc = [i['n.name'] for i in answers] subject = answers[0]['m.name'] final_answer = '{0}通常可以通过以下方式检查出来:{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) elif question_type == 'check_disease': desc = [i['m.name'] for i in answers] subject = answers[0]['n.name'] final_answer = '通常可以通过{0}检查出来的疾病有{1}'.format( subject, ';'.join(list(set(desc))[:self.num_limit])) # print("final_answer: ",final_answer) return final_answer
"Wisconsin","Wyoming"] # Document type docs = ["fraudulent", "fortune_100", "pennywise", "sold", "startup", "broke"] doc_entries = [] for doc in docs: doc_entries.append({ "people": [random.choice(people) for x in range(0,7)], "orgs": [random.choice(orgs) for x in range(0,5)], "gpes": [random.choice(gpes) for x in range(0,4)] }) graph = Graph(password='******') graph.run("MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r") # deleting existing data create_str = [] create_str += ["({0}:gpe {{ name:'{1}' }})".format(gpe,gpe) for gpe in gpes] create_str += ["({0}:org {{name:'{1}'}})".format(org,org) for org in orgs] create_str += ["({0}:person {{name:'{1}'}})".format(person,person) for person in people] create_str += ["({0}:doc {{name:'{1}'}})".format(doc,doc) for doc in docs] graph.run("create "+",".join(create_str)) #inserting individual entities for doc_name, entry in zip(docs, doc_entries): #associating gepes for gpe in entry["gpes"]: graph.run('MATCH (n:gpe {{name:"{0}"}}),(d:doc {{name:"{1}"}}) create (n)-[:indoc]->(d)'.format(gpe,doc_name)) #associating orgs for org in entry["orgs"]: graph.run('MATCH (n:org {{name:"{0}"}}),(d:doc {{name:"{1}"}}) create (n)-[:indoc]->(d)'.format(org,doc_name)) #associating people
class AnswerSearcher: def __init__(self): self.g = Graph( host="39.100.119.153", #127.0.0.1 http_port=7474, #7687 user="******", #neo4j password="******") #admin self.num_limit = 20 # 执行cypher查询,并返回相应结果 def search_main(self, sqls): final_answers = [] for sql_ in sqls['sqls']: question_type = sql_['question_type'] queries = sql_['sql'] answers = [] for query in queries: ress = self.g.run(query).data() # .data返回的是一个字典组成的列表[{"n.name":"xx", "m.name":"xx", "r.name":"xx(关系名称,如"宜吃")" # print("ress:", ress) answers += ress final_answer = self.answer_prettify(question_type, answers, sqls['question']) if final_answer: final_answers.append(final_answer) return final_answers # 根据对应的qustion_type,调用相应的回复模板 def answer_prettify(self, question_type, answers, question): final_answer = '' if not answers: return '' if question_type == 'check': desc = answers[0]['m.产品名称'] subject = answers[0]['m.登记编码'] if desc: final_answer = '{0}在“全国银行业理财产品登记系统”的登记编号为:{1},是银行发行的正规理财产品。'.format( desc, subject) else: final_answer = '该产品未在理财系统查询到登记编码,无登记编码均不属于正规银行理财产品!' elif question_type == 'explanation_category': name = answers[0]['m.名词'] desc = '' nature = '' user = '' for i in answers: if i['m.定义']: desc = '定义为' + i['m.定义'] + '\n' elif i['m.特性']: nature = '特性为' + i['m.特性'] + '\n' elif i['m.适用人群']: user = '******' + i['m.适用人群'] + '\n' final_answer = '{0}:\n{1}{2}{3}'.format(name, desc, nature, user) elif question_type == 'explanation_noun': name = answers[0]['m.名词'] desc = answers[0]['m.定义'] final_answer = '{0}的定义为:{1}'.format(name, desc) elif question_type == 'notice_category': print(question_type) print(answers) name = answers[0]['m.名词'] desc = '' nature = '' user = '' openform = '' type = '' for i in answers: try: desc = '定义为' + i['m.定义'] + '\n' except: pass try: nature = '特性为' + i['m.特性'] + '\n' except: pass try: user = '******' + i['m.适用人群'] + '\n' except: pass try: openform = '开放形态为' + i['m.开放形态'] + '\n' except: pass try: type = '产品类型为' + i['m.产品类型'] except: pass final_answer += '{0}产品需要注意的事项有:\n{1}{2}{3}{4}{5}'.format( name, desc, nature, user, openform, type) elif question_type == 'notice_attribution': attribution = answers[0]['m.名称'] diff = answers[0]['m.类别差异'] cate = answers[0]['m.类别'] subject = [i['n.名词'] for i in answers] final_answer = '{0}需要注意的事项有:\n(1){0}含有的类别有:{1}。\n(2)类别差异为:{2}。'.format( attribution, '、'.join(list(set(subject))), cate, diff) elif question_type == 'notice_product': name = answers[0]['m.产品名称'] print(answers) desc = '' desc1 = '' nature = '' user = '' openform = '' type = '' for i in answers: try: desc = i['n.名词'] except: pass try: desc1 = '的定义为' + i['n.定义'] + '\n' except: pass try: nature = '特性为' + i['n.特性'] + '\n' except: pass try: user = '******' + i['n.适用人群'] + '\n' except: pass try: openform = '开放形态为' + i['n.开放形态'] + '\n' except: pass try: type = '产品类型为' + i['n.产品类型'] except: pass order = 1 if nature != '': nature = '({})'.format(order) + nature order = order + 1 if user != '': user = '******'.format(order) + user order = order + 1 if openform != '': openform = '({})'.format(order) + openform order = order + 1 if type != '': type = '({})'.format(order) + type order = order + 1 final_answer += '{0}属于{1}的产品,需要注意的事项有:\n{1}{2}{3}{4}{5}{6}'.format( name, desc, desc1, nature, user, openform, type) elif question_type == 'call_number': name = answers[0]['m.名称'] subject = '' print(answers) try: subject = answers[0]['m.客服电话'] except: pass try: subject = answers[1]['m.咨询电话'] except: pass final_answer += '{0}的咨询电话为:{1}。'.format(name, subject) elif question_type == 'bank_product': bank = answers[0]['m.名称'] print(answers) state = [i['n.产品状态'] for i in answers] product = [i['n.产品名称'] for i in answers] final_answer = '以下为{0}最新的5款产品:'.format(bank) list_zip = list(zip(product, state)) order = 0 for i in list_zip: order += 1 final_answer += '\n({0}){1},产品状态为:{2}'.format( order, i[0], i[1]) elif question_type == 'bank_category_product': bank = answers[0]['m.名称'] cate = answers[0]['p.名词'] state = [i['n.产品状态'] for i in answers] product = [i['n.产品名称'] for i in answers] final_answer = '以下为{0}最新的5款{1}产品:'.format(bank, cate) list_zip = list(zip(product, state)) order = 0 for i in list_zip: order += 1 final_answer += '\n({0}){1},产品状态为:{2}'.format( order, i[0], i[1]) elif question_type == 'product_desc': name = answers[0]['m.产品名称'] djbm = answers[0]['m.登记编码'] qxlx = answers[0]['m.期限类型'] yjbjbz = answers[0]['m.业绩比较基准'] try: fxjg = answers[1]['n.名称'] except: fxjg = '' try: mjfs = answers[2]['n.名词'] except: mjfs = '' try: yxms = answers[3]['n.名词'] except: yxms = '' try: tzxz = answers[4]['n.名词'] except: tzxz = '' final_answer = '{0}的简介如下:\n登记编码:{1}\n期限类型:{2}\n业绩比较基准:{3}\n发行机构:{4}\n募集方式:{5}\n运作模式:{6}\n投资性质:{7}'.format( name, djbm, qxlx, yjbjbz, fxjg, mjfs, yxms, tzxz) elif question_type == 'bank_desc': name = answers[0]['m.名称'] time = answers[0]['m.营业时间'] call = answers[0]['m.客服电话'] url = answers[0]['m.官网链接'] final_answer = '下面为您介绍{0}的相关信息:\n营业时间:{1}\n客服电话:{2}\n官网链接:{3}'.format( name, time, call, url) elif question_type == 'url': name = answers[0]['m.名称'] url = answers[0]['m.官网链接'] final_answer = '{0}的官网链接为:\n{1}'.format(name, url) elif question_type == 'area_subbank_addr': bank = answers[0]['n.名称'] areas = [i['m.区域'] for i in answers] area = set(areas) final_answer += '{0}在{1}的网点分布如下:'.format(bank, area) subbank = [i['m.名称'] for i in answers] addr = [i['m.具体地址'] for i in answers] list_zip = list(zip(subbank, addr, areas)) for a in set(areas): for i in list_zip: if i[2] != a: continue if i[2] != area: area = i[2] final_answer += '\n{0}在{1}的网点分布如下:'.format(bank, area) final_answer += '\n{0},具体地址:{1}'.format(i[0], i[1]) elif question_type == 'area_subbank': bank = answers[0]['n.名称'] areas = [i['m.区域'] for i in answers] area = set(areas) subbank = [i['m.名称'] for i in answers] final_answer += '{0}在{1}的支行有:'.format(bank, area) list_zip = list(zip(subbank, areas)) for a in set(areas): for i in list_zip: if i[1] != a: continue if i[1] != area: area = i[1] final_answer += '\n{0}在{1}的支行有:'.format(bank, area) final_answer += '\n{0}'.format(i[0]) elif question_type == 'attribution_infos': attribution = answers[0]['m.名称'] diff = answers[0]['m.类别差异'] cate = answers[0]['m.类别'] subject = [i['n.名词'] for i in answers] final_answer = '{0}:\n(1){0}含有的类别有:{1}。\n(2)类别差异为:{2}。'.format( attribution, '、'.join(list(set(subject))), cate, diff) elif question_type == 'product_number': bank = answers[0]['m.名称'] print(answers) state = [i['n.产品状态'] for i in answers] count = [i['count(n)'] for i in answers] final_answer += '{0}:'.format(bank) list_zip = list(zip(state, count)) count_all = 0 for i in list_zip: count_all += i[1] final_answer += '\n{0}产品有{1}个,'.format(i[0], i[1]) final_answer += '\n所有产品总数为{0}个。'.format(count_all) elif question_type == 'product_category_number': bank = answers[0]['m.名称'] cate = answers[0]['p.名词'] print(answers) state = [i['n.产品状态'] for i in answers] count = [i['count(n)'] for i in answers] final_answer += '{0}{1}产品情况:'.format(bank, cate) list_zip = list(zip(state, count)) count_all = 0 for i in list_zip: count_all += i[1] final_answer += '\n{0}产品有{1}个,'.format(i[0], i[1]) final_answer += '\n所有产品总数为{0}个。'.format(count_all) elif question_type == 'subbank_number': print(answers) for i in answers: bank = i['n.名称'] area = i['m.区域'] count = i['count(r)'] count_all = 0 if i == answers[0]: count_all += count final_answer += '{0}\n在{1}共有{2}个网点,\n'.format( bank, area, count) elif i == answers[-1]: count_all += count final_answer += '在{0}共有{1}个网点,\n'.format(area, count) else: count_all += count final_answer += '在{0}共有{1}个网点,\n'.format(area, count) final_answer += '{0}在所查询地区的网点总共有{1}个。'.format(bank, count_all) elif question_type == 'product_area': name = answers[0]['m.产品名称'] area = [i['n.名称'] for i in answers] final_answer = '{}的销售区域为{}。'.format(name, '、'.join(list(set(area)))) elif question_type == 'product_attribution': print(answers) desc = [list(i.values())[-1] for i in answers] desc1 = ['' if x == None else x for x in desc] final_answer = ';'.join(list(set(desc1))[:self.num_limit]) elif question_type == 'product_user': print(answers) name = answers[0]['m.产品名称'] user = [i['n.适用人群'] for i in answers if i['n.适用人群'] != None] print(user) final_answer = '{0}的适用人群为:\n{1}'.format(name, '\n'.join(list(set(user)))) elif question_type == 'investment_category': name = [i['m.名词'] for i in answers] desc = [i['n.名词'] for i in answers] list_zip = list(zip(name, desc)) for i in list_zip: if i != list_zip[-1]: final_answer += '{0}所属类型为{1}\n'.format(i[0], i[1]) else: final_answer += '{0}所属类型为{1}\n'.format(i[0], i[1]) elif question_type == 'if_investment_category': name = answers[0]['m.名词'] desc = answers[0]['n.名词'] if desc: final_answer = '是的!' else: final_answer = '不对哦!' # 功能还需改进返回准确答案(下方,未# test) try: ansname = answers[1]['m.名词'] ansdesc = answers[1]['n.名词'] final_answer += '{0}所属类型为{1}。'.format(ansname, ansdesc) except: pass elif question_type == 'institution_category': print(answers) name = answers[0]['m.名称'] desc = answers[0]['n.名词'] final_answer = '{}所属机构类别为{}。'.format(name, desc) elif question_type == 'institution_bank': name = answers[0]['m.名称'] desc = answers[0]['n.名称'] final_answer = '{}所属总行为{}。'.format(name, desc) elif question_type == 'if_category': name = answers[0]['m.产品名称'] desc = answers[0]['p.名词'] if desc: final_answer = '是的!' else: final_answer = '不对哦!' # 功能还需改进返回准确答案 try: ansname = answers[1]['m.产品名称'] anstype = answers[1]['p.名词'] ansattr = answers[1]['n.名称'] final_answer += '{0}的{1}属性的类别为{2}。'.format( ansname, ansattr, anstype) except: pass elif question_type == 'bank_time': bank = answers[0]['m.名称'] time = answers[0]['m.营业时间'] final_answer = '{}的营业时间为{}'.format(bank, time) elif question_type == 'production_time': #name = answers[0]['m.产品名称'] time = list(answers[0].values())[-1] final_answer = '{}'.format(time) elif question_type == 'attribution_different': name = answers[0]['m.名称'] desc = answers[0]['m.类别差异'] final_answer = '{}不同类别之间的差异如下:\n{}'.format(name, desc) elif question_type == 'category_different': name = answers[0]['m.名词'] for i in answers: name1 = i['m.名词'] rela = i['r.different'] name2 = i['n.名词'] if i != answers[-1]: final_answer += '{}与{}之间的差异为{}。\n'.format( name1, rela, name2) else: final_answer += '{}与{}之间的差异为{}。'.format(name1, rela, name2) elif question_type == 'other_category': name = answers[0]['m.名词'] desc = [i['n.名词'] for i in answers] final_answer = '除了{},还有:{}'.format(name, '、'.join(list(set(desc)))) elif question_type == 'category_nature': name = answers[0]['m.名词'] desc = answers[0]['m.特性'] final_answer = '{}产品的特性为{}'.format(name, desc) elif question_type == 'recommend_category': final_answer = '为你推荐的产品有:' for i in answers: desc = i['m.名词'] if i != answers[-1]: final_answer = + '{1}产品、'.format(desc) else: final_answer = + '{1}产品。'.format(desc) elif question_type == 'if_recommend_category': name = answers[0]['m.名词'] desc = answers[0]['n.名词'] if desc: final_answer = '是的!' else: final_answer = '不对哦!' # 功能还需改进返回准确答案(下方,未# test) final_answer += '为你推荐的产品有:' try: for i in answers: desc = i['m.名词'] if i == answers[0]: pass else: if i != answers[-1]: final_answer = + '{1}产品、'.format(desc) else: final_answer = + '{1}产品。'.format(desc) except: pass elif question_type == 'if_buy': name = answers[0]['m.产品名称'] state = answers[0]['m.产品状态'] final_answer = '{}目前状态为{},'.format(name, state) if state == '在售': final_answer += '现在可以购买哦!' else: final_answer += '现在不可以购买哦!' return final_answer
# #========================================== Get files ==========================================# if __name__ == "__main__": pw = os.environ.get('NEO4J_PASS') g = Graph("http://localhost:7474/", password=pw) ## readme need to document setting environment variable in pycharm # g.delete_all() tx = g.begin() index1 = ''' CREATE INDEX ON: Contribution(type) ''' index2 = ''' CREATE INDEX ON: Committee(name) ''' g.run(index1) g.run(index2) # root = os.getcwd() # path = os.path.join(root, "data") # disclosure_1st_path = os.path.join(path, "2013_MidYear_XML") # files = [f for f in os.listdir(disclosure_1st_path) if f.endswith('.xml')] # files = ['file:///Users/yaqi/Documents/health-graph/data/2013_MidYear_XML/700669542.xml'] # Return xml files def get_file_path(kind): root_dir = '/Users/yaqi/Documents/data/' + kind filenames = [f for f in os.listdir(root_dir) if f.endswith('.xml')] filepath = [] for file in filenames: path = 'file://' + os.path.join(root_dir, file) filepath.append(path)
def UserStory2(Neo4JPwd, Dbname): graph = Graph(password=Neo4JPwd) # initializations and Validations input1_PremiumRange = str( input( "Please enter the premium range you want from following options:\n" "1000-50000 \n" "50000-100000 \n" "100000-150000 \n" "150000-200000 \n")) if input1_PremiumRange != "1000-50000" and input1_PremiumRange != "50000-100000" and input1_PremiumRange != "100000-150000" and input1_PremiumRange != "150000-200000": print( "Entered Premium Range is not from mentioned list.Please try again by entering the range from options given" ) return if input1_PremiumRange == []: print( "Entered Premium Range is not valid.Please try again by entering the range from options given" ) return input3_CoverFor = str( input("Please enter the disease you require in coverage:")).lower() if input3_CoverFor == []: print("Entered Covered disease is not valid.Please try again") return input2_CompanyName = str( input("Please enter the company name of the insurance:")).lower() if input2_CompanyName == []: print("Entered Company Name is not valid.Please try again") return # NEO4J Query for retreiving Policy Names based on above matching criteria results = graph.run("""MATCH (a:CompanyName)-[r:Has_Policy]->(b:PolicyName) WHERE toLower(a.CompanyName) = $CName WITH {PolicyName:b.PolicyName} AS Policies MATCH (t:PolicyName)- [r:PremiumRange_Of]->(u:PremiumRanges) WITH {PolicyName:t.PolicyName} AS PoliciesRange WHERE Policies.PolicyName = t.PolicyName AND u.PremiumRange = \"""" + \ input1_PremiumRange + """\" MATCH (a:PolicyName)-[r:Diseases_Covered]->(b:Disease) WHERE PoliciesRange.PolicyName = a.PolicyName AND toLower(b.name) = \"""" + \ input3_CoverFor + """\" return PoliciesRange.PolicyName AS PolicyNames""", parameters={'CName': input2_CompanyName}).data() if results != []: Policy_Name = [record['PolicyNames'] for record in results] # Mongodb Query to retrieve Min and Max Policies client = MongoClient() db = client[Dbname] collection = db.Insurance Pol_PremiumDetails = collection.aggregate([{ "$match": { "CompanyName": { "$ne": None } } }, { "$unwind": "$Policies" }, { "$match": { "Policies.PolicyName": { "$in": Policy_Name } } }, { "$project": { "_id": 0, "CompanyName": { "$toLower": "$CompanyName" }, "PolicyName": "$Policies.PolicyName", "Premium": "$Policies.Premium" } }, { "$match": { "CompanyName": input2_CompanyName } }, { "$out": "Policy_Premium" }]) query = db.Policy_Premium.aggregate([{ "$group": { "_id": "$CompanyName", "minPremium": { "$min": "$Premium" }, "maxPremium": { "$max": "$Premium" } } }]) df = pd.DataFrame(query) for index, row in df.iterrows(): minPremium = row['minPremium'] maxPremium = row['maxPremium'] # print(df) Minimum_Policy = db.Policy_Premium.find({"Premium": minPremium}, { "PolicyName": 1, "_id": 0 }) Maximum_Policy = db.Policy_Premium.find({"Premium": maxPremium}, { "PolicyName": 1, "_id": 0 }) Minimum_Policy1 = [record['PolicyName'] for record in Minimum_Policy] Maximum_Policy1 = [record['PolicyName'] for record in Maximum_Policy] Minimum_Premium_PolicyName = [] for x in Minimum_Policy1: Minimum_Premium_PolicyName.append(x) Maximum_Premium_PolicyName = [] for x in Maximum_Policy1: Maximum_Premium_PolicyName.append(x) df['Minimum_PolicyName'] = Minimum_Premium_PolicyName df['Maximum_PolicyName'] = Maximum_Premium_PolicyName # Final Output print(tabulate(df, headers='keys', tablefmt='psql')) else: print("There is no Policy for this combination in our database.")
from py2neo import Node, Relationship, Graph from random import randrange g = Graph() g.run("""MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r""") people = ["Jake", "Emily", "Alex"] people_two = ["Emily", "Alex", "Jake"] pairs = zip(people, people_two) for pair in pairs: tx = g.begin() a = Node("Person", name=pair[0]) b = Node("Person", name=pair[1]) ab = Relationship(a, "KNOWS", b, weight=randrange(0, 1)) tx.create(a) tx.create(b) tx.create(ab) tx.commit() print(g.run(""" MATCH (a:Person {name :'Jake'})-[relab:KNOWS]->(b:Person), -[]->() RETURN a.name, b.name """).data())
# 方式1: g = Graph(host="localhost", password='******',bolt=True, bolt_port=7689) print g.data('match (n) return count(*)') sys.exit(1) """ # 方式2: *****访问被代理或docker 容器中的 neo4j server的话,只能用这种方式 ********* # set up authentication parameters http_port = "7476" authenticate("localhost:"+http_port, "username", "password") # connect to authenticated graph database g = Graph("http://localhost:"+http_port+"/db/data/", bolt_port=7689) g.data('match (n) return count(*)') g.run('match (n) return count(*)').dump() # import data in one transaction tx = g.begin() a = Node("Person", name="Alice") b = Node("Person", name="Bob") tx.create(a) ab = Relationship(a, "KNOWS", b) tx.create(ab) #tx.commit() print g.exists(ab) # get nodes in one autocommit transaction g.run("MATCH (a:Person) RETURN a.name, a.born LIMIT 4").data()
import os import sys import time import requests from py2neo import Graph, Node, Relationship graph = Graph() graph.run("CREATE CONSTRAINT ON (u:User) ASSERT u.username IS UNIQUE") graph.run("CREATE CONSTRAINT ON (t:Tweet) ASSERT t.id IS UNIQUE") graph.run("CREATE CONSTRAINT ON (h:Hashtag) ASSERT h.name IS UNIQUE") TWITTER_BEARER = os.environ["TWITTER_BEARER"] headers = dict(accept="application/json", Authorization="Bearer " + TWITTER_BEARER) payload = dict( count=100, result_type="recent", lang="en", q=sys.argv[1] ) base_url = "https://api.twitter.com/1.1/search/tweets.json?" def find_tweets(since_id): payload["since_id"] = since_id url = base_url + "q={q}&count={count}&result_type={result_type}&lang={lang}&since_id={since_id}".format(**payload) r = requests.get(url, headers=headers)
server = conf["protocol"]+"://"+conf["host"]+":"+str( conf["port"] ) logging.basicConfig(level=logging.ERROR) numiter = 10000 graph = Graph(server) label = "TAXID" # Hashes for storing stuff parentid={} scientific_list={} names_list={} idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE") def process_relationship( statements, graph ): tx = graph.begin() #print statements logging.info('proc sent') for statement in statements: #print statement start = graph.nodes.match(statement[0], id=int( statement[1] )).first() end = graph.nodes.match(statement[0], id=int( statement[2] )).first() rel = Relationship( start, statement[3], end ) tx.create( rel )
import traceback import psycopg2 import psycopg2.extras import sys from py2neo import Graph, Node, Relationship, watch, authenticate http_port = 7474 bolt_port = 7687 host = "x.com.cn" authenticate("%s:%s" % (host, http_port), "name", "password") # connect to authenticated graph database g = Graph("http://%s:%s/db/data/" % (host, http_port), bolt_port=bolt_port) g.data('match (n) return count(*)') g.run('match (n) return count(*)').dump() now_datetime = datetime.datetime.now() def getDBConnection(): conn = psycopg2.connect( host="x.com.cn", port=432, user="******", password="******", dbname="xx", connect_timeout=10) return conn watch('httpstream') conn = getDBConnection()
from py2neo import Graph, Node import os def create_DrugFirm_node(file, g): query = ''' USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM {file} AS line FIELDTERMINATOR ' ' CREATE(df:DrugFirm {dunsNumber: line.DUNS_NUMBER, firmName: line.FIRM_NAME, address: line.ADDRESS, operations: line.OPERATIONS}) RETURN id(df), df.firmName ''' return g.run(query,file = file) if __name__ == "__main__": pw = os.environ.get('NEO4J_PASS') g = Graph("http://localhost:7474/", password=pw) ## readme need to document setting environment variable in pycharm tx = g.begin() index = ''' CREATE INDEX ON: DrugFirm(firmName)''' g.run(index) print("Create index on DrugFirm(firmName)") file = 'file:///drls_reg.txt' df_node = create_DrugFirm_node(file, g) print("Finish loading DrugFirm")
def UserStory4(Neo4JPwd, Dbname, redisdb): ## Intializations ### result = [] result2 = [] disease = [] disease_final = [] disease_drug = [] disease_drug2 = [] disease_drug3 = [] disease_drug4 = [] temp_se = {} result40 = [] graph = Graph(password=Neo4JPwd) r = redis.Redis(db=redisdb) pipeline = r.pipeline() ### Redis Search - for all seacrhed topics#### val = input("Please enter Month(in 2 digit format) of your choice: ") if len(val) != 2: print( "You didn't enter a 2 digit month code hence terminating program") return if int(val) <= 0 or int(val) > 12: print("You didnt enter a valid month digit hence terminating program") return print("Please wait for few seconds...") pattern = "*-" + val + "-*SEARCH" x = r.keys(pattern=pattern) for y in x: pipeline.lrange(y, 0, 100) result = pipeline.execute() for y in result: result2.append(y[0].decode('utf-8')) result2 = list( chain.from_iterable( repeat(i, c) for i, c in Counter(result2).most_common())) result3 = list(dict.fromkeys(result2)) ### Mongo Search - for filter out diseases from search topics#### client = MongoClient() db = client[Dbname] for temp in result3: x = db.Disease.find({"Disease": temp}, {"Disease": 1, "_id": 0}) for y in x: disease.append(y['Disease']) ### Neo Search - to find drug of the diseases#### for temp2 in disease: query = """match(n:Disease)<-[:Medicine_For]-(b) where n.name=\"""" + \ temp2 + """\" return n.name as Disease,b.Drug_Name as Drug""" neo_result = graph.run(query).data() disease_drug.append(neo_result) disease_drug2 = list2 = [x for x in disease_drug if x != []] ### Mongo Search for side effects of the drugs ### for irr1 in disease_drug2: for irr2 in irr1: drug = irr2.get('Drug') disease5 = irr2.get('Disease') pipeline = [{ "$match": { "Drug_Name": drug } }, { "$project": { "Drug_Name": 1, "_id": 0, "Side_Effects": { "$switch": { "branches": [{ "case": { "$eq": ["$Side_Effects", None] }, "then": "Blank" }], "default": "$Side_Effects" } } } }] result4 = db.Drugs.aggregate(pipeline) for irr3 in result4: irr3['Disease'] = disease5 disease_drug3.append(irr3) ### Grouping and filtering for final result ### for curr in disease_drug3: temp_se = {} side_effects = curr.get('Side_Effects') if side_effects == 'Blank': count_se = 0 else: if "following:" in side_effects: side_effects2 = side_effects.split("following:") count_se = side_effects2[1].count('\n') - 1 else: count_se = 1 temp_se['Disease'] = curr.get('Disease') temp_se['Drug'] = curr.get('Drug_Name') temp_se['No_Of_SE'] = count_se disease_drug4.append(temp_se) df_se = pd.DataFrame(disease_drug4) df_se_2 = df_se.groupby(['Disease']).agg({ 'No_Of_SE': ['min'] }).reset_index() df_se_2.columns = df_se_2.columns.droplevel(1) df_se_3 = [] df_se_4 = pd.DataFrame([], columns=list(['Disease', 'Drug', 'No_Of_SE'])) for index, row in df_se_2.iterrows(): filter_1 = df_se["Disease"] == row['Disease'] filter_2 = df_se["No_Of_SE"] == row['No_Of_SE'] df_se_4 = df_se[filter_1 & filter_2] for index, row in df_se_4.iterrows(): temp_se_2 = {} temp_se_2['Disease'] = row['Disease'] temp_se_2['Drug'] = row['Drug'] temp_se_2['No_Of_SE'] = row['No_Of_SE'] result40.append(temp_se_2) result_df = pd.DataFrame(result40) pd.set_option('display.max_colwidth', -1) result_df = result_df.drop_duplicates() final = result_df.groupby('Disease').agg({'Drug': ['min']}).reset_index() final.columns = final.columns.droplevel(1) for kor in disease_drug3: disease_final.append(kor['Disease']) seen = set() seen_add = seen.add gog = [x for x in disease_final if not (x in seen or seen_add(x))] gog2 = gog[:10] final2 = final[final['Disease'].isin(gog2)] print(final2.to_string(index=False, justify='left'))
class CivilAviationKnowledgeGraph: def __init__(self): self.data_path = "./data/data.json" self.export_dir = "./data/dicts/" self.graph = Graph("http://localhost:7474", auth=("neo4j", "shawn")) self.entities = {} # 收集实体 self.attrs = {} # 实体属性 self.rels_structures = set() # 实体结构关系 self.rels_values = [] # 实体值关系 self.cur_value_rel_src = None # 记录值关系的源节点 self.cur_index_name = None # 记录当前指标名称 def collect(self): print("开始收集数据...") with open(self.data_path, 'r', encoding='gbk') as fp: data = json.load(fp) # 开始以递归方式收集数据 self._travel(data, first_time=True) print("数据收集完毕!") def _travel(self, objs: dict, parent: tuple = None, first_time=False): for fields, entities in objs.items(): # 分离前缀和实体名 prefix, name = fields.split('-') if first_time: self.cur_value_rel_src = (prefix, name) if prefix == 'I' and entities.get("next"): self.cur_index_name = name # 获取实体属性 attrs = entities.get("attrs") if attrs: self.collect_attrs(name, attrs) # 获取下一层实体 next_ = entities.get("next") # 获取关系 rels = entities.get("rels") self.collect_entity(prefix, name) if parent: self.collect_structure_rel(parent, (prefix, name)) if rels: self.collect_value_rel((prefix, name), rels) if next_: self._travel(next_, (prefix, name)) def collect_entity(self, key: str, name: str): """ 收集实体 """ self.entities.setdefault(key, set()).add(name) def collect_attrs(self, name: str, attrs: dict): """ 收集实体的属性 """ self.attrs[name] = attrs def collect_structure_rel(self, src: tuple, dst: tuple): """ 收集实体结构关系 """ self.rels_structures.add((src[0] + '-' + dst[0], src[1], dst[1])) def collect_value_rel(self, dst: tuple, attrs: dict): """ 收集实体值关系 """ if dst[0] == 'A': # Year-Area($Index$) attrs['name'] = self.cur_index_name self.rels_values.append((self.cur_value_rel_src[0] + '-' + dst[0], self.cur_value_rel_src[1], dst[1], attrs)) def build(self): """ 从收集的数据中构建知识图谱 """ print("开始构建实体...") self.build_nodes() print("实体构建完毕!") print("开始构建关系...") self.build_relationships() print("关系构建完毕!") def build_nodes(self): """ 构建实体结点 """ for prefix, nodes in self.entities.items(): label = PREFIX_LABEL_MAP[prefix] for name in nodes: self.create_node(label, name, self.attrs.get(name)) def build_relationships(self): """ 构建实体关系 """ for (prefix, src, dst) in self.rels_structures: a, b = prefix.split('-') la = PREFIX_LABEL_MAP[a] lb = PREFIX_LABEL_MAP[b] rel = PREFIX_S_REL_MAP[prefix] self.create_relationship(la, lb, src, dst, rel) for (prefix, src, dst, attrs) in self.rels_values: a, b = prefix.split('-') la = PREFIX_LABEL_MAP[a] lb = PREFIX_LABEL_MAP[b] rel = PREFIX_V_REL_MAP[prefix] self.create_relationship(la, lb, src, dst, rel if rel else attrs['name'], attrs) def create_node(self, label: str, name: str, attrs=None): """ 创建结点 """ if attrs is None: attrs = {} node = Node(label, name=name, **attrs) self.graph.create(node) def create_relationship(self, src_label: str, dst_label: str, src: str, dst: str, rel: str, attrs=None): """ 创建关系 """ if attrs: rel_attrs = ", ".join([f"{k}: '{v}'" for k, v in attrs.items()]) else: rel_attrs = "name:'%s'" % rel query = f"match(s:{src_label}),(d:{dst_label}) where s.name='{src}' and d.name='{dst}' " \ f"create (s)-[rel:{rel} {{{rel_attrs}}}]->(d)" try: self.graph.run(query) except Exception as err: print(err) def export_collections(self): """ 导出收集的实体 """ if not os.path.exists(self.export_dir): os.mkdir(self.export_dir) for key, values in self.entities.items(): write_to_file(f"./data/dicts/{PREFIX_LABEL_MAP[key]}.txt", values) print("导出实体数据完毕.")
import json from py2neo import Graph db = Graph('http://45.55.182.50:7474/') db.run("CREATE INDEX ON :Business(id)") db.run("CREATE INDEX ON :Category(name)") create_business_query = ''' // MERGE ON categories MERGE (b:Business {id: {business_id}}) ON CREATE SET b.name = {name}, b.latitude = {latitude}, b.longitude = {longitude}, b.stars = {stars}, b.review_count = {review_count} WITH b UNWIND {categories} AS category MERGE (c:Category {name: category}) MERGE (b)-[:IS_IN]->(c) ''' merge_category_query = ''' MATCH (b:Business {id: {business_id}}) MERGE (c:Category {name: {category}}) CREATE UNIQUE (c)<-[:IS_IN]-(b) '''
def UserStory1(Neo4JPwd, Dbname, redisdb): graph = Graph(password=Neo4JPwd) print("Enter the symptoms with space") inp = [str(i).lower() for i in input().split()] if inp == []: print( "Entered input is not valid.Please try again by entering symptoms with space" ) return results = graph.run('''with $in as symptoms match (s:Symptom) Where toLower(s.name) in symptoms with collect(s) as symptoms match (d:Disease) where all(s in symptoms Where (s)-[:CAUSES]->(d)) return DISTINCT d.name as Disease, d.id as Disease_id''', parameters={ 'in': inp }).data() now = datetime.now() current_time = now.strftime("%d-%m-%Y:%H:%M:%S") username = ''.join(map(str, generate_username(1))) r = redis.Redis(db=redisdb) client = MongoClient() db = client[Dbname] collection = db.Disease if results == []: print( f"Tring.. Tring..... Tring............ database does not have any disease associated with mentioned symptoms {inp}" ) print( "--------------------------------------------------------------------------------" ) for i in range(len(inp)): key = 'NODISEASE' + ':' + current_time + ':' + username.upper() r.lpush(key, inp[i].upper()) print( f"Search data {inp[i].upper()} inserted into Redis with key {key} for further validation" ) print( "--------------------------------------------------------------------------------" ) return record_ids = [record['Disease'].lower() for record in results] record_ids2 = [ObjectId(record['Disease_id']) for record in results] print(f"{inp} symptoms causes disease(s) {record_ids}") print( "--------------------------------------------------------------------------------" ) for i in range(len(inp)): key = 'SYMPTOMS' + ':' + current_time + ':' + username.upper() r.lpush(key, inp[i].upper()) print( f"Search data {inp[i].upper()} inserted into Redis with key {key}") print( "--------------------------------------------------------------------------------" ) inp1 = input( "You want to know more about Diseases and their treatments (y/n):" ).lower() if inp1 != 'y': return print( "Please enter Disease you want to know from above list, enter all if you want to know about all Diseases:" ) disease = input().lower() print(disease) if disease != 'all' and disease in record_ids: query = collection.find( {'Disease': re.compile(disease, re.IGNORECASE)}) df = pd.DataFrame(query) for index, row in df.iterrows(): print( "--------------------------------------------------------------------------------" ) print(f"{inp} causes '{row['Disease']}'") print(f"please find some treatements for '{row['Disease']}''") print(row['Diagnosis_treatment']) if 'Specialization' in row: print( f"This '{row['Disease']}' can be treated by doctors having specialization of {row['Specialization']['Name']}" ) key = 'DISEASE' + ':' + current_time + ':' + username.upper() r.lpush(key, row['Disease'].upper()) print( "--------------------------------------------------------------------------------" ) print( f"Search data {row['Disease'].upper()} inserted into Redis with key {key}" ) else: if disease == 'all': print(record_ids2) query = collection.find({'_id': {'$in': record_ids2}}) df = pd.DataFrame(query) print(df) for index, row in df.iterrows(): print( "---------------------------------------------------------------------------------------------" ) print(f"{inp} causes '{row['Disease']}'") print(f"please find some treatements for '{row['Disease']}''") print(row['Diagnosis_treatment']) if 'Specialization' in row: if not pd.isna(row['Specialization']): print( f"This '{row['Disease']}' can be treated by doctors with specialization of {row['Specialization']['Name']}" ) key = 'DISEASE' + ':' + current_time + ':' + username.upper() r.lpush(key, row['Disease'].upper()) print( "--------------------------------------------------------------------------------" ) print( f"Search data {row['Disease'].upper()} inserted into Redis with key {key}" ) print( "--------------------------------------------------------------------------------" ) else: print( f"Entered {disease} does not exist in our database. Thank you for choosing healthcare engine" )
class BaseContext(object): """ Base CorpusContext class. Inherit from this and extend to create more functionality. Parameters ---------- args : arguments or :class:`~polyglotdb.config.CorpusConfig` If the first argument is not a CorpusConfig object, it is the name of the corpus kwargs : keyword arguments If a :class:`~polyglotdb.config.CorpusConfig` object is not specified, all arguments and keyword arguments are passed to a CorpusConfig object """ def __init__(self, *args, **kwargs): if len(args) == 0: raise(CorpusConfigError('Need to specify a corpus name or CorpusConfig.')) if isinstance(args[0], CorpusConfig): self.config = args[0] else: self.config = CorpusConfig(*args, **kwargs) self.config.init() self.graph = Graph(**self.config.graph_connection_kwargs) self.corpus_name = self.config.corpus_name if self.corpus_name: self.init_sql() self.hierarchy = Hierarchy({}) self.lexicon = Lexicon(self) self.census = Census(self) self._has_sound_files = None self._has_all_sound_files = None if getattr(sys, 'frozen', False): self.config.reaper_path = os.path.join(sys.path[-1], 'reaper') else: self.config.reaper_path = shutil.which('reaper') if sys.platform == 'win32': praat_exe = 'praatcon.exe' else: praat_exe = 'praat' if getattr(sys, 'frozen', False): self.config.praat_path = os.path.join(sys.path[-1], praat_exe) else: self.config.praat_path = shutil.which(praat_exe) self.config.query_behavior = 'speaker' def load_variables(self): """ Loads variables into Hierarchy """ try: with open(os.path.join(self.config.data_dir, 'variables'), 'rb') as f: var = pickle.load(f) self.hierarchy = var['hierarchy'] except FileNotFoundError: if self.corpus_name: self.hierarchy = self.generate_hierarchy() self.save_variables() def save_variables(self): """ saves variables to hierarchy""" with open(os.path.join(self.config.data_dir, 'variables'), 'wb') as f: pickle.dump({'hierarchy': self.hierarchy}, f) def init_sql(self): """ initializes sql connection """ self.engine = create_engine(self.config.sql_connection_string) Session.configure(bind=self.engine) if not os.path.exists(self.config.db_path): Base.metadata.create_all(self.engine) def execute_cypher(self, statement, **parameters): """ Executes a cypher query Parameters ---------- statement : str the cypher statement parameters : dict keyword arguments to execute a cypher statement Returns ------- query result : or raises error """ try: return self.graph.run(statement, **parameters) except py2neo.packages.httpstream.http.SocketError: raise(ConnectionError('PolyglotDB could not connect to the server specified.')) except ClientError: raise except (Unauthorized): raise(AuthorizationError('The specified user and password were not authorized by the server.')) except Forbidden: raise(NetworkAddressError('The server specified could not be found. Please double check the server address for typos or check your internet connection.')) except (TransientError): raise(TemporaryConnectionError('The server is (likely) temporarily unavailable.')) except ConstraintError: pass except Exception: raise @property def discourses(self): ''' Return a list of all discourses in the corpus. ''' q = self.sql_session.query(Discourse).all() if not len(q): res = self.execute_cypher('''MATCH (d:Discourse:{corpus_name}) RETURN d.name as discourse'''.format(corpus_name = self.corpus_name)) discourses = [] for d in res: instance = Discourse(name = d.discourse) self.sql_session.add(instance) discourses.append(d.discourse) self.sql_session.flush() return discourses return [x.name for x in q] @property def speakers(self): """ Gets a list of speakers in the corpus Returns ------- names : list all the speaker names """ q = self.sql_session.query(Speaker).all() if not len(q): res = self.execute_cypher('''MATCH (s:Speaker:{corpus_name}) RETURN s.name as speaker'''.format(corpus_name = self.corpus_name)) speakers = [] for s in res: print(s) instance = Speaker(name = s['speaker']) self.sql_session.add(instance) speakers.append(s['speaker']) self.sql_session.flush() return speakers return [x.name for x in q] def __enter__(self): self.sql_session = Session() self.load_variables() #if self.corpus_name: # self.hierarchy = self.generate_hierarchy() return self def __exit__(self, exc_type, exc, exc_tb): if exc_type is None: #try: # shutil.rmtree(self.config.temp_dir) #except: # pass self.sql_session.commit() return True else: self.sql_session.rollback() self.sql_session.expunge_all() self.sql_session.close() def __getattr__(self, key): if key == 'pause': return PauseAnnotation(corpus = self.corpus_name) if key + 's' in self.hierarchy.annotation_types: key += 's' # FIXME if key in self.hierarchy.annotation_types: return AnnotationAttribute(key, corpus = self.corpus_name, hierarchy = self.hierarchy) raise(GraphQueryError('The graph does not have any annotations of type \'{}\'. Possible types are: {}'.format(key, ', '.join(sorted(self.hierarchy.annotation_types))))) @property def word_name(self): """ Gets the word label Returns ------- word : str word name """ for at in self.hierarchy.annotation_types: if at.startswith('word'): #FIXME need a better way for storing word name return at return 'word' @property def phone_name(self): """ Gets the phone label Returns ------- phone : str phone name """ name = self.hierarchy.lowest if name is None: name = 'phone' return name def reset_graph(self, call_back = None, stop_check = None): ''' Remove all nodes and relationships in the graph that are apart of this corpus. ''' if call_back is not None: call_back('Resetting database...') number = self.execute_cypher('''MATCH (n:%s)-[r]-() return count(*) as number ''' % (self.corpus_name)).evaluate() call_back(0, number * 2) num_deleted = 0 deleted = 1000 while deleted > 0: if stop_check is not None and stop_check(): break deleted = self.execute_cypher('''MATCH (n:%s)-[r]-() with r LIMIT 50000 DELETE r return count(r) as deleted_count ''' % (self.corpus_name)).evaluate() num_deleted += deleted if call_back is not None: call_back(num_deleted) deleted = 1000 while deleted > 0: if stop_check is not None and stop_check(): break deleted = self.execute_cypher('''MATCH (n:%s) with n LIMIT 50000 DELETE n return count(n) as deleted_count ''' % (self.corpus_name)).evaluate() num_deleted += deleted if call_back is not None: call_back(num_deleted) self.reset_hierarchy() self.hierarchy = Hierarchy({}) def reset(self, call_back = None, stop_check = None): ''' Reset the graph and SQL databases for a corpus. ''' self.reset_graph(call_back, stop_check) try: Base.metadata.drop_all(self.engine) except sqlalchemy.exc.OperationalError: pass Base.metadata.create_all(self.engine) def query_graph(self, annotation_type): ''' Return a :class:`~polyglotdb.config.GraphQuery` for the specified annotation type. When extending :class:`~polyglotdb.config.GraphQuery` functionality, this function must be overwritten. Parameters ---------- annotation_type : str The type of annotation to look for in the corpus ''' if annotation_type.type not in self.hierarchy.annotation_types \ and annotation_type.type != 'pause': #FIXME make more general raise(GraphQueryError('The graph does not have any annotations of type \'{}\'. Possible types are: {}'.format(annotation_type.name, ', '.join(sorted(self.hierarchy.annotation_types))))) if self.config.query_behavior == 'speaker': cls = SpeakerGraphQuery elif self.config.query_behavior == 'discourse': cls = DiscourseGraphQuery else: cls = GraphQuery return cls(self, annotation_type) @property def annotation_types(self): return self.hierarchy.annotation_types @property def lowest_annotation(self): ''' Returns the annotation type that is the lowest in the hierarchy of containment. ''' return self.hierarchy.lowest def remove_discourse(self, name): ''' Remove the nodes and relationships associated with a single discourse in the corpus. Parameters ---------- name : str Name of the discourse to remove ''' self.execute_cypher('''MATCH (n:%s:%s)-[r]->() DELETE n, r''' % (self.corpus_name, name)) def discourse(self, name, annotations = None): ''' Get all words spoken in a discourse. Parameters ---------- name : str Name of the discourse ''' w = getattr(self, self.word_name) #FIXME make more general q = GraphQuery(self, w) q = q.filter(w.discourse.name == name) q = q.order_by(w.begin) return q.all()
def UserStory8(Neo4JPwd, Dbname, redisdb): ## Intializations ### result = [] result2 = [] result3 = [] drug = [] disease = [] disease_drug = [] disease_drug2 = [] Drugsofdiseaseslist = [] Sortedlistelements = [] Top5Drugs = [] # Neo Graph graph = Graph(password=Neo4JPwd) # Redis connection r = redis.Redis(db=redisdb) pipeline = r.pipeline() val = input("Please enter season number for your choice(1-4)\n" "(1 - Rainy,\n 2 - Summer,\n 3 - Winter,\n 4-Spring): ") if (val == "1"): patterns = "*-0[6-9]-*SEARCH*" season = "rainy" elif (val == "2"): patterns = "*-0[3-5]-*SEARCH*" season = "summer" elif (val == "3"): patterns = "*-[10][0-1]-*SEARCH*" season = "winter" else: patterns = "*-0[2-3]-*SEARCH*" season = "spring" ### Redis Search - for all seacrhed topics#### x = r.keys(pattern=patterns) for y in x: pipeline.lrange(y, 0, 100) result = pipeline.execute() for y in result: result2.append( y[0].decode('utf-8')) ###decoding the elements(removing eccoding) # sorted(result2, key=result2.count, reverse=True) result2 = list( chain.from_iterable( repeat(i, c) for i, c in Counter(result2).most_common()) ) ###sorting the elemts on basis of count result3 = list(dict.fromkeys(result2)) ###removing duplicates ### Mongo Search - for filter out drugs from search topics#### client = MongoClient() db = client[Dbname] drug = [ y['Drug_Name'] for temp in result3 for y in db.Drugs.find({"Drug_Name": temp}, { "Drug_Name": 1, "_id": 0 }) ] ## Mongo Search - for filter out diseases from search topics#### disease = [ y['Disease'] for temp in result3 for y in db.Disease.find({"Disease": temp}, { "Disease": 1, "_id": 0 }) ] ### Neo Search - to find drug of the diseases#### disease_drug = [graph.run("""match(n:Disease)<-[:Medicine_For]-(b) where n.name=\"""" + \ temp2 + """\" return b.Drug_Name as Drug""").data() for temp2 in disease] disease_drug2 = list2 = [x for x in disease_drug if x != []] ###remove null elements list for irr1 in disease_drug2: for irr2 in irr1: drugs = irr2.get('Drug') Drugsofdiseaseslist.append(drugs) def Sort_Tuple(tup): return (sorted(tup, key=lambda x: x[1], reverse=True)) counts = Counter(drug) counts.update(Drugsofdiseaseslist) Sortedlistelements = Sort_Tuple(counts.items()) Top5Drugs = Sortedlistelements[:10] newdict = dict(Top5Drugs) names = list(newdict.keys()) values = list(newdict.values()) # bar graph for top 5 drugs plt.figure(figsize=(12, 12)) plt.bar(range(len(newdict)), values, tick_label=names) plt.xticks(range(len(newdict)), names, rotation=90) plt.xlabel('Drugs', fontsize=18) plt.ylabel('Count of Drugs', fontsize=16) plt.savefig(season + '.png', bbox_inches='tight') print( "Output Figure is saved with respective season name for more clarity") print("displaying top 10 trending drugs for", season + " season") plt.show()
import requests, re from lxml import html from py2neo import Graph, Node, Relationship # find a numeric ID in a URL (without any other digits) findid = re.compile(r"\d+") # change the Neo4j password to yours g = Graph(user="******", password="******") # reset the graph # take care to use an underscore in ARTIST_OF # or in queries you will need to use tic quotes `` g.run('MATCH () -[r:ARTIST_OF] -> () DELETE r;') g.run('MATCH (n:Artist) DELETE n;') g.run('MATCH (m:Artwork) DELETE m;') def ScrapeCollection(workID): page = requests.get('http://moma.org/collection/works/' + str(workID)) tree = html.fromstring(page.content) # the title is a complex, potentially italicized field titles = tree.cssselect('.short-caption h1.object-tile--gothic') for title in titles: full_title = title.text.strip() break # the date is a string field which can be a year, range of years, or approximation dates = tree.cssselect('.short-caption h3') for date in dates: first_date = date.text.strip()
def UserStory9(Neo4JPwd, redisdb): graph = Graph(password=Neo4JPwd) drug_in = input("Please enter your Drug Name: ") result3 = graph.run( "match (a:Drug)-[:Medicine_For]->()<-[:Medicine_For]-(b:Drug) where a.Drug_Name='" + drug_in + "' return b.Drug_Name").data() if not result3: print( "Drug Name entered didn't match any drug names in the database. Hence ending script." ) return print("This will take around 30 seconds...") plt.style.use("ggplot") r = redis.Redis(db=redisdb) pipeline = r.pipeline() result = [] disease_list = [] trend_list = [] result2 = [] x = r.keys(pattern="*SEARCH*") for y in x: temp = {} temp['Key'] = y.decode("utf-8").split(":")[1][3:] pipeline.lrange(y, 0, 100) result.append(temp) pip = pipeline.execute() for var, car in zip(result, pip): temp = {} temp['Key'] = var['Key'] temp['Value'] = car[0].decode("utf-8") result2.append(temp) df1 = pd.DataFrame(result2) df1['Key'] = pd.to_datetime(df1['Key'], format='%m-%Y') for z in result3: disease_list.append(z['b.Drug_Name']) disease_list = list(set(disease_list)) print("Competitor Drugs: -") flag = 0 for dl in disease_list: flag = flag + 1 print(flag, ".", dl) comp_in = input( "Please enter name of Competitor Drug you wanna see trend with - ") if comp_in not in disease_list: print( "Entered Competitor Drug wasn't from the list. Hence ending program." ) return trend_list.append(drug_in) trend_list.append(comp_in) df2 = df1[df1['Value'].isin(trend_list)] df3 = df2.groupby(['Key', 'Value']).size().reset_index(name='counts') final = pd.pivot_table(df3, index=['Key'], columns=['Value'], fill_value=0) final.columns = final.columns.droplevel(0) final.sort_values(by=['Key']) final.plot(figsize=(20, 10)).legend(title='Drugs') plt.show()
from py2neo import Graph graph = Graph(password='******') results = graph.run( 'MATCH (s:asn)-[r:TO]->(d:asn) ' 'RETURN s.name as source, r as relationship, d.name as dest ' 'LIMIT 100' ) for source, relationship, dest in results: print('source: {} dest: {} relationship: {}'.format(source, dest, relationship))
def setUp(self): self.app = app.app.test_client() graph = Graph(os.environ.get('GRAPHENEDB_URL', 'http://localhost:7474'),bolt=False) graph.run("MATCH (a) DETACH DELETE a") #clears graph
class MedicalGraph: def __init__(self): cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1]) self.data_path = os.path.join(cur_dir, 'disease.csv') self.graph = Graph("http://localhost:7474", username="******", password="******") def read_file(self): """ 读取文件,获得实体,实体关系 :return: """ # cols = ["name", "alias", "part", "age", "infection", "insurance", "department", "checklist", "symptom", # "complication", "treatment", "drug", "period", "rate", "money"] # 实体 diseases = [] # 疾病 aliases = [] # 别名 symptoms = [] # 症状 parts = [] # 部位 departments = [] # 科室 complications = [] # 并发症 drugs = [] # 药品 # 疾病的属性:age, infection, insurance, checklist, treatment, period, rate, money diseases_infos = [] # 关系 disease_to_symptom = [] # 疾病与症状关系 disease_to_alias = [] # 疾病与别名关系 diseases_to_part = [] # 疾病与部位关系 disease_to_department = [] # 疾病与科室关系 disease_to_complication = [] # 疾病与并发症关系 disease_to_drug = [] # 疾病与药品关系 all_data = pd.read_csv(self.data_path, encoding='gb18030').loc[:, :].values for data in all_data: disease_dict = {} # 疾病信息 # 疾病 disease = str(data[0]).replace("...", " ").strip() disease_dict["name"] = disease # 别名 line = re.sub("[,、;,.;]", " ", str(data[1])) if str(data[1]) else "未知" for alias in line.strip().split(): aliases.append(alias) disease_to_alias.append([disease, alias]) # 部位 part_list = str(data[2]).strip().split() if str(data[2]) else "未知" for part in part_list: parts.append(part) diseases_to_part.append([disease, part]) # 年龄 age = str(data[3]).strip() disease_dict["age"] = age # 传染性 infect = str(data[4]).strip() disease_dict["infection"] = infect # 医保 insurance = str(data[5]).strip() disease_dict["insurance"] = insurance # 科室 department_list = str(data[6]).strip().split() for department in department_list: departments.append(department) disease_to_department.append([disease, department]) # 检查项 check = str(data[7]).strip() disease_dict["checklist"] = check # 症状 symptom_list = str(data[8]).replace("...", " ").strip().split()[:-1] for symptom in symptom_list: symptoms.append(symptom) disease_to_symptom.append([disease, symptom]) # 并发症 complication_list = str(data[9]).strip().split()[:-1] if str(data[9]) else "未知" for complication in complication_list: complications.append(complication) disease_to_complication.append([disease, complication]) # 治疗方法 treat = str(data[10]).strip()[:-4] disease_dict["treatment"] = treat # 药品 drug_string = str(data[11]).replace("...", " ").strip() for drug in drug_string.split()[:-1]: drugs.append(drug) disease_to_drug.append([disease, drug]) # 治愈周期 period = str(data[12]).strip() disease_dict["period"] = period # 治愈率 rate = str(data[13]).strip() disease_dict["rate"] = rate # 费用 money = str(data[14]).strip() if str(data[14]) else "未知" disease_dict["money"] = money diseases_infos.append(disease_dict) return set(diseases), set(symptoms), set(aliases), set(parts), set(departments), set(complications), \ set(drugs), disease_to_alias, disease_to_symptom, diseases_to_part, disease_to_department, \ disease_to_complication, disease_to_drug, diseases_infos def create_node(self, label, nodes): """ 创建节点 :param label: 标签 :param nodes: 节点 :return: """ count = 0 for node_name in nodes: node = Node(label, name=node_name) self.graph.create(node) count += 1 print(count, len(nodes)) return def create_diseases_nodes(self, disease_info): """ 创建疾病节点的属性 :param disease_info: list(Dict) :return: """ count = 0 for disease_dict in disease_info: node = Node("Disease", name=disease_dict['name'], age=disease_dict['age'], infection=disease_dict['infection'], insurance=disease_dict['insurance'], treatment=disease_dict['treatment'], checklist=disease_dict['checklist'], period=disease_dict['period'], rate=disease_dict['rate'], money=disease_dict['money']) self.graph.create(node) count += 1 print(count) return def create_graphNodes(self): """ 创建知识图谱实体 :return: """ disease, symptom, alias, part, department, complication, drug, rel_alias, rel_symptom, rel_part, \ rel_department, rel_complication, rel_drug, rel_infos = self.read_file() self.create_diseases_nodes(rel_infos) self.create_node("Symptom", symptom) self.create_node("Alias", alias) self.create_node("Part", part) self.create_node("Department", department) self.create_node("Complication", complication) self.create_node("Drug", drug) return def create_graphRels(self): disease, symptom, alias, part, department, complication, drug, rel_alias, rel_symptom, rel_part, \ rel_department, rel_complication, rel_drug, rel_infos = self.read_file() self.create_relationship("Disease", "Alias", rel_alias, "ALIAS_IS", "别名") self.create_relationship("Disease", "Symptom", rel_symptom, "HAS_SYMPTOM", "症状") self.create_relationship("Disease", "Part", rel_part, "PART_IS", "发病部位") self.create_relationship("Disease", "Department", rel_department, "DEPARTMENT_IS", "所属科室") self.create_relationship("Disease", "Complication", rel_complication, "HAS_COMPLICATION", "并发症") self.create_relationship("Disease", "Drug", rel_drug, "HAS_DRUG", "药品") def create_relationship(self, start_node, end_node, edges, rel_type, rel_name): """ 创建实体关系边 :param start_node: :param end_node: :param edges: :param rel_type: :param rel_name: :return: """ count = 0 # 去重处理 set_edges = [] for edge in edges: set_edges.append('###'.join(edge)) all = len(set(set_edges)) for edge in set(set_edges): edge = edge.split('###') p = edge[0] q = edge[1] query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % ( start_node, end_node, p, q, rel_type, rel_name) try: self.graph.run(query) count += 1 print(rel_type, count, all) except Exception as e: print(e) return
MATCH (p:Person) where p.name="张柏芝" return p ''' #清空数据库 #data = graph.run('MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r') data = graph.run(cql) print(list(data)[0]['p']["biography"]) """ #导入节点 电影类型 == 注意类型转换 cql = ''' LOAD CSV WITH HEADERS FROM "file:///genre.csv" AS line MERGE (p:Genre{gid:toInteger(line.gid),name:line.gname}) ''' result = graph.run(cql) print(result, "电影类型 存储成功") #导入节点 演员信息 cql = ''' LOAD CSV WITH HEADERS FROM 'file:///person.csv' AS line MERGE (p:Person { pid:toInteger(line.pid),birth:line.birth, death:line.death,name:line.name, biography:line.biography, birthplace:line.birthplace}) ''' result = graph.run(cql) print(result, "演员信息 存储成功") #导入节点 电影信息 cql = '''
# # source_type: 13 selected # # allSourcesFlag: 1 # # searchType : 2 # # output format: TEXT # ============================================= ADD RXCUI to node Prescription ============================================ # save return text file in improt directory #====== in bash: extract rows with rxcui =======# # cd to db/import directory # $awk '/\|RXCUI\|/' 4d0f0dc897a98b673797bcddf13c1db3.text > rxcui_pc.txt pc_rxcui = '/Users/yaqi/Documents/Neo4j/load_pc_drug_df copy/import/rxcui_pc.txt' pc_rxcui_df = pd.read_csv(pc_rxcui, sep = '|', header=None) idx = '''CREATE INDEX ON :Prescription(rxcui) ''' g.run(idx) pc_rxcui_df.apply(add_rxcui_Prescription,args=(g,), axis=1) print("finish adding rxcui to :Prescription") # print(genericName.shape) (23584079,) # ============================================= Extract GenericName from GenericDrug============================================ # q1 = ''' # MATCH (gd:GenericDrug) RETURN gd.genericName''' # # names = g.run(q1) # with open('/Users/yaqi/Documents/Neo4j/load_pc_drug_df/import/drug_genericName.txt', 'w') as text_file: # for name in names: # # line = name['gd.genericName']+ '\n'
class CamaraDosDeputados: def __init__(self): # 1: username 2: password 3: port # |1 | 2 | | 3 | self.graph = Graph("bolt://localhost:7687", auth=('neo4j', 'abc')) def init_db(self): self.delete_all() self.create_constraints() self.depIds = self.get_dep_ids() self.partyIds = self.get_party_ids() self.init_deputados() self.init_despesas() self.init_orgaos() self.init_partidos() def delete_all(self): self.graph.run("MATCH(n) DETACH DELETE n") def create_constraints(self): pass """ self.graph.run("CREATE CONSTRAINT ON (d:Deputado) ASSERT d.nome is UNIQUE;") self.graph.run("CREATE CONSTRAINT ON (d:Deputado) ASSERT d.id is UNIQUE;") self.graph.run("CREATE CONSTRAINT ON (p:Partido) ASSERT p.sigla is UNIQUE;") self.graph.run("CREATE CONSTRAINT ON (m:Municipio) ASSERT m.nome is UNIQUE;") self.graph.run("CREATE CONSTRAINT ON (uf:UnidadeFederativa) ASSERT uf.sigla is UNIQUE;") self.graph.run("CREATE CONSTRAINT ON (o:Orgao) ASSERT o.idOrgao is UNIQUE;") """ def get_dep_ids(self): get_depIds_query = """ WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados?ordem=ASC&ordenarPor=nome' AS url CALL apoc.load.json(url) YIELD value UNWIND value.dados as dados RETURN dados.id """ depIds = [r['dados.id'] for r in self.graph.run(get_depIds_query)] return depIds def get_party_ids(self): get_partyIds_query = """ WITH 'https://dadosabertos.camara.leg.br/api/v2/partidos?itens=10000&ordem=ASC&ordenarPor=sigla' AS url CALL apoc.load.json(url) YIELD value UNWIND value.dados as dados RETURN dados.id """ partyIds = [r['dados.id'] for r in self.graph.run(get_partyIds_query)] return partyIds def init_deputados(self): for id in self.depIds: init_deputado_query = """ WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados/{id}'""".format( id=id) + """ AS url CALL apoc.load.json(url) YIELD value UNWIND value.dados as dados MERGE(d:Deputado {id : toInteger(dados.id), nomeCivil : dados.nomeCivil}) ON CREATE SET d.nome = dados.ultimoStatus.nome, d.idLegislatura = dados.ultimoStatus.idLegislatura, d.uri = dados.uri, d.urlFoto = dados.ultimoStatus.urlFoto, d.sexo = dados.sexo, d.nascimento = DATE(dados.dataNascimento), d.cpf = dados.cpf, d.email = dados.ultimoStatus.gabinete.email, d.escolaridade = dados.escolaridade MERGE(p:Partido {sigla : dados.ultimoStatus.siglaPartido}) ON CREATE SET p.uri = dados.ultimoStatus.uriPartido MERGE (d)-[:FILIADO]-(p) FOREACH(t IN CASE WHEN dados.ufNascimento IS NOT NULL THEN [1] else [] END | MERGE(m:Municipio {nome: dados.municipioNascimento}) MERGE(uf: UnidadeFederativa {sigla: dados.ufNascimento}) MERGE (d)-[:ORIGEM]->(m) MERGE (m)-[:SITUADO]-(uf) ) """ self.graph.run(init_deputado_query) def init_despesas(self): for id in self.depIds: init_despesas_query = """ WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados/{id}/despesas?ano=2019&ano=2020&itens=100000&ordem=ASC&ordenarPor=mes'""".format( id=id) + """ AS url CALL apoc.load.json(url) YIELD value UNWIND value.dados as despesas """ + "MATCH (dep:Deputado {id:" + str(id) + "})" + """ FOREACH(dados in despesas | MERGE (t:TipoDespesa {tipo: dados.tipoDespesa}) CREATE (des:Despesa {valorDocumento: dados.valorDocumento, codDocumento: dados.codDocumento, nomeFornecedor: dados.nomeFornecedor, urlDocumento: dados.urlDocumento, tipo: dados.tipoDocumento}) CREATE (des)-[:TIPODESPESA]->(t) CREATE (dep)-[:GASTOU {data: DATE(dados.dataDocumento)}]->(des) ) """ self.graph.run(init_despesas_query) def init_orgaos(self): for id in self.depIds: init_orgaos_query = """ WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados/{id}/orgaos?dataInicio=2019-01-01&itens=100000&ordem=ASC&ordenarPor=dataInicio'""".format( id=id) + """ AS url CALL apoc.load.json(url) YIELD value UNWIND value.dados as orgaos """ + "MATCH (dep:Deputado {id:" + str(id) + "})" + """ FOREACH(orgao in orgaos | MERGE (o:Orgao {idOrgao: toInteger(orgao.idOrgao)}) ON CREATE SET o.uriOrgao = orgao.uriOrgao, o.siglaOrgao = orgao.siglaOrgao, o.nomeOrgao = orgao.nomeOrgao CREATE (dep)-[:PARTICIPA {titulo: orgao.titulo, dataInicio: DATE(left(orgao.dataInicio,10)), dataFim: DATE(left(orgao.dataFim,10))}]->(o) ) """ self.graph.run(init_orgaos_query) def init_partidos(self): for id in self.partyIds: init_party_query = """ WITH 'https://dadosabertos.camara.leg.br/api/v2/partidos/{id}'""".format( id=id) + """ AS url CALL apoc.load.json(url) YIELD value UNWIND value.dados as dados WITH dados MATCH (p:Partido {sigla: dados.sigla}) MATCH (d:Deputado {nome: dados.status.lider.nome}) MERGE (d)-[:LIDER]-(p) SET p.id = toInteger(dados.id), p.nome = dados.nome, p.situacao = dados.status.situacao, p.totalMembros = toInteger(dados.status.totalMembros), p.urlLogo = dados.urlLogo """ self.graph.run(init_party_query) def get_all_query(self): return "\"MATCH a=(:Deputado)-[]-(:Partido) RETURN a\"" def get_deputados_query(self): return "\"MATCH (d:Deputado) RETURN d\"" def get_partidos_query(self): return "\"MATCH (p:Partido) RETURN p\"" def get_orgaos_query(self): return "\"MATCH (o:Orgao) RETURN o\"" def get_deputados(self): query = """ MATCH(dep:Deputado) RETURN dep.nome ORDER BY dep.nome """ deputados = [] for record in self.graph.run(query): deputados.append(record["dep.nome"]) return deputados def get_partidos(self): query = """ MATCH(p:Partido) RETURN p.sigla ORDER BY p.totalMembros DESC """ partidos = [] for record in self.graph.run(query): partidos.append(record["p.sigla"]) return partidos def get_orgaos(self): query = """ MATCH(o:Orgao) RETURN o.siglaOrgao ORDER BY o.siglaOrgao """ orgaos = [] for record in self.graph.run(query): orgaos.append(record["o.siglaOrgao"]) return orgaos def get_deputado_info(self, deputado_name): query = """ MATCH (d:Deputado) WHERE d.nome = """ query += '\"' + deputado_name + '\"' query += """ RETURN d.nascimento, d.nomeCivil, d.urlFoto, d.cpf, d.escolaridade, d.sexo, d.nome, d.idLegislatura, d.id """ for record in self.graph.run(query): return record def get_deputado_relations_query(self, deputado_name): query = "\"MATCH a=(d:Deputado)-[]-() WHERE d.nome = '" + deputado_name + "' RETURN a\"" return query def get_partido_info(self, partido_name): query = """ MATCH (p:Partido) WHERE p.sigla = """ query += '\"' + partido_name + '\"' query += """ RETURN p.totalMembros, p.sigla, p.situacao, p.nome, p.id, p.urlLogo """ for record in self.graph.run(query): return record def get_orgao_info(self, orgao_name): query = """ MATCH (o:Orgao) WHERE o.siglaOrgao = """ query += '\"' + orgao_name + '\"' query += """ RETURN o.nomeOrgao, o.siglaOrgao, o.idOrgao """ for record in self.graph.run(query): return record
if __name__ == "__main__": pw = os.environ.get('NEO4J_PASS') g = Graph("http://localhost:7474/", password=pw) ## readme need to document setting environment variable in pycharm tx = g.begin() q1 = ''' MATCH (cl: Client) RETURN id(cl), cl.clientName ''' q2 = ''' MATCH (df:DrugFirm) RETURN id(df), df.firmName''' client_obj = g.run(q1) df_obj = g.run(q2) client_lst = [] for client in client_obj: client_dic = {} client_dic['id'] = client['id(cl)'] client_dic['clientName'] = client['cl.clientName'] client_lst.append(client_dic) df_lst = [] for object in df_obj: df_dic = {} df_dic['id'] = object['id(df)'] df_dic['firmName'] = object['df.firmName'] df_lst.append(df_dic)
r12 = Relationship(node_6, '助班', node_7) r13 = Relationship(node_8, '班主任', node_1) r14 = Relationship(node_8, '班主任', node_2) r15 = Relationship(node_8, '班主任', node_3) r16 = Relationship(node_8, '班主任', node_4) r17 = Relationship(node_8, '班主任', node_7) S = node_1 | node_2 | node_3 | node_4 | node_5 | node_6 | node_7 | node_8 | node_9 | node_10 s = r1 | r2 | r3 | r4 | r5 | r6 | r7 | r8 | r9 | r10 | r11 | r12 | r13 | r14 | r1 | r15 | r16 | r17 graph.create(S) graph.create(s) A = graph.data("Match(female:Person) where female.sex = '女' return female") print("\t查询数据库中的女性信息:") for a in A: print(a) B = graph.data("MATCH(n:Person)-[:班主任]->(student:Person) return student") print("\n\t输出覃老师的学生信息:") for b in B: print(b) node = graph.find_one(label='Person', property_key='name', property_value="覃晓") node['age'] = 30 graph.push(node) Data = graph.find_one(label='Person', property_key='name', property_value="覃晓") print("\n\t输出覃老师修改后的信息:") print(Data) #删除刘旭鹏的个人信息以及关系 graph.run('MATCH (p:Person{name:"刘旭鹏"})-[r]-() detach delete r,p ')
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self, value): sql = "MATCH (n:Item { title: '" + str(value) + "' }) return n;" answer = self.graph.run(sql).data() return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self, value): sql = "MATCH (n:HudongItem { title: '" + str(value) + "' }) return n;" try: answer = self.graph.run(sql).data() except: print(sql) return answer # 根据entity的名称返回关系 def getEntityRelationbyEntity(self, value): # title answer = self.graph.run( "MATCH (entity1) - [rel] -> (entity2) WHERE entity1.name = \"" + str(value) + "\" RETURN rel,entity2").data() return answer ''' 关系查询 下面 ''' # 查找entity1及其对应的关系(与getEntityRelationbyEntity的差别就是返回值不一样) def findRelationByEntity(self, entity1): # title answer = self.graph.run("MATCH (n1 {name:\"" + str(entity1) + "\"})- [rel] -> (n2) RETURN n1,rel,n2").data() # if(answer is None): # answer = self.graph.run("MATCH (n1:NewNode {title:\""+entity1+"\"})- [rel] -> (n2) RETURN n1,rel,n2" ).data() return answer # 查找entity2及其对应的关系 def findRelationByEntity2(self, entity1): # title answer = self.graph.run("MATCH (n1)- [rel] -> (n2 {name:\"" + str(entity1) + "\"}) RETURN n1,rel,n2").data() # if(answer is None): # answer = self.graph.run("MATCH (n1)- [rel] -> (n2:NewNode {title:\""+entity1+"\"}) RETURN n1,rel,n2" ).data() return answer # 根据entity1和关系查找enitty2 def findOtherEntities(self, entity, relation): # title #type answer = self.graph.run("MATCH (n1 {name:\"" + str(entity) + "\"})- [rel {name:\"" + str(relation) + "\"}] -> (n2) RETURN n1,rel,n2").data() # if(answer is None): # answer = self.graph.run("MATCH (n1:NewNode {title:\"" + entity + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2) RETURN n1,rel,n2" ).data() return answer # 根据entity2和关系查找enitty1 def findOtherEntities2( self, entity, relation): # type #title answer = self.graph.run("MATCH (n1)- [rel {name:\"" + str(relation) + "\"}] -> (n2 {name:\"" + str(entity) + "\"}) RETURN n1,rel,n2").data() # if(answer is None): # answer = self.graph.run("MATCH (n1)- [rel:RELATION {type:\""+relation+"\"}] -> (n2:NewNode {title:\"" + entity + "\"}) RETURN n1,rel,n2" ).data() return answer # 根据两个实体查询它们之间的最短路径 def findRelationByEntities(self, entity1, entity2): # HudongItem title acompany_with title answer = self.graph.run( "MATCH (p1:Disease {name:\"" + str(entity1) + "\"}),(p2:Disease{name:\"" + str(entity2) + "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN rel" ).evaluate() # answer = self.graph.run("MATCH (p1:HudongItem {name:\"" + entity1 + "\"})-[rel:RELATION]-(p2:HudongItem{title:\""+entity2+"\"}) RETURN p1,p2").data() if (answer is None): answer = self.graph.run( "MATCH (p1:Disease {name:\"" + str(entity1) + "\"}),(p2:Disease {name:\"" + str(entity2) + "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN p" ).evaluate() if (answer is None): answer = self.graph.run( "MATCH (p1:Disease {name:\"" + str(entity1) + "\"}),(p2:Disease{name:\"" + str(entity2) + "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN p" ).evaluate() if (answer is None): answer = self.graph.run( "MATCH (p1:Disease {name:\"" + str(entity1) + "\"}),(p2:Disease {name:\"" + str(entity2) + "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN p" ).evaluate() # answer = self.graph.data("MATCH (n1:HudongItem {title:\"" + entity1 + "\"})- [rel] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) # if(answer is None): # answer = self.graph.data("MATCH (n1:HudongItem {title:\"" + entity1 + "\"})- [rel] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) # if(answer is None): # answer = self.graph.data("MATCH (n1:NewNode {title:\"" + entity1 + "\"})- [rel] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) # if(answer is None): # answer = self.graph.data("MATCH (n1:NewNode {title:\"" + entity1 + "\"})- [rel] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) relationDict = [] if (answer is not None): for x in answer: tmp = {} start_node = x.start_node end_node = x.end_node tmp['n1'] = start_node tmp['n2'] = end_node tmp['rel'] = x relationDict.append(tmp) return relationDict # 查询数据库中是否有对应的实体-关系匹配 def findEntityRelation(self, entity1, relation, entity2): # HudongItem title type HudongItem title answer = self.graph.run("MATCH (n1:Disease {name:\"" + str(entity1) + "\"})- [rel:RELATION {name:\"" + str(relation) + "\"}] -> (n2:Disease{name:\"" + entity2 + "\"}) RETURN n1,rel,n2").data() if (answer is None): answer = self.graph.run("MATCH (n1:Disease {name:\"" + str(entity1) + "\"})- [rel:RELATION {name:\"" + str(relation) + "\"}] -> (n2:Disease{name:\"" + entity2 + "\"}) RETURN n1,rel,n2").data() if (answer is None): answer = self.graph.run("MATCH (n1:Disease {name:\"" + str(entity1) + "\"})- [rel:RELATION {name:\"" + str(relation) + "\"}] -> (n2:Disease{name:\"" + entity2 + "\"}) RETURN n1,rel,n2").data() if (answer is None): answer = self.graph.run("MATCH (n1:Disease {name:\"" + str(entity1) + "\"})- [rel:RELATION {name:\"" + str(relation) + "\"}] -> (n2:Disease{name:\"" + entity2 + "\"}) RETURN n1,rel,n2").data() return answer
from rest_framework import status import threading import json import time # Connect with neo4j db connected = False while not connected: neo4j_uri = settings.NEO4J_DB['URI'] if neo4j_uri != '' and neo4j_uri != None: neo4j_client = Graph(neo4j_uri, auth=(settings.NEO4J_DB['USER'], settings.NEO4J_DB['PASS'])) try: neo4j_client.run("Match () Return 1 Limit 1") print('Neo4j connection established!') connected = True except Exception as e: print('Neo4j connection fail!', e) time.sleep(3) # Pub/sub # d = threading.Thread(name='daemon', target=topic_subscribe) # d.setDaemon(True) # d.start() # d.join() # Subscribe on topic def topic_subscribe():