def createRelationships(): global relationships graph = Graph('http://localhost:7474/db/data') for r in relationships: NodeA = graph.find_one(r["start"]["collection"],property_key = "_id", property_value = str(r["start"]["_id"])) NodeB = graph.find_one(r["end"]["collection"],property_key = "_id", property_value = str(r["end"]["_id"])) graph.create(rel(NodeA,r["name"],NodeB))
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") print('connect successed') def matchItembyTitle(self,value): answer = self.graph.find_one(label="Item",property_key="title",property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self,value): answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value) return answer # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List #test = Neo4j() #test.connectDB() #a = test.getLabeledHudongItem('labels.txt') #print(a[10].openTypeList)
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self,value): answer = self.graph.find_one(label="Item",property_key="title",property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self,value): answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value) return answer # 返回所有已经标注过的互动百科item filename为labels.txt def getLabeledHudongItem(self, filename): labels = readCSV2(filename) List = [] i = 0 for line in labels: ctx = self.graph.find_one(label="HudongItem",property_key="title",property_value=line[0]) if ctx == None: continue; cur = HudongItem(ctx) cur.label = line[1] List.append(cur) print('load LabeledHudongItem over ...') return List # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List #test = Neo4j() #test.connectDB() #answer = test.graph.find_one(label="HudongItem",property_key="title",property_value='火龙果') #print(answer) #a = test.getLabeledHudongItem('labels.txt') #print(a[10].openTypeList)
class Graph(object): def __init__(self, neo4j_uri): self.graph = NeoGraph(neo4j_uri) def find_node(self, label, node_id): args = dict(property_key="node_id", property_value=node_id) return self.graph.find_one(label, **args) def create_user(self, args): node = self.find_node("User", args["username"]) if not node: properties = dict( node_id=args["username"], name=args["name"], city=args["city"] ) node = Node("User", **properties) self.graph.create(node) return node, True return node, False def delete_user(self, user): node = self.find_node("User", user) if node: self.graph.delete(node) return True return False
class Achievement(object): def __init__(self, graph_db): self.name = None self.id = None self.description = None self.title = None self.is_visible = True self.date = None self._graph_db = Graph(settings.DATABASE_URL) @property def achievement_node(self): return self._graph_db.find_one(GraphLabel.ACHIEVEMENT, property_key='id', property_value=self.id) @property def achievement_interests(self): """ get list of interests linked to this achievement :return: """ # ach_interests = self.graph_db.match(start_node=self.achievement_node, # rel_type=Relationship.) return None
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self,value): answer = self.graph.find_one(label="Item",property_key="title",property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self,value): answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value) return answer
class NeoProvider(object): def __init__(self): # TODO read this from a config file uri = "http://*****:*****@localhost:7474/db/data" self.graph = Graph(uri) self.store = Store(self.graph) def get_start_screen(self): # Fetch the start node start_node = self.graph.find_one("screen", "start", True) # Find all the navigations from the start node nav_rels = self.graph.match(start_node, "nav") # Find all the assets for the start node asset_rels = self.graph.match(start_node, "hasAsset") # Construct the DTOs assets = [Asset(asset_rel.end_node) for asset_rel in asset_rels] navs = [Navigation(nav_rel) for nav_rel in nav_rels] start_screen = Screen(start_node, navs, assets) return start_screen def get_next_screen(self, current_screen_key, option): # Fetch the current node current_node = self.graph.find_one("screen", "id", current_screen_key) # Navigate to the next node via option current_rels = self.graph.match(current_node, "nav") selected_rel = [rel for rel in current_rels if rel.properties['opt'] == int(option)][0] next_node = selected_rel.end_node # Grab new navigations and assets for the next node next_nav_rels = self.graph.match(next_node, "nav") asset_rels = self.graph.match(next_node, "hasAsset") # Construct the DTOs assets = [Asset(asset_rel.end_node) for asset_rel in asset_rels] navs = [Navigation(nav_rel) for nav_rel in next_nav_rels] next_screen = Screen(next_node, navs, assets) return next_screen
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self,value): answer = self.graph.find_one(label="Item",property_key="title",property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self,value): answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value) return answer # 根据entity的名称返回关系 def getEntityRelationbyEntity(self,value): answer = self.graph.data("MATCH (entity1) - [rel] -> (entity2) WHERE entity1.title = \"" +value +"\" RETURN rel,entity2") return answer
def before_all(context): # import falcon_test # context.attachment_dir = os.path.join(os.path.dirname(falcon_test.__file__), 'tests/data') # context.sms_path = os.path.join(os.path.dirname(falcon_test.__file__), '../../var/sms/') # context.mail_path = os.path.join(os.path.dirname(falcon_test.__file__), '../../var/mail/') # clear database graph_db = Graph(settings.DATABASE_URL) # graph_db.delete_all() new_user_node = graph_db.find_one('USER', property_key='email', property_value='*****@*****.**') graph_db.delete(new_user_node) interest_node = graph_db.find_one('INTEREST', property_key='name', property_value=PERSONAS['interest']['name']) interest_relationships = graph_db.match(start_node=None, rel_type='INTERESTED_IN', end_node=interest_node) for relationship in interest_relationships: graph_db.delete(relationship) graph_db.delete(interest_node) context.base_url = "http://localhost:8000" benv.before_all(context)
class AgoraOrganization(object): def __init__(self): self.name = None self.unique_id = None self.mission_statement = None self.email = None self.is_open = False self.is_invite_only = False self.website = None self.graph_db = Graph() @property def org_node(self): return self.graph_db.find_one(AgoraLabel.ORGANIZATION, property_key='name', property_value=self.name) @property def org_members(self): """ list of the members of the organization :return: list of tuple of member name, email """ org_members_nodes = self.graph_db.match(start_node=self.org_node, rel_type=AgoraRelationship.MEMBER_OF, end_node=None) org_members_list = [] for item in org_members_nodes: org_members_list.append((item.end_node["name"], item.end_node["email"])) return org_members_list def create_organization(self): """ create a new organization :return: py2neo Node """ self.unique_id = str(uuid.uuid4()) new_org_properties = { "name": self.name, "mission_statement": self.mission_statement, "unique_id": self.unique_id, "email": self.email, "is_open": self.is_open, "is_invite_only": self.is_invite_only, "website": self.website} new_org_node = Node.cast(AgoraLabel.ORGANIZATION, new_org_properties) self.graph_db.create(new_org_node) return new_org_node
class Build_Configuration: def __init__(self): self.graph = Graph() self.graph.delete_all() self.namespace = ["Start"] self.parent_node = [] def check_duplicates(self, label, name): # print "label",label,name if self.graph.find_one(label, property_key="name", property_value=name) != None: raise ValueError("Duplicate Node", label, name) def get_namespace(self, name): print self.namespace, name temp = copy.deepcopy(self.namespace) temp.append(name) return_value = "/".join(temp) return return_value def get_parent_node(self): return self.parent_node[-1] def pop_namespace(self): del self.namespace[-1] del self.parent_node[-1] # concept of namespace name is a string which ensures unique name # the name is essentially the directory structure of the tree def construct_node(self, push_namespace, relationship, label, name, properties): namespace = self.get_namespace(name) self.check_duplicates(label, name=namespace) node = Node(label) node.properties["namespace"] = namespace node.properties["name"] = name for i in properties.keys(): node.properties[i] = properties[i] self.graph.create(node) if len(self.parent_node) != 0: relation_enity = Relationship(self.get_parent_node(), relationship, node) self.graph.create(relation_enity) if push_namespace == True: self.namespace.append(name) self.parent_node.append(node)
class AgoraAchievement(object): def __init__(self, graph_db): self.name = None self.unique_id = None self.description = None self.title = None self.is_visible = True self.date = None self.graph_db = Graph() @property def achievement_node(self): return self.graph_db.find_one(AgoraLabel.ACHIEVEMENT, property_key='unique_id', property_value=self.unique_id) @property def achievement_interests(self): """
class Neo4JClient: """Client for Neo4J""" def __init__(self): authenticate("localhost:7474", secrets.NEO4J_USERNAME , secrets.NEO4J_PASSWORD) self.graph = Graph("http://localhost:7474/db/data/") def create_user_node(self, user_dict): mykeys = ['name', 'id_str', 'description', 'screen_name'] user_dict = {k: v for (k, v) in user_dict.items() if k in mykeys} user_node = Node('Person', user_dict['name'], **user_dict) self.graph.create(user_node) return user_node def find_user_node(self, key, value): return self.graph.find_one('Person', property_key=key, property_value=value) def create_rel(self, start_node, end_node, rel_type): return self.graph.create((start_node, rel_type, end_node)) def update_user_node(id, user_dict): pass
# 节点间关系的建立 node_1_call_node_2 = Relationship(test_node_1, 'CALL', test_node_2) node_1_call_node_2['count'] = 1 node_2_call_node_1 = Relationship(test_node_2, 'CALL', test_node_1) node_2_call_node_1['count'] = 2 test_graph.create(node_1_call_node_2) test_graph.create(node_2_call_node_1) # 节点/关系的属性赋值以及属性值的更新 node_1_call_node_2['count'] += 1 test_graph.push(node_1_call_node_2) # 通过属性值来查找节点和关系(find,find_one) find_code_1 = test_graph.find_one(label="Person", property_key="name", property_value="test_node_1") find_code_3 = test_graph.find_one(label="Person", property_key="name", property_value="test_node_2") print(find_code_1['name']) # 通过节点/关系查找相关联的节点/关系 find_relationship = test_graph.match_one(start_node=find_code_1, end_node=find_code_3, bidirectional=False) print(find_relationship) # match和match_one的参数包括start_node,Relationship,end_node中的至少一个。
print graph # find a node or set of nodes according to properties and labels # graph.find_one() # returns a single node # graph.find() # returns a generator # Let's find Marnee # marnee_node = graph.find_one("Person", property_key="name", property_value="Marnee") # print "find_one Marnee %s" % marnee_node # # marnee_generator = graph.find("Person", property_key="name", property_value="Marnee") # for marnee in marnee_generator: # print marnee # Let's find Julian julian_node = graph.find_one("Person", property_key="name", property_value="Julian") # print "find_one Julian %s" % julian_node #s # # Let's find all the Persons Julian knows # # show the Cypher -- MATCH # # show the code # # graph.match() # # graph.match_one() # # # julian_knows = graph.match(start_node=julian_node, # rel_type="KNOWS", # end_node=None) # for friend in julian_knows: # print "friend %s" % friend #
class GraphDB(): def __init__(self, user=NEO4J_USER, pwd=NEO4J_PWD, host=NEO4J_HOST): self.graph = Graph("http://%s:%s@%s/db/data/" % (user, pwd, host)) def query(self, query_str, stream=False): if stream: return self.graph.cypher.stream(query_str) else: return self.graph.cypher.execute(query_str) def create_relation_user_to_topic(self, user, relation, topic_name): userNode = self.graph.find_one("user", 'id', user.id_str) if not userNode: userNode = self.create_node_from_user(user) self.graph.create(userNode) topicNode = self.graph.find_one("topic_name", 'name', topic_name) if not topicNode: topicNode = Node("topic_name", name = topic_name) self.graph.create(topicNode) relationship = self.graph.match_one(userNode, relation, topicNode) if not relationship: relationship = Relationship(userNode, relation, topicNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push() # Relations: follows eventuell favourites, retweets def create_relation_user_to_user(self, userA, relation, userB): userANode = self.graph.find_one("user", 'id', userA.id_str) userBNode = self.graph.find_one("user", 'id', userB.id_str) if not userANode: userANode = self.create_node_from_user(userA) self.graph.create(userANode) if not userBNode: userBNode = self.create_node_from_user(userB) self.graph.create(userBNode) relationship = self.graph.match_one(userANode, relation, userBNode) if not relationship: relationship = Relationship(userANode, relation, userBNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push() def increment_user_counter(self, user, counter, n): userNode = self.graph.find_one("user", 'id', user.id_str) if not userNode: userNode = self.create_node_from_user(user) self.graph.create(userNode) if counter in userNode.properties: userNode.properties[counter] += n else: userNode.properties[counter] = n userNode.push() def get_all_users(self): users = [] for u in self.graph.find('user'): users.append({'name': u.properties['screen_name'], 'id_str': u.properties['id']}) return users def create_node_from_user(self, user): userNode = Node("user", name=user.screen_name, id=user.id_str, followers_count=user.followers_count, friends_count=user.friends_count, statuses_count=user.statuses_count, favourites_count=user.favourites_count) return userNode def quicksearch(self, username, limit=10): cql_query = "match(u:user) WHERE u.name =~ '%s.*' RETURN DISTINCT u.name LIMIT %s;" return self.query(cql_query % (username, limit)) def get_user_count(self): cql_query = "match(u:user) RETURN count(DISTINCT u) AS c;" for row in self.query(cql_query): return row['c'] return 0
employees = [tuple(line) for line in reader] f.close() with open(linkDataFile) as f: reader = csv.reader(f) next(reader, None) relations = [tuple(line) for line in reader] f.close() employee_entries = [] for link in relations: employee_entries.append({"links": link}) graph = Graph("http://*****:*****@localhost:7474/db/data/") graph.cypher.execute( "MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r") # deleting existing data #print (employees) for emp in employees: graph.create(Node("Employee", id=emp[0], email=str(emp[1]), group=1)) #print(emp[0]) for link in relations: node1 = graph.find_one("Employee", "id", link[0]) node2 = graph.find_one("Employee", "id", link[1]) #print(node1) graph.create(Relationship(node1, "EMAIL", node2, weight=link[2]))
dislikeCount=arraystring['dislikeCount'], likeCount=int(arraystring['likeCount'])) graph.create(a) print("Current run number(While Node Creation): " + str(i)) # print(arrayjson[i]['videoInfo']['id']) ## uncomment to print the data ## For-loop for creating relation ships between the created nodes for i in range(len(arrayjson)): element = arrayjson[i] for j in range(i - 1, -1, -1): # For establishing 'SAME_CHANNEL' relation if arrayjson[j]['videoInfo']['snippet']['channelId'] == element[ 'videoInfo']['snippet']['channelId']: a = graph.find_one("Youtube", property_key='name', property_value=element['videoInfo']['id']) b = graph.find_one("Youtube", property_key='name', property_value=arrayjson[j]['videoInfo']['id']) channelRelation = Relationship(a, "SAME_CHANNEL", b) graph.create(channelRelation) # For establishing 'SIMILAR_DESC' relation Count = descriptionCompare( arrayjson[i]['videoInfo']['snippet']['description'], arrayjson[j]['videoInfo']['snippet']['description']) if Count > 3000: a = graph.find_one("Youtube", property_key='name', property_value=element['videoInfo']['id'])
# mobile_node_1 = Node("Mobile", name="18610558465") csv_file_path = "D:/github_program/myPython/docs/rst/test.csv" red = pd.read_csv(csv_file_path) print(red) sys.exit(0) reader = csv.reader(open(csv_file_path, "r")) for line in reader: # print(len(line)) for single in range(0, len(line)): print(line[single]) find_code_1 = test_graph.find_one(label="Mobile", property_key="name", property_value=line[single]) if find_code_1 is None: mobile_node_1 = Node("Mobile", name="18610558465") test_graph.create(mobile_node_1) else: print("Exist") sys.exit(0) find_code_1 = test_graph.find_one(label="Mobile", property_key="name", property_value="18610558465") print(find_code_1) if find_code_1 is None: mobile_node_1 = Node("Mobile", name="18610558465")
for result in results: print("\t\t\t\t" + result['full_name'] + " FOUND") else: # print("\t\t\t\tNOT FOUND! Creating Author...") author_to_be_added = graph.merge_one("Author", "link", author["link"]) author_str_split_list = author["name"].split() if (len(author_str_split_list) == 1): author_to_be_added['full_name'] = author["name"].title() author_to_be_added['fist_name'] = author_str_split_list[0] author_to_be_added['middle_name'] = " " author_to_be_added['last_name'] = " " elif (len(author_str_split_list) == 2): author_to_be_added['full_name'] = author["name"].title() author_to_be_added['fist_name'] = author_str_split_list[0] author_to_be_added['middle_name'] = " " author_to_be_added['last_name'] = author_str_split_list[1] elif(len(author_str_split_list) == 3): author_to_be_added['full_name'] = author["name"].title() author_to_be_added['fist_name'] = author_str_split_list[0] author_to_be_added['middle_name'] = author_str_split_list[1] author_to_be_added['last_name'] = author_str_split_list[2] author_to_be_added.push() if authors.index(author) == 0: author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find_one('Author', 'link', author["link"]), primary_author="YES")) # primary_author_bool = False else: # pass author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find_one('Author', 'link', author["link"]), primary_author="NO")) print(j_list)
authenticate("localhost:7474","t","t") graph = Graph("http://localhost:7474/db/data/") def tofrac(a): b=float(10**len(str(a))) c=float(a) d=c/b return float(d) #Relationship(a, "similartags", b, weight = tcount) node=Node("Ariana",id=0,name="zero_node",type=0,date=0) graph.create(node) array=[] rel_ind=1 rel_type=0 score={} tmp={} n4= graph.find_one("Ariana",property_key = 'id', property_value = 0) for fname in iglob(os.path.expanduser('output1/*.json')): with open(fname) as fin: print (fname) hf=1 mf=1 tweets= json.load(fin) array.append(tweets) auth_id=tweets['meta']['author_id'] auth_name=tweets['meta']['author_name'] #print (auth_id) dt=tweets['meta']['date']['$date'] if(graph.find_one("Ariana",property_key = 'name', property_value = auth_name)==None): node = Node("Ariana",id=auth_id, name=tweets['meta']['author_name'],bundle_id=tweets['meta']['retweetOf']) graph.create(node) #score.setdefault(auth_name,{})
# 创建 graph_1 = Graph() graph_2 = Graph(host='localhost') graph_3 = Graph('http://localhost:7474/db/data/') s = a | b | r graph = Graph(password='******') # 添加 graph.create(s) # 查找 data = graph.data('MATCH (p:Person) return p') print(data) #### DataFrame df = DataFrame(data) node = graph.find_one(label='Person') print(node) relationship = graph.match_one(rel_type='KNOWS') print(relationship) # 更新 node = graph.find_one(label='Person') node['age'] = 21 graph.push(node) # 删除 Node 时必须先删除其对应的 Relationship,否则无法删除 Node node = graph.find_one(label='Person') relationship = graph.match_one(rel_type='KNOWS') graph.delete(relationship) graph.delete(node)
BREAKPOINT_INV_COMPANY = 1 # 导入公司持股关系的断点 BREAKPOINT_INV_PERSON = 1 # 导入人持股关系的断点 BREAKPOINT_LEADER = 1 # 导入董监高关系的断点 if __name__ == '__main__': df1 = pd.read_csv('../Data/shareholders1.csv', encoding='utf-8', sep=',') for i in range(len(df1)): data = df1.iloc[i, :] entname = data['entname'] regcap_CNY = data['regcap_CNY'] holder_label, holder = (data['holder'].split('_'))[0], (data['holder'].split('_'))[1] money_CNY = float(data['money_CNY']) share_ratio = round(float((data['share_ratio'].split('%'))[0])/100, 6) FLAGS = str(data['flags']) com_node = graph.find_one(label='COMPANY', property_key='entname', property_value=entname) if not com_node: com_node = Node('COMPANY') com_node['entname'] = entname com_node['regcap_CNY'] = regcap_CNY com_node['FLAGS'] = FLAGS graph.create(com_node) else: com_node['regcap_CNY'] = regcap_CNY graph.push(com_node) if holder_label == 'COMPANY': holder_node = graph.find_one(label='COMPANY', property_key='entname', property_value=holder) if not holder_node: holder_node = Node('COMPANY') holder_node['entname'] = holder holder_node['FLAGS'] = FLAGS
from py2neo import Node, Graph import json import time import pandas as pd start_time = time.time() print("Start create nodes") with open('config.json', 'r') as f: config = json.load(f) f.close() graph_address = 'http://{}:{}@localhost:7474/db/data/'.format(config['neo4j_credentials']['username'], config['neo4j_credentials']['password']) graph = Graph(graph_address) path = config['path_to_data'] f = open(path + "asoiaf-{}-nodes.csv".format(config["book_to_analyse"]),"r") nodes = pd.read_csv(f) list_nodes = list(nodes["Id"]) # create nodes for x in list(set(list_nodes)): if not graph.find_one('character', property_key='name', property_value=x): graph.create(Node('character', name=x)) print("--- %s seconds ---" % (time.time() - start_time))
content = f.read() topics = find_topic(content) #create topic, turn and user nodes #user does not need id, and has only a name property #remove author property from turn and add a relationship #PUBLISHES between user and turn # for topic in topics: topic_node = Node("Topic",title=topic.title) for turn in topic.turns: turn_node = Node("Turn",tid=turn.tid, text=turn.text,date=turn.date) author = turn.author if len(author) > 0: # find author node from graph author_node=graph.find_one("User", property_key="name", property_value=author) if author_node is None: author_node=Node("User",name=author) author_turn = Relationship(author_node, "PUBLISHES", turn_node) graph.create(author_turn) if turn.parent_turn is None: topic_turn=Relationship(topic_node, "CONTAINS",turn_node) graph.create(topic_turn) else: p_tid = turn.parent_turn.tid parent_node = graph.find_one("Turn", property_key="tid", property_value=p_tid) if parent_node is not None: turn_turn=Relationship(turn_node, "REPLIES", parent_node) graph.create(turn_turn) #find parent_node's author and create a relationship
class LoadDatatoNeo4J(object): graph = None def __init__(self): print("start load data ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") print("connect neo4j success!") def readData(self): count = 0 with open("new_node.csv", 'w') as fw: fw.write("title,lable" + '\n') with open("wikidata_relation.csv", "w") as fw: fw.write("HudongItem1,relation,HudongItem2" + '\n') with open("wikidata_relation2.csv", "w") as fw: fw.write("HudongItem,relation,NewNode" + '\n') with open("../wikidataRelation/entityRelation.json","r") as fr: with open("new_node.csv", 'a') as fwNewNode: with open("wikidata_relation.csv", 'a') as fwWikidataRelation: with open("wikidata_relation2.csv", 'a') as fwWikidataRelation2: newNodeList = [] for line in fr: print(line) entityRelationJson = json.loads(line) entity1 = entityRelationJson['entity1'] entity2 = entityRelationJson['entity2'] # 搜索entity1 find_entity1_result = self.graph.find_one( property_key = "title", property_value = entity1, label = "NerItem" # 这里的标签 ) # 搜索entity2 find_entity2_result = self.graph.find_one( property_key = "title", property_value = entity2, label = "NerItem" ) count += 1 print(count) # 如果entity1不在实体列表中,结束 if (find_entity1_result is None): continue # 去掉entityRelationJson['relation']中的逗号和双引号 entityRelationList = re.split(",|\"",entityRelationJson['relation']) entityRelation = "" for item in entityRelationList: entityRelation = entityRelation + item # 如果entity2既不在实体列表中,又不在NewNode中,则新建一个节点,该节点的lable为newNode,然后添加关系 if (find_entity2_result is None): if (entity2 not in newNodeList): fwNewNode.write(entity2 + "," + "newNode" + '\n') newNodeList.append(entity2) fwWikidataRelation2.write(entity1 + "," + entityRelation + "," + entity2 + '\n') # 如果entity2在实体列表中,直接查询关系 else: fwWikidataRelation.write(entity1 + "," + entityRelation + "," + entity2 + '\n')
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self,value): answer = self.graph.find_one(label="Hudong",property_key="title",property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self,value): answer = self.graph.find_one(label="Hudong",property_key="title",property_value=value) return answer # 根据entity的名称返回关系 def getEntityRelationbyEntity(self,value): answer = self.graph.data("MATCH (entity1) - [rel] -> (entity2) WHERE entity1.title = \"" +value +"\" RETURN rel,entity2") return answer # 根据关系名称返回结果 def findRelationEntity(self,value): answer = self.graph.data("MATCH (n1:Hudong)- [rel {type:\""+value+"\"}] -> (n2) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong)- [rel {type:\""+value+"\"}] -> (n2) RETURN n1,rel,n2" ) return answer #查找entity1及其对应的关系(与getEntityRelationbyEntity的差别就是返回值不一样) def findRelationByEntity(self,entity1): answer = self.graph.data("MATCH (n1:Hudong {title:\""+entity1+"\"})- [rel] -> (n2) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\""+entity1+"\"})- [rel] -> (n2) RETURN n1,rel,n2" ) return answer #查找entity2及其对应的关系 def findRelationByEntity2(self,entity1): answer = self.graph.data("MATCH (n1)- [rel] -> (n2:Hudong {title:\""+entity1+"\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1)- [rel] -> (n2:Hudong {title:\""+entity1+"\"}) RETURN n1,rel,n2" ) return answer #根据entity1和关系查找enitty2 def findOtherEntities(self,entity,relation): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2) RETURN n1,rel,n2" ) return answer #根据entity2和关系查找enitty1 def findOtherEntities2(self,entity,relation): answer = self.graph.data("MATCH (n1)- [rel:RELATION {type:\""+relation+"\"}] -> (n2:Hudong {title:\"" + entity + "\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1)- [rel:RELATION {type:\""+relation+"\"}] -> (n2:Hudong {title:\"" + entity + "\"}) RETURN n1,rel,n2" ) return answer #根据两个实体查询它们之间的关系 def findRelationByEntities(self,entity1,entity2): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) return answer #查询数据库中是否有对应的实体-关系匹配 def findEntityRelation(self,entity1,relation,entity2): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) if(len(answer) == 0): answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" ) return answer
''' a = Node('PersonTest', name='张三') b = Node('PersonTest', name='李四') r = Relationship(a, 'KNOWNS', b) s = a | b | r graph.create(s) ''' 2 —— Node查询 ''' # 用CQL进行查询,返回的结果是list data1 = graph.data('MATCH(p:PersonTest) return p') print("data1 = ", data1, type(data1)) print() # 用find_one()方法进行node查找,返回的是查找node的第一个node data2 = graph.find_one(label='PersonTest', property_key='name', property_value="李四") print("data2 = ", data2, type(data2)) print() # 用find()方法进行node查找 data3 = graph.find(label='PersonTest') for data in data3: print("data3 = ", data) print() ''' 3 —— Relationship查询 ''' relationship = graph.match_one(rel_type='KNOWNS') print(relationship, type(relationship)) print() '''
'base_ram', 'base_san', 'base_license', 'price' ] #print (header) csvwriter.writerow(header) count += 1 csvwriter.writerow([ i['client'], i['platform'], i['env'], i['server'], i['cpu'], i['ram'], i['diskGB'], i['os_type'], i['cpu_addon'], i['ram_addon'], i['base_vm'], i['base_cpu'], i['base_ram'], i['base_san'], i['base_license'], i['price'] ]) testcsv.close() exit(0) neo_vm = graph.find_one('Server', 'name', "ADAM-CUS-NJS1") pprint.pprint(neo_vm) for vm in oldvmlist: #neo_dc = Node('Datacenter', name=oldvmlist[vm]['site']) #graph.merge(neo_dc) neo_env = Node('Env', name=oldvmlist[vm]['env']) graph.merge(neo_env) #link=Relationship(neo_env,'is_on',neo_dc) #graph.merge(link) neo_client = Node('Compte', name=oldvmlist[vm]['compte']) graph.merge(neo_client) neo_pf = Node('Plateform', name=oldvmlist[vm]['PF']) graph.merge(neo_pf) link = Relationship(neo_client, 'have', neo_pf) graph.merge(link)
class loadDatatoNeo4j(object): graph = None def __init__(self): print("start load data ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") print("connect neo4j success!") def readData(self): count = 0 #新写的的new node.csv 先写标题 with open("new_nodef.csv", 'w') as fw: fw.write("title,lable" + '\n') #新写进去的 先写标题 with open("wikidata_relationf.csv", "w") as fw: fw.write("HudongItem1,relation,HudongItem2" + '\n') # 新写进去的 with open("wikidata_relation2f.csv", "w") as fw: fw.write("HudongItem,relation,NewNode" + '\n') #读取实体关系json with open("../wikidataRelation/entityRelation1.json", "r") as fr: # 打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入。 with open("new_nodef.csv", 'a') as fwNewNode: with open("wikidata_relationf.csv", 'a') as fwWikidataRelation: with open("wikidata_relation2f.csv", 'a') as fwWikidataRelation2: newNodeList = list() for line in fr: # print(line) entityRelationJson = json.loads(line) entity1 = entityRelationJson['entity1'] entity2 = entityRelationJson['entity2'] #搜索entity1 find_entity1_result = self.graph.find_one( property_key="title", property_value=entity1, label="HudongItem") #搜索entity2 find_entity2_result = self.graph.find_one( property_key="title", property_value=entity2, label="HudongItem") count += 1 print(count / 12358) # 如果entity1不在实体列表中(emmmmmm,不可能吧),那么就不要继续了 if (find_entity1_result is None): continue #去掉entityRelationJson['relation']中的逗号和双引号 entityRelationList = re.split( ",|\"", entityRelationJson['relation']) entityRelation = "" for item in entityRelationList: entityRelation = entityRelation + item #去掉entity2字符串中的逗号,并将繁体转成简体 entity2List = re.split(",|\"", entity2) entity2 = "" for item in entity2List: entity2 = entity2 + item entity2 = Converter('zh-hans').convert(entity2) # 如果entity2既不在实体列表中,又不在NewNode中,则新建一个节点,该节点的lable为newNode,然后添加关系 if (find_entity2_result is None): if (entity2 not in newNodeList): fwNewNode.write(entity2 + "," + "newNode" + '\n') newNodeList.append(entity2) fwWikidataRelation2.write(entity1 + "," + entityRelation + "," + entity2 + '\n') #如果entity2在实体列表中,直接连关系即可 else: fwWikidataRelation.write(entity1 + "," + entityRelation + "," + entity2 + '\n')
class NeoManager: def __init__(self, host, port, username, password): self.username = username self.host = host self.port = port self.password = password def connect(self): print("http://" + self.host + ":" + str(self.port), self.username, self.password) self.graph = Graph("http://" + self.host + ":" + str(self.port), username = self.username, password = self.password) if self.graph != None: print("Neo4j Database Connected.") self.selector = NodeSelector(self.graph) def createNode(self, nodelabel, nodename): nodename = str(nodename) nodelabel = str(nodelabel) node = Node(nodelabel, name = nodename) self.graph.create(node) return node def createRelation(self, nodeSrc, nodeDst, relationName): relationName = str(relationName) if nodeSrc == None or nodeDst == None: return relationship = Relationship(nodeSrc, relationName, nodeDst) print(relationship) # self.setRelationAttribute(relation, 'credential', 0.9) self.graph.create(relationship) return relationship def setRelationAttribute(self, relationship, attribute, val): relationship[attribute] = val return relationship[attribute] def getRelationAttribute(self, relationship, attribute): return relationship[attribute] def findByName(self, findName): findName = str(findName) trustable = self.graph.find_one(property_key = "name", property_value = findName, label = 'labelHolder') if trustable == None: untrustable = self.graph.find_one(property_key = "name", property_value = findName, label = 'Creditless') return False, untrustable else: return True, trustable def findAllByLabel(self, findLabel): findLabel = str(findLabel) selected = self.selector.select(findLabel) print(selected) def findNodeRelation(self, node): return self.graph.match_one(start_node = node, bidirectional = True) def hasStartToRelation(self, node, relstr): return self.graph.match(start_node=node, rel_type=relstr) def hasEndWithRelation(self, node, relstr): return self.graph.match(end_node=node, rel_type=relstr) def getRelationBetween(self, nodeA, nodeB): if nodeA == None or nodeB == None: return None else: return self.graph.match(start_node = nodeA, end_node = nodeB, bidirectional = True) # neo = NeoManager('localhost', 7474, 'neo4j', '123') # neo.connect() # with open('../Datasets/TrainSetUnique.csv', 'r', encoding = 'utf-8') as input: # reader = csv.reader(input) # # row: [0] entity1 [1] entity2 [2] relation [3] example # for row in reader: # node0 = neo.findByName(row[0]) # node1 = neo.findByName(row[1]) # print(node0) # if node0 == None: # node0 = neo.createNode("labelHolder", row[0]) # if node1 == None: # node1 = neo.createNode("labelHolder", row[1]) # relation = neo.getRelationBetween(node0, node1) # print(relation) # if relation == None or relation != row[2]: # print(row[2]) # relation = neo.createRelation(node0, node1, row[2])
class DBO(object): # 初始化,连接后台数据库 def __init__(self): self.graph = Graph(user='******', password='******') def list_organization_structure(self, Application=None, HostIP=None): condition = "where 1=1" if Application: condition += ' and a.Name="%s"' % Application if HostIP: condition += ' and n.IP="%s"' % HostIP cypher = 'MATCH (p:Project)-[]-(d:Department)-[]-(a:Application)-[]-(n:Host) %s RETURN p.name as Project,d.name as Department,a.name as Application' % condition return self.graph.data(cypher) def enum_vul(self, TaskID, Cypher_Conditions=None): if Cypher_Conditions: # selector.select.where not good for use , not support zh_cn just pure cypher cypher = 'MATCH (n:HostVul) where n.TaskID="%s" %s RETURN n ' % (TaskID, Cypher_Conditions) for data in self.graph.data(cypher): yield data["n"] else: selector = NodeSelector(self.graph) selected = selector.select("HostVul", TaskID=TaskID) for data in list(selected): yield data def add_vul(self, Vul_Data): if not self.HostVul_exists(Vul_Data): Host = self.graph.find_one("Host", "IP", Vul_Data[u"IP"]) vul = Node("HostVul") vul.update(Vul_Data) rel = Relationship(Host, "have", vul) self.graph.create(rel) def HostVul_exists(self, Vul_Data): cypher = "Match (n:HostVul) where n.TaskID='%s' and n.Scanner='%s' and n.IP='%s' and n.Port='%s' and n.ID='%s' return n.IP limit 1 " % ( Vul_Data[u"TaskID"], Vul_Data[u"Scanner"], Vul_Data[u"IP"], Vul_Data[u"Port"], Vul_Data[u"ID"]) result = self.graph.data(cypher) # 性能太差,使用其他简单方法 # selector = NodeSelector(self.graph) # selected = selector.select("HostVul", # IP=Vul_Data[u"IP"], # ID=Vul_Data[u"ID"]).limit(1) # .where("_.IP = '%s'" % Vul_Data[u"IP"], # "_.Port='%s'" % Vul_Data[u"Port"], # "_.ID='%s'" % Vul_Data[u"ID"]) return result def add_host(self, Application, host): self.node_simple_add("Host", "IP", host) host = self.graph.find_one("Host", "IP", host) app = self.graph.find_one("Application", "name", Application) self.rel_simple_add(app, "own", host) def add_department(self, Project, Department): self.node_simple_add("Project", "name", Project) self.node_simple_add("Department", "name", Department) pro = self.graph.find_one("Project", property_key="name", property_value=Project) dep = self.graph.find_one("Department", property_key="name", property_value=Department) self.rel_simple_add(pro, "own", dep) def add_app(self, Project, Department, Application): self.node_simple_add("Project", "name", Project) self.node_simple_add("Department", "name", Department) self.node_simple_add("Application", "name", Application) pro = self.graph.find_one("Project", property_key="name", property_value=Project) dep = self.graph.find_one("Department", property_key="name", property_value=Department) app = self.graph.find_one("Application", property_key="name", property_value=Application) self.rel_simple_add(pro, "own", dep) self.rel_simple_add(dep, "own", app) ### meta operate def node_exists(self, label, Key, Value): Find = self.graph.find_one(label, property_key=Key, property_value=Value) if Find: print "Node already exists: [%s: %s]" % (label, Find[Key]) return 2 else: return 0 def node_simple_add(self, label, Key, Value): Find = self.graph.find_one(label, property_key=Key, property_value=Value) if Find: print "Node already exists: [%s: %s]" % (label, Find[Key]) return 2 else: n = Node(label) n.update({Key: Value}) self.graph.create(n) return 1 def rel_exists(self, start_node, rel, end_node): Find = self.graph.match_one(start_node=start_node, rel_type=rel, end_node=end_node) if type(Find) == Relationship: print "Relationship already exists" return 2 else: return 0 def rel_simple_add(self, start_node, rel_type, end_node): Find = self.graph.match_one(start_node=start_node, rel_type=rel_type, end_node=end_node) if type(Find) == Relationship: print "Relationship already exists" return 2 else: rel = Relationship(start_node, rel_type, end_node) self.graph.create(rel) return 1
class Robot(): """NLU Robot. 自然语言理解机器人。 Public attributes: - graph: The connection of graph database. 图数据库连接 - selector: The selector of graph database. 图数据库选择器 - locations: Navigation Locations. 导航地点列表 - is_scene: 在线场景标志,默认为 False - user: 机器人配置信息 - usertopics: 可用话题列表 - address: 在线调用百度地图 IP 定位 API,网络异常时从配置信息获取默认地址 - topic: 当前QA话题 - qa_id: 当前QA id - qmemory: 短期记忆-最近用户问过的10个问题 - amemory: 短期记忆-最近回答用户的10个答案 - pmemory: 短期记忆-最近一次回答用户的正确答案 - cmd_end_scene: 退出场景命令集 - cmd_previous_step: 上一步命令集,场景内全局模式 - cmd_next_step: 下一步命令集,通过界面按钮实现 - cmd_repeat: 重复命令集 - do_not_know: 匹配不到时随机回答 """ def __init__(self, password="******", userid="A0001"): self.graph = Graph("http://localhost:7474/db/data/", password=password) self.selector = NodeSelector(self.graph) # self.locations = get_navigation_location() self.is_scene = False self.user = self.selector.select("User", userid=userid).first() self.usertopics = self.get_usertopics(userid=userid) self.address = get_location_by_ip(self.user['city']) self.topic = "" self.qa_id = get_current_time() self.qmemory = deque(maxlen=10) self.amemory = deque(maxlen=10) self.pmemory = deque(maxlen=10) self.cmd_end_scene = ["退出业务场景", "退出场景", "退出", "返回", "结束", "发挥"] self.cmd_previous_step = ["上一步", "上一部", "上一页", "上一个"] self.cmd_next_step = ["下一步", "下一部", "下一页", "下一个"] self.cmd_repeat = ['重复', '再来一个', '再来一遍', '你刚说什么', '再说一遍', '重来'] self.do_not_know = [ "这个问题太难了,{robotname}还在学习中", "这个问题{robotname}不会,要么我去问下", "您刚才说的是什么,可以再重复一遍吗", "{robotname}刚才走神了,一不小心没听清", "{robotname}理解的不是很清楚啦,你就换种方式表达呗", "不如我们换个话题吧", "咱们聊点别的吧", "{robotname}正在学习中", "{robotname}正在学习哦", "不好意思请问您可以再说一次吗", "额,这个问题嘛。。。", "{robotname}得好好想一想呢", "请问您说什么", "您问的问题好有深度呀", "{robotname}没有听明白,您能再说一遍吗" ] def __str__(self): return "Hello! I'm {robotname} and I'm {robotage} years old.".format( **self.user) @time_me() def configure(self, info="", userid="A0001"): """Configure knowledge base. 配置知识库。 """ assert userid is not "", "The userid can not be empty!" # 对传入的 userid 参数分析,若不合适则报相应消息 2017-6-7 if userid != "A0001": userid = "A0001" print("userid 不是默认值,已经更改为A0001") match_string = "MATCH (config:Config) RETURN config.name as name" subgraphs = [item[0] for item in self.graph.run(match_string)] print("所有知识库:", subgraphs) config = {"databases": []} if info != '': selected_names = info.split() forbidden_names = list( set(subgraphs).difference(set(selected_names))) print("选中知识库:", selected_names) print("禁用知识库:", forbidden_names) # TODO:待合并精简 可用 CONTAINS for name in selected_names: match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \ + userid + "' AND config.name='" + name + "' SET r.bselected=1" self.graph.run(match_string) for name in forbidden_names: match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \ + userid + "' AND config.name='" + name + "' SET r.bselected=0" self.graph.run(match_string) match_string = "MATCH (user:User)-[r:has]->(config:Config)" + \ "where user.userid='" + userid + \ "' RETURN config.name as name, r.bselected as bselected, r.available as available" for item in self.graph.run(match_string): config["databases"].append( dict(name=item[0], bselected=item[1], available=item[2])) print("可配置信息:", config) return config # @time_me() def get_usertopics(self, userid="A0001"): """Get available topics list. """ usertopics = [] if not userid: userid = "A0001" # 从知识库获取用户拥有权限的子知识库列表 match_string = "MATCH (user:User)-[r:has {bselected:1, available:1}]->(config:Config)" + \ "where user.userid='" + userid + "' RETURN config" data = self.graph.run(match_string).data() for item in data: usertopics.extend(item["config"]["topic"].split(",")) print("用户:", userid, "\n已有知识库列表:", usertopics) return usertopics def iformat(self, sentence): """Individualization of robot answer. 个性化机器人回答。 """ return sentence.format(**self.user) # @time_me() def add_to_memory(self, question="question", userid="A0001"): """Add user question to memory. 将用户当前对话加入信息记忆。 Args: question: 用户问题。 Defaults to "question". userid: 用户唯一标识。 Defaults to "userid". """ previous_node = self.graph.find_one("Memory", "qa_id", self.qa_id) self.qa_id = get_current_time() node = Node("Memory", question=question, userid=userid, qa_id=self.qa_id) if previous_node: relation = Relationship(previous_node, "next", node) self.graph.create(relation) else: self.graph.create(node) # def extract_navigation(self, question): """Extract navigation from question。从问题中抽取导航地点。 从导航地点列表选取与问题匹配度最高的地点。 QA匹配模式:(模糊匹配/全匹配) Args: question: User question. 用户问题。 """ # result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \ # context="", tid="", ftid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) # 模式1:模糊匹配 # temp_sim = 0 # sv1 = synonym_cut(question, 'wf') # if not sv1: # return result # for location in self.locations: # sv2 = synonym_cut(location, 'wf') # if sv2: # temp_sim = similarity(sv1, sv2, 'j') # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 # if temp_sim > 0.92: # print("Navigation location: " + location + " Similarity Score: " + str(temp_sim)) # result["content"] = location # result["context"] = "user_navigation" # result["behavior"] = int("0x001B", 16) # return result # 模式2:全匹配,判断“去”和地址关键词是就近的动词短语情况 # for location in self.locations: # keyword = "去" + location # if keyword in question: # print("Original navigation") # result["name"] = keyword # result["content"] = location # result["context"] = "user_navigation" # result["behavior"] = int("0x001B", 16) # return result # return result def update_result(self, question='', node=None): result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \ context="", tid="", ftid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) if not node: return result result['name'] = self.iformat(node["name"]) result["content"] = self.iformat( random_item(node["content"].split("|"))) result["context"] = node["topic"] result["tid"] = node["tid"] result["ftid"] = node["ftid"] result["txt"] = node["txt"] result["img"] = node["img"] result["button"] = node["button"] if node["url"]: result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = node["parameter"] func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result def extract_pinyin(self, question, subgraph, threshold=0.6, athreshold=0.8): """Extract synonymous QA in NLU database。 QA匹配模式:从图形数据库选取匹配度最高的问答对。 Args: question: User question. 用户问题。 subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。 """ temp_sim = 0 ss = [] max_score = 0 sv1 = pinyin_cut(question) print(sv1) for node in subgraph: iquestion = self.iformat(node["name"]) sv2 = pinyin_cut(iquestion) print(" ", sv2) temp_sim = jaccard_pinyin(sv1, sv2) print(temp_sim) # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 if temp_sim > athreshold: print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) return self.update_result(question, node) # =========================================================== ss.append(temp_sim) max_score = max(ss) if max_score > threshold: node = subgraph[ss.index(max_score)] iquestion = self.iformat(node["name"]) print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) return self.update_result(question, node) # =========================================================== return self.update_result(question) def extract_synonym(self, question, subgraph, threshold=0.60, athreshold=0.92): """Extract synonymous QA in NLU database。 QA匹配模式:从知识库选取第一个超过匹配阈值的问答对。 Args: question: User question. 用户问题。 subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。 """ temp_sim = 0 ss = [] max_score = 0 sv1 = synonym_cut(question, 'wf') # 基于 semantic.jaccard # sv1 = segment(question) # 基于 semantic.jaccard2 if not sv1: return self.update_result(question) for node in subgraph: iquestion = self.iformat(node["name"]) if question == iquestion: print("Similarity Score: Original sentence") return self.update_result(question, node) sv2 = synonym_cut(iquestion, 'wf') # 基于 semantic.jaccard # sv2 = segment(iquestion) # 基于 semantic.jaccard2 if sv2: temp_sim = similarity(sv1, sv2, 'j') # 基于 semantic.jaccard # temp_sim = similarity(sv1, sv2, 'j2') # 基于 semantic.jaccard2 # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 if temp_sim > athreshold: print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) return self.update_result(question, node) # =========================================================== ss.append(temp_sim) max_score = max(ss) if max_score > threshold: node = subgraph[ss.index(max_score)] iquestion = self.iformat(node["name"]) print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) return self.update_result(question, node) # =========================================================== return self.update_result(question) def extract_synonym_first(self, question, subgraph, threshold=0.60): """Extract synonymous QA in NLU database。 QA匹配模式:从知识库选取匹配度最高的问答对。 Args: question: User question. 用户问题。 subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。 """ temp_sim = 0 ss = [] max_score = 0 sv1 = synonym_cut(question, 'wf') # 基于 semantic.jaccard # sv1 = segment(question) # 基于 semantic.jaccard2 if not sv1: return self.update_result(question) for node in subgraph: iquestion = self.iformat(node["name"]) if question == iquestion: print("Similarity Score: Original sentence") return self.update_result(question, node) sv2 = synonym_cut(iquestion, 'wf') # 基于 semantic.jaccard # sv2 = segment(iquestion) # 基于 semantic.jaccard2 if sv2: temp_sim = similarity(sv1, sv2, 'j') # 基于 semantic.jaccard # temp_sim = similarity(sv1, sv2, 'j2') # 基于 semantic.jaccard2 ss.append(temp_sim) max_score = max(ss) if max_score > threshold: node = subgraph[ss.index(max_score)] iquestion = self.iformat(node["name"]) print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) return self.update_result(question, node) return self.update_result(question) def extract_keysentence(self, question, data=None, threshold=0.40): """Extract keysentence QA in NLU database。 QA匹配模式:从知识库选取包含关键句的问答对。 Args: question: User question. 用户问题。 """ if data: subgraph = [node for node in data if node["name"] in question] else: usertopics = ' '.join(self.usertopics) # 只从目前挂接的知识库中匹配 match_string = "MATCH (n:NluCell) WHERE '" + question + \ "' CONTAINS n.name and '" + usertopics + \ "' CONTAINS n.topic RETURN n LIMIT 1" subgraph = [ item['n'] for item in self.graph.run(match_string).data() ] if subgraph: # 选取第一个匹配节点 print("Similarity Score: Key sentence") # return self.extract_synonym(question, subgraph, threshold=threshold) node = subgraph[0] return self.update_result(question, node) return self.update_result(question) def extract_keysentence_first(self, question, data=None, threshold=0.40): """Extract keysentence QA in NLU database。 QA匹配模式:从知识库选取包含关键句且匹配度最高的问答对。 Args: question: User question. 用户问题。 """ if data: subgraph = [node for node in data if node["name"] in question] else: usertopics = ' '.join(self.usertopics) # 只从目前挂接的知识库中匹配 match_string = "MATCH (n:NluCell) WHERE '" + question + \ "' CONTAINS n.name and '" + usertopics + \ "' CONTAINS n.topic RETURN n" subdata = self.graph.run(match_string).data() subgraph = [item['n'] for item in subdata] if subgraph: # 选取得分最高的 print("Similarity Score: Key sentence") return self.extract_synonym_first(question, subgraph, threshold=threshold) return self.update_result(question) def remove_name(self, question): # 姓氏误匹配重定义 if question.startswith("小") and len(question) == 2: question = self.user['robotname'] # 称呼过滤 for robotname in ["小民", "小明", "小名", "晓明"]: if question.startswith( robotname) and len(question) >= 4 and "在线" not in question: question = question.lstrip(robotname) if not question: question = self.user['robotname'] return question @time_me() def search(self, question="question", tid="", userid="A0001"): """Nlu search. 语义搜索。 Args: question: 用户问题。 Defaults to "question". userid: 用户唯一标识。 Defaults to "userid" Returns: Dict contains: question, answer, topic, tid, url, behavior, parameter, txt, img, button. 返回包含问题,答案,话题,资源,行为,动作,文本,图片及按钮的字典。 """ # 添加到问题记忆 # self.qmemory.append(question) # self.add_to_memory(question, userid) # 语义:场景+全图+用户配置模式(用户根据 userid 动态获取其配置信息) # ========================初始化配置信息========================== self.user = self.selector.select("User", userid=userid).first() self.usertopics = self.get_usertopics(userid=userid) do_not_know = dict( question=question, name="", content=self.iformat(random_item(self.do_not_know)), # content="", context="", tid="", ftid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) error_page = dict( question=question, name="", content=self.user['error_page'], context="", tid="", ftid="", url="", behavior=int("0x1500", 16), # Modify:场景内 behavior 统一为 0x1500。(2018-1-8) parameter="", txt="", img="", button="", valid=0) # ========================一、预处理============================= # 敏感词过滤 if check_swords(question): print("问题包含敏感词!") return do_not_know # 移除称呼 question = self.remove_name(question) # ========================二、导航=============================== # result = self.extract_navigation(question) # if result["context"] == "user_navigation": # self.amemory.append(result) # 添加到普通记忆 # self.pmemory.append(result) # return result # ========================三、语义场景=========================== result = copy.deepcopy(do_not_know) # 全局上下文——重复 for item in self.cmd_repeat: # TODO:确认返回的是正确的指令而不是例如唱歌时的结束语“可以了” # TODO:从记忆里选取最近的有意义行为作为重复的内容 if item == question: if self.amemory: return self.amemory[-1] else: return do_not_know # 场景——退出 for item in self.cmd_end_scene: if item == question: # 完全匹配退出模式 result['behavior'] = 0 result['name'] = '退出' result['content'] = "" self.is_scene = False self.topic = "" self.amemory.clear() # 清空场景记忆 self.pmemory.clear() # 清空场景上一步记忆 return result # 场景——上一步:返回父节点(TODO:和下一步模式统一) if self.is_scene: for item in self.cmd_previous_step: if item in question: # 添加了链接跳转判断(采用该方案 2017-12-22) if len(self.pmemory) > 1: self.amemory.pop() return self.pmemory.pop() elif len(self.pmemory) == 1: return self.pmemory[-1] else: return error_page # 场景——下一步:通过 button 实现 for item in self.cmd_next_step: if item in question: if len(self.amemory) >= 1: parent = self.amemory[-1] if parent['button']: next_name = parent['button'].split('|')[-1] if next_name != '0': # 确定有下一步 # print(type(parent['tid']), parent['tid']) match_string = "MATCH (n:NluCell {name:'" + \ next_name + "', topic:'" + self.topic + \ "', ftid:" + str(int(parent['tid'])) + "}) RETURN n" match_data = list( self.graph.run(match_string).data()) if match_data: node = match_data[0]['n'] result = self.update_result(question, node) # 添加到场景记忆 self.pmemory.append(self.amemory[-1]) self.amemory.append(result) return result return error_page # ==========================场景匹配========================= if self.is_scene: # 在场景中:语义模式+关键句模式 # 场景内所有节点 match_scene = "MATCH (n:NluCell) WHERE n.topic='" + self.topic + "' RETURN n" scene_nodes = self.graph.run(match_scene).data() # 根据场景节点的 ftid 是否等于父节点 tid 筛选子场景节点 subscene_nodes = [ item['n'] for item in scene_nodes if item['n']['ftid'] == self.amemory[-1]['tid'] ] if subscene_nodes: result = self.extract_synonym_first(question, subscene_nodes) if not result["context"]: result = self.extract_keysentence_first( question, subscene_nodes) if not result["context"]: result = self.extract_pinyin(question, subscene_nodes) if result["context"]: print("正确匹配到当前场景的子场景") self.pmemory.append(self.amemory[-1]) self.amemory.append(result) # 添加到场景记忆 return result return error_page else: # 不在场景中:语义模式+关键句模式 # 场景内和问题语义标签一致的所有节点 tag = get_tag(question, self.user) match_graph = "MATCH (n:NluCell) WHERE n.tag='" + tag + \ "' and '" + ' '.join(self.usertopics) + "' CONTAINS n.topic RETURN n" usergraph_all = [ item['n'] for item in self.graph.run(match_graph).data() ] if usergraph_all: # 同义句匹配 TODO:阈值可配置 result = self.extract_synonym(question, usergraph_all, threshold=0.90) # 关键词匹配 TODO:配置开关 if not result["context"]: result = self.extract_keysentence(question) # 拼音匹配 TODO:配置开关 if not result["context"]: result = self.extract_pinyin(question, usergraph_all) # else: # 全局拼音匹配 TODO:配置开关 # match_pinyin = "MATCH (n:NluCell) WHERE '" + \ # ' '.join(self.usertopics) + "' CONTAINS n.topic RETURN n" # usergraph_pinyin = [item['n'] for item in self.graph.run(match_pinyin).data()] # if usergraph_pinyin: # result = self.extract_pinyin(question, usergraph_pinyin) if result["tid"] != '': # 匹配到场景节点 if int(result["tid"]) == 0: print("不在场景中,匹配到场景根节点") self.is_scene = True # 进入场景 self.topic = result["context"] self.amemory.clear() # 进入场景前清空普通记忆 self.pmemory.clear() self.amemory.append(result) # 添加到场景记忆 self.pmemory.append(result) return result else: print("不在场景中,匹配到场景子节点") return do_not_know elif result["context"]: # 匹配到普通节点 self.topic = result["context"] self.amemory.append(result) # 添加到普通记忆 self.pmemory.append(result) return result # ========五、在线语义(Modify:暂时关闭 2018-1-23)=============== # if not self.topic: # 1.音乐(唱一首xxx的xxx) # if "唱一首" in question or "唱首" in question or "我想听" in question: # result["behavior"] = int("0x0001", 16) # result["content"] = "好的,正在准备哦" # 2.附近有什么好吃的 # elif "附近" in question or "好吃的" in question: # result["behavior"] = int("0x001C", 16) # result["content"] = self.address # 3.nlu_tuling(天气) # elif "天气" in question: # 图灵API变更之后 Add in 2017-8-4 # location = get_location(question) # if not location: # 问句中不包含地址 # weather = nlu_tuling(self.address + question) # else: # 问句中包含地址 # weather = nlu_tuling(question) # 图灵API变更之前 # weather = nlu_tuling(question, loc=self.address) # result["behavior"] = int("0x0000", 16) # try: # 图灵API变更之前(目前可用) # temp = weather.split(";")[0].split(",")[1].split() # myweather = temp[0] + temp[2] + temp[3] # 图灵API变更之后 Add in 2017-8-3 # temp = weather.split(",") # myweather = temp[1] + temp[2] # except: # myweather = weather # result["content"] = myweather # result["context"] = "nlu_tuling" # 4.追加记录回答不上的所有问题 # else: # with open(log_do_not_know, "a", encoding="UTF-8") as file: # file.write(question + "\n") # 5.nlu_tuling # else: # result["content"] = nlu_tuling(question, loc=self.address) # result["context"] = "nlu_tuling" # if result["context"]: # 匹配到在线语义 # self.amemory.append(result) # 添加到普通记忆 # ============================================================== # 追加记录回答不上的所有问题 if not self.topic: with open(log_do_not_know, "a", encoding="UTF-8") as file: file.write(question + "\n") return result
print(ans) # 通过知识图谱查询 elif response[0] == '#': if response.__contains__("neo4j"): if len(input_message) < 4: ans = kgquery_entity(input_message) print(ans) else: a = input_message.find('和') b = input_message.find('的') name1 = input_message[:a] name2 = input_message[a + 1:b] #提取实体的类别名,在find_one中,类别名跟输入有关系 label1 = re.search(r".*[老师/学生/项目]", name1).group(0) label2 = re.search(r".*[老师/学生/项目]", name2).group(0) #find_one函数中,类别名由输入定,属性值也由输入值定 n1 = test_graph.find_one(label1, property_key="name", property_value=name1) n2 = test_graph.find_one(label2, property_key="name", property_value=name2) ans = kgquery_rel(n1, n2) ans = str(ans) print(name1 + '和' + name2 + '的关系是:' + ans) elif response.__contains__("NoMatchingTemplate"): print("NoMatchingTemplate") print("搜索引擎查询,此功能暂不支持") else: print('ver:' + response)
class Query_Configuration: def __init__(self, graph=None): if graph == None: self.graph = Graph() def match_labels(self, label): results = self.graph.cypher.execute("MATCH (m:" + label + ") RETURN m") return_value = [] for i in results: return_value.append(i[0]) return return_value def match_label_property(self, label, prop_index, prop_value): results = self.graph.cypher.execute( "MATCH (n:" + label + ") Where (n." + prop_index + "='" + prop_value + "') RETURN n" ) return_value = [] for i in results: return_value.append(i[0]) return return_value def match_relationship(self, relationship): query_string = "MATCH n-[:" + relationship + "]->m RETURN m" # print "---------query string ---------------->"+query_string results = self.graph.cypher.execute(query_string) return_value = [] for i in results: return_value.append(i[0]) return return_value # not tested yet def cypher_query(self, query_string, return_variable): query_string = query_string + " RETURN " + return_variable # print "---------query string ---------------->"+query_string results = self.graph.cypher.execute(query_string) return_value = [] for i in results: return_value.append(i[0]) return return_value def modify_properties(self, graph_object, new_properties): for i in new_properties.keys(): graph_object.properties[i] = new_properties[i] graph_object.push() # concept of namespace name is a string which ensures unique name # the name is essentially the directory structure of the tree def construct_merge_node(self, push_namespace, relationship, label, name, new_properties): namespace = self.get_namespace(name) node = self.graph.find_one(label, property_key="name", property_value=name) if self.graph.find_one(label, property_key="name", property_value=name) != None: for i in properties.keys(): node.properties[i] = properties[i] node.push() return node else: node = Node(label) node.properties["namespace"] = namespace node.properties["name"] = name for i in properties.keys(): node.properties[i] = properties[i] self.graph.create(node) if len(self.namespace) != 0: relation_enity = Relationship(self.get_namespace_node(), relationship, node) self.graph.create(relation_enity) if push_namespace == True: self.namespace.append(name) self.namespace.append(node) return node def match_relation_property_specific( self, label_name, property_name, property_value, label, return_name, return_value ): query_string = ( "MATCH (n:" + label_name + " { " + property_name + ':"' + property_value + '"})-[*]->(o:' + label + ") Where o." + return_name + ' = "' + return_value + '" RETURN o' ) # print "query string ",query_string results = self.graph.cypher.execute(query_string) return_value = [] for i in results: return_value.append(i[0]) return return_value def match_relation_property(self, label_name, property_name, property_value, label): query_string = ( "MATCH (n:" + label_name + " { " + property_name + ':"' + property_value + '"})-[*]->(o:' + label + ") RETURN o" ) results = self.graph.cypher.execute(query_string) return_value = [] for i in results: return_value.append(i[0]) return return_value
class Cq(object): def __init__(self): """ :return: """ self.id = '' self.subject = '' self.message = '' self.created_date = '' self._graph_db = Graph(settings.DATABASE_URL) @property def cq_properties(self): """ :return: """ properties_dict = dict(self.__dict__) del properties_dict['_graph_db'] return properties_dict @property def cq_node(self): """ :return: """ if self.id != '': return self._graph_db.find_one(GraphLabel.CQ, property_key='id', property_value=self.id) @property def response_list(self): """ list of responses to this CQ :return: list of responses """ cq_response_relationship = self._graph_db.match(start_node=self.cq_node, rel_type=GraphRelationship.TO, end_node=None) response_list = [] for rel in cq_response_relationship: response = rel.end_node.properties user_response_relationship = self._graph_db.match_one(start_node=None, rel_type=GraphRelationship.RESPONDED, end_node=self.cq_node) user_node = user_response_relationship.start_node response['by'] = '%s / %s' % (user_node.properties['name'], user_node.properties['call_sign']) response_list.append(response) return response_list @staticmethod def create_cq(user_node, cq_dict): cq_dict['id'] = str(uuid.uuid4()) cq_dict['created_date'] = datetime.date.today() cq_node = Node.cast(GraphLabel.CQ, cq_dict) cq_node, = Graph(settings.DATABASE_URL).create(cq_node) cq_relationship = Relationship(user_node, GraphRelationship.SENT, cq_node) Graph(settings.DATABASE_URL).create_unique(cq_relationship) @staticmethod def most_recent_cqs(): params = { } cypher_str = "" match_results = Graph(settings.DATABASE_URL).cypher.execute(statement=cypher_str, parameters=params) cq_list = [] cq = {} for item in match_results: cq['id'] = item.id cq['subject'] = item.subject cq['message'] = item.message cq['created_date'] = item.created_date cq_list.append(cq) root = {} root['cqs'] = cq_list return root def response(self, response_id): """ response dictionary details including user details :param response_id: :return: dict with response details and a dict of the user who made the response """ response_node = self._graph_db.find_one(GraphLabel.RESPONSE, property_key='id', property_value=response_id) response_user_relationship = self._graph_db.match_one(start_node=None, rel_type=GraphRelationship.RESPONDED, end_node=response_node) response_dict = {} response_dict['response'] = response_node.auto_sync_properties response_dict['user'] = response_user_relationship.start_node.properties return response_dict
class NeoPipeline(object): def __init__(self): self.graph_path = config.GRAPH_DB['graph_path'] self.graph = Graph(self.graph_path) self.sql_path = "data/network.sqlite" def nodes_from_sql(self, query, label, unique="id"): """ INPUT: str, str, str OUTPUT: None Imports node data from sql query into neo4j """ # Extract data from sql db. with sql.connect(self.sql_path) as con: nodes = pd.read_sql(sql=query, con=con, index_col=None) nodes_dict = nodes.to_dict(outtype="records") # Create nodes in graph. self.graph.schema.create_uniqueness_constraint(label, unique) for node in nodes_dict: n = Node.cast(label, node) self.graph.create(n) def relationships_from_sql(self, query, nodes, label, properties): """ INPUT: str, list(dict), str, dict OUTPUT: None Imports relationship data from sql query into neo4j """ with sql.connect(self.sql_path) as con: rels = pd.read_sql(sql=query, con=con, index_col=None) rels_dict = rels.to_dict(outtype="records") for rel in rels_dict: r = Relationship.cast(self.graph.find_one(nodes[0]["label"], nodes[0]["property"], rel[nodes[0]["sql_col"]]), label, self.graph.find_one(nodes[1]["label"], nodes[1]["property"], rel[nodes[1]["sql_col"]]), properties) self.graph.create(r) def build_network(self): query_players = ''' SELECT player_name AS name, player_id AS id, player_pos AS pos FROM individuals_subset GROUP BY player_id ''' self.nodes_from_sql(query_players, "Players", unique="id") query_coaches = ''' SELECT coach_name AS name, coach_id AS id FROM individuals_subset GROUP BY coach_id ''' self.nodes_from_sql(query_coaches, "Coaches", unique="id") query_play_coach = ''' SELECT * FROM individuals_subset ''' play_coach = [{'label': "Coach", 'property': "id", 'sql_col': "coach_id"}, {'label': "Player", 'property': "id", 'sql_col': "player_id"}] self.relationships_from_sql(query_play_coach, nodes=play_coach, label_rel="COACHED", properties={"league": "NBA"})
class PopItToNeo(object): def __init__(self): config = yaml.load(open("config.yaml")) self.endpoint = "https://sinar-malaysia.popit.mysociety.org/api/v0.1" # you know so that you can override this. why? I am not sure self.membership_field = "memberships" self.person_field = "persons" self.organization_field = "organizations" self.post_field = "posts" self.graph = Graph(config["graph_db"]) if config["refresh"] == True: self.graph.delete_all() # Because I am still not familiar to query with cypher # So lets cache here. Hopefully the memory usage don't kill me self.organization_processed = {} self.person_processed = {} self.post_processed = {} def process_membership(self): # So lets start from membership membership_url = "%s/%s" % (self.endpoint, self.membership_field) while True: logging.warning("Processing %s" % membership_url) data = self.fetch_entity(membership_url) logging.warning("Processing membership") entries = data["result"] for entry in entries: # a membership have 3 important field, person_id, organization_id, posts_id if not (entry.get("person_id") and entry.get("organization_id")): continue person = self.fetch_person(entry["person_id"]) if not person: continue role = entry.get("role","member") if not role: role = "member" logging.warning("Role: %s" % role) params = [] # This happens only once anyway kwparams = {} kwparams["popit_id"] = entry["id"] start_date = get_timestamp(entry.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entry.get("end_date")) if end_date: kwparams["end_date"] = end_date post_exist = False if entry.get("post_id"): post = self.fetch_post(entry["post_id"]) if not post: continue if self.graph.match_one(person, role, post): post_exist = True logging.warning("Already exist, skipping") if not post_exist: relationship = Relationship(person, role, post, **kwparams) self.graph.create(relationship) organization_exist = False if entry.get("organization_id"): organization = self.fetch_organization(entry["organization_id"]) if not organization: continue if self.graph.match_one(person, role, organization): logging.warning("Already exist, skipping") organization_exist = True if not organization_exist: relationship = Relationship(person, role, organization, **kwparams) self.graph.create(relationship) if data.get("next_url"): membership_url = data.get("next_url") else: break def fetch_person(self, person_id): if person_id in self.person_processed: logging.warning("Person %s fetch from cache" % person_id) return self.person_processed[person_id] node = self.graph.find_one("Persons", "popit_id", person_id) if node: logging.warning("Already exist, skipping") self.person_processed[person_id] = node return node person_url = "%s/%s/%s" % (self.endpoint, self.person_field, person_id) data = self.fetch_entity(person_url) if not data: # Don't assume that this id won't be created the next time logging.warning("person not exist %s" % person_id) return None logging.warning("Fetching person") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] logging.warning("Name: %s" % name) kwparam = {} birth_date = get_timestamp(entity.get("birth_date")) if birth_date: kwparam["birth_date"] = birth_date death_date = get_timestamp(entity.get("death_date")) if death_date: kwparam["death_date"] = death_date kwparam["name"] = name kwparam["popit_id"] = entity["id"] node = Node("Persons", **kwparam) self.graph.create(node) self.person_processed[entity["id"]] = node return node def fetch_organization(self, organization_id): if organization_id in self.organization_processed: logging.warning("Organization %s fetch from cache" % organization_id) return self.organization_processed[organization_id] node = self.graph.find_one("Organization", "popit_id", organization_id) if node: logging.warning("Already exist, skipping") self.organization_processed[organization_id] = node return node organization_url = "%s/%s/%s" % (self.endpoint, self.organization_field, organization_id) data = self.fetch_entity(organization_url) if not data: logging.warning("Organization don't exist %s" % organization_id) return None logging.warning("Fetch orgnanization") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] kwparams = {} logging.warning("Name: %s" % name) kwparams["name"] = name kwparams["popit_id"] = entity["id"] founding_date = get_timestamp(entity.get("founding_date")) if founding_date: kwparams["founding_date"] = founding_date dissolution_date = get_timestamp(entity.get("dissolution_date")) if dissolution_date: kwparams["dissolution_date"] = dissolution_date if "classification" in entity: logging.warning("Classification:%s" % entity["classification"]) kwparams["classification"] = entity["classification"] node = Node("Organization", **kwparams) self.graph.create(node) self.organization_processed[entity["id"]] = node return node def fetch_post(self, post_id): if post_id in self.post_processed: logging.warning("Post %s fetch from cache" % post_id) return self.post_processed[post_id] node = self.graph.find_one("Posts", "popit_id", post_id) if node: logging.warning("Already exist, skipping") self.post_processed[post_id] = node return node post_url = "%s/% s/%s" % (self.endpoint, self.post_field, post_id) data = self.fetch_entity(post_url) if not data: logging.warning("Post don't exist %s" % post_id) return None logging.warning("Fetch post") entity = data["result"] # Fetch organization node, because post is link to organization # What is the implication of post without organization? try: if entity.get("organization_id"): organization = self.fetch_organization(entity["organization_id"]) else: organization = None except Exception as e: logging.warning(e.message) organization = None logging.warning("Label: %s" % entity["label"]) kwparams = {} kwparams["name"] = entity["label"] kwparams["popit_id"] = entity["id"] start_date = get_timestamp(entity.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entity.get("end_date")) if end_date: kwparams["end_date"] = end_date node = Node("Posts", **kwparams) self.graph.create(node) self.post_processed[entity["id"]] = node if organization: temp_param = {} if start_date: temp_param["start_date"] = start_date if end_date: temp_param["end_date"] = end_date relation = Relationship(node, "of", organization, **kwparams) self.graph.create(relation) return node def process_parent_company(self): organizations_url = "%s/%s" % (self.endpoint, self.organization_field) while True: data = self.fetch_entity(organizations_url) entries = data["result"] for entry in entries: if not entry.get("parent_id"): logging.warning("No parent id, moving on") continue else: logging.warning(entry.get("parent_id")) # TODO: Dafuq this is not DRY. parent_node = self.fetch_organization(entry["parent_id"]) if not parent_node: continue child_node = self.fetch_organization(entry["id"]) parent_relationship = Relationship(parent_node, "parent_of", child_node) if self.graph.match_one(parent_node, "parent_of", child_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue self.graph.create(parent_relationship) if self.graph.match_one(child_node, "child_of", parent_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue child_relationship = Relationship(child_node, "child_of", parent_node) self.graph.create(child_relationship) if "next_url" in data: organizations_url = data["next_url"] logging.warning(organizations_url) else: break def process_posts(self): post_url = "%s/%s" % (self.endpoint, self.post_field) while True: data = self.fetch_entity(post_url) entries = data["result"] for entry in entries: node = self.fetch_post(entry["id"]) self.graph.create(node) # Since creating organization relationship is already part of getting post # ourjob is done here if "next_url" in data: post_url = data["next_url"] logging.warning(post_url) else: break def fetch_entity(self, url): r = requests.get(url) time.sleep(0.1) if r.status_code != 200: # Just to make output consistent, excception did not kill the script anyway return {} return r.json()
__author__ = 'Marnee Dearman' from py2neo import Graph, Node, Relationship from settings import graphene graph = Graph(graphene.DATABASE_URL) print graph # find a node or set of nodes according to properties and labels # graph.find_one() # returns a single node # graph.find() # returns a generator # Let's find Marnee marnee_node = graph.find_one("Person", property_key="name", property_value="Marnee") print "find_one Marnee %s" % marnee_node marnee_generator = graph.find("Person", property_key="name", property_value="Marnee") for marnee in marnee_generator: print marnee # Let's find Julian julian_node = graph.find_one("Person", property_key="name", property_value="Julian") print "find_one Julian %s" % julian_node # Let's find all the Persons Julian knows # show the Cypher -- MATCH
) # #Recommendation # ##Add User # In[4]: UserNode = graph_db.merge_one("User", "Name", "Ragnar") # ##Add User likes # In[5]: UserRef = graph_db.find_one("User", property_key="Name", property_value="Ragnar") #look for user Ragnar # In[6]: RecipeRef = graph_db.find_one( "Recipe", property_key="Name", property_value="Spaghetti Bolognese") #look for recipe Spaghetti Bolognese NodesRelationship = Relationship(UserRef, "Likes", RecipeRef) #Ragnar likes Spaghetti Bolognese graph_db.create_unique(NodesRelationship) #Commit his like to database # In[7]: graph_db.create_unique( Relationship(
class NeoRepo(): def __init__(self): self._host = "140.82.17.30" self.g = Graph("http://140.82.17.30", username="******", password="******") def add_user(self, user): n = Node("User", name=user) self.g.merge(n) def add_repo(self, repo): n = Node("Repo", name=repo) self.g.merge(n) def get_user(self, user): n = self.g.find_one("User", property_key='name', property_value=user) return n def get_repo(self, repo): n = self.g.find_one("Repo", property_key='name', property_value=repo) return n def add_rel(self, user, repo, rel_type): user = Node("User", name=user) repo = Node("Repo", name=repo) rel = Relationship(user, rel_type, repo) self.g.merge(rel) def match_user(self, user, rel_type='star'): if isinstance(user, str): user = self.get_user(user) if user is None: return [] match = self.g.match(start_node=user, bidirectional=False, rel_type=rel_type) return match def match_repo(self, repo, rel_type='star'): if isinstance(repo, str): repo = self.get_repo(repo) if repo is None: return [] match = self.g.match(end_node=repo, bidirectional=False, rel_type=rel_type) return match def match_one(self, user, repo, rel_type='star'): if isinstance(user, str): user = self.get_user(user) if isinstance(repo, str): repo = self.get_repo(repo) if user is None: return None if repo is None: return None match = self.g.match_one(start_node=user, end_node=repo, bidirectional=False, rel_type=rel_type) return match def suggest(self, repo): match_repo = self.match_repo(repo) count = {} for item_repo in match_repo: user = item_repo.start_node() # count[user['name']] = 0 match_user = self.match_user(user) for item_user in match_user: repo_suggest = item_user.end_node() if repo_suggest['name'] in count: count[repo_suggest['name']] += 1 else: count[repo_suggest['name']] = 1 if count == {}: return [] return (sorted(count.items(), key=lambda item: item[1], reverse=True))[1:51]
from py2neo import Graph, Node, Relationship,authenticate from talk import find_topic authenticate("localhost:7474", "neo4j", "M0ring15") graph = Graph() f = open('talk_archive') content = f.read() topics = find_topic(content) #create topic, turn nodes for topic in topics: topic_node = Node("Topic",title=topic.title) for turn in topic.turns: turn_node = Node("Turn",tid=turn.tid, text=turn.text, author=turn.author,date=turn.date) if turn.parent_turn is None: topic_turn=Relationship(topic_node, "CONTAINS",turn_node) graph.create(topic_turn) else: p_tid = turn.parent_turn.tid parent_node = graph.find_one("Turn", property_key="tid", property_value=p_tid) if parent_node is not None: turn_turn=Relationship(turn_node, "REPLIES", parent_node) graph.create(turn_turn)
class Interest(object): def __init__(self, graph_db=None): self.name = None self.id = None self.description = None self._graph_db = Graph(settings.DATABASE_URL) @property def interest_properties(self): properties_dict = dict(self.__dict__) del properties_dict['_graph_db'] return properties_dict @property def interest_node_by_id(self): if not self.id is None: return self._graph_db.find_one(GraphLabel.INTEREST, property_key='id', property_value=self.id) else: return None @property def interest_node_by_name(self): if not self.name is None: return self._graph_db.find_one(GraphLabel.INTEREST, property_key='name', property_value=self.name) else: return None def set_interest_attributes(self, interest_properties): for key, value in interest_properties.iteritems(): setattr(self, key, value) def create_interest(self): """ create an interest node based on the class attributes :return: py2neo Node """ #TODO error handling self.id = str(uuid.uuid4()) new_interest_node = Node.cast(GraphLabel.INTEREST, self.interest_properties) try: self._graph_db.create(new_interest_node) except: pass return new_interest_node # interest_node = self.get_interest() # if interest_node is None: # self.id = str(uuid.uuid4()) # new_interest = neo4j.Node.abstract(name=self.name, desciption=self.description, id=self.id) # created_interest, = self.graph_db.create(new_interest) # created_interest.add_labels(GraphLabel.INTEREST) # return created_interest # else: # return interest_node def matched_interests(self, match_string, limit): params = { 'match': '(?i)%s.*' % match_string, 'limit': limit } cypher_str = "MATCH (interest:INTEREST ) " \ "WHERE interest.name =~ {match} " \ "RETURN interest.name as name, interest.id as id " \ "LIMIT {limit}" match_results = self._graph_db.cypher.execute(statement=cypher_str, parameters=params) root = {} root['count'] = 0 interest_found = {} interests_list = [] for item in match_results: interest_found['id'] = item.id interest_found['name'] = item.name # self.id = item['id'] # self.get_user() # users_list.append(dict(self.user_properties)) interests_list.append(dict(interest_found)) root['count'] += 1 root['interests'] = interests_list return root def get_interest_by_name(self): """ get interest node :return: """ interest_node = self.interest_node_by_name if not interest_node is None: interest_attributes = self.interest_properties for key, value in interest_attributes.iteritems(): setattr(self, key, value) return interest_node def get_interest_by_id(self): interest_node = self.interest_node_by_id if not interest_node is None: interest_attributes = self.interest_properties for key, value in interest_node.properties.iteritems(): setattr(self, key, value) return interest_node def get_interest_for_json(self): root = {} return { '__class': self.__class__.__name__, 'id': self.id, 'name': self.name }
#a = df['head'].value_counts() a = df['head'] b = df['tail'] frames = [a, b] result = pd.concat(frames) result = result.drop_duplicates(keep='first', inplace=False) result = result.rename(columns={'0': 'node'}) result.to_csv(path + "node.csv", header=1) ###上传到neo4j n = open(path + "node.csv", encoding='utf-8') r = open(path + str(database) + ".csv", encoding='utf-8') data01 = pd.read_csv(n) data02 = pd.read_csv(r) ###上传节点 for i in range(len(data01)): temp = Node("Person", name=data01['0'][i]) g.create(temp) ###上传关系 for i in range(len(data02)): object = g.find_one(label="Person", property_key='name', property_value=data02["head"][i]) subject = g.find_one(label="Person", property_key='name', property_value=data02["tail"][i]) temp = Relationship(subject, data02['label'][i], object) g.create(temp)
"mission_statement": "Develop the Agora", "unique_id": unique_id, "email": '*****@*****.**'.lower(), "is_mentor": True, "is_tutor": True, "is_visible": True, "is_available_for_in_person": True, "is_admin": True} new_user_node = Node.cast(AgoraLabel.USER, new_user_properties) try: graph_db.create(new_user_node) except: print 'Node found' user_node = graph_db.find_one(AgoraLabel.USER, property_key='email', property_value="*****@*****.**".lower()) print user_node["email"] user = AgoraUser() user.email = "*****@*****.**" print user.user_interests interest = AgoraInterest() interest.name = 'SAMPLE' interest.description = 'SAMPLE DESCRIPTION' new_interest_node = interest.create_interest() user_interest_relationship_node = Relationship(start_node=user_node, rel=AgoraRelationship.INTERESTED_IN, end_node=new_interest_node)
class TwitterGraph(): """ Run queries against TwitterGraph. Functions here are mainly read-only, i.e. we only want to get answers, we are not modifying the graph structure """ PASSWORD = "******" USER = "******" HOST = "localhost:7474" def __init__(self, host=HOST, user=USER, password=PASSWORD): authenticate(host_port=host, user_name=user, password=password) self.graph = Graph() def get_users(self): # TO-DO : make it lazy for large datasets result = self.graph.find("User", limit=25) list_ = [user for user in result] return list_ def get_user(self, id_): result = self.graph.find_one("User", property_key="id", property_value=id_) return result def get_level_followers(self, limit=50, level=1, uid=None, screen_name=None): """ Return neo4j.cypher.RecordStream of users who are the n level follower of user uid/screen_name Level 1 follower is defined as : (1st_level_follower)-[follows]->(followee) """ cypher = self.graph.cypher statement = self._construct_follower_path(level) if uid is None and screen_name is None: raise InvalidArgumentException("Please specify either a valid user id or screen_name") if uid is not None: result = cypher.stream(statement, followee=uid, limit=limit) elif screen_name is not None: result = cypher.stream(statement, followee=screen_name, limit=limit) return [f for f in result] def is_n_level_follower(self, level, retweeter, screen_name): """ Given a retweeter screen_name and original tweeter's screen_name, determine if retweeter is n level follower """ if level == 1: return search.is_follower(retweeter, screen_name) cypher = self.graph.cypher level -= 1 statement = self._construct_follower_path(level) for follower in cypher.stream(statement, followee=screen_name, limit=5000): print follower[0] if search.is_follower(retweeter, follower[0]): return True return False def get_retweet_level(self, retweeter, screen_name): """ Given a retweeter screen name and the original user screen_name who tweeted the original tweet, determine the follower level """ level = 0 while level < 10: # stop at 10 to prevent timeout level += 1 # print len(followers) if self.is_n_level_follower(level, retweeter, screen_name): return level return 0 def _construct_follower_path(self, level, uid=False): # Construct pattern if uid: statement = "MATCH(:User {id_str : {followee} })" else: statement = "MATCH(:User {screen_name : {followee} })" while level > 1: statement += "<-[:follows]-(:User)" level -= 1 statement += "<-[:follows]-(a:User) RETURN a.screen_name LIMIT {limit}" return statement
data_type = field_relationship["dataType"] id_field = field_relationship["idField"] log.debug( "Found relationship mapping for [%s]. (:%s)-[:%s]->(:%s)", key, object_type, relationship_type, node_type) if data_type == "array": log.debug("Processing %s as %s", key, data_type) for id_value in value: log.debug("(:%s)-[:%s]->(:%s{%s: %s})", object_type, relationship_type, node_type, id_field, id_value) related_node = g.find_one( node_type, id_field, id_value) if not related_node: log.debug( "Didn't find a %s with %s == %s so I'm creating it", node_type, id_field, id_value) r_data = {id_field: id_value} related_node = Node( node_type, **r_data) tx.create(related_node) tx.commit() tx = g.begin() node_relationships.append({ 'relationship': { 'type': relationship_type },
from py2neo import Node, Relationship, Graph import requests, json, sys print "Performing pathfinding search with", sys.argv if len(sys.argv) != 4: print "You failed to enter the correct arguments." print "article one, article two, depth" else: graph = Graph("http://*****:*****@localhost:7474/db/data/") a = graph.find_one("Article", "lowerTitle", sys.argv[1].lower()) b = graph.find_one("Article", "lowerTitle", sys.argv[2].lower()) #print a, b if a and b: ENDPOINT = "http://*****:*****@localhost:7474/db/data/" request = { "to":ENDPOINT+"node/"+str(b._id), "max_depth": int(sys.argv[3]), "relationships": { "type":"LINKS", "direction":"out" }, "algorithm":"allSimplePaths" } r = requests.post(ENDPOINT+"node/"+str(a._id)+"/paths", data=json.dumps(request)) # print r.json() if r.status_code == 200: for path in r.json(): print "Path:" for node in path['nodes']:
class Neo4j(object): def __init__(self): self.graph = Graph(Config.NEO_URL, username=Config.NEO_USR, password=Config.NEO_PSW) self.mm = MongoManager.DBManager() def add_relation(self, node_name1, node_name2, movie_name='name', url='url'): """ 图中添加新的导演关系 若关系中的两个节点不在图中则自动创建 同时为关系添加电影名、发行时间、关系计数这几个参数 :param node_name1: :param node_name2: :param movie_name: :param release_time: :return: """ node1 = Node(DIRECTOR_LABEL, name=node_name1) node1['type'] = 'director' node2 = Node(ACTOR_LABEL, name=node_name2) # node3 = Node(MOVIE_LABEL, name=movie_name) # node3['url'] = url # # actor_movie_relation = Relationship(node2, ACTIN_LABEL, node3) # director_movie_relation = Relationship(node1, DIRECT_LABEL, node3) # self.graph.merge(actor_movie_relation, DEFAULT_LABEL, 'name') # self.graph.merge(director_movie_relation, DEFAULT_LABEL, 'name') # print(actor_movie_relation) # print(director_movie_relation) # if self.find_relation(node_name1, node_name2): # print('relation already existed, add count') # else: relation = Relationship(node1, COOPERATE_LABEL, node2) relation['count'] = 1 self.graph.merge(relation, DEFAULT_LABEL, 'name') # print("成功创建关系", node_name1, ',', COOPERATE_LABEL, ',', node_name2) def print(self, name, relation): """ 打印所有以名字为name的节点开始、具有relation关系的边的终节点的信息 :param name: :param relation: :return: """ print('##########') query = 'MATCH (n) WHERE n.name={name} RETURN n' params = dict(name=name) node = self.graph.evaluate(query, params) print(node) for rel in self.graph.match((node, ), relation): print(rel.end_node['name'], rel.end_node.labels, rel['movie_name'], rel['release_time']) def find_director_node(self, name): """ 查找具有某名字的节点,若图中有此节点则返回true,反之返回false :param name: :return: """ query = 'MATCH (n:Director) WHERE n.name={name} RETURN n' params = dict(name=name) node = self.graph.evaluate(query, params) if node is None: return False if self.graph.exists(node): return True else: return False def find_actor_node(self, name): """ 查找具有某名字的节点,若图中有此节点则返回true,反之返回false :param name: :return: """ query = 'MATCH (n:Actor) WHERE n.name={name} RETURN n' params = dict(name=name) node = self.graph.evaluate(query, params) if node is None: return False if self.graph.exists(node): return True else: return False def get_labeled_node(self, count=1): """ 获取具有某个标签的节点列表 打印节点数量 并返回该list :return: """ # 用CQL进行查询,返回的结果是list datas = self.graph.data('MATCH(p:Director) return p') # 目标节点数量 # print(len(datas)) # 数据类型为list # print(type(datas)) _count = 1 for data in datas: # data类型为dict # print(type(data)) # if _count > count: # break print(data) _count += 1 print('Total count of Director is', _count) return datas def find_relation_and_add_count(self, name1, name2): """ 查找分别以name1, name2为起始、终止节点的 CooperateWith 关系 若找到则对应count数加一 :param name1: :param name2: :return: """ sn = self.graph.find_one(DIRECTOR_LABEL, property_key='name', property_value=name1) en = self.graph.find_one(ACTOR_LABEL, property_key='name', property_value=name2) rel = self.graph.match(start_node=sn, rel_type=COOPERATE_LABEL, end_node=en) # print(rel) # print('--------') query = 'MATCH(n:Director)-[r:CooperateWith]->(m:Actor) WHERE n.name={name1} and m.name={name2} RETURN r' params = dict(name1=name1, name2=name2) relation = self.graph.evaluate(query, params) if relation is None: print('relation is none') self.add_relation(name1, name2) return False if self.graph.exists(relation): print('relation exists, add count') relation['count'] += 1 self.graph.push(relation) print(relation.start_node()['name'], '->', relation['count'], '->', relation.end_node()['name']) return True else: print('relation does not exist') return False def clear_graph(self): """ 清空图数据库 :return: """ self.graph.delete_all() def show_end_node(self, name, relation_label): """ 根据输入的起始节点名和关系标签,遍历全部对应关系,并打印终节点的属性群 :param name: :param relation_label: :param attrs: :return: """ query = 'MATCH (n) WHERE n.name={name} RETURN n' params = dict(name=name) node = self.graph.evaluate(query, params) if node is None: print('node is None!') return False if self.graph.exists(node): print(node) # 遍历此起始节点的全部关系,打印关系的个数 for rel in self.graph.match((node, ), relation_label): print(name, '->', rel['count'], '->', rel.end_node['name']) else: print('node not exists!') return False def get_coop_count(self): """ 获取全部导演、演员合作关系及次数并打印 :return: """ directors = self.get_labeled_node() # print(type(directors)) count = 1 for director in directors: if count > 1: break # print(director['p']['name']) self.show_end_node(director['p']['name'], COOPERATE_LABEL) count += 1 def get_cooperations(self): directors = self.get_labeled_node() # datas = [] for director in directors: query = 'MATCH (n) WHERE n.name={name} RETURN n' params = dict(name=director['p']['name']) node = self.graph.evaluate(query, params) if node is None: print('node is None!') return None if self.graph.exists(node): # 遍历此起始节点的全部关系,一一存入结果集并返回 for rel in self.graph.match(start_node=node, rel_type=COOPERATE_LABEL): data = { 'director': director['p']['name'], 'actor': rel.end_node()['name'], 'count': rel['count'] } # print("合作信息,", data) self.mm.save_data(Config.COOPERATION_TEMP, data) # datas.append(data) else: print('node not exists!') return None
"mission_statement": "Use the Agora to learn all the things.", "id": id, "email": email.lower(), "is_mentor": True, "is_tutor": True, "is_visible": True, "is_available_for_in_person": True, "is_admin": False} new_user_node = Node.cast(AgoraLabel.USER, new_user_properties) try: graph_db.create(new_user_node) except: print 'Node found' user_node = graph_db.find_one(AgoraLabel.USER, property_key='email', property_value=email.lower()) print user_node["email"] user = AgoraUser() user.email = email user.get_user() print user.user_interests interest = AgoraInterest() interest.name = 'Music' interest.description = 'Learning how to communicate clearly through writing.' new_interest_node = interest.create_interest() print new_interest_node print user_node['name'] interest_node = Graph().find_one('INTEREST',
class GraphClass(): dbb = '' graph = '' def __init__(self): self.db = GraphDatabase("http://localhost:7474/db/data/") self.graph = Graph("http://localhost:7474/db/data/") def InsertStudentNode(self, name): try: Student_node = self.graph.find_one(label="Student", property_key="name", property_value=name) if Student_node is None: tx = self.graph.begin() student_n = Node("Student", name=name) tx.create(student_n) tx.commit() return student_n else: return Student_node except: print("problem with student node insert") tx.rollback() def InsertSubjectNode(self, name): try: Subject_node = self.graph.find_one(label="Subject", property_key="name", property_value=name) if Subject_node is None: tx = self.graph.begin() Subject_n = Node("Subject", name=name) tx.create(Subject_n) tx.commit() return Subject_n else: return Subject_node except: print("problem with subject node insert") tx.rollback() def relationship(self, startnode, endnode, review): try: tx = self.graph.begin() ab = Relationship(startnode, review, endnode) tx.create(ab) tx.commit() except: print("problem with relationship") tx.rollback() def Toptrending(self): TopList = [] RecordList = self.db.query( "MATCH (n)-[r]->(m) RETURN m, COUNT(r) ORDER BY COUNT(r) DESC LIMIT 4 ", returns=(dict, str)) for record in RecordList: TopList.append(record[0]['data']['name']) return TopList def ColabFiltering(self, user): SubjList = [] RecordListLike = self.db.query( "MATCH (s:Student)-[:like]->(n:Subject)<-[:like]-()-[:like]->(m:Subject) WHERE s.name = {username} AND NOT (s)-[:like]->(m:Subject) RETURN m.name", params={"username": user}, returns=(str)) RecordListOpen = self.db.query( "MATCH (s:Student)-[:open]->(n:Subject)<-[:open]-()-[:open]->(m:Subject) WHERE s.name = {username} AND NOT (s)-[:open]->(m:Subject) RETURN m.name", params={"username": user}, returns=(str)) for record in RecordListLike: if record[0] not in SubjList: SubjList.append(record[0]) for record in RecordListOpen: if record[0] not in SubjList: SubjList.append(record[0]) return SubjList #if __name__ == '__main__': #app.run(debug=True) # graph = GraphClass() # print(graph.ColabFiltering(user='******')) """relationship(startnode=InsertStudentNode("raam"), endnode=InsertSubjectNode("subject-5"), review="like")
#!/usr/bin/env python """ create languages for app """ from py2neo import Graph, Node languages = [ "PHP", "Python", "Ruby", "Erlang", "Elixir", "Haskell", "Go", "Java", "Scala", "Groovy", "JavaScript", "C#", "C++", "Swift" ] graph = Graph('http://*****:*****@127.0.0.1:7474/db/data/') for i in languages: language = graph.find_one("Language", "name", i) if None == language: lang = Node("Language", name=i) graph.create(lang)
class TwitterGraph(): def __init__(self): self.graph = Graph("http://*****:*****@54.191.171.209:7474/db/data/") self.popularity_heap = [] self.reassess_popularity() def add_user(self, user): new_user = Node("User", token=user.token.session_id, user_id=user.id) return self.graph.create(new_user) def is_cached(self, screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is not None: return True def get_RT_recommendations(self, user): recommendations = Counter() user_node = self.graph.find_one("User", 'user_id', user.id) following = user_node.match_outgoing("FOLLOWS", limit=5) for rel in following: retweets = rel.end_node.match_outgoing("RETWEETED", limit=5) for r in retweets: recommendations[r.end_node.properties['screen_name']] += 1 return [str for (str, count) in recommendations.most_common(10)] def get_generic_recommendations(self): return [screen_name for (count, screen_name) in heapq.nlargest(10, self.popularity_heap)] def reassess_popularity(self): # NOTE: expensive calculation, to be run threaded at multiples of x actions to graph or hourly/daily job all_twitter_users = self.graph.find("TwitterUser") for tu in all_twitter_users: incoming_count = sum(1 for _ in tu.match_incoming()) heapq.heappush(self.popularity_heap, (incoming_count, tu.properties['screen_name'])) def add_twitter_user(self, screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: new_twitter_user = Node("TwitterUser", screen_name=screen_name) self.graph.create(new_twitter_user) def add_follow(self, screen_name, user): user_node = self.graph.find_one("User", 'user_id', user.id) if user_node is None: # this shouldn't happen, just for testing while transitioning db self.add_user(user) user_node = self.graph.find_one("User", 'user_id', user.id) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) follow_relationship = Relationship(user_node, "FOLLOWS", twitter_user) self.graph.create(follow_relationship) self.reassess_popularity() def remove_follow(self, screen_name, user): user_node = self.graph.find_one("User", 'user_id', user.id) if user_node is None: # this shouldn't happen, just for testing while transitioning db self.add_user(user) user_node = self.graph.find_one("User", 'user_id', user.id) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) follow_relationship = self.graph.match_one(user_node, "FOLLOWS", twitter_user) if follow_relationship is not None: self.graph.delete(follow_relationship) def add_retweet(self, screen_name, retweeted_screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) self.add_twitter_user(retweeted_screen_name) retweeted_twitter_user = self.graph.find_one("TwitterUser", 'screen_name', retweeted_screen_name) retweet = self.graph.match_one(twitter_user, "RETWEETED", retweeted_twitter_user) if retweet is None: retweet_relationship = Relationship(twitter_user, "RETWEETED", retweeted_twitter_user) retweet_relationship.properties['count'] = 1 self.graph.create(retweet_relationship) elif retweet.properties['count'] is None: # this shouldn't happen, just for testing while transitioning db retweet.properties['count'] = 1 retweet.push() else: retweet.properties['count'] = retweet.properties['count'] + 1 retweet.push()
class StuffNeo4j(): def __init__(self, nodelabel, reltype): self.graph_db = None self.nodelabel = nodelabel self.reltype = reltype def connect(self, uri, usr="******", pwd="neo4j"): if not uri.endswith('/'): uri += '/' authenticate(uri, usr, pwd) self.graph_db = Graph(uri + "db/data") def create_indexes(self): #If index is already created py2neo throws exception. try: self.graph_db.cypher.execute("CREATE INDEX ON :%s(name)" % self.nodelabel) except: pass try: self.graph_db.cypher.execute("CREATE INDEX ON :%s(synset_id)" % self.nodelabel) except: pass try: self.graph_db.cypher.execute( "CREATE INDEX ON :%s(pointer_symbol)" % self.reltype) except: pass def create_node(self, nodetype, **kwargs): return Node(nodetype, **kwargs) def merge_node(self, nodetype, uniq_key, uniq_val, **kwargs): n = self.graph_db.merge_one(nodetype, uniq_key, uniq_val) for k in kwargs: n.properties[k] = kwargs[k] n.push() return n def insert_rel(self, reltype, node1, node2, **kwargs): if node1 is not None and node2 is not None: rel = Relationship(node1, reltype, node2, **kwargs) self.graph_db.create(rel) else: print "Could not insert relation (%s) - [%s] -> (%s)" % ( node1, reltype, node2) def merge_rel(self, reltype, node1, node2, **kwargs): if node1 is not None and node2 is not None: rel = Relationship(node1, reltype, node2, **kwargs) return self.graph_db.create_unique(rel) else: print "Could not merge relation (%s) - [%s] -> (%s)" % ( node1, reltype, node2) def create_wordnet_rel(self, synset1, synset2, ptype): """ Pointer symbols http://wordnet.princeton.edu/wordnet/man/wninput.5WN.html The pointer_symbol s for nouns are: ! Antonym @ Hypernym @i Instance Hypernym ~ Hyponym ~i Instance Hyponym #m Member holonym #s Substance holonym #p Part holonym %m Member meronym %s Substance meronym %p Part meronym = Attribute + Derivationally related form ;c Domain of synset - TOPIC -c Member of this domain - TOPIC ;r Domain of synset - REGION -r Member of this domain - REGION ;u Domain of synset - USAGE -u Member of this domain - USAGE The pointer_symbol s for verbs are: ! Antonym @ Hypernym ~ Hyponym * Entailment > Cause ^ Also see $ Verb Group + Derivationally related form ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE The pointer_symbol s for adjectives are: ! Antonym & Similar to < Participle of verb \ Pertainym (pertains to noun) = Attribute ^ Also see ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE The pointer_symbol s for adverbs are: ! Antonym \ Derived from adjective ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE """ node1 = self.graph_db.find_one(self.nodelabel, property_key="synset_id", property_value=synset1) node2 = self.graph_db.find_one(self.nodelabel, property_key="synset_id", property_value=synset2) if (node1 is not None) and (node2 is not None): rel = Relationship(node1, self.reltype, node2, pointer_symbol=ptype) return rel else: raise Exception( "Could not create Wordnet relation (%s) - [%s] -> (%s)" % (synset1, ptype, synset2)) def insert_bulk(self, objs): if len(objs) > 0: self.graph_db.create(*objs)
f.write(user); f.write('\n'); f.close(); ''' for i in Cursor(api.followers, id=user).items(): print "adding " + i.screen_name; f.write("\t"+i.screen_name+"\n"); to_be_networked.append(i.screen_name); ''' while True: try: temp_user = api.get_user(user); temp_ratio = float(temp_user.followers_count) / float(temp_user.friends_count); base_node = graph.find_one("regular", "screen_name", user); if not base_node: base_node = graph.find_one("verified", "screen_name", user); if not base_node: base_node = graph.find_one("cautious", "screen_name", user); if not base_node: if temp_user.verified: base_node = Node("verified", screen_name=user, ratio=temp_ratio, tweets=temp_user.statuses_count, created=temp_user.created_at, followers=temp_user.followers_count, following=temp_user.friends_count, location=temp_user.location); else: if temp_ratio < 0.01: base_node = Node("cautious", screen_name=user, ratio=temp_ratio, tweets=temp_user.statuses_count, created=temp_user.created_at, followers=temp_user.followers_count, following=temp_user.friends_count, location=temp_user.location); else: base_node = Node("regular", screen_name=user, ratio=temp_ratio, tweets=temp_user.statuses_count, created=temp_user.created_at, followers=temp_user.followers_count, following=temp_user.friends_count, location=temp_user.location);
class UserGraph: def __init__(self): self.graph = None self.label = "Account" self.user_dao = UserDAO() self.address_dao = AddressDAO() self.transaction_dao = TransactionDAO() def init_connect(self): self.graph = Graph("http://127.0.0.1:7474", username="******", password="******") def clear_data(self): self.graph.delete_all() def create_a_user_node(self, name_code, btc): a_user = Node(self.label, name=name_code, btc=btc) return self.graph.create(a_user) def get_a_user_node_by_code(self, name_code): user_node = self.graph.find_one(self.label, property_key='name', property_value=name_code) return user_node def add_a_transaction(self, transaction_dict): if not transaction_dict['source']: # check the user node, if no create destination = transaction_dict['destination'] temp_result = self.address_dao.get_address_by_address(destination) if not temp_result: # this destination address haven't recorded in address table new_user_for_dest = self.user_dao.create_user() temp_result = self.address_dao.create_address( new_user_for_dest['id'], destination) # add a new node account_dest = self.user_dao.get_user_by_id(temp_result['user_id']) account_node = self.get_a_user_node_by_code(account_dest['code']) if account_node: account_node['btc'] += transaction_dict['value'] self.graph.push(account_node) else: self.create_a_user_node(account_dest['code'], transaction_dict['value']) return # if have source and destination address, not only need to check node but also add relation source = transaction_dict['source'] destination = transaction_dict['destination'] # source address must be exit, so skip to check temp_source_result = self.address_dao.get_address_by_address(source) temp_dest_result = self.address_dao.get_address_by_address(destination) if not temp_dest_result: new_user_for_dest = self.user_dao.create_user() temp_dest_result = self.address_dao.create_address( new_user_for_dest['id'], destination) # get source acount and check and update node account_source = self.user_dao.get_user_by_id( temp_source_result['user_id']) account_source_node = self.get_a_user_node_by_code( account_source['code']) if account_source_node: account_source_node['btc'] -= transaction_dict['value'] self.graph.push(account_source_node) else: self.create_a_user_node(account_source['code'], -transaction_dict['value']) account_source_node = self.get_a_user_node_by_code( account_source['code']) # get destination acount and check and update node account_destination = self.user_dao.get_user_by_id( temp_dest_result['user_id']) account_destination_node = self.get_a_user_node_by_code( account_destination['code']) if account_destination_node: account_destination_node['btc'] += transaction_dict['value'] self.graph.push(account_destination_node) else: self.create_a_user_node(account_destination['code'], transaction_dict['value']) account_destination_node = self.get_a_user_node_by_code( account_destination['code']) # add the relationship for two node source_node_pay_destination_node = self.graph.match_one( start_node=account_source_node, end_node=account_destination_node, bidirectional=False) if source_node_pay_destination_node: source_node_pay_destination_node['btc'] += transaction_dict[ 'value'] self.graph.push(source_node_pay_destination_node) else: source_node_pay_destination_node = Relationship( account_source_node, "Pay", account_destination_node) source_node_pay_destination_node['btc'] = transaction_dict['value'] self.graph.create(source_node_pay_destination_node) def generate_user_graph(self): page_total = 100 for page_num in range(page_total): print("====Generate User Graph 处理第 " + str(page_num) + " / " + str(page_total) + " 页交易(100/page)") transaction_list = self.transaction_dao.paginate_list_resource( models.Transaction, page_num) count = 1 for item_transaction in transaction_list: # print("==处理本页第 " + str(count) + " / 100" + " 个交易") count += 1 # print(item_transaction) # deal each transaction transaction_dict = { 'source': item_transaction['source'], 'destination': item_transaction['destination'], 'value': item_transaction['value'] } self.add_a_transaction(transaction_dict) #user_graph = UserGraph() #user_graph.init_connect() #user_graph.generate_user_graph()
pw=open('neo4j_pw').readline().strip() authenticate("localhost:7474", "neo4j", pw) # connect to authenticated graph database graph = Graph() tx = graph.cypher.begin() conceptnodes={} tasknodes={} contrastnodes={} # Create concept nodes for i in range(len(concept_ids)): tx.append('CREATE (%s:concept {name: "%s", id:"%s"}) RETURN %s'%(concept_ids[i], concept_names[i],concept_ids[i],concept_ids[i])) if graph.find_one('concept',property_key='id', property_value=concept_ids[i]) == None: conceptnode= Node("concept",name=concept_names[i],id=concept_ids[i]) graph.create(conceptnode) # Create task nodes for i in range(len(task_ids)): tx.append('CREATE (%s:task {name: "%s", id:"%s"}) RETURN %s'%(task_ids[i], task_names[i],task_ids[i],task_ids[i])) if graph.find_one('task',property_key='id', property_value=task_ids[i]) == None: tasknode= Node("task", name=task_names[i],id=task_ids[i]) graph.create(tasknode) # Create contrast nodes, associate with task for i in range(len(contrast_tasks)): tasknode=graph.find_one('task',property_key='id', property_value=contrast_tasks[i]) path = Path(tasknode,Rel("HASCONTRAST"),Node("contrast", name=contrast_names[i],id=contrast_ids[i]))
property_all = aa + bb + cc + dd + ee + ff + gg + hh + ii + jj + HP_all + MP_all + HP_recover_all + MP_recover_all + R_cooling_all + R_cost_all + skill_R_all + attack_all + attack_range_all property_name = [] for i in property_all_2: property_name.append(i[1]) #节点列表 things_2 = [weapons, heros] things = weapons + heros #获取英雄tag m = [] tags_all = [] for i in range(0, 68): a = re.findall( '\S+', g.find_one('hero', property_key='name', property_value=heros[i])['tag']) m = m + a for p in m: if p not in tags_all: tags_all.append(p) #关系列表 relation = [u'相似', u'克制', u'搭配', u'推荐', u'适合用于对抗'] rel_2 = [[u'相似', u'像'], [u'克制'], [u'搭配', u'配合', u'组合'], [u'推荐', u'出装'], [u'用来对抗', u'适合用来对抗', u'适合用于对抗']] rel_3 = [ u'相似', u'像', u'克制', u'搭配', u'配合', u'组合', u'推荐', u'出装', u'用来对抗', u'适合用来对抗', u'适合用于对抗' ] #用来判断对不对的属性 if_true = [u'远程', u'近战' + u'近程'] + tags_all
class WriteToNeo4j: """将Json类型的知识三元组导入Neo4j数据库""" def __init__(self, triple_path): self.entity_set = set() # 实体节点集合 self.nlp = NLP() # 连接neo4j数据库 self.graph = Graph(host='localhost', http_port=7474, user='******', password='******') f_in = open(triple_path, 'r') triple_str = f_in.read() # 读取整个Json self.triple = json.loads(triple_str) def write_litigant(self, litigants, relation): """处理当事人信息(原告和原告) Args: litigant: list,当事人信息 """ for litigant in litigants: node_litigant = Node(self.get_label(litigant['名字']), name=litigant['名字'], id=litigant['编号']) self.graph.create(node_litigant) self.entity_set.add(litigant['名字']) node_root = self.graph.find_one('判决书', property_key='name', property_value='判决书001') entity_relation = Relationship(node_root, relation, node_litigant, label='relation') self.graph.create(entity_relation) for item in litigant: if item != '名字' and item != '编号': node_repr = Node(self.get_label(litigant[item]), name=litigant[item]) # 负责人,委托代理人 self.graph.create(node_repr) self.entity_set.add(litigant[item]) entity_relation = Relationship(node_litigant, item, node_repr, label='关系') self.graph.create(entity_relation) def get_label(self, word): """根据单词获得标签 Args: word: str,单词 Returns: label: str,类型标签 """ label = '' postag = self.nlp.get_postag(word) if postag == 'nh': label = '人' elif postag == 'ni': label = '组织' elif postag == 'ns': label = '地点' else: label = '其他' return label def write(self): """写入图数据库""" # 根节点 # 一篇判决书具有"文书编号","文书标题","按键编号","文书类型","案件编号"几个属性 node_root = Node('判决书', name='判决书001', id=self.triple['文书编号'], title=self.triple['文书标题'], type=self.triple['文书类型'], case=self.triple['案件编号']) self.graph.create(node_root) self.entity_set.add('判决书001') node_court = Node('组织', name=self.triple['受理法院']) self.graph.create(node_court) self.entity_set.add(self.triple['受理法院']) entity_rerlation = Relationship(node_root, '受理法院', node_court, label='关系') self.graph.create(entity_rerlation) # 遍历原告,被告 plaintiffs = self.triple['原告'] self.write_litigant(plaintiffs, '原告') defendants = self.triple['被告'] self.write_litigant(defendants, '被告') facts = self.triple['案情事实'] for fact in facts: tri = fact['知识'] entity1 = tri[0] relation = tri[1] entity2 = tri[2] node_list = [] node1 = Node(self.get_label(entity1), name=entity1) if entity1 not in self.entity_set: self.graph.create(node1) node_list.append(node1) self.entity_set.add(entity1) else: node_list.append( self.graph.find_one(self.get_label(entity1), property_key='name', property_value=entity1)) node2 = Node(self.get_label(entity2), name=entity2) if entity2 not in self.entity_set: self.graph.create(node2) node_list.append(node2) self.entity_set.add(entity2) else: node_list.append( self.graph.find_one(self.get_label(entity2), property_key='name', property_value=entity2)) entity_relation = Relationship(node_list[0], relation, node_list[1], label='关系') self.graph.create(entity_relation)
host="154.8.214.203", # neo4j 搭载服务器的ip地址,ifconfig可获取到 http_port=7474, # neo4j 服务器监听的端口号 user="******", # 数据库user name,如果没有更改过,应该是neo4j password="******") with codecs.open('xywy_new2.json', 'r', encoding='utf-8') as f: contents = f.read() l = json.loads(contents) # print(len(l)) # print(l[18]) # print(l[22]) for b in l: # sql = "MATCH (n:`疾病`{名称:'%s'}) RETURN n.名称" % (b['名称']) # m = graph.run(sql).data() a = graph.find_one(label="疾病", property_key="名称", property_value=b['名称']) if a: # 如果不为none,则更新 #更新标签 sql = "MATCH (n:`疾病`{名称:'%s'}) set n:寻医问药" % (b['名称']) graph.run(sql) #更新别称 if b['别称'] != ['暂无数据']: if a['别称'] != ['暂无数据']: for bc in b['别称']: if bc not in a['别称']: a['别称'].append(bc) else: a['别称'] = b['别称'] sql = "MATCH (n:`疾病`{名称:'%s'}) set n.别称='%s'" % (b['名称'], a['别称']) graph.run(sql)