def get_track_comments(): track_comments = {} graph = Graph() for comment in graph.find("Comment"): track_comments[comment.properties['id']] = inflate(comment.properties) return track_comments
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") print('connect successed') def matchItembyTitle(self, value): answer = self.graph.find_one(label="Item", property_key="title", property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self, value): answer = self.graph.find_one(label="HudongItem", property_key="title", property_value=value) return answer # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") print('connect successed') def matchItembyTitle(self,value): answer = self.graph.find_one(label="Item",property_key="title",property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self,value): answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value) return answer # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List #test = Neo4j() #test.connectDB() #a = test.getLabeledHudongItem('labels.txt') #print(a[10].openTypeList)
def lambda_handler(event, context): graph = Graph(host=os.environ["NAME_NEO_DOMAIN"], user=os.environ["USER"], password=os.environ["PASSWORD"]) receiver = graph.find('User', property_key='id', property_value=tuple(event['follow'])) dynamodb = boto3.resource('dynamodb') table = dynamodb.Table(os.environ["DYNAMODB"]) fields = ("username", "first_name", "last_name", "id", "photo") timestamp = int(time.time()) user = {} user['id'] = event['id'] for key, value in event['user'].items(): if key in fields: user[key] = value with table.batch_writer() as batch: for item in receiver: batch.put_item( Item={ 'id': str(item['id']), 'uid': str(event['id']) + "f", 'user': user, 'type': 1, 'timestamp': timestamp, 'fcm': item['fcm'] if 'fcm' in item else False, 'lang': item['lang'] if 'lang' in item else "FR", })
def get_tracks(): track_metadata = {} graph = Graph() for track in graph.find("Track"): track_metadata[track.properties['id']] = inflate(track.properties) return track_metadata
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self, value): answer = self.graph.find_one(label="Item", property_key="title", property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self, value): answer = self.graph.find_one(label="HudongItem", property_key="title", property_value=value) return answer # 返回所有已经标注过的互动百科item filename为labels.txt def getLabeledHudongItem(self, filename): labels = readCSV2(filename) List = [] i = 0 for line in labels: ctx = self.graph.find_one(label="HudongItem", property_key="title", property_value=line[0]) if ctx == None: continue cur = HudongItem(ctx) cur.label = line[1] List.append(cur) print('load LabeledHudongItem over ...') return List # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List #test = Neo4j() #test.connectDB() #answer = test.graph.find_one(label="HudongItem",property_key="title",property_value='火龙果') #print(answer) #a = test.getLabeledHudongItem('labels.txt') #print(a[10].openTypeList)
def resolveAttrNameUsingKB(query): g = Graph() large_weight = 0 large_wt_node = None for node in g.find('ROOT'): wt = shortestPathWeight(query, node) # print(node.properties['phrase']+ ' : '+ str(wt), file=sys.stderr) if wt > large_weight: large_wt_node = node large_weight = wt if large_wt_node is None: return '' return large_wt_node.properties['phrase']
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): self.graph = Graph("http://localhost:7474", username="******", password="******") def matchItembyTitle(self,value): answer = self.graph.find_one(label="Item",property_key="title",property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self,value): answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value) return answer # 返回所有已经标注过的互动百科item filename为labels.txt def getLabeledHudongItem(self, filename): labels = readCSV2(filename) List = [] i = 0 for line in labels: ctx = self.graph.find_one(label="HudongItem",property_key="title",property_value=line[0]) if ctx == None: continue; cur = HudongItem(ctx) cur.label = line[1] List.append(cur) print('load LabeledHudongItem over ...') return List # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List #test = Neo4j() #test.connectDB() #answer = test.graph.find_one(label="HudongItem",property_key="title",property_value='火龙果') #print(answer) #a = test.getLabeledHudongItem('labels.txt') #print(a[10].openTypeList)
def run(): DB_graph = Graph(password="******") roots = [] selector = NodeSelection(DB_graph) for pair in pairs: node = Node("tree_node") node["pattern"] = pair roots.append(node) for root in roots: #"_.name =~ 'J.*'" node_set = DB_graph.find('rebase_enzyme') #node_set = selector.select("rebase_enzyme").where((root['pattern'] in _.pattern)) for node in node_set: #if node["pattern"] != root["pattern"]: print(node['pattern']) tree_making(root, node)
def city(): location = request.args['meetup_group'] graph = Graph(host=config['neo4j']['host'], user=config['neo4j']['user'], password=config['neo4j']['password']) logger.info('Finding upcoming meetup events in {}'.format(location)) groups_data = defaultdict() groups = graph.find('Group') for group in groups: groups_data[group.properties['name']] = [] for rel in graph.match(start_node=group, rel_type="HAS EVENT"): groups_data[group.properties['name']].append(rel.end_node().properties['time']) return json.dumps(groups_data)
def lambda_handler(event, context): graph = Graph(host=os.environ["NAME_NEO_DOMAIN"], user=os.environ["USER"], password=os.environ["PASSWORD"]) nodes = graph.find('User', property_key='id', property_value=tuple(event['follow'])) user = graph.find_one('User', property_key='id', property_value=event['id']) now = int(time.time()) if user: for node in nodes: graph.create(Relationship(user, 'FOLLOW', node, timestamp=now))
class Neo4j(): graph = None def __init__(self): print("create neo4j class ...") def connectDB(self): conf = configparser.ConfigParser() conf.read('demo/neo4jconfig') url = conf.get("neo4jdb", "url") username = conf.get("neo4jdb", "username") password = conf.get("neo4jdb", "password") self.graph = Graph(url, username=username, password=password) # self.graph = Graph("http://localhost:7474", username="******", password="******") print('connect successed') def matchItembyTitle(self, value): answer = self.graph.find_one(label="Item", property_key="title", property_value=value) return answer # 根据title值返回互动百科item def matchHudongItembyTitle(self, value): answer = self.graph.find_one(label="HudongItem", property_key="title", property_value=value) return answer # 返回限定个数的互动百科item def getAllHudongItem(self, limitnum): List = [] ge = self.graph.find(label="HudongItem", limit=limitnum) for g in ge: List.append(HudongItem(g)) print('load AllHudongItem over ...') return List #test = Neo4j() #test.connectDB() #a = test.getLabeledHudongItem('labels.txt') #print(a[10].openTypeList)
for p in simili_matrix.get(sk).keys(): try: if active_periodics.index(p) != 0: score += simili_matrix.get(active_key).get(p) except ValueError as e: pass recommended[sk] = score sorted_x = sorted(recommended.items(), key=operator.itemgetter(1), reverse = True) neighbor1 = graph.cypher.execute("MATCH p=(a:Author {keylattes:'%s'})-[r:AUTHORING*2]-(b:Author) RETURN DISTINCT b.name" % active_key) neighbor_list = list() # cast RecordList to list [neighbor_list.append(x[0]) for x in neighbor1] for i in sorted_x[:10]: n = graph.find_one("Author", property_key='keylattes', property_value=i[0]) try: if neighbor_list.index(n['name']) != -1: print('[**] ' + n['name'] + ' - ' + str(i[1])) except ValueError: print(n['name'] + ' - ' + str(i[1])) if __name__ == '__main__': r = Recommender() graph = Graph() authors = graph.find("Author") publications = graph.find("Article") r.recommend(authors, publications)
import os import numpy as np import re import jieba from gensim.models import word2vec graph = Graph('http://localhost:7474', username='******', password='******') labels = ['gainian', 'people', 'book'] names = [] path = 'F:\大创\抑郁症智能公益平台\\0项目\智能问答\简单的问题分类器\data' # f = open(os.path.join(path, 'dict.txt'), 'a', encoding='utf-8') text = '' for label in labels: nodes = graph.find(label=label) for node in nodes: if not node['intro']: continue # name = node['name'] intro = node['intro'] text += intro + '\n' # if 'http' in name or len(name) > 100: # continue # name = re.sub("[《》]+", "", name) # names.append(name) # if label == 'gainian': # f.write(name + ' 2000 ng\n') # elif label == 'book': # f.write(name + ' 2000 nb\n') # else:
graph = Graph(graphene.DATABASE_URL) print graph # find a node or set of nodes according to properties and labels # graph.find_one() # returns a single node # graph.find() # returns a generator # Let's find Marnee marnee_node = graph.find_one("Person", property_key="name", property_value="Marnee") print "find_one Marnee %s" % marnee_node marnee_generator = graph.find("Person", property_key="name", property_value="Marnee") for marnee in marnee_generator: print marnee # Let's find Julian julian_node = graph.find_one("Person", property_key="name", property_value="Julian") print "find_one Julian %s" % julian_node # Let's find all the Persons Julian knows # show the Cypher -- MATCH # show the code # graph.match() # graph.match_one()
'DINING_KITCHEN.jpg', 'GUEST_ROOM.jpg', 'LIVING.jpg', 'MASTER_BEDROOM.jpg', 'TOILET.jpg' ] }, 'Banksy Quirk': { 'desc': 'You are the person who picked up that little curio in Venice or Vellore, that everyone talks about. Everything about you is YOU. Special, edgy, liberal and sexy. Who thinks graffiti can be the purest form of art. Been to a protest lately?', 'cover': 'banksy quirk.jpg', 'images': [ 'DINING.jpg', 'GUEST_BEDROOM.jpg', 'KITCHEN.jpg', 'LIVING.jpg', 'MASTER_BEDROOM.jpg' ] } } z = graph.find(label) for node in z: node.properties['price'] = (random.randint(20000, 100000)) node.properties['description'] = metadata[node['name']]['desc'] node.properties['cover_pic'] = metadata[node['name']]['cover'] # node.properties['images'] = [] # if len(metadata[node['name']]['images']) > 0: # for image in metadata[node['name']]['images']: # node.properties['images'].append(generateImageString(image,1)) # else: # for i in range(0,4): # node.properties['images'].append(generateImageString(i,0)) node.push()
graph.create(s) ''' 2 —— Node查询 ''' # 用CQL进行查询,返回的结果是list data1 = graph.data('MATCH(p:PersonTest) return p') print("data1 = ", data1, type(data1)) print() # 用find_one()方法进行node查找,返回的是查找node的第一个node data2 = graph.find_one(label='PersonTest', property_key='name', property_value="李四") print("data2 = ", data2, type(data2)) print() # 用find()方法进行node查找 data3 = graph.find(label='PersonTest') for data in data3: print("data3 = ", data) print() ''' 3 —— Relationship查询 ''' relationship = graph.match_one(rel_type='KNOWNS') print(relationship, type(relationship)) print() ''' 4 —— 更新Node的某个属性值,若node没有该属性,则新增该属性 ''' node1 = graph.find_one(label='PersonTest', property_key='name', property_value="张三")
class GraphDB(): def __init__(self, user=NEO4J_USER, pwd=NEO4J_PWD, host=NEO4J_HOST): self.graph = Graph("http://%s:%s@%s/db/data/" % (user, pwd, host)) def query(self, query_str, stream=False): if stream: return self.graph.cypher.stream(query_str) else: return self.graph.cypher.execute(query_str) def create_relation_user_to_topic(self, user, relation, topic_name): userNode = self.graph.find_one("user", 'id', user.id_str) if not userNode: userNode = self.create_node_from_user(user) self.graph.create(userNode) topicNode = self.graph.find_one("topic_name", 'name', topic_name) if not topicNode: topicNode = Node("topic_name", name = topic_name) self.graph.create(topicNode) relationship = self.graph.match_one(userNode, relation, topicNode) if not relationship: relationship = Relationship(userNode, relation, topicNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push() # Relations: follows eventuell favourites, retweets def create_relation_user_to_user(self, userA, relation, userB): userANode = self.graph.find_one("user", 'id', userA.id_str) userBNode = self.graph.find_one("user", 'id', userB.id_str) if not userANode: userANode = self.create_node_from_user(userA) self.graph.create(userANode) if not userBNode: userBNode = self.create_node_from_user(userB) self.graph.create(userBNode) relationship = self.graph.match_one(userANode, relation, userBNode) if not relationship: relationship = Relationship(userANode, relation, userBNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push() def increment_user_counter(self, user, counter, n): userNode = self.graph.find_one("user", 'id', user.id_str) if not userNode: userNode = self.create_node_from_user(user) self.graph.create(userNode) if counter in userNode.properties: userNode.properties[counter] += n else: userNode.properties[counter] = n userNode.push() def get_all_users(self): users = [] for u in self.graph.find('user'): users.append({'name': u.properties['screen_name'], 'id_str': u.properties['id']}) return users def create_node_from_user(self, user): userNode = Node("user", name=user.screen_name, id=user.id_str, followers_count=user.followers_count, friends_count=user.friends_count, statuses_count=user.statuses_count, favourites_count=user.favourites_count) return userNode def quicksearch(self, username, limit=10): cql_query = "match(u:user) WHERE u.name =~ '%s.*' RETURN DISTINCT u.name LIMIT %s;" return self.query(cql_query % (username, limit)) def get_user_count(self): cql_query = "match(u:user) RETURN count(DISTINCT u) AS c;" for row in self.query(cql_query): return row['c'] return 0
#authorization auth = tweepy.OAuthHandler(ckey, csecret) #wait on rate limits api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) #Get specific hashtag until the date we want tweets = tweepy.Cursor(api.search, q="#SuperBowl", count=100, until='2016-02-09', include_entities=True).items() for tweet in tweets: #find if exists for exploration... mynode = list( graph.find('User', property_key='Screen_Name', property_value=tweet.user.screen_name.encode('utf8'))) x = graph.merge_one("User", "Screen_Name", tweet.user.screen_name.encode('utf8')) x.properties.update({ "Name": tweet.user.name, "Description": tweet.user.description.encode('utf8'), "Location": tweet.user.location, "Followers": tweet.user.followers_count, "Friends": tweet.user.friends_count, "Tweets": tweet.user.statuses_count, "Image": tweet.user.profile_image_url }) if len(mynode) == 0: x.properties.update({"Exploration": ''})
from __future__ import print_function from py2neo import Graph, Node, Relationship graph_db = Graph() person = Node("Person", name="JUHASZ Lilla Linda") for record in graph_db.cypher.execute("Match (n) return n"): print(record) new_person = Node("Person", name="JUHASZ Peter", born=1930) print("exists: " + str(list(graph_db.find("Person", property_key="name", property_value="JUHASZ Peter")))) new_person.bind(graph_db.uri) print("exists: " + str(new_person.exists)) father = graph_db.find_one("Person", property_key='name', property_value="JUHASZ Peter") child = graph_db.find_one("Person", property_key='name', property_value="JUHASZ Lilla Linda") child_of_rel = "CHILD_OF" father_daughter_relationship = Relationship(child, child_of_rel, father) graph_db.create(father_daughter_relationship)
class TestNeoDBHandler(unittest.TestCase): def setUp(self): self.graph = Graph(TEST_GRAPH_DB) self.node_list = [Node("TEST", test_id=i) for i in xrange(5)] # Nodes # ----- for i, node in enumerate(self.node_list): node.labels.add("Twirp") node.properties.update({ "user_id": i*100000, "username":"", "name":"", "handle":"", "followers_count":i*100, "friends_count":i*50, "tweet_count":i*10, "retweet_count":i*5, "been_retweeted_count":i*3, "favourite_hashtag":"", "hashtag_count":i*2, "archipelago_id":i*1, "subscribed": True, "constituency":"CB"+str(i), "offices":["office"+str(i), "sedge steward"], }) self.node_list[0].properties.update({"username":"******", "name":"Michael Blue Eyes", "handle":"MBEyes", "favourite_hashtag":"#roth", "party":"DC" }) self.node_list[1].properties.update({"username":"******", "name":"Little Richard", "handle":"LRichy", "favourite_hashtag":"#rawls", "party":"DC" }) self.node_list[2].properties.update({"username":"******", "name":"The Boy Wonder", "handle":"tBW", "favourite_hashtag":"#richyfeynman", "party":"Marvel" }) self.node_list[3].properties.update({"username":"******", "name":"Kendog Lamar", "handle":"Kdog", "favourite_hashtag":"#kanye", "party":"Marvel"}) self.node_list[4].properties.update({"username":"******", "name":"Tiny Hands", "handle":"tinyhands", "favourite_hashtag":"#ihavetinyhands", "party":"Beano" }) # Relationships # -------------- # mbe -[MENTION]> lrich # mbe -[REPLIES]> ken # lrich -[REPLIES]> mbe # tbw -[RETWEETS]> lrich # tbw -[MENTIONS_BY_PROXY]> mbe # ken -!-> # th -!-> defaults = { "mentions":0, "mention_last":"", "mention_date":"", "replies":0, "reply_last":"", "reply_date":"", "retweets":0, "retweet_last":"", "retweet_date":"" } mbe1 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[1], **defaults) mbe2 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[3], **defaults) lrich = Relationship(self.node_list[1], "DIRECT", self.node_list[0], **defaults) tbw = Relationship(self.node_list[2], "DIRECT", self.node_list[1], **defaults) tbw2 = Relationship(self.node_list[2], "INDIRECT", self.node_list[0], **defaults) mbe1.properties.update({ "mentions":5, "mention_last":"1000000", "mention_date":"today" }) mbe2.properties.update({ "replies":10, "reply_last":"2000000", "reply_date":"tommorow" }) lrich.properties.update({ "replies":15, "reply_last":"3000000", "reply_date":"yesterday" }) tbw.properties.update({ "retweets":20, "retweet_last":"4000000", "retweet_date":"thismorning" }) tbw2.properties.update({ "mentions":1, "mention_last":"3000000", "mention_date":"yesterday" }) for node in self.node_list: self.graph.create(node) self.graph.create(mbe1) self.graph.create(mbe2) self.graph.create(lrich) self.graph.create(tbw) self.graph.create(tbw2) self.graph.push() def tearDown(self): # remove test items self.graph.cypher.execute("MATCH (n:TEST) DETACH DELETE n") empty_list = [ _ for _ in self.graph.find('TEST') ] self.assertEqual( empty_list, []) ######################################################################## # CYPHER QUERIES # ######################################################################## def test_get_party_nodes(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"Kendog Lamar", "handle":"Kdog", "party":"Marvel", "constituency":"CB3", "offices":["office3", "sedge steward"], "tweets": 30, "friends": 150, "followers": 300, "archipelago_id": 3, "tweeted":[], "mentions":[], "mention_last":[], "mention_date":[], "replies":[], "reply_last":[], "reply_date":[], "retweets":[], "retweet_last":[], "retweet_date":[], "tweet_type":[] }, { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['MBEyes','LRichy'], "mentions":[1, 0], "mention_last":['3000000', ""], "mention_date":['yesterday', ""], "replies":[0,0], "reply_last":["",""], "reply_date":["",""], "retweets":[0, 20], "retweet_last":["",'4000000'], "retweet_date":["", 'thismorning'], "tweet_type":["INDIRECT", "DIRECT"] } ] # Make request results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 0) ] # Test against reference self.assertEqual(len(results), 2) for i in range(2): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) def test_get_party_nodes_min_tweet(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"Kendog Lamar", "handle":"Kdog", "party":"Marvel", "constituency":"CB3", "offices":["office3", "sedge steward"], "tweets": 30, "friends": 150, "followers": 300, "archipelago_id": 3, "tweeted":[], "mentions":[], "mention_last":[], "mention_date":[], "replies":[], "reply_last":[], "reply_date":[], "retweets":[], "retweet_last":[], "retweet_date":[], "tweet_type":[] }, { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['LRichy'], "mentions":[0], "mention_last":[""], "mention_date":[""], "replies":[0], "reply_last":[""], "reply_date":[""], "retweets":[20], "retweet_last":['4000000'], "retweet_date":['thismorning'], "tweet_type":["DIRECT"] } ] # Make request results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 5) ] # Test against reference self.assertEqual(len(results), 2) for i in range(2): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) def test_get_cross_party_nodes_default(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['MBEyes','LRichy'], "mentions":[1, 0], "mention_last":['3000000', ""], "mention_date":['yesterday', ""], "replies":[0,0], "reply_last":["",""], "reply_date":["",""], "retweets":[0, 20], "retweet_last":["",'4000000'], "retweet_date":["", 'thismorning'], "tweet_type":["INDIRECT", "DIRECT"] } ] results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 0 ) ] # Test against reference self.assertEqual(len(results), 1) for i in range(1): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) def test_get_cross_party_nodes_min_tweets(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) test_reference = [ { "name":"The Boy Wonder", "handle":"tBW", "party":"Marvel", "constituency":"CB2", "offices":["office2", "sedge steward"], "tweets": 20, "friends": 100, "followers": 200, "archipelago_id": 2, "tweeted":['LRichy'], "mentions":[0], "mention_last":[""], "mention_date":[""], "replies":[0], "reply_last":[""], "reply_date":[""], "retweets":[20], "retweet_last":['4000000'], "retweet_date":['thismorning'], "tweet_type":["DIRECT"] } ] results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 5) ] # Test against reference self.assertEqual(len(results), 1) for i in range(1): for key in test_reference[i].keys(): self.assertEqual(results[i][key], test_reference[i][key] ) ######################################################################## # ADDING TO DB (TWIRPS CLASSES)->(PY2NEO OBJS) # ######################################################################## def test_add_Twirp_to_database(self): neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_twirp = Twirp(None, 'test') new_twirp.id = 314150000000 new_twirp.username = '******' new_twirp.name = 'Bilbo Baggins' new_twirp.handle = 'bilbo' new_twirp.followers_count = 20 new_twirp.friends_count = 30 new_twirp.tweet_count = 40 new_twirp.retweet_count = 50 new_twirp.been_retweet_count = 60 new_twirp.favourite_hashtag = '#onering' new_twirp.hashtag_count = 70 new_twirp.archipelago_id = 80 new_twirp.twirps_type = -1 new_twirp.subscribed = False new_twirp.geo = False # Add to database (with 'TEST' label) neo_db_handler.add_Twirp_to_database(new_twirp, is_test_mode=True) # Check results results = [ _ for _ in self.graph.cypher.execute( "MATCH (n {handle:'bilbo'}) RETURN n")] self.assertEqual(len(results), 1) node = results[0][0] # Interrogate Node self.assertEqual(node.get_labels(), [u'TEST', u'Twirp', u'Other']) self.assertEqual(node["user_id"],314150000000) self.assertEqual(node["username"],'BilboBagginsMP') self.assertEqual(node["name"],'Bilbo Baggins') self.assertEqual(node["handle"],'bilbo') self.assertEqual(node["followers_count"],20) self.assertEqual(node["friends_count"],30) self.assertEqual(node["tweet_count"],40) self.assertEqual(node["retweet_count"],50) self.assertEqual(node["been_retweeted_count"],60 ) self.assertEqual(node["favourite_hashtag"],'#onering') self.assertEqual(node["hashtag_count"],70) self.assertEqual(node["archipelago_id"],80 ) self.assertEqual(node["subscribed"],False) def test_add_Tweet_to_database__mention(self): # TEST: (LRich)->(tinyhands) - mention: ("Hey @tinyhands") neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands')] new_tweet.content = 'Generic tweet @tinyhands' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b {handle:'tinyhands'}) RETURN r""")] self.assertEqual(len(results), 1) relationship = results[0][0] # In depth check self.assertEqual(relationship.type, u'DIRECT') self.assertEqual(relationship["mentions"], 1) self.assertEqual(relationship["mention_last"], '1') self.assertEqual(relationship["mention_date"], 'a date string') self.assertEqual(relationship["replies"], 0) self.assertEqual(relationship["reply_last"], '') self.assertEqual(relationship["reply_date"], '') self.assertEqual(relationship["retweets"], 0) self.assertEqual(relationship["retweet_last"], '') self.assertEqual(relationship["retweet_date"], '') def test_add_Tweet_to_database__reply(self): # TEST: (LRich) ->(tBW) - reply & mention; # (LRich) ->(tinyhands) mention EG: (reply->tBW):"Hey @tBW, @tinyhands" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands'), (200000, 'tBW')] new_tweet.content = 'Generic tweet @tinyhands @tBW' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = True new_tweet.in_reply_to_user = (200000, 'tBW') new_tweet.in_reply_to_status_id = 2 new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b) WHERE b.handle<>'MBEyes' RETURN r, b.name ORDER BY b.name""")] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'DIRECT') self.assertEqual(results[0][1], 'The Boy Wonder') self.assertEqual(results[0][0]["mentions"], 0) self.assertEqual(results[0][0]["mention_last"], '') self.assertEqual(results[0][0]["mention_date"], '') self.assertEqual(results[0][0]["replies"], 1) self.assertEqual(results[0][0]["reply_last"], '1') self.assertEqual(results[0][0]["reply_date"], 'a date string') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Tiny Hands') self.assertEqual(results[1][0]["mentions"], 1) self.assertEqual(results[1][0]["mention_last"], '1') self.assertEqual(results[1][0]["mention_date"], 'a date string') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 0) self.assertEqual(results[1][0]["retweet_last"], '') self.assertEqual(results[1][0]["retweet_date"], '') def test_add_Tweet_to_database__retweet(self): # TEST: (tiny) ->(MBEyes) - reply & mention; # (tiny) ->(Kdog) mention_by_proxy EG: (ret->MBE):"Hey @MBE, @Kdog" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 400000 new_tweet.handle = 'tinyhands' new_tweet.mentions = [(300000, 'Kdog')] new_tweet.content = 'Generic tweet @Kdog' # not stored here new_tweet.is_retweet = True new_tweet.retweeted_user = (0, 'MBEyes') new_tweet.retweet_status_id = 2 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = [] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'tinyhands'})-[r]->(b) RETURN r, b.name ORDER BY b.name""")] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'INDIRECT') self.assertEqual(results[0][1], 'Kendog Lamar') self.assertEqual(results[0][0]["mentions"], 1) self.assertEqual(results[0][0]["mention_last"], '1') self.assertEqual(results[0][0]["mention_date"], 'a date string') self.assertEqual(results[0][0]["replies"], 0) self.assertEqual(results[0][0]["reply_last"], '') self.assertEqual(results[0][0]["reply_date"], '') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Michael Blue Eyes') self.assertEqual(results[1][0]["mentions"], 0) self.assertEqual(results[1][0]["mention_last"], '') self.assertEqual(results[1][0]["mention_date"], '') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 1) self.assertEqual(results[1][0]["retweet_last"], '1') self.assertEqual(results[1][0]["retweet_date"], 'a date string')
class EmailGraph: #http://py2neo.org/2.0/intro.html#nodes-relationships #Creates a New Graph (You will Need to Update this Function for your own install) def __init__(self, user, pwrd): authenticate("localhost:7474", user, pwrd) self.graph = Graph("http://localhost:7474/db/data/") java_path = "C:\ProgramData\Oracle\Java\javapath\java.exe" os.environ['JAVAHOME'] = java_path self.st = StanfordNERTagger('C:\stanford-ner-2015-12-09\classifiers\english.conll.4class.distsim.crf.ser.gz',\ 'C:\stanford-ner-2015-12-09\stanford-ner.jar') self.stop_words = nltk.corpus.stopwords.words('english') self.legal_words = {"section","fw","re","ops","fyi","doc no","case no","subtitle","btw","usc","foia","chapter","u.s.c",\ "report","attachment","attachments","note","amended", "ebook","subject","unclassified department of state case","doc",\ "unclassified u.s. department of state","original message","project", "copyright", "pls", "you","u.s. department of state case no"} #process email: removes some of the headings before looking for keywords def process_email(self, email): processed = "" for line in email.split('\n'): s = line.lower() if s.startswith("unclassified u.s. department of state") or \ s.startswith("release in") or \ s.startswith("original message") or \ s.startswith("to:") or \ s.startswith("from:") or \ s.startswith("sent:") or \ s.startswith("cc:"): pass else: if len(line) > 0 and line[-1] == '.': processed = processed + line + ' ' else: processed = processed + line + '. ' return processed #filter_by_contents: receives a list of noun_phrases and filters out phrases contained in longer phrases elsewhere in the list def filter_by_contents(self, noun_phrases): in_others = [] for i, candidate in enumerate(noun_phrases): for j, other in enumerate(noun_phrases): if i != j: if candidate[0].lower() in other[0].lower() and candidate[ 0] != other[0]: #compare each phrase with another in_others.append(candidate) #filter out our identified 'duplicate' words and stopwords. filtered_words = [w for w in noun_phrases if w not in in_others and \ w[0].lower() not in self.legal_words and w[0].lower() not in self.stop_words] #create a Frequency Distribution unigram_fd = nltk.FreqDist(filtered_words) #get the most common phrases common_noun_phrases = unigram_fd.most_common(20) result = [] words = set([w[0][0].lower() for w in common_noun_phrases]) for w in words: best_match = None for phrase in common_noun_phrases: if phrase[0][0].lower() == w: if best_match is None: best_match = phrase else: best_match = (best_match[0], best_match[1] + phrase[1]) result.append(best_match) return sorted([w for w in result], key=lambda w: w[1], reverse=True) #filter_by_hypernym: receives a list of candidates and finds the best hypernym for each. #I started with code by Anna Swigart, ANLP 2015, and her concept of using a dictionary to store #terms from WordNet, however this code drastically departs from her algorithm. def filter_by_hypernym(self, candidates): #create a dictionary results = [] for term in candidates: #loop through list of candidates synsets = wn.synsets(term[0][0], 'n') #obtain the synsets for the phrase if len(synsets) >= 1: hypers = synsets[0].hypernyms( ) + synsets[0].instance_hypernyms() if len(hypers) >= 1: results.append(((term[0][0], hypers[0].name().split('.')[0]), term[1])) else: results.append(term) else: results.append(term) return results #algorithm for extracting key words from an email body def final_algorithm(self, email): #Create Sentences sentences = nltk.sent_tokenize((self.process_email(email))) tokenized_sentences = [] for s in sentences: #get the tokens for each sentence that are filtered tokenized_sentences.append([word for word in nltk.word_tokenize(s) \ if not re.search('[0-9]', word) and word.lower() not in self.legal_words and len(word) > 2]) #separate the NER tagged entities from the rest def get_entities(tags): result = [] curr = [] for ent in tags: if ent[1] == 'O': if len(curr) > 0: result.append(curr) curr = [] else: if len(curr) > 0: if not curr[0][1] == ent[1].lower(): result.append(curr) curr = [(ent[0], ent[1].lower())] else: curr = curr + [(ent[0], ent[1].lower())] else: curr = [(ent[0], ent[1].lower())] return result #NER tag each of the sentences tagged_sents = self.st.tag_sents(tokenized_sentences) entity_names = [] for s in tagged_sents: entity_names = entity_names + get_entities(s) #reorganize the entities for further processing def compress_entities(entities): new_list = [] for entity in entities: result = " ".join([w[0] for w in entity]) new_list.append((result, entity[0][1])) return new_list entity_names = compress_entities(entity_names) #print(entity_names) # Print unique entity names noun_phrases = entity_names #Candidates Filtered by Duplicate Nouns and Rescored by Length noun_phrases = self.filter_by_contents(noun_phrases) #print(noun_phrases) #Candidate with better categories/hypernyms! noun_phrases = self.filter_by_hypernym(noun_phrases) #print("Email:\n" + email) print("Key Phrases:\n" + str(noun_phrases)) return noun_phrases #clears out a graph def delete(self): self.graph.delete_all() #checks to see if a node exists in a graph #http://stackoverflow.com/questions/22134649/how-to-check-if-a-node-exists-in-neo4j-with-py2neo def find_existing(self, label, key, value): mynode = list( self.graph.find(label, property_key=key, property_value=value)) # node found if len(mynode) > 0: return mynode[0] # no node found else: return None #adds a new 'email' data element to the graph #code based on http://py2neo.org/2.0/intro.html#nodes-relationships def add_to_graph(self, data_element, terms): #['Id', 'DocNumber', 'MetadataSubject', 'MetadataTo', 'Metadata From', #'MetadataDateSent', 'ExtractedSubject', 'ExtractedTo', #'ExtractedFrom', 'ExtractedBodyText','RawText', 'Label']] email_id = data_element['DocNumber'] email_feeling = data_element['NewLabel'] email = self.find_existing("Email", "docid", email_id) if email is None: if str(email_feeling) == '1': email_feelstr = 'emotional' n = 'E' else: email_feelstr = 'neutral' n = 'N' email = Node("Email", name = n, docid = email_id, tone=email_feelstr,\ subject=data_element["ExtractedSubject"], date=data_element['MetadataDateSent']) s = email #add From nodes from_id_all = data_element['ExtractedFrom'] if type(from_id_all) is str: for from_id_i in from_id_all.split(';'): from_id = from_id_i.strip().strip('\'') sender = self.find_existing("User", "address", from_id) if sender is None: sender = Node("User", address=from_id) s = s | Relationship(sender, "SENT", email) #add To nodes to_id_all = data_element['ExtractedTo'] if type(to_id_all) is str: for to_id_i in to_id_all.split(';'): to_id = to_id_i.strip().strip('\'') receiver = self.find_existing("User", "address", to_id) if receiver is None: receiver = Node("User", address=to_id) s = s | Relationship(receiver, "RECEIVED", email) #add Emotion nodes emote_all = data_element['Emotions'] #print(emote_all) if type(emote_all) is str: print("Emotions: " + str(emote_all)) for emote in emote_all.split(';'): if len(emote) > 0: emotion = self.find_existing("Emotion", "name", emote) if emotion is None: emotion = Node("Emotion", name=emote) s = s | Relationship(email, "EMOTED", emotion) self.graph.create(s) #add keywords and categories for item in range(0, len(terms)): keyword = terms[item][0][0] category = terms[item][0][1] n = self.find_existing("Keyword", "name", keyword) if n is None: n = Node("Keyword", name=keyword) s = Relationship(email, "MENTIONS", n) c = self.find_existing("Category", "name", category) if c is None: c = Node("Category", name=category) s = s | Relationship(n, "IS_TYPE_OF", c) self.graph.create(s) #get_random_emails - returns a number of random emails from a given data frame def get_random_emails(self, data_set, number): random_index = np.random.permutation(data_set.index) full_data_shuffled = data_set.ix[random_index,\ ['Id', 'DocNumber', 'MetadataSubject', 'MetadataTo', 'Metadata From', 'MetadataDateSent',\ 'ExtractedSubject', 'ExtractedTo', 'ExtractedFrom','ExtractedBodyText','RawText',\ 'NewLabel', 'Emotions']] full_data_shuffled.reset_index(drop=True, inplace=True) #separate the training data from the development data return full_data_shuffled.loc[0:number - 1] #adds a specified number of emails from a dataset def add_new_emails(self, num, total_df): selected_emails = self.get_random_emails(total_df, num) selected_emails["MetadataDateSent"].fillna(value='<blank>', inplace=True) selected_emails["ExtractedSubject"].fillna(value='<blank>', inplace=True) data_list = selected_emails["RawText"].values.tolist() subject_list = selected_emails["ExtractedSubject"].values.tolist() printable = set(string.printable) #for each email, extract the key words and then add to the graph for index in range(0, num): s = "".join(filter(lambda x: x in printable, data_list[index])) + ' . ' +\ "".join(filter(lambda x: x != '<blank>' and x in printable, subject_list[index])) terms = self.final_algorithm(s) self.add_to_graph(selected_emails.loc[index], terms)
dates = pd.date_range(start='19850101', end='20180517') big_event = pd.DataFrame(index=dates) big_event['rrr'] = rrr_df['changed'] big_event['T007'] = big_volati big_event = big_event.dropna(how='all') follow_user = '******' for day in dates.tolist(): if day in big_event.index.tolist(): if pd.notnull(big_event.loc[day, 'rrr']): # get the leaf node data = test_graph.data( "match(n:event)-[r:Transmission*1..7]->(relateNode) where n.event_type='rrr' and size((relateNode)-[]->())=0 return relateNode") event = 'rrr' if pd.notnull(big_event.loc[day, 'T007']): event_node = test_graph.find('event', property_key='event_type', property_value='T007') data = test_graph.data( "match(n:event)-[r:Transmission*1..7]->(relateNode) where n.event_type='T007' and size((relateNode)-[]->())=0 return relateNode") event = 'T007' transmited_assets = [x['relateNode']['tdx_stock_code'] for x in data] transmited_follow_asset = set(transmited_assets) & set(follow_assets.keys()) for tra in transmited_follow_asset: if follow_user in follow_assets[tra]: print(day, follow_user, '关注的', tra, '因', event, '大波动造成影响!') else: # print(day, follow_user, '关注的资产今天没有影响') pass
from py2neo import authenticate, Graph, Node, Relationship from passlib.hash import bcrypt import os authenticate("localhost:7474", "neo4j", "shanghai") #graph = Graph(os.environ.get('GRAPHENEDB_URL', 'http://localhost:7474') + '/db/data/') graph = Graph("http://localhost:7474/db/data/") """ py2neo API graph.find() ; graph.match() > RETURNS generator > elem in generator: Node or Relationship graph.execute() > RETURNS RecordList > elem in RecordList: Record > elem[0]: Node graph.find(label, property_key=None, property_value=None, limit=None) graph.find_one(label, property_key=None, property_value=None) """ class Person: """ - id (UNIQUE CONSTRAINT)
# -*- coding: utf-8 -*- from py2neo import Graph, Node, Relationship, NodeSelector graph = Graph("http://139.224.129.150:7474/browser/", username="******", password="******") # 用CQL进行查询,返回的结果是list data1 = graph.data('MATCH(p:Tag) return p') print("data1 = ", data1, type(data1)) # 用find_one()方法进行node查找,返回的是查找node的第一个node data2 = graph.find_one(label='Form') print("data2 = ", data2, type(data2)) # 用find()方法进行node查找,需要遍历输出,类似于mongodb data3 = graph.find(label='Form') for data in data3: print("data3 = ", data) # Relationship查询 relationship = graph.match_one(rel_type='Sub') print(relationship, type(relationship))
inf = open('links.csv') hashes = {} for i, row in enumerate(csv.reader(inf)): first_hash = geohash.encode(float(row[1]), float(row[2]), 6) last_hash = geohash.encode(float(row[4]), float(row[5]), 6) if first_hash != last_hash: # it would be better to eliminate these by duration instead hashes.setdefault(first_hash, []).append(last_hash) for src_geohash, destinations in hashes.items(): source = Node(geohash_label, name=src_geohash) print("creating {} with {} destinations".format(src_geohash, len(destinations))) #graph.merge_one(geohash_label, source) matches = list( graph.find(geohash_label, property_key="name", property_value=src_geohash)) if matches: source = matches[0] else: geo = gaz_it_up(src_geohash) source.properties["admin1"] = geo["admin1"] source.properties["admin2"] = geo["admin2"] source.properties["asciiname"] = geo["asciiname"] source.properties["geoname"] = geo["name"] graph.create(source) for dest in destinations: print("dest=", dest) destination = Node(geohash_label, name=dest) #graph.merge_one(geohash_label, destination)
# graph.run(CREATE (Song2-[:Key]->Node2)) # graph.run(CREATE (Song3-[:Key]->Node1)) graph.create(Rel1) graph.create(Rel2) graph.create(Rel3) graph.create(Rel4) graph.create(Rel5) graph.create(Rel6) graph.create(Rel7) graph.create(Rel8) graph.create(Rel9) graph.create(Rel10) graph.create(Rel11) graph.create(Rel12) results = graph.find("Word","Name","baby") for result in results: print(result) # # MATCH (pee1)-[:Key]->(n:Word {Name:"baby"})<-[:Key]-(pee2) WHERE pee1<>pee2 RETURN pee1,pee2,n # FOREACH(p1 in pee1 | # FOREACH (p2 in pee2 | # MATCH (p1)-[:Key]->(n:Word)<-[:Key]-(p2) WHERE p1<>p2)) RETURN p1,p2,n # # FOREACH(country in cou | # FOREACH (c in ch | # FOREACH (a in addresses | # CREATE (s:Store {name:c.name+"_"+a, address:a}) # CREATE (s-[:BELONGS_TO]->c) # CREATE (s-[:IN]->country) )))
class DpNLU(object): def __init__(self, configs): self.configs = configs self.root = None self.graph = None self.all_label = None self.__load_template() self.__conn_knowledge_graph() def __load_template(self): tree = ET.ElementTree(file=self.configs.dp_template_path) self.root = tree.getroot() def __conn_knowledge_graph(self): self.graph = Graph(self.configs.neo_db_ip, user=self.configs.neo_username, password=self.configs.neo_password) self.all_label = ["Actor", "Album", "Honour", "Song"] def process(self, stmt): for i in range(len(stmt.get_words())): print(stmt.get_words()[i] + " " + stmt.get_pos()[i] + " " \ + str(stmt.get_arcs()[i][0]) + ":" + stmt.get_arcs()[i][1]) result_idx = set() for template in self.root: tw_pos = self.getpos_tmp(stmt.get_words(), stmt.get_pos(), template.find("tempword")) wordidx = self.entityidx(tw_pos, stmt.get_pos(), stmt.get_arcs(), template.iterfind("entity")) result_idx = result_idx | wordidx nf = NegFilter(stmt.get_words(), stmt.get_pos(), stmt.get_arcs()) negidx = nf.negfilter(wordidx) sim_result = [] for i in wordidx: if i in negidx: sim_result.append("-" + stmt.get_words()[i]) else: sim_result.append(stmt.get_words()[i]) print(" ".join(sim_result)) print("---------------------------") node_list = list() for i in result_idx: for l in self.all_label: nodes = self.graph.find(label=l, property_key="name", property_value=stmt.get_words()[i]) node_list.extend(nodes) for node in node_list: print("|".join(node.labels()) + ":" + node["name"]) def getpos_tmp(self, words, postags, tempword): tw_pos = [] for i in range(len(words)): if words[i] in tempword.find("word").text.split(",") and postags[ i] in tempword.find("wordclass").text.split(","): tw_pos.append(i + 1) return tw_pos def entityidx(self, des, postags, arcs, entities): wordidx = set() for entity in entities: newdes = [] for idx in range(len(postags)): hit = False if arcs[idx][1] in entity.find("relation").text.split( ",") and postags[idx] in entity.find( "wordclass").text.split(","): i = arcs[idx][0] if entity.attrib.get("direct") == "false": while i not in des and i != 0: i = arcs[i - 1][0] if i in des: hit = True if hit: newdes.append(idx + 1) if entity.attrib.get("isresult") == "true": wordidx.add(idx) if entity.attrib.get("internode") == "true": des = newdes return wordidx
class Robot(): """NLU Robot. 自然语言理解机器人。 Public attributes: - graph: The connection of graph database. 图形数据库连接。 - pattern: The pattern for NLU tool: 'semantic' or 'vec'. 语义标签或词向量模式。 - memory: The context memory of robot. 机器人对话上下文记忆。 """ def __init__(self, password="******"): # 连接图知识库 self.graph = Graph("http://localhost:7474/db/data/", password=password) # 语义模式:'semantic' or 'vec' self.pattern = 'semantic' # 获取导航地点数据库 self.locations = get_navigation_location() # 在线场景标志,默认为False self.is_scene = False # 在线调用百度地图IP定位api,网络异常时返回默认地址:上海市 self.address = get_location_by_ip() # 机器人配置信息 self.gconfig = None # 可用话题列表 self.usertopics = [] # 当前QA话题 self.topic = "" # 当前QA id self.qa_id = get_current_time() # 短期记忆:最近问过的10个问题与10个答案 self.qmemory = deque(maxlen=10) self.amemory = deque(maxlen=10) # 匹配不到时随机回答 TODO:记录回答不上的所有问题, self.do_not_know = [ "这个问题太难了,{robotname}还在学习中", "这个问题{robotname}不会,要么我去问下", "您刚才说的是什么,可以再重复一遍吗", "{robotname}刚才走神了,一不小心没听清", "{robotname}理解的不是很清楚啦,你就换种方式表达呗", "不如我们换个话题吧", "咱们聊点别的吧", "{robotname}正在学习中", "{robotname}正在学习哦", "不好意思请问您可以再说一次吗", "额,这个问题嘛。。。", "{robotname}得好好想一想呢", "请问您说什么", "您问的问题好有深度呀", "{robotname}没有听明白,您能再说一遍吗" ] def __str__(self): return "Hello! I'm {robotname} and I'm {robotage} years old.".format( **self.gconfig) @time_me() def configure(self, info="", userid="userid"): """Configure knowledge base. 配置知识库。 """ assert userid is not "", "The userid can not be empty!" # TO UPGRADE 对传入的userid参数分析,若不合适则报相应消息 2017-6-7 if userid != "A0001": userid = "A0001" print("userid 不是标准A0001,已经更改为A0001") match_string = "MATCH (config:Config) RETURN config.name as name" subgraphs = [item[0] for item in self.graph.run(match_string)] print("所有知识库:", subgraphs) if not info: config = {"databases": []} match_string = "MATCH (user:User)-[r:has]->(config:Config)" + \ "where user.userid='" + userid + \ "' RETURN config.name as name, r.bselected as bselected, r.available as available" for item in self.graph.run(match_string): config["databases"].append( dict(name=item[0], bselected=item[1], available=item[2])) print("可配置信息:", config) return config else: selected_names = info.split() forbidden_names = list(set(subgraphs).difference(set(selected_names))) print("选中知识库:", selected_names) print("禁用知识库:", forbidden_names) # TODO:待合并精简 for name in selected_names: match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \ + userid + "' AND config.name='" + name + "' SET r.bselected=1" # print(match_string) self.graph.run(match_string) for name in forbidden_names: match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \ + userid + "' AND config.name='" + name + "' SET r.bselected=0" # print(match_string) self.graph.run(match_string) return self.get_usertopics(userid=userid) # @time_me() def get_usertopics(self, userid="userid"): """Get usertopics list. """ usertopics = [] if not userid: userid = "userid" # 从知识库获取用户拥有权限的子知识库列表 match_string = "MATCH (user:User)-[r:has {bselected:1, available:1}]->(config:Config)" + \ "where user.userid='" + userid + "' RETURN config" data = self.graph.run(match_string).data() for item in data: usertopics.extend(item["config"]["topic"].split(",")) print("用户:", userid, "\n已有知识库列表:", usertopics) return usertopics def iformat(self, sentence): """Individualization of robot answer. 个性化机器人回答。 """ return sentence.format(**self.gconfig) # @time_me() def add_to_memory(self, question="question", userid="userid"): """Add user question to memory. 将用户当前对话加入信息记忆。 Args: question: 用户问题。 Defaults to "question". userid: 用户唯一标识。 Defaults to "userid". """ previous_node = self.graph.find_one("Memory", "qa_id", self.qa_id) self.qa_id = get_current_time() node = Node("Memory", question=question, userid=userid, qa_id=self.qa_id) if previous_node: relation = Relationship(previous_node, "next", node) self.graph.create(relation) else: self.graph.create(node) # Development requirements from Mr Tang in 2017-5-11. # 由模糊匹配->全匹配 from Mr Tang in 2017-6-1. def extract_navigation(self, question): """Extract navigation。抽取导航地点。 QA匹配模式:从导航地点列表选取匹配度最高的地点。 Args: question: User question. 用户问题。 """ result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \ context="", url="", behavior=0, parameter=0) # temp_sim = 0 # sv1 = synonym_cut(question, 'wf') # if not sv1: # return result for location in self.locations: # TODO:判断“去”和地址关键词是否是就近的动词短语情况 if "去" in question and location in question: print("Original navigation") result["content"] = location result["context"] = "user_navigation" result["behavior"] = int("0x001B", 16) return result # sv2 = synonym_cut(location, 'wf') # if sv2: # temp_sim = similarity(sv1, sv2, 'j') # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 # if temp_sim > 0.92: # print("Navigation location: " + location + " Similarity Score: " + str(temp_sim)) # result["content"] = location # result["context"] = "user_navigation" # result["behavior"] = int("0x001B", 16) # return result return result def extract_pinyin(self, question, subgraph): """Extract synonymous QA in NLU database。 QA匹配模式:从图形数据库选取匹配度最高的问答对。 Args: question: User question. 用户问题。 subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。 """ temp_sim = 0 result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \ context="", url="", behavior=0, parameter=0) sv1 = pinyin_cut(question) print(sv1) for node in subgraph: iquestion = self.iformat(node["name"]) sv2 = pinyin_cut(iquestion) print(" ", sv2) temp_sim = jaccard_pinyin(sv1, sv2) print(temp_sim) # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 if temp_sim > 0.75: print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) result["content"] = self.iformat( random_item(node["content"].split("|"))) result["context"] = node["topic"] if node["url"]: # result["url"] = json.loads(random_item(node["url"].split("|"))) result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = int(node["parameter"]) func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result return result def extract_synonym(self, question, subgraph): """Extract synonymous QA in NLU database。 QA匹配模式:从图形数据库选取匹配度最高的问答对。 Args: question: User question. 用户问题。 subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。 """ temp_sim = 0 result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \ context="", url="", behavior=0, parameter=0) # semantic: 切分为同义词标签向量,根据标签相似性计算相似度矩阵,由相似性矩阵计算句子相似度 # vec: 切分为词向量,根据word2vec计算相似度矩阵,由相似性矩阵计算句子相似度 if self.pattern == 'semantic': # elif self.pattern == 'vec': sv1 = synonym_cut(question, 'wf') if not sv1: return result for node in subgraph: iquestion = self.iformat(node["name"]) if question == iquestion: print("Similarity Score: Original sentence") result["content"] = self.iformat( random_item(node["content"].split("|"))) result["context"] = node["topic"] if node["url"]: # result["url"] = json.loads(random_item(node["url"].split("|"))) result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = int(node["parameter"]) # 知识实体节点api抽取原始问题中的关键信息,据此本地查询/在线调用第三方api/在线爬取 func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result sv2 = synonym_cut(iquestion, 'wf') if sv2: temp_sim = similarity(sv1, sv2, 'j') # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 if temp_sim > 0.92: print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) result["content"] = self.iformat( random_item(node["content"].split("|"))) result["context"] = node["topic"] if node["url"]: # result["url"] = json.loads(random_item(node["url"].split("|"))) result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = int(node["parameter"]) func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result return result def extract_keysentence(self, question): """Extract keysentence QA in NLU database。 QA匹配模式:从图形数据库选取包含关键句的问答对。 Args: question: User question. 用户问题。 """ result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \ context="", url="", behavior=0, parameter=0) match_string = "MATCH (n:NluCell) WHERE '" + question + "' CONTAINS n.name RETURN n LIMIT 1" subgraph = self.graph.run(match_string).data() if subgraph: node = list(subgraph)[0]['n'] print("Similarity Score: Key sentence") result["content"] = self.iformat( random_item(node["content"].split("|"))) result["context"] = node["topic"] if node["url"]: # result["url"] = json.loads(random_item(node["url"].split("|"))) result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = int(node["parameter"]) # 知识实体节点api抽取原始问题中的关键信息,据此本地查询/在线调用第三方api/在线爬取 func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result return result @time_me() def search(self, question="question", userid="userid"): """Nlu search. 语义搜索。 Args: question: 用户问题。 Defaults to "question". userid: 用户唯一标识。 Defaults to "userid" Returns: Dict contains answer, current topic, url, behavior and parameter. 返回包含答案,当前话题,资源包,行为指令及对应参数的字典。 """ # 添加到问题记忆 # self.qmemory.append(question) # self.add_to_memory(question, userid) # 本地语义:全图模式 #tag = get_tag(question) #subgraph = self.graph.find("NluCell", "tag", tag) #result = self.extract_synonym(question, subgraph) # 本地语义:场景+全图+用户配置模式 # 多用户根据userid动态获取对应的配置信息 self.gconfig = self.graph.find_one("User", "userid", userid) self.usertopics = self.get_usertopics(userid=userid) # 一、预处理===================================================== # 问题过滤器(添加敏感词过滤 2017-5-25) if check_swords(question): print("问题包含敏感词!") return dict(question=question, content=self.iformat(random_item(self.do_not_know)), \ context="", url="", behavior=0, parameter=0) # 姓氏引起误匹配重定义 if question.startswith("小") and len(question) == 2: question = self.gconfig['robotname'] # 称呼过滤 Add in 2017-7-5 for robotname in ["小民", "小明", "小名", "晓明"]: if question.startswith( robotname) and len(question) >= 4 and "在线" not in question: question = question.lstrip(robotname) if not question: question = self.gconfig['robotname'] # 二、导航======================================================= result = self.extract_navigation(question) if result["context"] == "user_navigation": return result # 三、云端在线场景================================================ result = dict(question=question, content="", context="basic_cmd", url="", \ behavior=int("0x0000", 16), parameter=0) # TODO: 简化为统一模式 # TODO {'behavior': 0, 'content': '理财产品取号', 'context': 'basic_cmd', 'parameter': 0, 'question': '理财产品取号', 'url': ''} if "理财产品" in question and "取号" not in question: result["behavior"] = int("0x1002", 16) # 进入在线场景 result["question"] = "理财产品" # 重定义为标准问题 self.is_scene = True # 在线场景标志 return result if "免费wifi" in question or "wifi" in question: result["behavior"] = int("0x1002", 16) # 进入在线场景 result["question"] = "有没有免费的wifi" # 重定义为标准问题 self.is_scene = True # 在线场景标志 return result if "存款利率" in question: result["behavior"] = int("0x1002", 16) # 进入在线场景 result["question"] = "存款利率" # 重定义为标准问题 self.is_scene = True # 在线场景标志 return result if "我要取钱" in question or "取钱" in question: result["behavior"] = int("0x1002", 16) # 进入在线场景 result["question"] = "我要取钱" # 重定义为标准问题 self.is_scene = True # 在线场景标志 return result if "信用卡挂失" in question: result["behavior"] = int("0x1002", 16) # 进入在线场景 result["question"] = "信用卡挂失" # 重定义为标准问题 self.is_scene = True # 在线场景标志 return result if "开通云闪付" in question: result["behavior"] = int("0x1002", 16) # 进入在线场景 result["question"] = "开通云闪付" # 重定义为标准问题 self.is_scene = True # 在线场景标志 return result if "办理粤卡通" in question or "办理粤通卡" in question: result["behavior"] = int("0x1002", 16) # 进入在线场景 result["question"] = "办理粤通卡" # 重定义为标准问题 修正:2017-7-3 self.is_scene = True # 在线场景标志 return result # 进入在线场景 # start_scene = ["理财产品", "wifi", "存款利率", "取钱", "信用卡挂失", "开通云闪付", "办理粤卡通"] # for item in start_scene: # if item in question: # result["behavior"] = int("0x1002", 16) # 进入在线场景 # result["question"] = "办理粤卡通" # 重定义为标准问题 # self.is_scene = True # 在线场景标志 # 退出在线场景 end_scene = ["退出业务场景", "退出场景", "退出", "返回", "结束", "发挥"] for item in end_scene: if item == question: # if item in question: # 避免多个退出模式冲突 result["behavior"] = int("0x0020", 16) # 场景退出 self.is_scene = False return result previous_step = ["上一步", "上一部", "上一页", "上一个"] next_step = ["下一步", "下一部", "下一页", "下一个"] if self.is_scene: # for item in previous_step: # if item in question: # result["behavior"] = int("0x001D", 16) # 场景上一步 # for item in next_step: # if item in question: # result["behavior"] = int("0x001E", 16) # 场景下一步 if "上一步" in question or "上一部" in question or "上一页" in question or "上一个" in question: result["behavior"] = int("0x001D", 16) # 场景上一步 return result elif "下一步" in question or "下一部" in question or "下一页" in question or "下一个" in question: result["behavior"] = int("0x001E", 16) # 场景下一步 return result # result["content"] = question # return result # 常用命令,交互,业务 # 上下文——重复命令 TODO:确认返回的是正确的指令而不是例如唱歌时的结束语“可以了” if "再来一个" in question: # TODO:从记忆里选取最近的有意义行为作为重复的内容 return self.amemory[-1] # 四、本地标准语义================================================ # 模式1:选取语义得分大于阈值 tag = get_tag(question, self.gconfig) # TODO:添加语义标签和关键词综合匹配的情况 subgraph_all = list(self.graph.find("NluCell", "tag", tag)) # subgraph_scene = [node for node in subgraph_all if node["topic"]==self.topic] # TODO:usergraph_all 包含正常问答和用户自定义问答,可优先匹配用户自定义问答 usergraph_all = [ node for node in subgraph_all if node["topic"] in self.usertopics ] usergraph_scene = [ node for node in usergraph_all if node["topic"] == self.topic ] # 查看根据语义标签初步确定的子图 # for node in usergraph_all: # print(node["name"]) # if subgraph_scene: if usergraph_scene: result = self.extract_synonym(question, usergraph_scene) # result = self.extract_pinyin(question, usergraph_scene) if result["context"]: self.topic = result["context"] self.amemory.append(result) # 添加到答案记忆 return result result = self.extract_synonym(question, usergraph_all) # result = self.extract_pinyin(question, usergraph_all) # result = self.extract_synonym(question, subgraph_all) self.topic = result["context"] self.amemory.append(result) # 添加到答案记忆 # 模式2:包含关键句就匹配 if not self.topic: result = self.extract_keysentence(question) if result["context"]: self.topic = result["context"] self.amemory.append(result) # 添加到答案记忆 return result # 五、在线语义==================================================== if not self.topic: # 1.音乐(唱一首xxx的xxx) if "唱一首" in question or "唱首" in question or "我想听" in question: result["behavior"] = int("0x0001", 16) result["content"] = "好的,正在准备哦" # 2.附近有什么好吃的 elif "附近" in question or "好吃的" in question: result["behavior"] = int("0x001C", 16) result["content"] = self.address # 3.nlu_tuling(天气) elif "天气" in question: # 图灵API变更之后 Add in 2017-8-4 location = get_location(question) if not location: # 问句中不包含地址 weather = nlu_tuling(self.address + question) else: # 问句中包含地址 weather = nlu_tuling(question) # 图灵API变更之前 # weather = nlu_tuling(question, loc=self.address) result["behavior"] = int("0x0000", 16) try: # 图灵API变更之前 # temp = weather.split(";")[0].split(",")[1].split() # myweather = temp[0] + temp[2] + temp[3] # 图灵API变更之后 Add in 2017-8-3 temp = weather.split(",") myweather = temp[1] + temp[2] except: myweather = weather result["content"] = myweather result["context"] = "nlu_tuling" # 4.追加记录回答不上的所有问题 else: with open("C:/nlu/bin/do_not_know.txt", "a", encoding="UTF-8") as file: file.write(question + "\n") # 5.nlu_tuling # else: # result["content"] = nlu_tuling(question, loc=self.address) # result["context"] = "nlu_tuling" return result
title = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["title"] abstract = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["abstract"] authors = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["authors"] doi = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["doi"] article_to_be_added = graph.merge_one("Article", "doi", doi) article_to_be_added['abstract'] = abstract # article_to_be_added['authors'] = authors article_to_be_added['title'] = title article_to_be_added.push() print("\t\t\t" + title) relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "printed_in", journal_to_be_added, volume=volume_key, issue=issue_key, issue_date=str(acm_structure[publisher_key][journal_key][volume_key][issue_key]["date"]["month"])+str(acm_structure[publisher_key][journal_key][volume_key][issue_key]["date"]["year"]), issn=acm_structure[publisher_key][journal_key][volume_key][issue_key]["issn"])) # primary_author_bool = True for author in authors: # print("Author detected is: " + author["name"]) # print("Author_link detected is: " + author["link"]) results = graph.find('Author', 'link', author["link"]) # print(type(results)) if len(list(results)) == 1: for result in results: print("\t\t\t\t" + result['full_name'] + " FOUND") else: # print("\t\t\t\tNOT FOUND! Creating Author...") author_to_be_added = graph.merge_one("Author", "link", author["link"]) author_str_split_list = author["name"].split() if (len(author_str_split_list) == 1): author_to_be_added['full_name'] = author["name"].title() author_to_be_added['fist_name'] = author_str_split_list[0] author_to_be_added['middle_name'] = " " author_to_be_added['last_name'] = " " elif (len(author_str_split_list) == 2): author_to_be_added['full_name'] = author["name"].title()
class CategoryTree(object): def __init__(self, country): project_conf = get_project_conf() neo_host = project_conf.get("NEO4J", "host") user = project_conf.get("NEO4J", "username") password = project_conf.get("NEO4J", "password") logging.getLogger("py2neo.batch").setLevel(logging.WARNING) logging.getLogger("py2neo.cypher").setLevel(logging.WARNING) logging.getLogger("httpstream").setLevel(logging.WARNING) authenticate(neo_host, user, password) self.graph = Graph("http://%s/db/data/" % neo_host) try: self.graph.schema.create_uniqueness_constraint("Category", "id") except: pass self.categories = self.get_categories(country) def merge_node(self, node, country, do_not_load=False): category_id = "%s%s" % (country, str(node['BrowseNodeId'])) category = self.graph.merge_one('Category', 'id', category_id) if 'name' not in category.properties: category['name'] = node['Name'] category['is_root'] = int(node.get('IsCategoryRoot', 0)) category['do_not_load'] = bool(do_not_load) category['country'] = country category.push() if not category_id in self.categories: self.categories[category_id] = self.category_node_dict(category) return category def relationship(self, parent, child): return Relationship(parent, 'HAS_CHILD', child) def relationship_exists(self, parent, child): if len(list(self.graph.match(start_node=parent, end_node=child, rel_type='HAS_CHILD'))) > 0: return True return False def create_relationship(self, relationship): self.graph.create_unique(relationship) relationship.push() def create_relationships(self, parent, children): for child in children: self.create_relationship(parent, child) def add_new_category(self, browsenode, amazon_api, country): # browse_node expected format #{u'Ancestors': {u'BrowseNode': {u'Ancestors': {u'BrowseNode': {u'BrowseNodeId': u'560798', # u'Name': u'Electronics & Photo'}}, # u'BrowseNodeId': u'560800', # u'IsCategoryRoot': u'1', # u'Name': u'Categories'}}, # u'BrowseNodeId': u'1340509031', # u'Children': {u'BrowseNode': [{u'BrowseNodeId': u'560826', # u'Name': u'Accessories'}, # {u'BrowseNodeId': u'2829144031', # u'Name': u'Big Button Mobile Phones'}, # {u'BrowseNodeId': u'430574031', # u'Name': u'Mobile Broadband'}, # {u'BrowseNodeId': u'5362060031', # u'Name': u'Mobile Phones & Smartphones'}, # {u'BrowseNodeId': u'213005031', # u'Name': u'SIM Cards'}, # {u'BrowseNodeId': u'3457450031', # u'Name': u'Smartwatches'}]}, # u'Name': u'Mobile Phones & Communication'} added_categories = [] do_not_load = True current_browsenode = browsenode # Determine the value of do not load according to the youngest ancestor's do_not_load while 'Ancestors' in current_browsenode: current_id = "%s%s" % (country, current_browsenode['BrowseNodeId']) current_node = self.categories.get(current_id, None) if not current_node: if type(current_browsenode['Ancestors']) is dict: current_browsenode = current_browsenode['Ancestors'] elif type(current_browsenode['Ancestors']) is list: current_browsenode = current_browsenode['Ancestors'][0] # This shouldn't happen. But if it does better to log and continue with the first one else: do_not_load = bool(current_node['do_not_load']) break # Create the missing nodes and relationships child = self.merge_node(browsenode, country, do_not_load) added_categories.append(child) current_browsenode = browsenode while 'Ancestors' in current_browsenode and int(current_browsenode.get("IsCategoryRoot", 0))!=1: if type(current_browsenode['Ancestors']) is dict: parent_browsenode_id = current_browsenode['Ancestors']['BrowseNode']['BrowseNodeId'] elif type(current_browsenode['Ancestors']) is list: # This shouldn't happen. But if it does better to log and continue with the first one parent_browsenode_id = current_browsenode['Ancestors'][0]['BrowseNode']['BrowseNodeId'] parent_graph_id="%s%s" % (country,parent_browsenode_id) parent_node = self.categories.get(parent_graph_id, None) if parent_node: parent = self.get_category(parent_graph_id) relationship = self.relationship(parent, child) self.create_relationship(relationship) break else: parent_browsenode = amazon_api.get_node(parent_browsenode_id) if type(parent_browsenode) is dict: parent = self.merge_node(parent_browsenode, country, do_not_load) relationship = self.relationship(parent, child) self.create_relationship(relationship) added_categories.append(parent) current_browsenode = parent_browsenode elif parent_browsenode == "AWS.InvalidParameterValue": print "Deleting node %s and all its children" % str(parent_browsenode_id) self.delete_category(parent_browsenode_id) break else: #self.logger.warning("Unknown error from amazon API.") print 'Unknown error from amazon API. %s' % parent_browsenode break for category in added_categories: category_id = "%s%s" % (country, category['id']) length = self.get_shortest_length_to_root(category_id) category['shortest_length_root'] = length category.push() self.categories[category_id] = self.category_node_dict(category) new_category_id = "%s%s" % (country, browsenode['BrowseNodeId']) return self.categories.get(new_category_id) def category_node_dict(self, category_node): result = { 'is_root': category_node['is_root'], 'id': category_node['id'], 'name': category_node['name'], 'do_not_load': category_node['do_not_load'], 'shortest_length_root': category_node['shortest_length_root'] } return result def get_categories(self, country): categories = {} records = self.graph.find('Category', property_key='country', property_value=country) for category in records: categories[category['id']] = self.category_node_dict(category) return categories def get_category(self, category_id): category = self.graph.find_one('Category', property_key='id', property_value=category_id) if category: return self.category_node_dict(category) def is_orphan(self, category_id): category = self.get_category(category_id) if not category: return True if not bool(category['is_root']): query = """MATCH p=a-[:HAS_CHILD*]->n WHERE n.id = {id} AND a.is_root=1 RETURN p LIMIT 1""" cypher = self.graph.cypher path = cypher.execute_one(query, id=category_id) if not path: return True return False def get_children(self, category_id): query = """MATCH (n)-[r:HAS_CHILD*]->(m) WHERE n.id = {id} RETURN m""" cypher = self.graph.cypher children = cypher.execute(query, id=category_id) return children def delete_category(self, category_id): cypher = self.graph.cypher children = self.get_children(category_id) delete_query = """ MATCH (n {id:'%s'}) OPTIONAL MATCH n-[r]-() DELETE n,r """ if children: for record in children: child = record[0] cypher.execute_one(delete_query % child["id"]) cypher.execute_one(delete_query % category_id) def get_shortest_length_to_root(self, category_id): query = """MATCH p=a-[:HAS_CHILD*]->n WHERE n.id={id} AND a.is_root=1 RETURN length(p) ORDER BY length(p) DESC LIMIT 1""" cypher = self.graph.cypher length = cypher.execute_one(query, id=category_id) return length
#-*-coding:utf-8-*- import pandas as pd from py2neo import Graph, Node, walk, Relationship import re import jieba import os os.chdir('E:/课程/知识图谱/第3周/数据库建立与查询') g = Graph("http://localhost:7474") # username="******", password="******" #加载字典 jieba.load_userdict("./dict.txt") #获取英雄列表 heros = [] hero = g.find(label='hero') for i in hero: heros.append(i.properties['name']) #获取装备列表 weapons = [] weapon = g.find(label='weapon') for i in weapon: weapons.append(i.properties['name']) #属性同义词:数组第一个元素为数据库里属性名,第二个为输出时官方名,后面的是可能的同义词名 HP_all = ['HP', u'生命值', u'血量', u'血上限'] MP_all = ['MP', u'法力值', u'蓝量', u'蓝'] HP_recover_all = [u'HP_recover', u'每5秒回血', '回血'] MP_recover_all = [u'MP_recover', u'每5秒回复法力值', u'每5秒回蓝', u'回蓝'] R_cooling_all = ['R_cooling', u'大招冷却时间']
from py2neo import Graph, authenticate, Node, Relationship import MySQLdb import threading authenticate("localhost:7474", "neo4j", "8760neo4j") graph = Graph() mynode = list(graph.find('fw', property_key='count')) ct = 1 fobj = open("textdump1.txt", "r").readlines() file_tacker = open("tarcker.txt", "a") #for i in fobj: def indexing(i): global ct print "*********" print i print ct print "**********" i = i.lower() file_tacker.write(str(i)) temp = i.split(" ", 3) b = "" for i in temp: b = b + "".join(" " + str(i).replace("'", "")) b = b.strip() s = b.split(" ", 3) dic = {} for i in range(len(s)):
con = sqlite3.connect('../db/search_title.db') cur = con.cursor() cur.execute("CREATE TABLE Title (title TEXT, item_id INT, poster_path TEXT, year INT)") cur.execute("CREATE INDEX title_index ON Title (title)") def get_new_id(): query = """MERGE (nid:ItemIncremental) ON CREATE SET nid.count = 1 ON MATCH SET nid.count = nid.count + 1 RETURN nid.count""" new_id = graph.cypher.execute(query)[0][0] return new_id # graph = Graph() genres = graph.find('Genre') genre_dict = {} for genre in genres: genre_dict[genre.properties['name']] = genre with con: with open (titles_file, 'r') as f_in: counter = 0 more_than_one = 0 no_genre_counter = 0 for line in f_in: try: obj = json.loads(line) except: obj = ast.literal_eval(line)
class GraphExporter: def __init__(self): authenticate(neo4j.HOST_PORT, neo4j.USERNAME, neo4j.PASSWORD) self.neo4j_db = Graph(neo4j.REMOTE_URI) def create_taikonetwork_graph(self): print("> Taiko Network Graph: querying all nodes and relationships...") self.graph = nx.Graph() self._add_group_nodes() self._add_memberships() self._add_member_nodes() self._add_unique_connections() print("> Taiko Network Graph: SUCCESSFULLY CREATED!\n" "> Export to graph file format to save.\n") def create_demographic_graph(self): print("> Demographic Graph: querying all Member nodes and Connection rels...") self.graph = nx.Graph() self._add_member_nodes(demo=True) self._add_unique_connections(demo=True) print("> Demographic Graph: SUCCESSFULLY CREATED!\n" "> Export to graph file format to save.\n") def export_gexf_graph(self, filepath='graph.gexf'): nx.write_gexf(self.graph, filepath, encoding='utf-8', prettyprint=True, version='1.2draft') def _add_group_nodes(self): groups = self.neo4j_db.find('Group') color = {'r': 255, 'g': 2, 'b': 97, 'a': 1} for g in groups: data = g.properties self.graph.add_node( g._id, label=data['name'], sf_id=data['sf_id'], viz={'color': color}) def _add_member_nodes(self, demo=False): members = self.neo4j_db.find('Member') for m in members: data = m.properties color = self._random_color(m._id, 1) if demo: self.graph.add_node( m._id, label=data['firstname'] + ' ' + data['lastname'], gender=data['gender'], dob=data['dob'], race=data['race'], ethnicity=data['asian_ethnicity'], viz={'color': color}) else: self.graph.add_node( m._id, label=data['firstname'] + ' ' + data['lastname'], sf_id=data['sf_id'], viz={'color': color}) def _add_unique_connections(self, demo=False): connections = self.neo4j_db.match(rel_type='CONNECTED_TO') unique_rels = [] for c in connections: start = c.start_node._id end = c.end_node._id if (start, end) not in unique_rels and (end, start) not in unique_rels: if demo: color = {'r': 213, 'g': 213, 'b': 213, 'a': 0.3} else: color = self._random_color(start, 0.3) self.graph.add_edge(start, end, viz={'color': color}) unique_rels.append((start, end)) def _add_memberships(self): memberships = self.neo4j_db.match(rel_type='MEMBER_OF') for ms in memberships: color = self._random_color(ms.start_node._id, 0.3) self.graph.add_edge(ms.start_node._id, ms.end_node._id, viz={'color': color}) def _random_color(self, obj_id, alpha): colors = [{'r': 164, 'g': 243, 'b': 121}, {'r': 243, 'g': 230, 'b': 121}, {'r': 243, 'g': 121, 'b': 184}, {'r': 154, 'g': 121, 'b': 243}, {'r': 202, 'g': 243, 'b': 121}, {'r': 243, 'g': 177, 'b': 121}, {'r': 243, 'g': 121, 'b': 238}, {'r': 121, 'g': 243, 'b': 212}, {'r': 243, 'g': 190, 'b': 121}, {'r': 121, 'g': 194, 'b': 243}, {'r': 157, 'g': 2, 'b': 253}, {'r': 2, 'g': 86, 'b': 253}] c = colors[obj_id % 12] c['a'] = alpha return c
for row in bsm.rows[1:]: from_type, from_name, edge_type, edge_name, to_type, to_name, netlog = [cell.value for cell in row] if netlog is None: from_type = "grey" to_type = "grey" print(from_type, from_name, edge_type, to_type, to_name) from_node = graph.merge_one(from_type.strip(), "name", from_name.strip()) to_node = graph.merge_one(to_type.strip(), "name", to_name.strip()) from_to = Relationship(from_node, edge_type, to_node) graph.create_unique(from_to) # get nodes with degree nodes = [] for label in graph.node_labels: for p in graph.find(label): node = {"id": p.ref.split("/")[-1], "label": p["name"], "title": p["name"], "value": p.degree, "group": label} nodes.append(node) with open("report/nodesnetlog.js", "w") as f: f.write("var nodesraw = " + dumps(nodes, indent=2) + ";") # get edges edges = [] for r in graph.match(): edge = {"to": r.end_node.ref.split("/")[-1], "from": r.start_node.ref.split("/")[-1] }
class test_pipeline(unittest.TestCase): LEN_DATETIME = 26 LEN_TEST_FILE = 632 def setUp(self): try: __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) self.src = open( os.path.join(__location__, "data/bit-test-data.txt")) self.badFreq = open( os.path.join(__location__, "data/bad-frequency.txt")) self.badStartTime = open( os.path.join(__location__, "data/bad-starttime.txt")) self.graph = Graph("http://localhost:8484/db/data") self.graph.delete_all() self.service = WaferService(self.graph) except: print "Error during unittest setup" def tearDown(self): self.graph.delete_all() # # File tests # def test_open(self): self.assertEquals(len(self.src.read().split("\n")), 20) # # Parser tests # def test_parser(self): bitdo = parser.BITdo(self.src) self.assertEquals(len(bitdo.toJson()), test_pipeline.LEN_TEST_FILE) self.assertEquals(len(bitdo.channels.keys()), 5) self.assertEquals(bitdo.header["SamplingFrequency"], "1000") self.assertEquals(len(bitdo.channels["EMG"]), 16) # Assure that datetime is to microsecond precision self.assertEquals( len(bitdo.header["StartDateTime"]), test_pipeline.LEN_DATETIME) def test_parser_errors(self): self.assertRaises(AttributeError, parser.BITdo, (self.badFreq)) self.assertRaises(AttributeError, parser.BITdo, (self.badStartTime)) # # Aggregator tests # def test_aggregator_nums(self): a = [0, 0, 1, 1, 1] s = aggregator.streaksIn(a) self.assertEquals(s[0].getStreaks(), [2]) self.assertEquals(s[0].getStreakExp(2), [4]) self.assertEquals(s[1].getStreaks(), [3]) self.assertEquals(s[1].getStreakExp(2), [9]) def test_aggregator_bools(self): b = [True, False, False, True, False] s = aggregator.streaksIn(b) self.assertEquals(s[True].getStreaks(), [1, 1]) self.assertEquals(s[False].getStreaks(), [2, 1]) self.assertEquals(s[False].getStreakExp(2), [4, 1]) def test_aggregator_strings(self): c = ["cat", "826", "826", "826", "~~", "~~", "cat", "cat", "~~"] s = aggregator.streaksIn(c) self.assertEquals(s["cat"].getStreaks(), [1, 2]) self.assertEquals(s["cat"].getStreakExp(2), [1, 4]) self.assertEquals(s["826"].getStreaks(), [3]) self.assertEquals(s["826"].getStreakExp(3), [27]) self.assertEquals(s["~~"].getStreaks(), [2, 1]) self.assertEquals(s["~~"].getStreakExp(-1), [0.5, 1]) def test_aggregator_average(self): bitdo = parser.BITdo(self.src) self.assertEquals(aggregator.average(bitdo.channels['EMG']), 525.4375) self.assertEquals(aggregator.average([1, 2, 3]), 2) self.assertEquals(aggregator.average([x for x in range(1000)]), 499.5) # # Graph Service # def test_add_new_user(self): user = self.service.add_user("Duke") userid = user.properties["userid"] activity = self.service.add_activity( userid, "Free Throws", "no description") activityname = activity.properties["name"] self.service.add_moment( userid, activityname, "timestamp", ["a1:true", "a2:false"]) self.service.add_moment( userid, activityname, "timestamp", ["a1:true", "a2:false"]) self.assertEquals(count(self.graph.find("User")), 1) self.assertEquals(count(self.graph.find("Activity")), 1) self.assertEquals(count(self.graph.find("Moment")), 2) self.assertEquals(count(self.graph.find("Annotation")), 2) # # Graph API # def test_post_user(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) def test_post_user_fails(self): r = requests.post('http://localhost:8000/users', {}) self.assertEquals(r.status_code, 400) def test_post_activity(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) def test_post_activity_fails(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) # Test explicitly, i.e. not using the helper function # so we are able to neglect parameters r = requests.post('http://localhost:8000/activities', { 'userid': 'Thaddeus'}) self.assertEquals(r.status_code, 400) r = requests.post('http://localhost:8000/users', { 'name': 'Free-throw shooting'}) self.assertEquals(r.status_code, 400) def test_post_moment(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) r = newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"]) self.assertEquals(r.status_code, 201) def test_post_moment_fails(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) # Test explicitly, i.e. not using the helper function # so we are able to neglect parameters annotations = ["make:true", "swish:true"] r = requests.post('http://localhost:8000/moments', { # missing userid 'name': 'Free-throw shooting', 'timestamp': now(), 'annotations[]': annotations}) self.assertEquals(r.status_code, 400) r = requests.post('http://localhost:8000/moments', { 'userid': 'Thaddeus', 'name': 'Free-throw shooting', 'timestamp': now() # missing annotations }) self.assertEquals(r.status_code, 400) r = requests.post('http://localhost:8000/moments', { 'userid': 'Thaddeus', 'name': 'Free-throw shooting', 'timestamp': now(), # it's `annotations[]`... sigh 'annotations': annotations}) self.assertEquals(r.status_code, 400) def test_get_moment(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"]) r = getMoments('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) self.assertEquals(len(r.json()), 3) def test_get_moment_fails(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Basketball') self.assertEquals(r.status_code, 200) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"]) # wrong acitivity name r = getMoments('Thaddeus', 'B_sketb_ll') self.assertEquals(r.status_code, 400)
class GraphDatabase(): def __init__(self): try: self.graph = Graph( 'http://*****:*****@localhost:7474/db/data') except: print 'ERROR: Initialize Neo4j browser' self.graph.delete_all() def createDocumentNode(self, index, label): docNode = self.graph.merge_one('Document', 'name', 'Doc ' + str(index)) self.updateNode(docNode, { 'id': index, 'label': label, 'in-weight': 0, 'out-weight': 0 }) return docNode def createFeatureNode(self, index, word): wordNode = Node('Feature', word=word) self.graph.create(wordNode) self.updateNode(wordNode, { 'in-weight': 0, 'out-weight': 0, 'id': index }) return wordNode def getFeatureNode(self, word): return list( self.graph.find('Feature', property_key='word', property_value=word))[0] def createWeightedRelation(self, node1, node2, relation): match = self.graph.match(start_node=node1, rel_type=relation, end_node=node2) numberOfRelations = sum(1 for x in match) if numberOfRelations >= 1: match = self.graph.match(start_node=node1, rel_type=relation, end_node=node2) for relationship in match: self.increaseWeight(relationship) self.increaseWeight(node1, 'out-weight') self.increaseWeight(node2, 'in-weight') else: newRelation = Relationship(node1, relation, node2, weight=1) self.graph.create(newRelation) self.increaseWeight(node1, 'out-weight') self.increaseWeight(node2, 'in-weight') def increaseWeight(self, entity, weight='weight'): entity[weight] = entity[weight] + 1 self.graph.push(entity) def updateNode(self, node, propertyDict): node.properties.update(propertyDict) self.graph.push(node) def normalizeRelationships(self, nodes, relation): for node in nodes: for rel in node.match_incoming(relation): rel['norm_weight'] = rel['weight'] / node['in-weight'] self.graph.push(rel) def getNodes(self, feature): recordList = self.graph.cypher.execute( 'MATCH (node:%s) RETURN node' % feature) return [record.node for record in recordList] def getMatrix(self, nodesX, nodesY=None, relation='followed_by', propertyType='norm_weight'): if nodesY == None: nodesY = nodesX matrix = np.zeros([len(nodesX), len(nodesY)]) for node in nodesX: rowIndex = node['id'] for outRelation in node.match_outgoing(relation): colIndex = outRelation.end_node['id'] weight = outRelation[propertyType] matrix[rowIndex, colIndex] = weight return matrix def cypherContextSim(self): tx = self.graph.cypher.begin() tx.append(CONTEXT_SIM) tx.commit()
class Network(): def __init__(self): self.graph_instance = Graph() self.time = self.update_time(str(datetime.datetime.now())) # Updates current instance of time by system def update_time(self, time): self.time = time # Checks if node exists, if it does not exist - creates new node; else, updates node def add_node(self, link, date_last_updated, frequency): calculated_frequency = convert_frequency_to_hours(frequency) if (not self.check_node_exist(link)): # Create a new node for webpage with an initial calculated frequency n = Node(link, date_last_updated=date_last_updated, frequency=frequency, calculated_frequency=calculated_frequency, link=link) self.graph_instance.create(n) else: # Update existing fields for webpage node n = self.graph_instance.find_one(link) if (n["date_last_updated"] != ""): calculated_frequency = self._update_calculated_frequency( n["date_last_updated"], date_last_updated) n["date_last_updated"] = date_last_updated n["calculated_frequency"] = calculated_frequency n["frequency"] = frequency n.push() return n # Measures calculated frequency from subtracting previous date_last_updated to current date_last_updated (returns time in hours) def _update_calculated_frequency(self, prev_date_updated, new_date_updated): try: prev_date = datetime.datetime.strptime(prev_date_updated, "%Y-%m-%d") new_date = datetime.datetime.strptime(new_date_updated, "%Y-%m-%d") td = new_date - prev_date return td.total_seconds() // 3600 except: return -1 # If the relationship doesn't exist, create a new edge; else, update the tag def add_edge(self, node_u, node_v_link, relationship): self.add_node(node_v_link, "", "") node_v = self.get_node(node_v_link) self.graph_instance.create( Relationship(node_u, "links_to", node_v, tag=relationship)) def check_node_exist(self, link): return len(list(self.graph_instance.find(link))) != 0 def check_relationship_exist(self, node_u, node_v): return len( list( self.graph_instance.match(start_node=node_u, end_node=node_v, rel_type="links_to"))) > 0 def delete_failed_webpages(self, link): if (self.check_node_exist(link) == False): return node = self.get_node(link) self.delete_relationship(node) self.delete_incoming_relationship(node) self.graph_instance.delete(node) def delete_relationship(self, node_u): rels = list( self.graph_instance.match(rel_type="links_to", start_node=node_u, end_node=None)) for r in rels: self.graph_instance.separate(r) def delete_incoming_relationship(self, node_u): rels = list( self.graph_instance.match(rel_type="links_to", end_node=node_u, start_node=None)) for r in rels: self.graph_instance.separate(r) def get_node(self, link): return self.graph_instance.find_one(link) def get_node_information(self, link): check_node = self.graph_instance.data("MATCH (n {link: '" + link + "'} ) RETURN n") if len(check_node) == 0: return {} n = self.get_node(link) date_last_updated = n["date_last_updated"] calculated_frequency = n["calculated_frequency"] frequency = n["frequency"] node_data = {} node_data["date_last_updated"] = date_last_updated node_data["calculated_frequency"] = calculated_frequency node_data["frequency"] = frequency node_data["outlinks"] = self.get_outlinks(link) node_data["inlinks"] = self.get_inlinks(link) return node_data def get_outlinks(self, link): outlink_data = self.graph_instance.data("MATCH (n {link: '" + link + "'} )-->(node) RETURN node") outlinks = [] for o in outlink_data: outlinks.append(o["node"]["link"]) return outlinks def get_inlinks(self, link): inlink_data = self.graph_instance.data("MATCH (n {link: '" + link + "'} )<--(node) RETURN node") inlinks = [] for o in inlink_data: inlinks.append(o["node"]["link"]) return inlinks # Get adjacency matrix from Neo4j and nodes from py2neo def _to_matrix(self): nodes = list(self.graph_instance.node_selector.select()) N = len(nodes) mat = np.zeros((N, N)) # Populate the adjacency matrix for i, a in enumerate(nodes): for j, b in enumerate(nodes): # Use existing function to check for link mat[i, j] = self.check_relationship_exist(a, b) return mat #Iterate over nodes and add pagerank def update_pagerank(self): # Get all the nodes nodes = self.graph_instance.node_selector.select() # Iterate over the result of _pagerank and the nodes for pr, node in zip(self._pagerank(), nodes): # Update the node's pagerank and push back to neo4j node.update(page_rank=pr) self.graph_instance.push(node) # Simple show function to get nodes and display their pagerank def show_pagerank(self, selector=None, link=None): nodes = list(self.graph_instance.node_selector.select()) for node in nodes: if isinstance(link, str): if not list(node.labels())[0] == link: continue elif isinstance(link, (list, tuple)): if not list(node.labels())[0] in link: continue # Get the pageranks for any given list of links (or all) def get_pagerank_dict(self, links=[]): nodes = list(self.graph_instance.node_selector.select()) dct = {} for node in nodes: if isinstance(links, str): if not list(node.labels())[0] == links: continue elif isinstance(links, (list, tuple)): if not list(node.labels())[0] in links: continue dct[list(node.labels())[0]] = node.get('page_rank') return dct # Creates dictionary object with information for ranking API (including page rank) def get_ranking_data(self, links): page_ranks = self.get_pagerank_dict(links) data = {} data["webpages"] = [] for l in page_ranks.keys(): webpage_data = {} # If the node exists if (page_ranks[l] != None): n = self.get_node(l) webpage_data["pageRankValue"] = page_ranks[l] webpage_data["dateLastUpdated"] = n["date_last_updated"] webpage_data["frequency"] = n["frequency"] webpage_data["webpage"] = l else: webpage_data["pageRankValue"] = "NULL" webpage_data["dateLastUpdated"] = "" webpage_data["frequency"] = "" webpage_data["webpage"] = "" data["webpages"].append(webpage_data) return data # Perform pagerank on the adjacency matrix, using the power method def _pagerank( self, alpha=0.85, max_iter=100, # Increase this if we get the non-convergence error tol=1.0e-6, ): # Create a sparse matrix rep. of adjacency matrix mat = scipy.sparse.csr_matrix(self._to_matrix()) n, m = mat.shape # Make a sum matrix S = scipy.array(mat.sum(axis=1)).flatten() # Get non-zero rows index = scipy.where(S <> 0)[0] for i in index: # We need to normlize (divide by sum) mat[i, :] *= 1.0 / S[i] # pr = scipy.ones((n)) / n # initial guess # Get dangling nodes dangling = scipy.array(scipy.where(mat.sum(axis=1) == 0, 1.0 / n, 0)).flatten() for i in range(max_iter): prlast = pr pr = alpha * (pr * mat + scipy.dot(dangling, prlast)) + ( 1 - alpha) * prlast.sum() / n # check if we're done err = scipy.absolute(pr - prlast).sum() if err < n * tol: return pr raise Exception("pagerank failed to converge [%d iterations]" % (i + 1)) # Prioritizer def prioritizer(self, outlinks): # Get remaining time and number of inlink for ol in outlinks: if (not self.check_node_exist(ol)): outlinks.remove(ol) else: self.remaining_time(ol) self.sort_node(outlinks) new_links = sorted( outlinks, key=lambda k: (self.get_node(k)["time_remaining"], self.number_of_inlinks(k))) for ol in new_links: # Update last_crawled_time current = str(datetime.datetime.now()) node = self.get_node(ol) node["last_crawled_time"] = current node.push() return new_links # Get number of inlink def number_of_inlinks(self, outlink): node = self.get_node(outlink) return -len( list( self.graph_instance.match( rel_type="links_to", end_node=node, start_node=None))) # Updates remaining time left for a node to be crawled based on frequency def remaining_time(self, outlink): node = self.get_node(outlink) last_crawled_time = node["last_crawled_time"] if (last_crawled_time == None): node["time_remaining"] = 0 node.push() else: fmt = '%Y-%m-%d %H:%M:%S' current = str(datetime.datetime.now()) start = datetime.datetime.strptime(current[:19], fmt) end = datetime.datetime.strptime(last_crawled_time[:19], fmt) diff = (start - end).total_seconds() / 60.000 / 60.000 diff = float(node["calculated_frequency"]) - diff node["time_remaining"] = diff node.push() # Sort node and fill top 100 def sort_node(self, outlinks): num = len(outlinks) count = 0 nodes = self.graph_instance.data("MATCH (n) RETURN n") for n in nodes: if (not n["n"]["link"] in outlinks): self.remaining_time(n["n"]["link"]) nodes = self.graph_instance.data( "MATCH (n) RETURN n ORDER BY (n.time_remaining) DESC") for n in nodes: link = n["n"]["link"] if (not link in outlinks): outlinks.append(link) count = count + 1 if (count + num > 100): break # Return dictionary object of prioritized links and their priority value def prioritize_dic(self, outlinks): new_links = self.prioritizer(outlinks) data = {} data["prioritizedLinks"] = [] p_value = 1 for l in new_links: l_data = {} l_data["link"] = l l_data["priority_value"] = p_value * 10 data["prioritizedLinks"].append(l_data) p_value = p_value + 1 return data
class TwitterGraph(): def __init__(self): self.graph = Graph("http://*****:*****@54.191.171.209:7474/db/data/") self.popularity_heap = [] self.reassess_popularity() def add_user(self, user): new_user = Node("User", token=user.token.session_id, user_id=user.id) return self.graph.create(new_user) def is_cached(self, screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is not None: return True def get_RT_recommendations(self, user): recommendations = Counter() user_node = self.graph.find_one("User", 'user_id', user.id) following = user_node.match_outgoing("FOLLOWS", limit=5) for rel in following: retweets = rel.end_node.match_outgoing("RETWEETED", limit=5) for r in retweets: recommendations[r.end_node.properties['screen_name']] += 1 return [str for (str, count) in recommendations.most_common(10)] def get_generic_recommendations(self): return [screen_name for (count, screen_name) in heapq.nlargest(10, self.popularity_heap)] def reassess_popularity(self): # NOTE: expensive calculation, to be run threaded at multiples of x actions to graph or hourly/daily job all_twitter_users = self.graph.find("TwitterUser") for tu in all_twitter_users: incoming_count = sum(1 for _ in tu.match_incoming()) heapq.heappush(self.popularity_heap, (incoming_count, tu.properties['screen_name'])) def add_twitter_user(self, screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: new_twitter_user = Node("TwitterUser", screen_name=screen_name) self.graph.create(new_twitter_user) def add_follow(self, screen_name, user): user_node = self.graph.find_one("User", 'user_id', user.id) if user_node is None: # this shouldn't happen, just for testing while transitioning db self.add_user(user) user_node = self.graph.find_one("User", 'user_id', user.id) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) follow_relationship = Relationship(user_node, "FOLLOWS", twitter_user) self.graph.create(follow_relationship) self.reassess_popularity() def remove_follow(self, screen_name, user): user_node = self.graph.find_one("User", 'user_id', user.id) if user_node is None: # this shouldn't happen, just for testing while transitioning db self.add_user(user) user_node = self.graph.find_one("User", 'user_id', user.id) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) follow_relationship = self.graph.match_one(user_node, "FOLLOWS", twitter_user) if follow_relationship is not None: self.graph.delete(follow_relationship) def add_retweet(self, screen_name, retweeted_screen_name): twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) if twitter_user is None: # this shouldn't happen, just for testing while transitioning db self.add_twitter_user(screen_name) twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name) self.add_twitter_user(retweeted_screen_name) retweeted_twitter_user = self.graph.find_one("TwitterUser", 'screen_name', retweeted_screen_name) retweet = self.graph.match_one(twitter_user, "RETWEETED", retweeted_twitter_user) if retweet is None: retweet_relationship = Relationship(twitter_user, "RETWEETED", retweeted_twitter_user) retweet_relationship.properties['count'] = 1 self.graph.create(retweet_relationship) elif retweet.properties['count'] is None: # this shouldn't happen, just for testing while transitioning db retweet.properties['count'] = 1 retweet.push() else: retweet.properties['count'] = retweet.properties['count'] + 1 retweet.push()
class NFIBManager(object): """ Manage the handling of Network Function Information Base. Use neo4j implementation for storing and querying NFs and NF decompositions. """ def __init__ (self): """ Init. """ super(NFIBManager, self).__init__() log.debug("Init %s based on neo4j" % self.__class__.__name__) # Suppress low level logging self.__suppress_neo4j_logging() try: self.graph_db = Graph() except Unauthorized as e: quit_with_error( "Got Unauthorozed error on: %s from neo4j! Disable the authorization " "in /etc/neo4j/neoj4-server.properties!" % e) @staticmethod def __suppress_neo4j_logging (level=None): """ Suppress annoying and detailed logging of `py2neo` and `httpstream` packages. :param level: level of logging (default: WARNING) :type level: str :return: None """ import logging level = level if level is not None else logging.WARNING logging.getLogger("py2neo").setLevel(level) logging.getLogger("neo4j").setLevel(level) logging.getLogger("httpstream").setLevel(level) def addNode (self, node): """ Add new node to the DB. :param node: node to be added to the DB :type node: dict :return: success of addition :rtype: Boolean """ node_db = list( self.graph_db.find(node['label'], 'node_id', node['node_id'])) if len(node_db) > 0: log.debug("node %s exists in the DB" % node['node_id']) return False node_new = py2neo.Node(node['label'], node_id=node['node_id']) for key, value in node.items(): node_new.properties[key] = value self.graph_db.create(node_new) return True def addClickNF (self, nf): """ Add new click-based NF to the DB :param nf: nf to be added to the DB :type nf: dict :return: success of addition :rtype: Boolean """ dirname = "/home/mininet/escape-shared/mininet/mininet" # 1. First check if the source can be compiled if nf.get('clickSource', ''): if not self.clickCompile(nf): return False # 2. Check the existence of the required VNFs/Click elements dependency = [] clickTempPath = nf.get('clickTempPath', dirname + '/templates/' + nf['node_id'] + '.jinja2') if os.path.exists(clickTempPath): with open(clickTempPath) as template: for line in template: if '::' in line: element = line.split('::')[-1].split('(')[0].replace(' ', '') node = list(self.graph_db.find('NF', 'node_id', str(element))) if len(node) <= 0: log.debug( "The new NF is dependent on non-existing NF %s" % element) return False else: dependency.append(str(element)) template = open(clickTempPath, 'r').read() else: template = '' # 3. Extract the click handlers form the source files read_handlers = {} read = [] write_handlers = {} write = [] for src in nf.get('clickSource', ''): if '.cc' in src: with open(nf.get('clickPath', '') + '/' + src) as source: for line in source: if 'add_read_handler' in line: hdlr = line.split('"')[1] if hdlr not in read: read.append(hdlr) if 'add_write_handler' in line: hdlr = line.split('"')[1] if hdlr not in write: write.append(hdlr) if read: read_handlers[nf['node_id']] = read if write: write_handlers[nf['node_id']] = write # Add the handlers of other elements used in click scripts of the new NF if dependency: for element in dependency: NF_info = self.getNF(element) read = eval(NF_info['read_handlers']).get(element, '') write = eval(NF_info['write_handlers']).get(element, '') if read: read_handlers[element] = read if write: write_handlers[element] = write # 4. Add the NF to the DB nf.update( { 'dependency': repr(dependency), 'read_handlers': repr(read_handlers), 'write_handlers': repr(write_handlers), 'command': str(template) }) self.addNode(nf) def addVMNF (self, nf): # To be updated self.addNode(nf) @staticmethod def clickCompile (nf): """ Compile source of the click-based NF :param nf: the click-based NF :type nf: dict :return: success of compilation :rtype: Boolean """ for src in nf.get('clickSource', ''): if not os.path.exists(nf.get('clickPath', '') + '/' + src): log.debug("source file does not exist: %s" % src) return False os.system('cd ' + nf.get('clickPath', '') + '; make clean; ./configure; make elemlist; ' 'make') if not os.path.exists(nf.get('clickPath', '') + '/userlevel/click'): log.debug("The source code can not be compiled") return False else: return True def removeNF (self, nf_id): """ Remove an NF and all its decompositions from the DB. :param nf_id: the id of the NF to be removed from the DB :type nf_id: string :return: success of removal :rtype: Boolean """ node = list(self.graph_db.find('NF', 'node_id', nf_id)) if len(node) > 0: rels_DECOMPOSE = list( self.graph_db.match(start_node=node[0], rel_type='DECOMPOSED')) for rel in rels_DECOMPOSE: self.removeDecomp(rel.end_node.properties['node_id']) node[0].delete_related() return True else: log.debug("node %s does not exist in the DB" % nf_id) return False def updateNF (self, nf): """ Update the information of a NF. :param nf: the information for the NF to be updated :type nf: dict :return: success of the update :rtype: Boolean """ node = list(self.graph_db.find(nf['label'], 'node_id', nf['node_id'])) if len(node) > 0: node[0].set_properties(nf) return True else: log.debug("node %s does not exist in the DB" % nf['node_id']) return False def getNF (self, nf_id): """ Get the information for the NF with id equal to nf_id. :param nf_id: the id of the NF to get the information for :type nf_id: string :return: the information of NF with id equal to nf_id :rtype: dict """ node = list(self.graph_db.find('NF', 'node_id', nf_id)) if len(node) > 0: return node[0].properties else: log.debug("node %s does not exist in the DB" % nf_id) return None def addRelationship (self, relationship): """ Add relationship between two existing nodes :param relationship: relationship to be added between two nodes :type relationship: dict :return: success of the addition :rtype: Boolean """ node1 = list(self.graph_db.find(relationship['src_label'], 'node_id', relationship['src_id'])) node2 = list(self.graph_db.find(relationship['dst_label'], 'node_id', relationship['dst_id'])) if len(node1) > 0 and len(node2) > 0: rel = Relationship(node1[0], relationship['rel_type'], node2[0]) for key, value in relationship.items(): rel.properties[key] = value self.graph_db.create(rel) return True else: log.debug("nodes do not exist in the DB") return False def removeRelationship (self, relationship): """ Remove the relationship between two nodes in the DB. :param relationship: the relationship to be removed :type relationship: dict :return: the success of the removal :rtype: Boolean """ node1 = list(self.graph_db.find(relationship['src_label'], 'node_id', relationship['src_id'])) node2 = list(self.graph_db.find(relationship['dst_label'], 'node_id', relationship['dst_id'])) if len(node1) > 0 and len(node2) > 0: rels = list(self.graph_db.match(start_node=node1[0], end_node=node2[0], rel_type=relationship['rel_type'])) for r in rels: r.delete() return True else: log.debug("nodes do not exist in the DB") return False def addDecomp (self, nf_id, decomp_id, decomp): """ Add new decomposition for a high-level NF. :param nf_id: the id of the NF for which a decomposition is added :type nf_id: string :param decomp_id: the id of the new decomposition :type decomp_id: string :param decomp: the decomposition to be added to the DB :type decomp: Networkx.Digraph :return: success of the addition :rtype: Boolean """ nf = list(self.graph_db.find('NF', 'node_id', nf_id)) if len(nf) <= 0: log.debug("NF %s does not exist in the DB" % nf_id) return False for n in decomp.nodes(): node = list(self.graph_db.find('SAP', 'node_id', n)) if len(node) > 0: log.debug("SAPs exist in the DB") return False if not self.addNode({'label': 'graph', 'node_id': decomp_id}): log.debug("decomposition %s exists in the DB" % decomp_id) return False for n in decomp.nodes(): if decomp.node[n]['properties']['label'] == 'SAP': self.addNode(decomp.node[n]['properties']) dst_label = 'SAP' elif decomp.node[n]['properties']['label'] == 'NF' and \ decomp.node[n]['properties']['type'] == 'click': self.addClickNF(decomp.node[n]['properties']) dst_label = 'NF' elif decomp.node[n]['properties']['label'] == 'NF' and \ decomp.node[n]['properties']['type'] == 'VM': self.addVMNF(decomp.node[n]['properties']) dst_label = 'NF' elif decomp.node[n]['properties']['label'] == 'NF' and \ decomp.node[n]['properties']['type'] == 'NA': self.addNode(decomp.node[n]['properties']) dst_label = 'NF' else: # FIXME - czentye --> add default to dst_label variable always be # defined for addRelationship self.addNode({'label': 'NF', 'type': 'NA'}) dst_label = 'NA' self.addRelationship( { 'src_label': 'graph', 'dst_label': dst_label, 'src_id': decomp_id, 'dst_id': n, 'rel_type': 'CONTAINS' }) for e in decomp.edges(): temp = { 'src_label': decomp.node[e[0]]['properties']['label'], 'src_id': e[0], 'dst_label': decomp.node[e[1]]['properties']['label'], 'dst_id': e[1], 'rel_type': 'CONNECTED' } temp.update(decomp.edge[e[0]][e[1]]['properties']) self.addRelationship(temp) self.addRelationship( { 'src_label': 'NF', 'src_id': nf_id, 'dst_label': 'graph', 'dst_id': decomp_id, 'rel_type': 'DECOMPOSED' }) return True def removeDecomp (self, decomp_id): """ Remove a decomposition from the DB. :param decomp_id: the id of the decomposition to be removed from the DB :type decomp_id: string :return: the success of the removal :rtype: Boolean """ node = list(self.graph_db.find('graph', 'node_id', decomp_id)) if len(node) > 0: queue = deque([node[0]]) while len(queue) > 0: node = queue.popleft() # we search for all the nodes with relationship CONTAINS or DECOMPOSED rels_CONTAINS = list( self.graph_db.match(start_node=node, rel_type='CONTAINS')) rels_DECOMPOSED = list( self.graph_db.match(start_node=node, rel_type='DECOMPOSED')) if len(rels_CONTAINS) > 0: rels = rels_CONTAINS else: rels = rels_DECOMPOSED for rel in rels: if len(list(self.graph_db.match(end_node=rel.end_node, rel_type='CONTAINS'))) <= 1: queue.append(rel.end_node) node.isolate() node.delete() return True else: log.debug("decomposition %s does not exist in the DB" % decomp_id) return False def getSingleDecomp (self, decomp_id): """ Get a decomposition with id decomp_id. : param decomp_id: the id of the decomposition to be returned : type decomp_id: str : return: decomposition with id equal to decomp_id : rtype: tuple of networkx.DiGraph and Relationships """ graph = networkx.DiGraph() node = list(self.graph_db.find('graph', 'node_id', decomp_id)) if len(node) != 0: rels = list(self.graph_db.match(start_node=node[0], rel_type='CONTAINS')) for rel in rels: graph.add_node(rel.end_node.properties['node_id']) graph.node[rel.end_node.properties['node_id']][ 'properties'] = rel.end_node.properties for rel in rels: rel_CONNECTED = list( self.graph_db.match(start_node=rel.end_node, rel_type='CONNECTED')) for rel_c in rel_CONNECTED: if rel_c.end_node.properties['node_id'] in graph.nodes(): graph.add_edge(rel_c.start_node.properties['node_id'], rel_c.end_node.properties['node_id']) graph.edge[rel_c.start_node.properties['node_id']][ rel_c.end_node.properties['node_id']][ 'properties'] = rel_c.properties return graph, rels else: log.debug("decomposition %s does not exist in the DB" % decomp_id) return None def getDecomps (self, nffg): """ Get all decompositions for a given nffg. : param nffg: the nffg for which the decompositions should be returned : type nffg: nffg : return: all the decompositions for the given nffg : rtype: dict """ decompositions = {} nodes_list = [] index = 0 for n in nffg.nfs: node = list(self.graph_db.find('NF', 'node_id', n.id)) if len(node) != 0: nodes_list.append(node[0]) else: log.debug("NF %s does not exist in the DB" % n.id) return None queue = deque([nodes_list]) queue_nffg = deque([nffg]) while len(queue) > 0: nodes = queue.popleft() nffg_init = queue_nffg.popleft() indicator = 0 for node in nodes: rels_DECOMPOSED = list( self.graph_db.match(start_node=node, rel_type='DECOMPOSED')) for rel in rels_DECOMPOSED: indicator = 1 nffg_temp = NFFG() graph, rels = self.getSingleDecomp(rel.end_node.properties['node_id']) for n in graph.nodes(): if graph.node[n]['properties']['label'] == 'NF': nffg_temp.add_nf(id=n, dep_type=graph.node[n]['properties']['type'], cpu=graph.node[n]['properties']['cpu'], mem=graph.node[n]['properties']['mem'], storage=graph.node[n]['properties']['storage']) elif graph.node[n]['properties']['label'] == 'SAP': nffg_temp.add_sap(id=n) counter = 0 for edge in graph.edges(): for nf in nffg_temp.nfs: if nf.id == edge[0]: node0 = nf if nf.id == edge[1]: node1 = nf for sap in nffg_temp.saps: if sap.id == edge[0]: node0 = sap if sap.id == edge[1]: node1 = sap # FIXME - czentye --> There is a chance node0, node1 variables # not defined yet until here and add_port will be raise an exception nffg_temp.add_sglink(node0.add_port( graph.edge[edge[0]][edge[1]]['properties']['src_port']), node1.add_port( graph.edge[edge[0]][edge[1]]['properties']['dst_port']), id='hop' + str(counter)) for n in nffg_init.nfs: nffg_temp.add_node(n) for n in nffg_init.saps: nffg_temp.add_node(n) for n in nffg_init.infras: nffg_temp.add_node(n) for l in nffg_init.links: nffg_temp.add_edge(l.src.node, l.dst.node, l) for l in nffg_init.sg_hops: nffg_temp.add_edge(l.src.node, l.dst.node, l) for l in nffg_init.reqs: nffg_temp.add_edge(l.src.node, l.dst.node, l) extra_nodes = [] for l in nffg_temp.sg_hops: if node.properties['node_id'] == l.src.node.id: src_port = l.src dst_port = l.dst for edge in graph.edges(): if graph.node[edge[1]]['properties']['label'] == 'SAP': if str(src_port.id) == str( graph.edge[edge[0]][edge[1]]['properties']['dst_port']): for e in nffg_temp.sg_hops: if e.src.node.id == edge[0] and e.dst.node.id == edge[1]: nffg_temp.add_sglink(e.src, dst_port) extra_nodes.append(edge[1]) if node.properties['node_id'] == l.dst.node.id: dst_port = l.dst src_port = l.src for edge in graph.edges(): if graph.node[edge[0]]['properties']['label'] == 'SAP': if str(dst_port.id) == str( graph.edge[edge[0]][edge[1]]['properties']['src_port']): for e in nffg_temp.sg_hops: if e.src.node.id == edge[0] and e.dst.node.id == edge[1]: nffg_temp.add_sglink(src_port, e.dst) extra_nodes.append(edge[0]) nffg_temp.del_node(node.properties['node_id']) for extra in extra_nodes: nffg_temp.del_node(extra) queue_nffg.append(nffg_temp) nodes_copy = list(nodes) new_nodes = map(lambda x: x.end_node, rels) nodes_copy.remove(node) queue.append(nodes_copy + new_nodes) if indicator == 1: break if indicator == 0: decompositions['D' + str(index)] = nffg_init index += 1 return decompositions def removeGraphDB (self): """ Remove all nodes and relationships from the DB. :return: None """ self.graph_db.delete_all() def __initialize (self): """ Initialize NFIB with test data. """ log.info("Initializing NF database with NFs and decompositions...") # start clean - all the existing info is removed from the DB self.removeGraphDB() # add new high-level NF to the DB, all the information related to the NF # should be given as a dict self.addNode({'label': 'NF', 'node_id': 'forwarder', 'type': 'NA'}) self.addNode({'label': 'NF', 'node_id': 'compressor', 'type': 'NA'}) self.addNode({'label': 'NF', 'node_id': 'decompressor', 'type': 'NA'}) log.debug( "%s: high-level NFs were added to the DB" % self.__class__.__name__) # generate a decomposition for a high-level forwarder NF (in form of # networkx) G1 = networkx.DiGraph() G1.add_path(['SAP1', 'simpleForwarder', 'SAP2']) # create node properties for n in G1.nodes(): properties = {'node_id': n} if 'SAP' in n: properties['label'] = 'SAP' properties['type'] = 'NA' else: properties['label'] = 'NF' properties['type'] = 'click' properties['cpu'] = 10 properties['mem'] = 100 properties['storage'] = 100 G1.node[n]['properties'] = properties # create edge properties properties = {'BW': 100, 'src_port': 1, 'dst_port': 1} G1.edge['SAP1']['simpleForwarder']['properties'] = properties properties1 = {'BW': 100, 'src_port': 2, 'dst_port': 2} G1.edge['simpleForwarder']['SAP2']['properties'] = properties1 # generate a decomposition for a high-level compressor NF (in form of # networkx) G2 = networkx.DiGraph() G2.add_path(['SAP3', 'headerCompressor', 'SAP4']) # create node properties for n in G2.nodes(): properties = {'node_id': n} if 'SAP' in n: properties['label'] = 'SAP' properties['type'] = 'NA' else: properties['label'] = 'NF' properties['type'] = 'click' properties['cpu'] = 20 properties['mem'] = 200 properties['storage'] = 200 G2.node[n]['properties'] = properties # create edge properties properties3 = {'BW': 200, 'src_port': 1, 'dst_port': 1} G2.edge['SAP3']['headerCompressor']['properties'] = properties3 properties4 = {'BW': 200, 'src_port': 2, 'dst_port': 2} G2.edge['headerCompressor']['SAP4']['properties'] = properties4 # generate a decomposition for a high-level decompressor NF (in form of # networkx) G3 = networkx.DiGraph() G3.add_path(['SAP5', 'headerDecompressor', 'SAP6']) # create node properties for n in G3.nodes(): properties = {'node_id': n} if 'SAP' in n: properties['label'] = 'SAP' properties['type'] = 'NA' else: properties['label'] = 'NF' properties['type'] = 'click' properties['cpu'] = 30 properties['mem'] = 300 properties['storage'] = 300 G3.node[n]['properties'] = properties # create edge properties properties5 = {'BW': 300, 'src_port': 1, 'dst_port': 1} G3.edge['SAP5']['headerDecompressor']['properties'] = properties5 properties6 = {'BW': 300, 'src_port': 2, 'dst_port': 2} G3.edge['headerDecompressor']['SAP6']['properties'] = properties6 # required elementary NFs should be added first to the DB self.addClickNF({'label': 'NF', 'node_id': 'Queue', 'type:': 'click'}) self.addClickNF({'label': 'NF', 'node_id': 'Classifier', 'type': 'click'}) self.addClickNF({'label': 'NF', 'node_id': 'Counter', 'type': 'click'}) self.addClickNF({'label': 'NF', 'node_id': 'RFC2507Comp', 'type': 'click'}) self.addClickNF( {'label': 'NF', 'node_id': 'RFC2507Decomp', 'type': 'click'}) # the NF decompositions are added to the DB self.addDecomp('forwarder', 'G1', G1) self.addDecomp('compressor', 'G2', G2) self.addDecomp('decompressor', 'G3', G3) log.debug( "%s: NF decompositions were added to the DB" % self.__class__.__name__) def initialize (self): """ Initialize NFIB with test data. """ try: self.__initialize() except SocketError as e: log.error( "NFIB is not reachable due to failed neo4j service! Cause: " + str(e)) except KeyboardInterrupt: log.warning("NFIB was interrupted by user!") except Unauthorized: log.error( "neo4j responded with Unauthorized error! Maybe you forgot disabling " "authentication in '/etc/neo4j/neo4j.conf' ?") except IOError as e: if ".neo4j/known_hosts" in str(e): # Skip Permission denied in case of accessing neo4j cache file (v3.0.2) pass else: raise except: log.exception("Got unexpected error during NFIB initialization!")
# get only unique lists in result # print('res: {}'.format(result)) for prefix in result: result[prefix] = [list(x) for x in set(tuple(x) for x in result[prefix])] print('result: {}'.format(result)) for prefix in result: for path in result[prefix]: print('path: {}'.format(path)) cur_node = None prev_node = None counter_as_prepend = 0 for index, asn in enumerate(path): searched_node = graph.find('asn', property_key='label', property_value=asn) try: cur_node = searched_node.next() # see if the AS node is already in the db or not. If yes, cur_node == prev_node except StopIteration: cur_node = Node('asn', label=str(asn)) # if not exists, then create a new one if index > 0: if index == len(path) - 1: cur_node['path'] = path # attach AS path to the last ASN if cur_node != prev_node: if counter_as_prepend > 0: cur_node['prepended'] = counter_as_prepend counter_as_prepend = 0 # reset text = 'PEER_{}'.format(prefix) peering = Relationship(cur_node, text, prev_node) peering['time'] = timestamp graph.create(peering)
for article_key, article_value in issue_attributes_value.items(): title = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["title"] abstract = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["abstract"] authors = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["authors"] doi = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["doi"] references = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["references"] citations = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["citations"] article_to_be_added = graph.merge_one("Article", "doi", doi) article_to_be_added['abstract'] = abstract article_to_be_added['authors'] = authors[0]["name"] article_to_be_added['title'] = title article_to_be_added['citations'] = [] article_to_be_added['references'] = [] if ( len(references) > 0 ) and ( len(citations) > 0 ) : article_to_be_added['references'] = references article_to_be_added['citations'] = citations article_to_be_added.push() #print(title) relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "printed_in", journal_to_be_added, volume=volume_key, issue=issue_key, issn=journal_structure["ACM"][journal_key][volume_key][issue_key]["issn"])) primary_author_bool = True for author in authors: if primary_author_bool: author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find('Author', 'full_name', author), primary_author="YES")) primary_author_bool = False else: author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find('Author', 'full_name', author), primary_author="NO"))
class TwitterGraph(): """ Run queries against TwitterGraph. Functions here are mainly read-only, i.e. we only want to get answers, we are not modifying the graph structure """ PASSWORD = "******" USER = "******" HOST = "localhost:7474" def __init__(self, host=HOST, user=USER, password=PASSWORD): authenticate(host_port=host, user_name=user, password=password) self.graph = Graph() def get_users(self): # TO-DO : make it lazy for large datasets result = self.graph.find("User", limit=25) list_ = [user for user in result] return list_ def get_user(self, id_): result = self.graph.find_one("User", property_key="id", property_value=id_) return result def get_level_followers(self, limit=50, level=1, uid=None, screen_name=None): """ Return neo4j.cypher.RecordStream of users who are the n level follower of user uid/screen_name Level 1 follower is defined as : (1st_level_follower)-[follows]->(followee) """ cypher = self.graph.cypher statement = self._construct_follower_path(level) if uid is None and screen_name is None: raise InvalidArgumentException("Please specify either a valid user id or screen_name") if uid is not None: result = cypher.stream(statement, followee=uid, limit=limit) elif screen_name is not None: result = cypher.stream(statement, followee=screen_name, limit=limit) return [f for f in result] def is_n_level_follower(self, level, retweeter, screen_name): """ Given a retweeter screen_name and original tweeter's screen_name, determine if retweeter is n level follower """ if level == 1: return search.is_follower(retweeter, screen_name) cypher = self.graph.cypher level -= 1 statement = self._construct_follower_path(level) for follower in cypher.stream(statement, followee=screen_name, limit=5000): print follower[0] if search.is_follower(retweeter, follower[0]): return True return False def get_retweet_level(self, retweeter, screen_name): """ Given a retweeter screen name and the original user screen_name who tweeted the original tweet, determine the follower level """ level = 0 while level < 10: # stop at 10 to prevent timeout level += 1 # print len(followers) if self.is_n_level_follower(level, retweeter, screen_name): return level return 0 def _construct_follower_path(self, level, uid=False): # Construct pattern if uid: statement = "MATCH(:User {id_str : {followee} })" else: statement = "MATCH(:User {screen_name : {followee} })" while level > 1: statement += "<-[:follows]-(:User)" level -= 1 statement += "<-[:follows]-(a:User) RETURN a.screen_name LIMIT {limit}" return statement
from py2neo import Graph,authenticate,Node,Relationship import MySQLdb import threading authenticate("localhost:7474","neo4j","8760neo4j") graph = Graph() mynode = list(graph.find('fw', property_key='count')) ct=1 fobj = open("textdump1.txt","r").readlines() file_tacker=open("tarcker.txt","a") #for i in fobj: def indexing(i): global ct print "*********" print i print ct print "**********" i = i.lower() file_tacker.write(str(i)) temp = i.split(" ",3) b="" for i in temp: b=b+"".join(" "+str(i).replace("'","")) b=b.strip() s=b.split(" ",3) dic={} for i in range(len(s)): n2=graph.cypher.execute("""MATCH (a: `%s`) where a.auto_name = '%s' return a"""%(str(s[i][0]),str(s[i])))
def main(): graph = Graph() graph.cypher.execute("CREATE CONSTRAINT ON (user:User) ASSERT user.username IS UNIQUE" ) graph.cypher.execute("CREATE CONSTRAINT ON (job:Job) ASSERT job.title IS UNIQUE" ) graph.cypher.execute("CREATE CONSTRAINT ON (city:City) ASSERT city.name IS UNIQUE" ) userFile = open("users.csv", "r") userFile.readline() lineNumber = 0 for line in userFile.readlines(): print("\r Processing line " + str(lineNumber), end="") lineNumber += 1 parsedLine = line.split(",") user = Node("User", username=parsedLine[0], name=parsedLine[1], biography=parsedLine[4], password=bcrypt.encrypt("password")) graph.create(user) city = graph.merge_one("City", "name", parsedLine[2]) job = graph.merge_one("Job", "title", parsedLine[3]) livesIn = Relationship(user, "IS_FROM", city) hasJob = Relationship(user, "HAS_JOB_TITLE", job) graph.create(livesIn) graph.create(hasJob) result = graph.cypher.execute("MATCH (beer:Beer) " " RETURN beer, rand() as rand " " ORDER BY rand" " LIMIT {range}", range=random.randrange(100,600)) for beer in result: beerNode = graph.find_one("Beer", "breweryDbId", beer.beer["breweryDbId"]) likesBrewery = Relationship(user, "LIKES", beerNode) graph.create(likesBrewery) result = graph.cypher.execute("MATCH (brewery:Brewery) " " RETURN brewery, rand() as rand " " ORDER BY rand" " LIMIT {range}", range=random.randrange(0,10)) for brewery in result: breweryNode = graph.find_one("Brewery", "breweryDbId", brewery.brewery["breweryDbId"]) likesBrewery = Relationship(user, "LIKES", breweryNode) graph.create(likesBrewery) if lineNumber > 300: break for user in graph.find("User"): userNode = graph.find_one("User", "username", user["username"]) result = graph.cypher.execute("MATCH (user:User) " "WHERE user.username <> {me}" " RETURN user, rand() as rand " " ORDER BY rand" " LIMIT {range}", me=userNode["username"], range=random.randrange(5,40)) for person in result: dude = graph.find_one("User", "username", person.user["username"]) buddiesWith = Relationship(userNode, "FOLLOWS", dude) graph.create(buddiesWith)
issue=issue_key, issue_date=str( acm_structure[publisher_key] [journal_key][volume_key][issue_key] ["date"]["month"]) + str(acm_structure[publisher_key] [journal_key][volume_key][issue_key] ["date"]["year"]), issn=acm_structure[publisher_key] [journal_key][volume_key][issue_key] ["issn"])) # primary_author_bool = True for author in authors: # print("Author detected is: " + author["name"]) # print("Author_link detected is: " + author["link"]) results = graph.find('Author', 'link', author["link"]) # print(type(results)) if len(list(results)) == 1: for result in results: print("\t\t\t\t" + result['full_name'] + " FOUND") else: # print("\t\t\t\tNOT FOUND! Creating Author...") author_to_be_added = graph.merge_one( "Author", "link", author["link"]) author_str_split_list = author[ "name"].split() if (len(author_str_split_list) == 1): author_to_be_added[ 'full_name'] = author[ "name"].title()
class Robot(): """NLU Robot. 自然语言理解机器人。 Public attributes: - graph: The connection of graph database. 图形数据库连接。 - pattern: The pattern for NLU tool: 'semantic' or 'vec'. 语义标签或词向量模式。 - memory: The context memory of robot. 机器人对话上下文记忆。 """ def __init__(self, password="******"): # 连接图知识库 self.graph = Graph("http://localhost:7474/db/data/", password=password) # 语义模式:'semantic' or 'vec' self.pattern = 'semantic' # 获取导航地点数据库 self.locations = get_navigation_location() # 在线场景标志,默认为False self.is_scene = False # 在线调用百度地图IP定位api,网络异常时返回默认地址:上海市/从配置信息获取 self.address = get_location_by_ip(self.graph.find_one("User", "userid", "A0001")['city']) # 机器人配置信息 self.user = None # 可用话题列表 self.usertopics = [] # 当前QA话题 self.topic = "" # 当前QA id self.qa_id = get_current_time() # 短期记忆:最近问过的10个问题与10个答案 self.qmemory = deque(maxlen=10) # 问题 self.amemory = deque(maxlen=10) # 答案 self.pmemory = deque(maxlen=10) # 上一步 # 匹配不到时随机回答 TODO:记录回答不上的所有问题, self.do_not_know = [ "这个问题太难了,{robotname}还在学习中", "这个问题{robotname}不会,要么我去问下", "您刚才说的是什么,可以再重复一遍吗", "{robotname}刚才走神了,一不小心没听清", "{robotname}理解的不是很清楚啦,你就换种方式表达呗", "不如我们换个话题吧", "咱们聊点别的吧", "{robotname}正在学习中", "{robotname}正在学习哦", "不好意思请问您可以再说一次吗", "额,这个问题嘛。。。", "{robotname}得好好想一想呢", "请问您说什么", "您问的问题好有深度呀", "{robotname}没有听明白,您能再说一遍吗" ] def __str__(self): return "Hello! I'm {robotname} and I'm {robotage} years old.".format(**self.user) @time_me() def configure(self, info="", userid="userid"): """Configure knowledge base. 配置知识库。 """ assert userid is not "", "The userid can not be empty!" # TO UPGRADE 对传入的userid参数分析,若不合适则报相应消息 2017-6-7 if userid != "A0001": userid = "A0001" print("userid 不是标准A0001,已经更改为A0001") match_string = "MATCH (config:Config) RETURN config.name as name" subgraphs = [item[0] for item in self.graph.run(match_string)] print("所有知识库:", subgraphs) if not info: config = {"databases": []} match_string = "MATCH (user:User)-[r:has]->(config:Config)" + \ "where user.userid='" + userid + \ "' RETURN config.name as name, r.bselected as bselected, r.available as available" for item in self.graph.run(match_string): config["databases"].append(dict(name=item[0], bselected=item[1], available=item[2])) print("可配置信息:", config) return config else: selected_names = info.split() forbidden_names = list(set(subgraphs).difference(set(selected_names))) print("选中知识库:", selected_names) print("禁用知识库:", forbidden_names) # TODO:待合并精简 for name in selected_names: match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \ + userid + "' AND config.name='" + name + "' SET r.bselected=1" # print(match_string) self.graph.run(match_string) for name in forbidden_names: match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \ + userid + "' AND config.name='" + name + "' SET r.bselected=0" # print(match_string) self.graph.run(match_string) return self.get_usertopics(userid=userid) # @time_me() def get_usertopics(self, userid="A0001"): """Get usertopics list. """ usertopics = [] if not userid: userid = "A0001" # 从知识库获取用户拥有权限的子知识库列表 match_string = "MATCH (user:User)-[r:has {bselected:1, available:1}]->(config:Config)" + \ "where user.userid='" + userid + "' RETURN config" data = self.graph.run(match_string).data() for item in data: usertopics.extend(item["config"]["topic"].split(",")) print("用户:", userid, "\n已有知识库列表:", usertopics) return usertopics def iformat(self, sentence): """Individualization of robot answer. 个性化机器人回答。 """ return sentence.format(**self.user) # @time_me() def add_to_memory(self, question="question", userid="userid"): """Add user question to memory. 将用户当前对话加入信息记忆。 Args: question: 用户问题。 Defaults to "question". userid: 用户唯一标识。 Defaults to "userid". """ previous_node = self.graph.find_one("Memory", "qa_id", self.qa_id) self.qa_id = get_current_time() node = Node("Memory", question=question, userid=userid, qa_id=self.qa_id) if previous_node: relation = Relationship(previous_node, "next", node) self.graph.create(relation) else: self.graph.create(node) # Development requirements from Mr Tang in 2017-5-11. # 由模糊匹配->全匹配 from Mr Tang in 2017-6-1. def extract_navigation(self, question): """Extract navigation。抽取导航地点。 QA匹配模式:从导航地点列表选取匹配度最高的地点。 Args: question: User question. 用户问题。 """ result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \ context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) # temp_sim = 0 # sv1 = synonym_cut(question, 'wf') # if not sv1: # return result for location in self.locations: # 判断“去”和地址关键词是就近的动词短语情况 keyword = "去" + location if keyword in question: print("Original navigation") result["name"] = keyword result["content"] = location result["context"] = "user_navigation" result["behavior"] = int("0x001B", 16) return result # sv2 = synonym_cut(location, 'wf') # if sv2: # temp_sim = similarity(sv1, sv2, 'j') # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 # if temp_sim > 0.92: # print("Navigation location: " + location + " Similarity Score: " + str(temp_sim)) # result["content"] = location # result["context"] = "user_navigation" # result["behavior"] = int("0x001B", 16) # return result return result def extract_pinyin(self, question, subgraph): """Extract synonymous QA in NLU database。 QA匹配模式:从图形数据库选取匹配度最高的问答对。 Args: question: User question. 用户问题。 subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。 """ temp_sim = 0 result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \ context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) sv1 = pinyin_cut(question) print(sv1) for node in subgraph: iquestion = self.iformat(node["name"]) sv2 = pinyin_cut(iquestion) print(" ", sv2) temp_sim = jaccard_pinyin(sv1, sv2) print(temp_sim) # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 if temp_sim > 0.75: print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) result['name'] = iquestion result["content"] = self.iformat(random_item(node["content"].split("|"))) result["context"] = node["topic"] result["tid"] = node["tid"] result["txt"] = node["txt"] result["img"] = node["img"] result["button"] = node["button"] if node["url"]: result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = node["parameter"] func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result return result def extract_synonym(self, question, subgraph): """Extract synonymous QA in NLU database。 QA匹配模式:从知识库选取匹配度最高的问答对。 Args: question: User question. 用户问题。 subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。 """ temp_sim = 0 result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \ context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) # semantic: 切分为同义词标签向量,根据标签相似性计算相似度矩阵,再由相似性矩阵计算句子相似度 # vec: 切分为词向量,根据词向量计算相似度矩阵,再由相似性矩阵计算句子相似度 if self.pattern == 'semantic': # elif self.pattern == 'vec': sv1 = synonym_cut(question, 'wf') if not sv1: return result for node in subgraph: iquestion = self.iformat(node["name"]) if question == iquestion: print("Similarity Score: Original sentence") result['name'] = iquestion result["content"] = self.iformat(random_item(node["content"].split("|"))) result["context"] = node["topic"] result["tid"] = node["tid"] result["txt"] = node["txt"] result["img"] = node["img"] result["button"] = node["button"] if node["url"]: result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = node["parameter"] # 知识实体节点api抽取原始问题中的关键信息,据此本地查询/在线调用第三方api/在线爬取 func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result sv2 = synonym_cut(iquestion, 'wf') if sv2: temp_sim = similarity(sv1, sv2, 'j') # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配 if temp_sim > 0.92: print("Q: " + iquestion + " Similarity Score: " + str(temp_sim)) result['name'] = iquestion result["content"] = self.iformat(random_item(node["content"].split("|"))) result["context"] = node["topic"] result["tid"] = node["tid"] result["txt"] = node["txt"] result["img"] = node["img"] result["button"] = node["button"] if node["url"]: result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = node["parameter"] func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result return result def extract_keysentence(self, question, data=None): """Extract keysentence QA in NLU database。 QA匹配模式:从知识库选取包含关键句的问答对。 Args: question: User question. 用户问题。 """ result = dict(question=question, name="", content=self.iformat(random_item(self.do_not_know)), \ context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) # if data: # subgraph = [node for node in data if node["name"] in question] # TODO:从包含关键句的问答对中选取和当前问答的跳转链接最接近的 # node = 和当前问答的跳转链接最接近的 in subgraph usertopics = ' '.join(self.usertopics) # 只从目前挂接的知识库中匹配 match_string = "MATCH (n:NluCell) WHERE '" + question + \ "' CONTAINS n.name and '" + usertopics + \ "' CONTAINS n.topic RETURN n LIMIT 1" subgraph = self.graph.run(match_string).data() if subgraph: # TODO:判断 subgraph 中是否包含场景根节点 node = list(subgraph)[0]['n'] print("Similarity Score: Key sentence") result['name'] = node['name'] result["content"] = self.iformat(random_item(node["content"].split("|"))) result["context"] = node["topic"] result["tid"] = node["tid"] result["txt"] = node["txt"] result["img"] = node["img"] result["button"] = node["button"] if node["url"]: result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = node["parameter"] # 知识实体节点api抽取原始问题中的关键信息,据此本地查询/在线调用第三方api/在线爬取 func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") return result return result def remove_name(self, question): # 姓氏误匹配重定义 if question.startswith("小") and len(question) == 2: question = self.user['robotname'] # 称呼过滤 for robotname in ["小民", "小明", "小名", "晓明"]: if question.startswith(robotname) and len(question) >= 4 and "在线" not in question: question = question.lstrip(robotname) if not question: question = self.user['robotname'] return question @time_me() def search(self, question="question", tid="", userid="userid"): """Nlu search. 语义搜索。 Args: question: 用户问题。 Defaults to "question". userid: 用户唯一标识。 Defaults to "userid" Returns: Dict contains: question, answer, topic, tid, url, behavior, parameter, txt, img, button. 返回包含问题,答案,话题,资源,行为,动作,文本,图片及按钮的字典。 """ # 添加到问题记忆 # self.qmemory.append(question) # self.add_to_memory(question, userid) # 语义:场景+全图+用户配置模式(用户根据 userid 动态获取其配置信息) # ========================初始化配置信息========================== self.user = self.graph.find_one("User", "userid", userid) self.usertopics = self.get_usertopics(userid=userid) do_not_know = dict( question=question, name="", # content=self.iformat(random_item(self.do_not_know)), content="", context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1) error_page = dict( question=question, name="", content=self.user['error_page'], context="", tid="", url="", behavior=int("0x1500", 16), # Modify:场景内 behavior 统一为 0x1500。(2018-1-8) parameter="", txt="", img="", button="", valid=0) # ========================一、预处理============================= # 问题过滤(添加敏感词过滤 2017-5-25) if check_swords(question): print("问题包含敏感词!") return do_not_know # 移除称呼 question = self.remove_name(question) # ========================二、导航=============================== result = self.extract_navigation(question) if result["context"] == "user_navigation": self.amemory.append(result) # 添加到普通记忆 self.pmemory.append(result) return result # ========================三、语义场景=========================== result = copy.deepcopy(do_not_know) # 全局上下文——重复 for item in cmd_repeat: # TODO:确认返回的是正确的指令而不是例如唱歌时的结束语“可以了” # TODO:从记忆里选取最近的有意义行为作为重复的内容 if item == question: if self.amemory: return self.amemory[-1] else: return do_not_know # 场景——退出 for item in cmd_end_scene: if item == question: # 完全匹配退出模式 # result['behavior'] = int("0x0020", 16) result['behavior'] = 0 result['name'] = '退出' # result['content'] = "好的,退出" result['content'] = "" self.is_scene = False self.topic = "" self.amemory.clear() # 清空场景记忆 self.pmemory.clear() # 清空场景上一步记忆 return result # 场景——上一步:使用双向队列实现 if self.is_scene: for item in cmd_previous_step: if item in question: # 添加了链接跳转判断(采用该方案 2017-12-22) if len(self.pmemory) > 1: self.amemory.pop() return self.pmemory.pop() elif len(self.pmemory) == 1: return self.pmemory[-1] else: # Modify:返回 error_page 2017-12-22 return error_page # return do_not_know # 未添加链接跳转判断(不用该方案 2017-12-22) # if len(self.pmemory) > 1: # return self.amemory.pop() # elif len(self.amemory) == 1: # return self.amemory[-1] # else: # return do_not_know # 场景——下一步:使用双向队列实现 for item in cmd_next_step: if item in question: if len(self.amemory) >= 1: cur_button = json.loads(self.amemory[-1]['button']) if self.amemory[-1]['button'] else {} next = cur_button.get('next', {}) if next: next_tid = next['url'] next_question = next['content'] match_string = "MATCH (n:NluCell {name:'" + \ next_question + "', topic:'" + self.topic + \ "', tid:" + next_tid + "}) RETURN n" match_data = list(self.graph.run(match_string).data()) if match_data: node = match_data[0]['n'] result['name'] = self.iformat(node["name"]) result["content"] = self.iformat(random_item(node["content"].split("|"))) result["context"] = node["topic"] result["tid"] = node["tid"] result["txt"] = node["txt"] result["img"] = node["img"] result["button"] = node["button"] if node["url"]: result["url"] = random_item(node["url"].split("|")) if node["behavior"]: result["behavior"] = int(node["behavior"], 16) if node["parameter"]: result["parameter"] = node["parameter"] func = node["api"] if func: exec("result['content'] = " + func + "('" + result["content"] + "')") # 添加到场景记忆 self.pmemory.append(self.amemory[-1]) self.amemory.append(result) return result return error_page # ==========================场景匹配============================= tag = get_tag(question, self.user) # subgraph_all = list(self.graph.find("NluCell", "tag", tag)) # 列表 subgraph_all = self.graph.find("NluCell", "tag", tag) # 迭代器 usergraph_all = [node for node in subgraph_all if node["topic"] in self.usertopics] usergraph_scene = [node for node in usergraph_all if node["topic"] == self.topic] if self.is_scene: # 在场景中:语义模式+关键句模式 if usergraph_scene: result = self.extract_synonym(question, usergraph_scene) if not result["context"]: result = self.extract_keysentence(question, usergraph_scene) # result = self.extract_pinyin(question, usergraph_scene) if result["context"]: print("在场景中,匹配到场景问答对") # 检测结果的 tid 是否是当前场景的子场景跳转链接 # 实现:在 self.amemory[-1] 的跳转链接集合中查找匹配的 tid # =================================================== data_img = json.loads(self.amemory[-1]['img']) if self.amemory[-1]['img'] else {} data_button = json.loads(self.amemory[-1]['button']) if self.amemory[-1]['button'] else {} def get_tids(data): tids = set() for key in data.keys(): tid = data[key]['url'] if tid: tids.add(int(tid)) return tids pre_tids = get_tids(data_img).union(get_tids(data_button.setdefault('area', {}))) if int(result["tid"]) in pre_tids: print("正确匹配到当前场景的子场景") self.pmemory.append(self.amemory[-1]) self.amemory.append(result) # 添加到场景记忆 return result # =================================================== # 场景中若找不到子图或者匹配不到就重复当前问题->返回自定义错误提示 # Modify:返回 error_page (2017-12-22) # if self.amemory: # return self.amemory[-1] # else: # return error_page return error_page else: # 不在场景中:语义模式+关键句模式 result = self.extract_synonym(question, usergraph_all) if not result["context"]: result = self.extract_keysentence(question) # result = self.extract_pinyin(question, usergraph_all) if result["tid"] != '': # 匹配到场景节点 if int(result["tid"]) == 0: print("不在场景中,匹配到场景根节点") self.is_scene = True # 进入场景 self.topic = result["context"] self.amemory.clear() # 进入场景前清空普通记忆 self.pmemory.clear() self.amemory.append(result) # 添加到场景记忆 self.pmemory.append(result) return result else: print("不在场景中,匹配到场景子节点") return do_not_know elif result["context"]: # 匹配到普通节点 self.topic = result["context"] self.amemory.append(result) # 添加到普通记忆 self.pmemory.append(result) return result # ========================五、在线语义=========================== if not self.topic: # 1.音乐(唱一首xxx的xxx) if "唱一首" in question or "唱首" in question or "我想听" in question: result["behavior"] = int("0x0001", 16) result["content"] = "好的,正在准备哦" # 2.附近有什么好吃的 elif "附近" in question or "好吃的" in question: result["behavior"] = int("0x001C", 16) result["content"] = self.address # 3.nlu_tuling(天气) elif "天气" in question: # 图灵API变更之后 Add in 2017-8-4 location = get_location(question) if not location: # 问句中不包含地址 weather = nlu_tuling(self.address + question) else: # 问句中包含地址 weather = nlu_tuling(question) # 图灵API变更之前 # weather = nlu_tuling(question, loc=self.address) result["behavior"] = int("0x0000", 16) try: # 图灵API变更之前 temp = weather.split(";")[0].split(",")[1].split() myweather = temp[0] + temp[2] + temp[3] # 图灵API变更之后 Add in 2017-8-3 # temp = weather.split(",") # myweather = temp[1] + temp[2] except: myweather = weather result["content"] = myweather result["context"] = "nlu_tuling" # 4.追加记录回答不上的所有问题 else: with open(log_do_not_know, "a", encoding="UTF-8") as file: file.write(question + "\n") # 5.nlu_tuling # else: # result["content"] = nlu_tuling(question, loc=self.address) # result["context"] = "nlu_tuling" if result["context"]: # 匹配到在线语义 self.amemory.append(result) # 添加到普通记忆 # ============================================================== return result
def get_data(timestamp): graph = Graph(password="******") stream = BGPStream() rec = BGPRecord() rec_time = None # stream.add_filter('prefix', '198.41.0.0/24') # A-root # stream.add_filter('prefix', '192.228.79.0/24') # B-root, only 1 site # stream.add_filter('prefix', '192.33.4.0/24') # C-root # stream.add_filter('prefix', '199.7.91.0/24') # D-root # stream.add_filter('prefix', '192.203.230.0/24') # E-root, IPv4 only # stream.add_filter('prefix', '192.5.5.0/24') # F-root # stream.add_filter('prefix', '192.112.36.0/24') # G-root, IPv4 only # stream.add_filter('prefix', '198.97.190.0/24') # H-root # stream.add_filter('prefix', '192.36.148.0/24') # I-root # stream.add_filter('prefix', '192.58.128.0/24') # J-root stream.add_filter('prefix', '193.0.14.0/24') # K-root # stream.add_filter('prefix', '199.7.83.0/24') # L-root # stream.add_filter('prefix', '202.12.27.0/24') # M-root # IPv6 # stream.add_filter('prefix', '2001:503:ba3e::/48') # A ## stream.add_filter('prefix', '2001:500:84::/48') # B, only 1 site # stream.add_filter('prefix', '2001:500:2::/48') # C # stream.add_filter('prefix', '2001:500:2d::/48') # D # stream.add_filter('prefix', '2001:500:2f::/48') # F # stream.add_filter('prefix', '2001:500:1::/48') # H # stream.add_filter('prefix', '2001:7fe::/33') # I # stream.add_filter('prefix', '2001:503:c27::/48') # J # stream.add_filter('prefix', '2001:7fd::/48') # K # stream.add_filter('prefix', '2001:500:9f::/48') # L # stream.add_filter('prefix', '2001:dc3::/32') # M stream.add_filter('record-type', 'ribs') # stream.add_filter('collector', 'rrc01') stream.add_filter('project', 'routeviews') stream.add_interval_filter(timestamp, timestamp) stream.start() result = {} while stream.get_next_record(rec): rec_time = rec.time if rec.status == "valid": elem = rec.get_next_elem() while elem: print rec.collector, elem.type, elem.peer_address, elem.peer_asn, elem.fields as_path = elem.fields['as-path'].split() as_path.reverse() prefix = elem.fields['prefix'] if prefix not in result: result[prefix] = [] result[prefix].append(as_path) elem = rec.get_next_elem() # get only unique lists in result for prefix in result: result[prefix] = [list(x) for x in set(tuple(x) for x in result[prefix])] print('timestamp {} ==> result: {}'.format(rec_time, result)) for prefix in result: for path in result[prefix]: print('path: {}'.format(path)) cur_node = None prev_node = None counter_as_prepend = 0 for index, asn in enumerate(path): searched_node = graph.find('asn', property_key='label', property_value=asn) try: cur_node = searched_node.next() # see if the AS node is already in the db or not. If yes, cur_node == prev_node except StopIteration: cur_node = Node('asn', label=str(asn)) # if not exists, then create a new one if index > 0: if index == len(path) - 1: cur_node['path'] = path # attach AS path to the last ASN if cur_node != prev_node: if counter_as_prepend > 0: cur_node['prepended'] = counter_as_prepend counter_as_prepend = 0 # reset peering = Relationship(cur_node, 'TO', prev_node, time=rec_time, prefix=prefix) # peering['time'] = rec_time # peering['prefix'] = prefix graph.create(peering) else: # AS prepending counter_as_prepend += 1 prev_node = cur_node