Python Graph.find Beispiele, py2neo.Graph.find Python Beispiele

Beispiel #1

0

Datei anzeigen

def get_track_comments():
    track_comments = {}
    graph = Graph()

    for comment in graph.find("Comment"):
        track_comments[comment.properties['id']] = inflate(comment.properties)
    return track_comments

Beispiel #2

0

Datei anzeigen

Datei: neo_models.py Projekt: nicajonh/Agriculture_KnowledgeGraph

class Neo4j():
    graph = None

    def __init__(self):
        print("create neo4j class ...")

    def connectDB(self):
        self.graph = Graph("http://localhost:7474",
                           username="******",
                           password="******")
        print('connect successed')

    def matchItembyTitle(self, value):
        answer = self.graph.find_one(label="Item",
                                     property_key="title",
                                     property_value=value)
        return answer

    # 根据title值返回互动百科item
    def matchHudongItembyTitle(self, value):
        answer = self.graph.find_one(label="HudongItem",
                                     property_key="title",
                                     property_value=value)
        return answer

    # 返回限定个数的互动百科item
    def getAllHudongItem(self, limitnum):
        List = []
        ge = self.graph.find(label="HudongItem", limit=limitnum)
        for g in ge:
            List.append(HudongItem(g))

        print('load AllHudongItem over ...')
        return List

Beispiel #3

0

Datei anzeigen

Datei: neo_models.py Projekt: CrackerCat/Agriculture_KnowledgeGraph

class Neo4j():
	graph = None
	def __init__(self):
		print("create neo4j class ...")
		
	def connectDB(self):
		self.graph = Graph("http://localhost:7474", username="******", password="******")
		print('connect successed')
		
	def matchItembyTitle(self,value):
		answer = self.graph.find_one(label="Item",property_key="title",property_value=value)
		return answer

	# 根据title值返回互动百科item
	def matchHudongItembyTitle(self,value):
		answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value)
		return answer
			
	# 返回限定个数的互动百科item
	def getAllHudongItem(self, limitnum):
		List = []
		ge = self.graph.find(label="HudongItem", limit=limitnum)
		for g in ge:
			List.append(HudongItem(g))
			
		print('load AllHudongItem over ...')
		return List
		
		
#test = Neo4j()
#test.connectDB()
#a = test.getLabeledHudongItem('labels.txt')
#print(a[10].openTypeList)

Beispiel #4

0

Datei anzeigen

Datei: lambda_function.py Projekt: yannistannier/AWS-scripts

def lambda_handler(event, context):
    graph = Graph(host=os.environ["NAME_NEO_DOMAIN"],
                  user=os.environ["USER"],
                  password=os.environ["PASSWORD"])
    receiver = graph.find('User',
                          property_key='id',
                          property_value=tuple(event['follow']))

    dynamodb = boto3.resource('dynamodb')
    table = dynamodb.Table(os.environ["DYNAMODB"])

    fields = ("username", "first_name", "last_name", "id", "photo")
    timestamp = int(time.time())
    user = {}
    user['id'] = event['id']

    for key, value in event['user'].items():
        if key in fields:
            user[key] = value

    with table.batch_writer() as batch:
        for item in receiver:
            batch.put_item(
                Item={
                    'id': str(item['id']),
                    'uid': str(event['id']) + "f",
                    'user': user,
                    'type': 1,
                    'timestamp': timestamp,
                    'fcm': item['fcm'] if 'fcm' in item else False,
                    'lang': item['lang'] if 'lang' in item else "FR",
                })

Beispiel #5

0

Datei anzeigen

def get_tracks():
    track_metadata = {}
    graph = Graph()

    for track in graph.find("Track"):
        track_metadata[track.properties['id']] = inflate(track.properties)
    return track_metadata

Beispiel #6

0

Datei anzeigen

Datei: neo_models.py Projekt: Muzi95082/ForestryKG

class Neo4j():
    graph = None

    def __init__(self):
        print("create neo4j class ...")

    def connectDB(self):
        self.graph = Graph("http://localhost:7474",
                           username="******",
                           password="******")

    def matchItembyTitle(self, value):
        answer = self.graph.find_one(label="Item",
                                     property_key="title",
                                     property_value=value)
        return answer

    # 根据title值返回互动百科item
    def matchHudongItembyTitle(self, value):
        answer = self.graph.find_one(label="HudongItem",
                                     property_key="title",
                                     property_value=value)
        return answer

    # 返回所有已经标注过的互动百科item   filename为labels.txt
    def getLabeledHudongItem(self, filename):
        labels = readCSV2(filename)
        List = []
        i = 0
        for line in labels:
            ctx = self.graph.find_one(label="HudongItem",
                                      property_key="title",
                                      property_value=line[0])
            if ctx == None:
                continue
            cur = HudongItem(ctx)
            cur.label = line[1]
            List.append(cur)

        print('load LabeledHudongItem over ...')
        return List

    # 返回限定个数的互动百科item
    def getAllHudongItem(self, limitnum):
        List = []
        ge = self.graph.find(label="HudongItem", limit=limitnum)
        for g in ge:
            List.append(HudongItem(g))

        print('load AllHudongItem over ...')
        return List


#test = Neo4j()
#test.connectDB()
#answer = test.graph.find_one(label="HudongItem",property_key="title",property_value='火龙果')
#print(answer)
#a = test.getLabeledHudongItem('labels.txt')
#print(a[10].openTypeList)

Beispiel #7

0

Datei anzeigen

def resolveAttrNameUsingKB(query):
    g = Graph()
    large_weight = 0
    large_wt_node = None
    for node in g.find('ROOT'):
        wt = shortestPathWeight(query, node)
        # print(node.properties['phrase']+ ' : '+ str(wt), file=sys.stderr)
        if  wt > large_weight:
            large_wt_node = node
            large_weight = wt
    if large_wt_node is None:
        return ''
    return large_wt_node.properties['phrase']

Beispiel #8

0

Datei anzeigen

Datei: neo_models.py Projekt: CrackerCat/Agriculture_KnowledgeGraph

class Neo4j():
	graph = None
	def __init__(self):
		print("create neo4j class ...")
		
	def connectDB(self):
		self.graph = Graph("http://localhost:7474", username="******", password="******")
		
	def matchItembyTitle(self,value):
		answer = self.graph.find_one(label="Item",property_key="title",property_value=value)
		return answer

	# 根据title值返回互动百科item
	def matchHudongItembyTitle(self,value):
		answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value)
		return answer
		
	# 返回所有已经标注过的互动百科item   filename为labels.txt
	def getLabeledHudongItem(self, filename):
		labels = readCSV2(filename)
		List = []
		i = 0
		for line in labels:
			ctx = self.graph.find_one(label="HudongItem",property_key="title",property_value=line[0])
			if ctx == None:
				continue;
			cur = HudongItem(ctx)
			cur.label = line[1]
			List.append(cur)
		
		print('load LabeledHudongItem over ...')
		return List
	
	# 返回限定个数的互动百科item
	def getAllHudongItem(self, limitnum):
		List = []
		ge = self.graph.find(label="HudongItem", limit=limitnum)
		for g in ge:
			List.append(HudongItem(g))
			
		print('load AllHudongItem over ...')
		return List
		
		
#test = Neo4j()
#test.connectDB()
#answer = test.graph.find_one(label="HudongItem",property_key="title",property_value='火龙果')
#print(answer)
#a = test.getLabeledHudongItem('labels.txt')
#print(a[10].openTypeList)

Beispiel #9

0

Datei anzeigen

def run():
    DB_graph = Graph(password="******")
    roots = []
    selector = NodeSelection(DB_graph)
    for pair in pairs:
        node = Node("tree_node")
        node["pattern"] = pair
        roots.append(node)
    for root in roots:
        #"_.name =~ 'J.*'"
        node_set = DB_graph.find('rebase_enzyme')
        #node_set = selector.select("rebase_enzyme").where((root['pattern'] in _.pattern))
        for node in node_set:
            #if node["pattern"] != root["pattern"]:

            print(node['pattern'])
            tree_making(root, node)

Beispiel #10

0

Datei anzeigen

Datei: social_genius_backend.py Projekt: implicit-explicit/social_genius_backend

def city():
    location = request.args['meetup_group']

    graph = Graph(host=config['neo4j']['host'], user=config['neo4j']['user'],
                  password=config['neo4j']['password'])

    logger.info('Finding upcoming meetup events in {}'.format(location))

    groups_data = defaultdict()

    groups = graph.find('Group')
    for group in groups:
        groups_data[group.properties['name']] = []
        for rel in graph.match(start_node=group, rel_type="HAS EVENT"):
            groups_data[group.properties['name']].append(rel.end_node().properties['time'])

    return json.dumps(groups_data)

Beispiel #11

0

Datei anzeigen

Datei: lambda_function.py Projekt: yannistannier/AWS-scripts

def lambda_handler(event, context):
    graph = Graph(host=os.environ["NAME_NEO_DOMAIN"],
                  user=os.environ["USER"],
                  password=os.environ["PASSWORD"])

    nodes = graph.find('User',
                       property_key='id',
                       property_value=tuple(event['follow']))
    user = graph.find_one('User',
                          property_key='id',
                          property_value=event['id'])

    now = int(time.time())

    if user:
        for node in nodes:
            graph.create(Relationship(user, 'FOLLOW', node, timestamp=now))

Beispiel #12

0

Datei anzeigen

Datei: neo_models.py Projekt: glacierck/Agriculture_KnowledgeGraph

class Neo4j():
    graph = None

    def __init__(self):
        print("create neo4j class ...")

    def connectDB(self):
        conf = configparser.ConfigParser()
        conf.read('demo/neo4jconfig')
        url = conf.get("neo4jdb", "url")
        username = conf.get("neo4jdb", "username")
        password = conf.get("neo4jdb", "password")
        self.graph = Graph(url, username=username, password=password)
        # self.graph = Graph("http://localhost:7474", username="******", password="******")
        print('connect successed')

    def matchItembyTitle(self, value):
        answer = self.graph.find_one(label="Item",
                                     property_key="title",
                                     property_value=value)
        return answer

    # 根据title值返回互动百科item
    def matchHudongItembyTitle(self, value):
        answer = self.graph.find_one(label="HudongItem",
                                     property_key="title",
                                     property_value=value)
        return answer

    # 返回限定个数的互动百科item
    def getAllHudongItem(self, limitnum):
        List = []
        ge = self.graph.find(label="HudongItem", limit=limitnum)
        for g in ge:
            List.append(HudongItem(g))

        print('load AllHudongItem over ...')
        return List


#test = Neo4j()
#test.connectDB()
#a = test.getLabeledHudongItem('labels.txt')
#print(a[10].openTypeList)

Beispiel #13

0

Datei anzeigen

Datei: recommender.py Projekt: arcosta/graco

            for p in simili_matrix.get(sk).keys():
                try:
                    if active_periodics.index(p) != 0:
                        score += simili_matrix.get(active_key).get(p)
                except ValueError as e:
                    pass
            recommended[sk] = score
        sorted_x = sorted(recommended.items(), key=operator.itemgetter(1), reverse = True)
        neighbor1 = graph.cypher.execute("MATCH p=(a:Author {keylattes:'%s'})-[r:AUTHORING*2]-(b:Author) RETURN DISTINCT b.name" % active_key)
        neighbor_list = list()

        # cast RecordList to list
        [neighbor_list.append(x[0]) for x in neighbor1]

        for i in sorted_x[:10]:
            n = graph.find_one("Author", property_key='keylattes', property_value=i[0])
            try:
                if neighbor_list.index(n['name']) != -1:
                    print('[**] ' + n['name'] + ' - ' + str(i[1]))
            except ValueError:
                print(n['name'] + ' - ' + str(i[1]))

if __name__ == '__main__':
    r = Recommender()

    graph = Graph()
    authors = graph.find("Author")
    publications = graph.find("Article")

    r.recommend(authors, publications)

Beispiel #14

0

Datei anzeigen

Datei: makedata.py Projekt: Martin12345677/DepressionIntelligentPublicWelfarePlatform

import os
import numpy as np
import re
import jieba
from gensim.models import word2vec

graph = Graph('http://localhost:7474', username='******', password='******')

labels = ['gainian', 'people', 'book']
names = []
path = 'F:\大创\抑郁症智能公益平台\\0项目\智能问答\简单的问题分类器\data'

# f = open(os.path.join(path, 'dict.txt'), 'a', encoding='utf-8')
text = ''
for label in labels:
    nodes = graph.find(label=label)
    for node in nodes:
        if not node['intro']:
            continue
        # name = node['name']
        intro = node['intro']
        text += intro + '\n'
        # if 'http' in name or len(name) > 100:
        #     continue
        # name = re.sub("[《》]+", "", name)
        # names.append(name)
#         if label == 'gainian':
#             f.write(name + ' 2000 ng\n')
#         elif label == 'book':
#             f.write(name + ' 2000 nb\n')
#         else:

Beispiel #15

0

Datei anzeigen

graph = Graph(graphene.DATABASE_URL)
print graph

# find a node or set of nodes according to properties and labels
# graph.find_one() # returns a single node
# graph.find() # returns a generator

# Let's find Marnee
marnee_node = graph.find_one("Person",
                             property_key="name",
                             property_value="Marnee")
print "find_one Marnee %s" % marnee_node

marnee_generator = graph.find("Person",
                              property_key="name",
                              property_value="Marnee")
for marnee in marnee_generator:
    print marnee

# Let's find Julian
julian_node = graph.find_one("Person",
                             property_key="name",
                             property_value="Julian")
print "find_one Julian %s" % julian_node

# Let's find all the Persons Julian knows
# show the Cypher -- MATCH
# show the code
# graph.match()
# graph.match_one()

Beispiel #16

0

Datei anzeigen

Datei: meta.py Projekt: mankybansal/homeluxe-platform

            'DINING_KITCHEN.jpg', 'GUEST_ROOM.jpg', 'LIVING.jpg',
            'MASTER_BEDROOM.jpg', 'TOILET.jpg'
        ]
    },
    'Banksy Quirk': {
        'desc':
        'You are the person who picked up that little curio in Venice or Vellore, that everyone talks about. Everything about you is YOU. Special, edgy, liberal and sexy. Who thinks graffiti can be the purest form of art. Been to a protest lately?',
        'cover':
        'banksy quirk.jpg',
        'images': [
            'DINING.jpg', 'GUEST_BEDROOM.jpg', 'KITCHEN.jpg', 'LIVING.jpg',
            'MASTER_BEDROOM.jpg'
        ]
    }
}

z = graph.find(label)

for node in z:
    node.properties['price'] = (random.randint(20000, 100000))
    node.properties['description'] = metadata[node['name']]['desc']
    node.properties['cover_pic'] = metadata[node['name']]['cover']
    #    node.properties['images'] = []
    #    if len(metadata[node['name']]['images']) > 0:
    #        for image in metadata[node['name']]['images']:
    #            node.properties['images'].append(generateImageString(image,1))
    #    else:
    #        for i in range(0,4):
    #            node.properties['images'].append(generateImageString(i,0))
    node.push()

Beispiel #17

0

Datei anzeigen

Datei: study1015.py Projekt: Mr2277/Hello

graph.create(s)
'''
    2　——　Node查询
'''
# 用CQL进行查询，返回的结果是list
data1 = graph.data('MATCH(p:PersonTest) return p')
print("data1 = ", data1, type(data1))
print()
# 用find_one()方法进行node查找，返回的是查找node的第一个node
data2 = graph.find_one(label='PersonTest',
                       property_key='name',
                       property_value="李四")
print("data2 = ", data2, type(data2))
print()
# 用find()方法进行node查找
data3 = graph.find(label='PersonTest')
for data in data3:
    print("data3 = ", data)
print()
'''
    3　——　Relationship查询
'''
relationship = graph.match_one(rel_type='KNOWNS')
print(relationship, type(relationship))
print()
'''
    4 —— 更新Node的某个属性值，若node没有该属性，则新增该属性
'''
node1 = graph.find_one(label='PersonTest',
                       property_key='name',
                       property_value="张三")

Beispiel #18

0

Datei anzeigen

Datei: neo4j.py Projekt: ScJa/twitter-analyzer

class GraphDB():

    def __init__(self, user=NEO4J_USER, pwd=NEO4J_PWD, host=NEO4J_HOST):
        self.graph = Graph("http://%s:%s@%s/db/data/" % (user, pwd, host))

    def query(self, query_str, stream=False):
        if stream:
            return self.graph.cypher.stream(query_str)
        else:
            return self.graph.cypher.execute(query_str)

    def create_relation_user_to_topic(self, user, relation, topic_name):
        userNode = self.graph.find_one("user", 'id', user.id_str)
        if not userNode:
            userNode = self.create_node_from_user(user)
            self.graph.create(userNode)

        topicNode = self.graph.find_one("topic_name", 'name', topic_name)
        if not topicNode:
            topicNode = Node("topic_name", name = topic_name)
            self.graph.create(topicNode)

        relationship = self.graph.match_one(userNode, relation, topicNode)
        if not relationship:
            relationship = Relationship(userNode, relation, topicNode, count = 1)
            self.graph.create(relationship)
        else:
            relationship.properties['count'] += 1
            relationship.push()

    # Relations: follows eventuell favourites, retweets

    def create_relation_user_to_user(self, userA, relation, userB):
        userANode = self.graph.find_one("user", 'id', userA.id_str)
        userBNode = self.graph.find_one("user", 'id', userB.id_str)

        if not userANode:
            userANode = self.create_node_from_user(userA)
            self.graph.create(userANode)

        if not userBNode:
            userBNode = self.create_node_from_user(userB)
            self.graph.create(userBNode)

        relationship = self.graph.match_one(userANode, relation, userBNode)
        if not relationship:
            relationship = Relationship(userANode, relation, userBNode, count = 1)
            self.graph.create(relationship)
        else:
            relationship.properties['count'] += 1
            relationship.push()

    def increment_user_counter(self, user, counter, n):
        userNode = self.graph.find_one("user", 'id', user.id_str)
        if not userNode:
            userNode = self.create_node_from_user(user)
            self.graph.create(userNode)

        if counter in userNode.properties:
            userNode.properties[counter] += n
        else:
            userNode.properties[counter] = n
        userNode.push()

    def get_all_users(self):
        users = []
        for u in self.graph.find('user'):
            users.append({'name': u.properties['screen_name'], 'id_str': u.properties['id']})
        return users

    def create_node_from_user(self, user):
        userNode = Node("user", name=user.screen_name, id=user.id_str, followers_count=user.followers_count,
            friends_count=user.friends_count, statuses_count=user.statuses_count, favourites_count=user.favourites_count)
        return userNode

    def quicksearch(self, username, limit=10):
        cql_query = "match(u:user) WHERE u.name =~ '%s.*' RETURN DISTINCT u.name LIMIT %s;"
        return self.query(cql_query % (username, limit))

    def get_user_count(self):
        cql_query = "match(u:user) RETURN count(DISTINCT u) AS c;"
        for row in self.query(cql_query):
            return row['c']
        return 0

Beispiel #19

0

Datei anzeigen

#authorization
auth = tweepy.OAuthHandler(ckey, csecret)
#wait on rate limits
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

#Get specific hashtag  until the date we want
tweets = tweepy.Cursor(api.search,
                       q="#SuperBowl",
                       count=100,
                       until='2016-02-09',
                       include_entities=True).items()
for tweet in tweets:
    #find if exists for exploration...
    mynode = list(
        graph.find('User',
                   property_key='Screen_Name',
                   property_value=tweet.user.screen_name.encode('utf8')))

    x = graph.merge_one("User", "Screen_Name",
                        tweet.user.screen_name.encode('utf8'))
    x.properties.update({
        "Name": tweet.user.name,
        "Description": tweet.user.description.encode('utf8'),
        "Location": tweet.user.location,
        "Followers": tweet.user.followers_count,
        "Friends": tweet.user.friends_count,
        "Tweets": tweet.user.statuses_count,
        "Image": tweet.user.profile_image_url
    })
    if len(mynode) == 0:
        x.properties.update({"Exploration": ''})

Beispiel #20

0

Datei anzeigen

Datei: read_from_db.py Projekt: sandordargo/family-tree

from __future__ import print_function
from py2neo import Graph, Node, Relationship

graph_db = Graph()

person = Node("Person", name="JUHASZ Lilla Linda")

for record in graph_db.cypher.execute("Match (n) return n"):
    print(record)

new_person = Node("Person", name="JUHASZ Peter", born=1930)
print("exists: " + str(list(graph_db.find("Person", property_key="name", property_value="JUHASZ Peter"))))

new_person.bind(graph_db.uri)
print("exists: " + str(new_person.exists))

father = graph_db.find_one("Person", property_key='name', property_value="JUHASZ Peter")

child = graph_db.find_one("Person", property_key='name', property_value="JUHASZ Lilla Linda")
child_of_rel = "CHILD_OF"
father_daughter_relationship = Relationship(child, child_of_rel, father)
graph_db.create(father_daughter_relationship)

Beispiel #21

0

Datei anzeigen

Datei: test_databasehandlers.py Projekt: condnsdmatters/twirps

class TestNeoDBHandler(unittest.TestCase):

    def setUp(self):

        self.graph = Graph(TEST_GRAPH_DB)

        self.node_list = [Node("TEST", test_id=i) for i in xrange(5)]
        
        # Nodes
        # -----
        for i, node in enumerate(self.node_list):
            node.labels.add("Twirp")
            node.properties.update({                
                    "user_id": i*100000,
                    "username":"",
                    "name":"",
                    "handle":"",
                    "followers_count":i*100,
                    "friends_count":i*50,
                    "tweet_count":i*10,
                    "retweet_count":i*5,
                    "been_retweeted_count":i*3,
                    "favourite_hashtag":"",
                    "hashtag_count":i*2,
                    "archipelago_id":i*1,
                    "subscribed": True,
                    "constituency":"CB"+str(i),
                    "offices":["office"+str(i), "sedge steward"],
            })

        self.node_list[0].properties.update({"username":"******", "name":"Michael Blue Eyes", "handle":"MBEyes", "favourite_hashtag":"#roth", "party":"DC" })
        self.node_list[1].properties.update({"username":"******", "name":"Little Richard", "handle":"LRichy", "favourite_hashtag":"#rawls", "party":"DC" })
        self.node_list[2].properties.update({"username":"******", "name":"The Boy Wonder", "handle":"tBW", "favourite_hashtag":"#richyfeynman", "party":"Marvel" })
        self.node_list[3].properties.update({"username":"******", "name":"Kendog Lamar", "handle":"Kdog", "favourite_hashtag":"#kanye", "party":"Marvel"})
        self.node_list[4].properties.update({"username":"******", "name":"Tiny Hands", "handle":"tinyhands", "favourite_hashtag":"#ihavetinyhands", "party":"Beano" })

        # Relationships
        # --------------
        # mbe -[MENTION]> lrich
        # mbe -[REPLIES]> ken 
        # lrich -[REPLIES]> mbe
        # tbw -[RETWEETS]> lrich
        # tbw -[MENTIONS_BY_PROXY]> mbe
        # ken -!->
        # th  -!->

        defaults = {
            "mentions":0,
            "mention_last":"",
            "mention_date":"",
            "replies":0,
            "reply_last":"",
            "reply_date":"",
            "retweets":0,
            "retweet_last":"",
            "retweet_date":""
            }

        mbe1 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[1], **defaults)
        mbe2 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[3], **defaults)
        lrich = Relationship(self.node_list[1], "DIRECT", self.node_list[0], **defaults)
        tbw = Relationship(self.node_list[2], "DIRECT", self.node_list[1],  **defaults)
        tbw2 = Relationship(self.node_list[2], "INDIRECT", self.node_list[0],  **defaults)


        mbe1.properties.update({
            "mentions":5,
            "mention_last":"1000000",
            "mention_date":"today"
        })
        mbe2.properties.update({
            "replies":10,
            "reply_last":"2000000",
            "reply_date":"tommorow"
        })
        lrich.properties.update({
            "replies":15,
            "reply_last":"3000000",
            "reply_date":"yesterday"
        })
        tbw.properties.update({
            "retweets":20,
            "retweet_last":"4000000",
            "retweet_date":"thismorning"
        })
        tbw2.properties.update({
            "mentions":1,
            "mention_last":"3000000",
            "mention_date":"yesterday"
        })


        for node in self.node_list:
            self.graph.create(node)
        
        self.graph.create(mbe1)
        self.graph.create(mbe2)
        self.graph.create(lrich)
        self.graph.create(tbw)
        self.graph.create(tbw2)

        self.graph.push()

    def tearDown(self):
        
        # remove test items
        self.graph.cypher.execute("MATCH (n:TEST) DETACH DELETE n")

        empty_list = [ _ for _ in self.graph.find('TEST') ]
        self.assertEqual( empty_list, [])




        ########################################################################
        #                          CYPHER QUERIES                              #
        ########################################################################

    def test_get_party_nodes(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"Kendog Lamar", 
                "handle":"Kdog", 
                "party":"Marvel",
                "constituency":"CB3",
                "offices":["office3", "sedge steward"],

                "tweets": 30,
                "friends": 150, 
                "followers": 300,
                "archipelago_id": 3,

                "tweeted":[],
                "mentions":[],
                "mention_last":[],
                "mention_date":[],                
                "replies":[],
                "reply_last":[],
                "reply_date":[],
                "retweets":[],
                "retweet_last":[],
                "retweet_date":[],
                "tweet_type":[]

           },
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                
                "tweeted":['MBEyes','LRichy'],
                "mentions":[1, 0],
                "mention_last":['3000000', ""],
                "mention_date":['yesterday', ""],                
                "replies":[0,0],
                "reply_last":["",""],
                "reply_date":["",""],
                "retweets":[0, 20],
                "retweet_last":["",'4000000'],
                "retweet_date":["", 'thismorning'],
                "tweet_type":["INDIRECT", "DIRECT"]

           }
        ]

        # Make request
        results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 0) ]

        # Test against reference
        self.assertEqual(len(results), 2)
        
        for i in range(2):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )
    
    def test_get_party_nodes_min_tweet(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"Kendog Lamar", 
                "handle":"Kdog", 
                "party":"Marvel",
                "constituency":"CB3",
                "offices":["office3", "sedge steward"],

                "tweets": 30,
                "friends": 150, 
                "followers": 300,
                "archipelago_id": 3,

                "tweeted":[],
                "mentions":[],
                "mention_last":[],
                "mention_date":[],                
                "replies":[],
                "reply_last":[],
                "reply_date":[],
                "retweets":[],
                "retweet_last":[],
                "retweet_date":[],
                "tweet_type":[]
           },
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                "tweeted":['LRichy'],
                "mentions":[0],
                "mention_last":[""],
                "mention_date":[""],                
                "replies":[0],
                "reply_last":[""],
                "reply_date":[""],
                "retweets":[20],
                "retweet_last":['4000000'],
                "retweet_date":['thismorning'],
                "tweet_type":["DIRECT"]
           }
        ]

        # Make request
        results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 5) ]

        # Test against reference
        self.assertEqual(len(results), 2)

        for i in range(2):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )


    def test_get_cross_party_nodes_default(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                "tweeted":['MBEyes','LRichy'],
                "mentions":[1, 0],
                "mention_last":['3000000', ""],
                "mention_date":['yesterday', ""],                
                "replies":[0,0],
                "reply_last":["",""],
                "reply_date":["",""],
                "retweets":[0, 20],
                "retweet_last":["",'4000000'],
                "retweet_date":["", 'thismorning'],
                "tweet_type":["INDIRECT", "DIRECT"]
           }
        ]

        results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 0 ) ]

        # Test against reference
        self.assertEqual(len(results), 1)

        for i in range(1):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )

    def test_get_cross_party_nodes_min_tweets(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                "tweeted":['LRichy'],
                "mentions":[0],
                "mention_last":[""],
                "mention_date":[""],                
                "replies":[0],
                "reply_last":[""],
                "reply_date":[""],
                "retweets":[20],
                "retweet_last":['4000000'],
                "retweet_date":['thismorning'],
                "tweet_type":["DIRECT"]
           }
        ]

        results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 5) ]

        # Test against reference
        self.assertEqual(len(results), 1)

        for i in range(1):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )



        ########################################################################
        #               ADDING TO DB   (TWIRPS CLASSES)->(PY2NEO OBJS)         #
        ########################################################################
        
    def test_add_Twirp_to_database(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_twirp = Twirp(None, 'test')
        new_twirp.id = 314150000000
        new_twirp.username = '******'
        new_twirp.name = 'Bilbo Baggins'
        new_twirp.handle = 'bilbo'
        new_twirp.followers_count = 20
        new_twirp.friends_count = 30
        new_twirp.tweet_count = 40
        new_twirp.retweet_count = 50
        new_twirp.been_retweet_count = 60 
        new_twirp.favourite_hashtag = '#onering'
        new_twirp.hashtag_count = 70
        new_twirp.archipelago_id = 80 
        new_twirp.twirps_type = -1
        new_twirp.subscribed = False
        new_twirp.geo = False

        # Add to database (with 'TEST' label)
        neo_db_handler.add_Twirp_to_database(new_twirp, is_test_mode=True)

        # Check results
        results = [ _ for _ in self.graph.cypher.execute(
                                    "MATCH (n {handle:'bilbo'}) RETURN n")]        
        self.assertEqual(len(results), 1)
        node = results[0][0]

        # Interrogate Node
        self.assertEqual(node.get_labels(), [u'TEST', u'Twirp', u'Other'])

        self.assertEqual(node["user_id"],314150000000)
        self.assertEqual(node["username"],'BilboBagginsMP')
        self.assertEqual(node["name"],'Bilbo Baggins')
        self.assertEqual(node["handle"],'bilbo')
        self.assertEqual(node["followers_count"],20)
        self.assertEqual(node["friends_count"],30)
        self.assertEqual(node["tweet_count"],40)
        self.assertEqual(node["retweet_count"],50)
        self.assertEqual(node["been_retweeted_count"],60 )
        self.assertEqual(node["favourite_hashtag"],'#onering')
        self.assertEqual(node["hashtag_count"],70)
        self.assertEqual(node["archipelago_id"],80 )
        self.assertEqual(node["subscribed"],False)


    def test_add_Tweet_to_database__mention(self):
        # TEST: (LRich)->(tinyhands) - mention: ("Hey @tinyhands")
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_tweet = Tweet(None, 'test')

        new_tweet.tweet_id = 1
        new_tweet.user_id = 100000
        new_tweet.handle = 'LRichy'
        new_tweet.mentions = [(400000, 'tinyhands')] 
        new_tweet.content = 'Generic tweet @tinyhands'  # not stored here
        
        new_tweet.is_retweet = False
        new_tweet.retweeted_user = None
        new_tweet.retweet_status_id = 0
        
        new_tweet.is_reply = False
        new_tweet.in_reply_to_user = None
        new_tweet.in_reply_to_status_id = None
        
        new_tweet.retweet_count = 3           # not stored here
        new_tweet.favourite_count = 4         # not stored here
        new_tweet.hashtags = ['clothes']      # not stored here
        new_tweet.date = 'a date string'
        new_tweet.urls = ['https://url.com']  # not stored here
        new_tweet.website_link = 'twitter.com/status/madeupstatus1'

        # Add to database
        neo_db_handler.add_Tweet_to_database(new_tweet)

        # Preliminary check 
        results = [ _ for _ in self.graph.cypher.execute(
                    """MATCH (a {handle:'LRichy'})-[r]->(b {handle:'tinyhands'})
                       RETURN r""")]        
        self.assertEqual(len(results), 1)
        relationship =  results[0][0]

        # In depth check
        self.assertEqual(relationship.type, u'DIRECT')

        self.assertEqual(relationship["mentions"], 1)
        self.assertEqual(relationship["mention_last"], '1')
        self.assertEqual(relationship["mention_date"], 'a date string')

        self.assertEqual(relationship["replies"], 0)
        self.assertEqual(relationship["reply_last"], '')
        self.assertEqual(relationship["reply_date"], '')
        
        self.assertEqual(relationship["retweets"], 0)
        self.assertEqual(relationship["retweet_last"], '')
        self.assertEqual(relationship["retweet_date"], '')
       

    def test_add_Tweet_to_database__reply(self):
        # TEST: (LRich) ->(tBW) - reply & mention; 
        #       (LRich) ->(tinyhands) mention   EG: (reply->tBW):"Hey @tBW, @tinyhands"
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_tweet = Tweet(None, 'test')

        new_tweet.tweet_id = 1
        new_tweet.user_id = 100000
        new_tweet.handle = 'LRichy'
        new_tweet.mentions = [(400000, 'tinyhands'), (200000, 'tBW')] 
        new_tweet.content = 'Generic tweet @tinyhands @tBW'  # not stored here      
        
        new_tweet.is_retweet = False
        new_tweet.retweeted_user = None
        new_tweet.retweet_status_id = 0
        
        new_tweet.is_reply = True
        new_tweet.in_reply_to_user = (200000, 'tBW')
        new_tweet.in_reply_to_status_id = 2
        
        new_tweet.retweet_count = 3              # not stored here   
        new_tweet.favourite_count = 4            # not stored here
        new_tweet.hashtags = ['clothes']         # not stored here
        new_tweet.date = 'a date string'
        new_tweet.urls = ['https://url.com/']    # not stored here
        new_tweet.website_link = 'twitter.com/status/madeupstatus1'

        # Add to database
        neo_db_handler.add_Tweet_to_database(new_tweet) 

        # Preliminary check
        results = [ _ for _ in self.graph.cypher.execute(
                                """MATCH (a {handle:'LRichy'})-[r]->(b) 
                                   WHERE b.handle<>'MBEyes' 
                                   RETURN r, b.name ORDER BY b.name""")]        
        
        self.assertEqual(len(results), 2)

        # In depth check
        self.assertEqual(results[0][0].type, u'DIRECT')
        self.assertEqual(results[0][1], 'The Boy Wonder')

        self.assertEqual(results[0][0]["mentions"], 0)
        self.assertEqual(results[0][0]["mention_last"], '')
        self.assertEqual(results[0][0]["mention_date"], '')

        self.assertEqual(results[0][0]["replies"], 1)
        self.assertEqual(results[0][0]["reply_last"], '1')
        self.assertEqual(results[0][0]["reply_date"], 'a date string')
        
        self.assertEqual(results[0][0]["retweets"], 0)
        self.assertEqual(results[0][0]["retweet_last"], '')
        self.assertEqual(results[0][0]["retweet_date"], '')


        self.assertEqual(results[1][0].type, u'DIRECT')
        self.assertEqual(results[1][1], 'Tiny Hands')

        self.assertEqual(results[1][0]["mentions"], 1)
        self.assertEqual(results[1][0]["mention_last"], '1')
        self.assertEqual(results[1][0]["mention_date"], 'a date string')

        self.assertEqual(results[1][0]["replies"], 0)
        self.assertEqual(results[1][0]["reply_last"], '')
        self.assertEqual(results[1][0]["reply_date"],  '')
        
        self.assertEqual(results[1][0]["retweets"], 0)
        self.assertEqual(results[1][0]["retweet_last"], '')
        self.assertEqual(results[1][0]["retweet_date"], '')


    def test_add_Tweet_to_database__retweet(self):
        # TEST: (tiny) ->(MBEyes) - reply & mention; 
        #       (tiny) ->(Kdog) mention_by_proxy   EG: (ret->MBE):"Hey @MBE, @Kdog"

        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_tweet = Tweet(None, 'test')

        new_tweet.tweet_id = 1
        new_tweet.user_id = 400000
        new_tweet.handle = 'tinyhands'
        new_tweet.mentions = [(300000, 'Kdog')] 
        new_tweet.content = 'Generic tweet @Kdog'  # not stored here

        new_tweet.is_retweet = True
        new_tweet.retweeted_user = (0, 'MBEyes')
        new_tweet.retweet_status_id = 2

        new_tweet.is_reply = False
        new_tweet.in_reply_to_user = None
        new_tweet.in_reply_to_status_id = None
        
        new_tweet.retweet_count = 3                # not stored here
        new_tweet.favourite_count = 4              # not stored here
        new_tweet.hashtags = []                    # not stored here
        new_tweet.date = 'a date string'
        new_tweet.urls = ['https://url.com/']      # not stored here
        new_tweet.website_link = 'twitter.com/status/madeupstatus1'

        # Add to database
        neo_db_handler.add_Tweet_to_database(new_tweet) 

        # Preliminary check
        results = [ _ for _ in self.graph.cypher.execute(
                                    """MATCH (a {handle:'tinyhands'})-[r]->(b) 
                                    RETURN r, b.name ORDER BY b.name""")]        

        self.assertEqual(len(results), 2)

        # In depth check
        self.assertEqual(results[0][0].type, u'INDIRECT')
        self.assertEqual(results[0][1], 'Kendog Lamar')

        self.assertEqual(results[0][0]["mentions"], 1)
        self.assertEqual(results[0][0]["mention_last"], '1')
        self.assertEqual(results[0][0]["mention_date"], 'a date string')

        self.assertEqual(results[0][0]["replies"], 0)
        self.assertEqual(results[0][0]["reply_last"], '')
        self.assertEqual(results[0][0]["reply_date"], '')
        
        self.assertEqual(results[0][0]["retweets"], 0)
        self.assertEqual(results[0][0]["retweet_last"], '')
        self.assertEqual(results[0][0]["retweet_date"], '')


        self.assertEqual(results[1][0].type, u'DIRECT')
        self.assertEqual(results[1][1], 'Michael Blue Eyes')

        self.assertEqual(results[1][0]["mentions"], 0)
        self.assertEqual(results[1][0]["mention_last"], '')
        self.assertEqual(results[1][0]["mention_date"], '')

        self.assertEqual(results[1][0]["replies"], 0)
        self.assertEqual(results[1][0]["reply_last"], '')
        self.assertEqual(results[1][0]["reply_date"], '')
        
        self.assertEqual(results[1][0]["retweets"], 1)
        self.assertEqual(results[1][0]["retweet_last"], '1')
        self.assertEqual(results[1][0]["retweet_date"], 'a date string')

Beispiel #22

0

Datei anzeigen

Datei: emailgraph.py Projekt: shannonh90/nlp_hillary_emails

class EmailGraph:
    #http://py2neo.org/2.0/intro.html#nodes-relationships
    #Creates a New Graph (You will Need to Update this Function for your own install)
    def __init__(self, user, pwrd):
        authenticate("localhost:7474", user, pwrd)
        self.graph = Graph("http://localhost:7474/db/data/")
        java_path = "C:\ProgramData\Oracle\Java\javapath\java.exe"
        os.environ['JAVAHOME'] = java_path
        self.st = StanfordNERTagger('C:\stanford-ner-2015-12-09\classifiers\english.conll.4class.distsim.crf.ser.gz',\
                      'C:\stanford-ner-2015-12-09\stanford-ner.jar')
        self.stop_words = nltk.corpus.stopwords.words('english')
        self.legal_words = {"section","fw","re","ops","fyi","doc no","case no","subtitle","btw","usc","foia","chapter","u.s.c",\
               "report","attachment","attachments","note","amended", "ebook","subject","unclassified department of state case","doc",\
               "unclassified u.s. department of state","original message","project", "copyright", "pls", "you","u.s. department of state case no"}

    #process email: removes some of the headings before looking for keywords
    def process_email(self, email):
        processed = ""
        for line in email.split('\n'):
            s = line.lower()
            if s.startswith("unclassified u.s. department of state") or \
               s.startswith("release in") or \
               s.startswith("original message") or \
               s.startswith("to:") or \
               s.startswith("from:") or \
               s.startswith("sent:") or  \
               s.startswith("cc:"):
                pass
            else:
                if len(line) > 0 and line[-1] == '.':
                    processed = processed + line + ' '
                else:
                    processed = processed + line + '. '
        return processed

    #filter_by_contents: receives a list of noun_phrases and filters out phrases contained in longer phrases elsewhere in the list
    def filter_by_contents(self, noun_phrases):
        in_others = []
        for i, candidate in enumerate(noun_phrases):
            for j, other in enumerate(noun_phrases):
                if i != j:
                    if candidate[0].lower() in other[0].lower() and candidate[
                            0] != other[0]:  #compare each phrase with another
                        in_others.append(candidate)
        #filter out our identified 'duplicate' words and stopwords.
        filtered_words = [w for w in noun_phrases if w not in in_others and \
                          w[0].lower() not in self.legal_words and w[0].lower() not in self.stop_words]

        #create a Frequency Distribution
        unigram_fd = nltk.FreqDist(filtered_words)
        #get the most common phrases
        common_noun_phrases = unigram_fd.most_common(20)

        result = []
        words = set([w[0][0].lower() for w in common_noun_phrases])
        for w in words:
            best_match = None
            for phrase in common_noun_phrases:
                if phrase[0][0].lower() == w:
                    if best_match is None:
                        best_match = phrase
                    else:
                        best_match = (best_match[0], best_match[1] + phrase[1])
            result.append(best_match)
        return sorted([w for w in result], key=lambda w: w[1], reverse=True)

    #filter_by_hypernym: receives a list of candidates and finds the best hypernym for each.
    #I started with code by Anna Swigart, ANLP 2015, and her concept of using a dictionary to store
    #terms from WordNet, however this code drastically departs from her algorithm.
    def filter_by_hypernym(self, candidates):
        #create a dictionary
        results = []
        for term in candidates:  #loop through list of candidates
            synsets = wn.synsets(term[0][0],
                                 'n')  #obtain the synsets for the phrase
            if len(synsets) >= 1:
                hypers = synsets[0].hypernyms(
                ) + synsets[0].instance_hypernyms()
                if len(hypers) >= 1:
                    results.append(((term[0][0],
                                     hypers[0].name().split('.')[0]), term[1]))
                else:
                    results.append(term)
            else:
                results.append(term)
        return results

    #algorithm for extracting key words from an email body
    def final_algorithm(self, email):
        #Create Sentences
        sentences = nltk.sent_tokenize((self.process_email(email)))
        tokenized_sentences = []
        for s in sentences:
            #get the tokens for each sentence that are filtered
            tokenized_sentences.append([word for word in nltk.word_tokenize(s) \
                                        if not re.search('[0-9]', word) and word.lower() not in self.legal_words and len(word) > 2])

        #separate the NER tagged entities from the rest
        def get_entities(tags):
            result = []
            curr = []
            for ent in tags:
                if ent[1] == 'O':
                    if len(curr) > 0:
                        result.append(curr)
                        curr = []
                else:
                    if len(curr) > 0:
                        if not curr[0][1] == ent[1].lower():
                            result.append(curr)
                            curr = [(ent[0], ent[1].lower())]
                        else:
                            curr = curr + [(ent[0], ent[1].lower())]
                    else:
                        curr = [(ent[0], ent[1].lower())]
            return result

        #NER tag each of the sentences
        tagged_sents = self.st.tag_sents(tokenized_sentences)
        entity_names = []
        for s in tagged_sents:
            entity_names = entity_names + get_entities(s)

        #reorganize the entities for further processing
        def compress_entities(entities):
            new_list = []
            for entity in entities:
                result = " ".join([w[0] for w in entity])
                new_list.append((result, entity[0][1]))
            return new_list

        entity_names = compress_entities(entity_names)
        #print(entity_names)

        # Print unique entity names
        noun_phrases = entity_names

        #Candidates Filtered by Duplicate Nouns and Rescored by Length
        noun_phrases = self.filter_by_contents(noun_phrases)
        #print(noun_phrases)

        #Candidate with better categories/hypernyms!
        noun_phrases = self.filter_by_hypernym(noun_phrases)

        #print("Email:\n" + email)
        print("Key Phrases:\n" + str(noun_phrases))

        return noun_phrases

    #clears out a graph
    def delete(self):
        self.graph.delete_all()

    #checks to see if a node exists in a graph
    #http://stackoverflow.com/questions/22134649/how-to-check-if-a-node-exists-in-neo4j-with-py2neo
    def find_existing(self, label, key, value):
        mynode = list(
            self.graph.find(label, property_key=key, property_value=value))
        # node found
        if len(mynode) > 0:
            return mynode[0]
        # no node found
        else:
            return None

    #adds a new 'email' data element to the graph
    #code based on http://py2neo.org/2.0/intro.html#nodes-relationships
    def add_to_graph(self, data_element, terms):

        #['Id', 'DocNumber', 'MetadataSubject', 'MetadataTo', 'Metadata From',
        #'MetadataDateSent', 'ExtractedSubject', 'ExtractedTo',
        #'ExtractedFrom', 'ExtractedBodyText','RawText', 'Label']]
        email_id = data_element['DocNumber']
        email_feeling = data_element['NewLabel']
        email = self.find_existing("Email", "docid", email_id)
        if email is None:
            if str(email_feeling) == '1':
                email_feelstr = 'emotional'
                n = 'E'
            else:
                email_feelstr = 'neutral'
                n = 'N'
            email = Node("Email", name = n, docid = email_id, tone=email_feelstr,\
                subject=data_element["ExtractedSubject"], date=data_element['MetadataDateSent'])
        s = email

        #add From nodes
        from_id_all = data_element['ExtractedFrom']
        if type(from_id_all) is str:
            for from_id_i in from_id_all.split(';'):
                from_id = from_id_i.strip().strip('\'')
                sender = self.find_existing("User", "address", from_id)
                if sender is None:
                    sender = Node("User", address=from_id)
                s = s | Relationship(sender, "SENT", email)

        #add To nodes
        to_id_all = data_element['ExtractedTo']
        if type(to_id_all) is str:
            for to_id_i in to_id_all.split(';'):
                to_id = to_id_i.strip().strip('\'')
                receiver = self.find_existing("User", "address", to_id)
                if receiver is None:
                    receiver = Node("User", address=to_id)
                s = s | Relationship(receiver, "RECEIVED", email)

        #add Emotion nodes
        emote_all = data_element['Emotions']
        #print(emote_all)
        if type(emote_all) is str:
            print("Emotions: " + str(emote_all))
            for emote in emote_all.split(';'):
                if len(emote) > 0:
                    emotion = self.find_existing("Emotion", "name", emote)
                    if emotion is None:
                        emotion = Node("Emotion", name=emote)
                    s = s | Relationship(email, "EMOTED", emotion)

        self.graph.create(s)

        #add keywords and categories
        for item in range(0, len(terms)):
            keyword = terms[item][0][0]
            category = terms[item][0][1]
            n = self.find_existing("Keyword", "name", keyword)
            if n is None:
                n = Node("Keyword", name=keyword)
            s = Relationship(email, "MENTIONS", n)

            c = self.find_existing("Category", "name", category)
            if c is None:
                c = Node("Category", name=category)
            s = s | Relationship(n, "IS_TYPE_OF", c)
            self.graph.create(s)

    #get_random_emails - returns a number of random emails from a given data frame
    def get_random_emails(self, data_set, number):
        random_index = np.random.permutation(data_set.index)
        full_data_shuffled = data_set.ix[random_index,\
        ['Id', 'DocNumber', 'MetadataSubject', 'MetadataTo', 'Metadata From', 'MetadataDateSent',\
            'ExtractedSubject', 'ExtractedTo', 'ExtractedFrom','ExtractedBodyText','RawText',\
            'NewLabel', 'Emotions']]
        full_data_shuffled.reset_index(drop=True, inplace=True)
        #separate the training data from the development data
        return full_data_shuffled.loc[0:number - 1]

    #adds a specified number of emails from a dataset
    def add_new_emails(self, num, total_df):
        selected_emails = self.get_random_emails(total_df, num)
        selected_emails["MetadataDateSent"].fillna(value='<blank>',
                                                   inplace=True)
        selected_emails["ExtractedSubject"].fillna(value='<blank>',
                                                   inplace=True)
        data_list = selected_emails["RawText"].values.tolist()
        subject_list = selected_emails["ExtractedSubject"].values.tolist()
        printable = set(string.printable)

        #for each email, extract the key words and then add to the graph
        for index in range(0, num):
            s = "".join(filter(lambda x: x in printable, data_list[index])) + ' . ' +\
                "".join(filter(lambda x: x != '<blank>' and x in printable, subject_list[index]))
            terms = self.final_algorithm(s)
            self.add_to_graph(selected_emails.loc[index], terms)

Beispiel #23

0

Datei anzeigen

dates = pd.date_range(start='19850101', end='20180517')
big_event = pd.DataFrame(index=dates)
big_event['rrr'] = rrr_df['changed']
big_event['T007'] = big_volati
big_event = big_event.dropna(how='all')

follow_user = '******'
for day in dates.tolist():
    if day in big_event.index.tolist():
        if pd.notnull(big_event.loc[day, 'rrr']):
            # get the leaf node
            data = test_graph.data(
                "match(n:event)-[r:Transmission*1..7]->(relateNode) where n.event_type='rrr' and size((relateNode)-[]->())=0 return relateNode")
            event = 'rrr'

        if pd.notnull(big_event.loc[day, 'T007']):
            event_node = test_graph.find('event', property_key='event_type', property_value='T007')
            data = test_graph.data(
                "match(n:event)-[r:Transmission*1..7]->(relateNode) where n.event_type='T007' and size((relateNode)-[]->())=0 return relateNode")
            event = 'T007'

        transmited_assets = [x['relateNode']['tdx_stock_code'] for x in data]
        transmited_follow_asset = set(transmited_assets) & set(follow_assets.keys())
        for tra in transmited_follow_asset:
            if follow_user in follow_assets[tra]:
                print(day, follow_user, '关注的', tra, '因', event, '大波动造成影响！')
    else:
        # print(day, follow_user, '关注的资产今天没有影响')
        pass

Beispiel #24

0

Datei anzeigen

Datei: models.py Projekt: raphodn/alumni-graph

from py2neo import authenticate, Graph, Node, Relationship
from passlib.hash import bcrypt
import os

authenticate("localhost:7474", "neo4j", "shanghai")

#graph = Graph(os.environ.get('GRAPHENEDB_URL', 'http://localhost:7474') + '/db/data/')
graph = Graph("http://localhost:7474/db/data/")

"""
py2neo API

graph.find() ; graph.match()
    > RETURNS generator
    > elem in generator: Node or Relationship

graph.execute() > RETURNS RecordList
    > elem in RecordList: Record
    > elem[0]: Node


graph.find(label, property_key=None, property_value=None, limit=None)
graph.find_one(label, property_key=None, property_value=None)


"""


class Person:
    """
    - id (UNIQUE CONSTRAINT)

Beispiel #25

0

Datei anzeigen

Datei: test01.py Projekt: YeFan19/PycharmProject

# -*- coding: utf-8 -*-

from py2neo import Graph, Node, Relationship, NodeSelector
graph = Graph("http://139.224.129.150:7474/browser/",
              username="******",
              password="******")

# 用CQL进行查询，返回的结果是list
data1 = graph.data('MATCH(p:Tag) return p')
print("data1 = ", data1, type(data1))

# 用find_one()方法进行node查找，返回的是查找node的第一个node
data2 = graph.find_one(label='Form')
print("data2 = ", data2, type(data2))

# 用find()方法进行node查找,需要遍历输出，类似于mongodb
data3 = graph.find(label='Form')
for data in data3:
    print("data3 = ", data)

# Relationship查询
relationship = graph.match_one(rel_type='Sub')
print(relationship, type(relationship))

Beispiel #26

0

Datei anzeigen

Datei: neoloader.py Projekt: dwyerk/tracky

inf = open('links.csv')
hashes = {}
for i, row in enumerate(csv.reader(inf)):
    first_hash = geohash.encode(float(row[1]), float(row[2]), 6)
    last_hash = geohash.encode(float(row[4]), float(row[5]), 6)
    if first_hash != last_hash:  # it would be better to eliminate these by duration instead
        hashes.setdefault(first_hash, []).append(last_hash)

for src_geohash, destinations in hashes.items():
    source = Node(geohash_label, name=src_geohash)
    print("creating {} with {} destinations".format(src_geohash,
                                                    len(destinations)))
    #graph.merge_one(geohash_label, source)
    matches = list(
        graph.find(geohash_label,
                   property_key="name",
                   property_value=src_geohash))
    if matches:
        source = matches[0]
    else:
        geo = gaz_it_up(src_geohash)
        source.properties["admin1"] = geo["admin1"]
        source.properties["admin2"] = geo["admin2"]
        source.properties["asciiname"] = geo["asciiname"]
        source.properties["geoname"] = geo["name"]
        graph.create(source)

    for dest in destinations:
        print("dest=", dest)
        destination = Node(geohash_label, name=dest)
        #graph.merge_one(geohash_label, destination)

Beispiel #27

0

Datei anzeigen

Datei: try.py Projekt: gopick/Lyric_Dictionary

# graph.run(CREATE (Song2-[:Key]->Node2))
# graph.run(CREATE (Song3-[:Key]->Node1))
graph.create(Rel1)
graph.create(Rel2)
graph.create(Rel3)
graph.create(Rel4)
graph.create(Rel5)
graph.create(Rel6)
graph.create(Rel7)
graph.create(Rel8)
graph.create(Rel9)
graph.create(Rel10)
graph.create(Rel11)
graph.create(Rel12)

results = graph.find("Word","Name","baby")
for result in results:
    print(result)
# 
# MATCH (pee1)-[:Key]->(n:Word {Name:"baby"})<-[:Key]-(pee2) WHERE pee1<>pee2 RETURN pee1,pee2,n
# FOREACH(p1 in pee1 |
#     FOREACH (p2 in pee2 |
#                 MATCH (p1)-[:Key]->(n:Word)<-[:Key]-(p2) WHERE p1<>p2)) RETURN p1,p2,n
#
# FOREACH(country in cou |
#     FOREACH (c in ch |
#             FOREACH (a in addresses |
#                 CREATE (s:Store {name:c.name+"_"+a, address:a})
#                 CREATE (s-[:BELONGS_TO]->c)
#                 CREATE (s-[:IN]->country)               )))

Beispiel #28

0

Datei anzeigen

Datei: dp_nlu.py Projekt: yangtiangithub/aiml

class DpNLU(object):
    def __init__(self, configs):

        self.configs = configs

        self.root = None
        self.graph = None
        self.all_label = None

        self.__load_template()
        self.__conn_knowledge_graph()

    def __load_template(self):
        tree = ET.ElementTree(file=self.configs.dp_template_path)
        self.root = tree.getroot()

    def __conn_knowledge_graph(self):
        self.graph = Graph(self.configs.neo_db_ip,
                           user=self.configs.neo_username,
                           password=self.configs.neo_password)
        self.all_label = ["Actor", "Album", "Honour", "Song"]

    def process(self, stmt):

        for i in range(len(stmt.get_words())):
            print(stmt.get_words()[i] + " " + stmt.get_pos()[i] + " " \
                  + str(stmt.get_arcs()[i][0]) + ":" + stmt.get_arcs()[i][1])

        result_idx = set()
        for template in self.root:
            tw_pos = self.getpos_tmp(stmt.get_words(), stmt.get_pos(),
                                     template.find("tempword"))
            wordidx = self.entityidx(tw_pos, stmt.get_pos(), stmt.get_arcs(),
                                     template.iterfind("entity"))
            result_idx = result_idx | wordidx
            nf = NegFilter(stmt.get_words(), stmt.get_pos(), stmt.get_arcs())
            negidx = nf.negfilter(wordidx)
            sim_result = []
            for i in wordidx:
                if i in negidx:
                    sim_result.append("-" + stmt.get_words()[i])
                else:
                    sim_result.append(stmt.get_words()[i])
            print(" ".join(sim_result))
        print("---------------------------")

        node_list = list()
        for i in result_idx:
            for l in self.all_label:
                nodes = self.graph.find(label=l,
                                        property_key="name",
                                        property_value=stmt.get_words()[i])
                node_list.extend(nodes)

        for node in node_list:
            print("|".join(node.labels()) + ":" + node["name"])

    def getpos_tmp(self, words, postags, tempword):
        tw_pos = []
        for i in range(len(words)):
            if words[i] in tempword.find("word").text.split(",") and postags[
                    i] in tempword.find("wordclass").text.split(","):
                tw_pos.append(i + 1)
        return tw_pos

    def entityidx(self, des, postags, arcs, entities):
        wordidx = set()
        for entity in entities:
            newdes = []
            for idx in range(len(postags)):
                hit = False
                if arcs[idx][1] in entity.find("relation").text.split(
                        ",") and postags[idx] in entity.find(
                            "wordclass").text.split(","):
                    i = arcs[idx][0]
                    if entity.attrib.get("direct") == "false":
                        while i not in des and i != 0:
                            i = arcs[i - 1][0]
                    if i in des:
                        hit = True
                if hit:
                    newdes.append(idx + 1)
                    if entity.attrib.get("isresult") == "true":
                        wordidx.add(idx)
            if entity.attrib.get("internode") == "true":
                des = newdes
        return wordidx

Beispiel #29

0

Datei anzeigen

Datei: qa.py Projekt: xinqiyang/chat

class Robot():
    """NLU Robot.
    自然语言理解机器人。

    Public attributes:
    - graph: The connection of graph database. 图形数据库连接。
    - pattern: The pattern for NLU tool: 'semantic' or 'vec'. 语义标签或词向量模式。
    - memory: The context memory of robot. 机器人对话上下文记忆。
    """
    def __init__(self, password="******"):
        # 连接图知识库
        self.graph = Graph("http://localhost:7474/db/data/", password=password)
        # 语义模式：'semantic' or 'vec'
        self.pattern = 'semantic'
        # 获取导航地点数据库
        self.locations = get_navigation_location()
        # 在线场景标志，默认为False
        self.is_scene = False
        # 在线调用百度地图IP定位api，网络异常时返回默认地址：上海市
        self.address = get_location_by_ip()
        # 机器人配置信息
        self.gconfig = None
        # 可用话题列表
        self.usertopics = []
        # 当前QA话题
        self.topic = ""
        # 当前QA id
        self.qa_id = get_current_time()
        # 短期记忆：最近问过的10个问题与10个答案
        self.qmemory = deque(maxlen=10)
        self.amemory = deque(maxlen=10)
        # 匹配不到时随机回答 TODO：记录回答不上的所有问题，
        self.do_not_know = [
            "这个问题太难了，{robotname}还在学习中", "这个问题{robotname}不会，要么我去问下",
            "您刚才说的是什么，可以再重复一遍吗", "{robotname}刚才走神了，一不小心没听清",
            "{robotname}理解的不是很清楚啦，你就换种方式表达呗", "不如我们换个话题吧", "咱们聊点别的吧",
            "{robotname}正在学习中", "{robotname}正在学习哦", "不好意思请问您可以再说一次吗",
            "额，这个问题嘛。。。", "{robotname}得好好想一想呢", "请问您说什么", "您问的问题好有深度呀",
            "{robotname}没有听明白，您能再说一遍吗"
        ]

    def __str__(self):
        return "Hello! I'm {robotname} and I'm {robotage} years old.".format(
            **self.gconfig)

    @time_me()
    def configure(self, info="", userid="userid"):
        """Configure knowledge base.
        配置知识库。
        """
        assert userid is not "", "The userid can not be empty!"
        # TO UPGRADE 对传入的userid参数分析，若不合适则报相应消息 2017-6-7
        if userid != "A0001":
            userid = "A0001"
            print("userid 不是标准A0001，已经更改为A0001")
        match_string = "MATCH (config:Config) RETURN config.name as name"
        subgraphs = [item[0] for item in self.graph.run(match_string)]
        print("所有知识库：", subgraphs)
        if not info:
            config = {"databases": []}
            match_string = "MATCH (user:User)-[r:has]->(config:Config)" + \
                "where user.userid='" + userid + \
                "' RETURN config.name as name, r.bselected as bselected, r.available as available"
            for item in self.graph.run(match_string):
                config["databases"].append(
                    dict(name=item[0], bselected=item[1], available=item[2]))
            print("可配置信息：", config)
            return config
        else:
            selected_names = info.split()
        forbidden_names = list(set(subgraphs).difference(set(selected_names)))
        print("选中知识库：", selected_names)
        print("禁用知识库：", forbidden_names)
        # TODO：待合并精简
        for name in selected_names:
            match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \
                + userid + "' AND config.name='" + name + "' SET r.bselected=1"
            # print(match_string)
            self.graph.run(match_string)
        for name in forbidden_names:
            match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \
                + userid + "' AND config.name='" + name + "' SET r.bselected=0"
            # print(match_string)
            self.graph.run(match_string)
        return self.get_usertopics(userid=userid)

    # @time_me()
    def get_usertopics(self, userid="userid"):
        """Get usertopics list.
        """
        usertopics = []
        if not userid:
            userid = "userid"
        # 从知识库获取用户拥有权限的子知识库列表
        match_string = "MATCH (user:User)-[r:has {bselected:1, available:1}]->(config:Config)" + \
            "where user.userid='" + userid + "' RETURN config"
        data = self.graph.run(match_string).data()
        for item in data:
            usertopics.extend(item["config"]["topic"].split(","))
        print("用户：", userid, "\n已有知识库列表：", usertopics)
        return usertopics

    def iformat(self, sentence):
        """Individualization of robot answer.
        个性化机器人回答。
        """
        return sentence.format(**self.gconfig)

    # @time_me()
    def add_to_memory(self, question="question", userid="userid"):
        """Add user question to memory.
        将用户当前对话加入信息记忆。

        Args:
            question: 用户问题。
                Defaults to "question".
            userid: 用户唯一标识。
                Defaults to "userid".
        """
        previous_node = self.graph.find_one("Memory", "qa_id", self.qa_id)
        self.qa_id = get_current_time()
        node = Node("Memory",
                    question=question,
                    userid=userid,
                    qa_id=self.qa_id)
        if previous_node:
            relation = Relationship(previous_node, "next", node)
            self.graph.create(relation)
        else:
            self.graph.create(node)

    # Development requirements from Mr Tang in 2017-5-11.
    # 由模糊匹配->全匹配 from Mr Tang in 2017-6-1.
    def extract_navigation(self, question):
        """Extract navigation。抽取导航地点。
        QA匹配模式：从导航地点列表选取匹配度最高的地点。

        Args:
            question: User question. 用户问题。
        """
        result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \
            context="", url="", behavior=0, parameter=0)
        # temp_sim = 0
        # sv1 = synonym_cut(question, 'wf')
        # if not sv1:
        # return result
        for location in self.locations:
            # TODO：判断“去”和地址关键词是否是就近的动词短语情况
            if "去" in question and location in question:
                print("Original navigation")
                result["content"] = location
                result["context"] = "user_navigation"
                result["behavior"] = int("0x001B", 16)
                return result
            # sv2 = synonym_cut(location, 'wf')
            # if sv2:
            # temp_sim = similarity(sv1, sv2, 'j')
            # 匹配加速，不必选取最高相似度，只要达到阈值就终止匹配
            # if temp_sim > 0.92:
            # print("Navigation location: " + location + " Similarity Score: " + str(temp_sim))
            # result["content"] = location
            # result["context"] = "user_navigation"
            # result["behavior"] = int("0x001B", 16)
            # return result
        return result

    def extract_pinyin(self, question, subgraph):
        """Extract synonymous QA in NLU database。
        QA匹配模式：从图形数据库选取匹配度最高的问答对。

        Args:
            question: User question. 用户问题。
            subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。
        """
        temp_sim = 0
        result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \
            context="", url="", behavior=0, parameter=0)
        sv1 = pinyin_cut(question)
        print(sv1)
        for node in subgraph:
            iquestion = self.iformat(node["name"])
            sv2 = pinyin_cut(iquestion)
            print("  ", sv2)
            temp_sim = jaccard_pinyin(sv1, sv2)
            print(temp_sim)
            # 匹配加速，不必选取最高相似度，只要达到阈值就终止匹配
            if temp_sim > 0.75:
                print("Q: " + iquestion + " Similarity Score: " +
                      str(temp_sim))
                result["content"] = self.iformat(
                    random_item(node["content"].split("|")))
                result["context"] = node["topic"]
                if node["url"]:
                    # result["url"] = json.loads(random_item(node["url"].split("|")))
                    result["url"] = random_item(node["url"].split("|"))
                if node["behavior"]:
                    result["behavior"] = int(node["behavior"], 16)
                if node["parameter"]:
                    result["parameter"] = int(node["parameter"])
                func = node["api"]
                if func:
                    exec("result['content'] = " + func + "('" +
                         result["content"] + "')")
                return result
        return result

    def extract_synonym(self, question, subgraph):
        """Extract synonymous QA in NLU database。
        QA匹配模式：从图形数据库选取匹配度最高的问答对。

        Args:
            question: User question. 用户问题。
            subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。
        """
        temp_sim = 0
        result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \
            context="", url="", behavior=0, parameter=0)
        # semantic: 切分为同义词标签向量，根据标签相似性计算相似度矩阵，由相似性矩阵计算句子相似度
        # vec: 切分为词向量，根据word2vec计算相似度矩阵，由相似性矩阵计算句子相似度
        if self.pattern == 'semantic':
            # elif self.pattern == 'vec':
            sv1 = synonym_cut(question, 'wf')
            if not sv1:
                return result
            for node in subgraph:
                iquestion = self.iformat(node["name"])
                if question == iquestion:
                    print("Similarity Score: Original sentence")
                    result["content"] = self.iformat(
                        random_item(node["content"].split("|")))
                    result["context"] = node["topic"]
                    if node["url"]:
                        # result["url"] = json.loads(random_item(node["url"].split("|")))
                        result["url"] = random_item(node["url"].split("|"))
                    if node["behavior"]:
                        result["behavior"] = int(node["behavior"], 16)
                    if node["parameter"]:
                        result["parameter"] = int(node["parameter"])
                    # 知识实体节点api抽取原始问题中的关键信息，据此本地查询/在线调用第三方api/在线爬取
                    func = node["api"]
                    if func:
                        exec("result['content'] = " + func + "('" +
                             result["content"] + "')")
                    return result
                sv2 = synonym_cut(iquestion, 'wf')
                if sv2:
                    temp_sim = similarity(sv1, sv2, 'j')
# 匹配加速，不必选取最高相似度，只要达到阈值就终止匹配
                if temp_sim > 0.92:
                    print("Q: " + iquestion + " Similarity Score: " +
                          str(temp_sim))
                    result["content"] = self.iformat(
                        random_item(node["content"].split("|")))
                    result["context"] = node["topic"]
                    if node["url"]:
                        # result["url"] = json.loads(random_item(node["url"].split("|")))
                        result["url"] = random_item(node["url"].split("|"))
                    if node["behavior"]:
                        result["behavior"] = int(node["behavior"], 16)
                    if node["parameter"]:
                        result["parameter"] = int(node["parameter"])
                    func = node["api"]
                    if func:
                        exec("result['content'] = " + func + "('" +
                             result["content"] + "')")
                    return result
        return result

    def extract_keysentence(self, question):
        """Extract keysentence QA in NLU database。
        QA匹配模式：从图形数据库选取包含关键句的问答对。

        Args:
            question: User question. 用户问题。
        """
        result = dict(question=question, content=self.iformat(random_item(self.do_not_know)), \
            context="", url="", behavior=0, parameter=0)
        match_string = "MATCH (n:NluCell) WHERE '" + question + "' CONTAINS n.name RETURN n LIMIT 1"
        subgraph = self.graph.run(match_string).data()
        if subgraph:
            node = list(subgraph)[0]['n']
            print("Similarity Score: Key sentence")
            result["content"] = self.iformat(
                random_item(node["content"].split("|")))
            result["context"] = node["topic"]
            if node["url"]:
                # result["url"] = json.loads(random_item(node["url"].split("|")))
                result["url"] = random_item(node["url"].split("|"))
            if node["behavior"]:
                result["behavior"] = int(node["behavior"], 16)
            if node["parameter"]:
                result["parameter"] = int(node["parameter"])
            # 知识实体节点api抽取原始问题中的关键信息，据此本地查询/在线调用第三方api/在线爬取
            func = node["api"]
            if func:
                exec("result['content'] = " + func + "('" + result["content"] +
                     "')")
            return result
        return result

    @time_me()
    def search(self, question="question", userid="userid"):
        """Nlu search. 语义搜索。

        Args:
            question: 用户问题。
                Defaults to "question".
            userid: 用户唯一标识。
                Defaults to "userid"

        Returns:
            Dict contains answer, current topic, url, behavior and parameter.
            返回包含答案，当前话题，资源包，行为指令及对应参数的字典。
        """
        # 添加到问题记忆
        # self.qmemory.append(question)
        # self.add_to_memory(question, userid)

        # 本地语义：全图模式
        #tag = get_tag(question)
        #subgraph = self.graph.find("NluCell", "tag", tag)
        #result = self.extract_synonym(question, subgraph)

        # 本地语义：场景+全图+用户配置模式
        # 多用户根据userid动态获取对应的配置信息
        self.gconfig = self.graph.find_one("User", "userid", userid)
        self.usertopics = self.get_usertopics(userid=userid)

        # 一、预处理=====================================================
        # 问题过滤器(添加敏感词过滤 2017-5-25)
        if check_swords(question):
            print("问题包含敏感词！")
            return dict(question=question, content=self.iformat(random_item(self.do_not_know)), \
            context="", url="", behavior=0, parameter=0)
        # 姓氏引起误匹配重定义
        if question.startswith("小") and len(question) == 2:
            question = self.gconfig['robotname']
        # 称呼过滤 Add in 2017-7-5
        for robotname in ["小民", "小明", "小名", "晓明"]:
            if question.startswith(
                    robotname) and len(question) >= 4 and "在线" not in question:
                question = question.lstrip(robotname)
        if not question:
            question = self.gconfig['robotname']
        # 二、导航=======================================================
        result = self.extract_navigation(question)
        if result["context"] == "user_navigation":
            return result

        # 三、云端在线场景================================================
        result = dict(question=question, content="", context="basic_cmd", url="", \
        behavior=int("0x0000", 16), parameter=0)
        # TODO: 简化为统一模式
        # TODO {'behavior': 0, 'content': '理财产品取号', 'context': 'basic_cmd', 'parameter': 0, 'question': '理财产品取号', 'url': ''}
        if "理财产品" in question and "取号" not in question:
            result["behavior"] = int("0x1002", 16)  # 进入在线场景
            result["question"] = "理财产品"  # 重定义为标准问题
            self.is_scene = True  # 在线场景标志
            return result
        if "免费wifi" in question or "wifi" in question:
            result["behavior"] = int("0x1002", 16)  # 进入在线场景
            result["question"] = "有没有免费的wifi"  # 重定义为标准问题
            self.is_scene = True  # 在线场景标志
            return result
        if "存款利率" in question:
            result["behavior"] = int("0x1002", 16)  # 进入在线场景
            result["question"] = "存款利率"  # 重定义为标准问题
            self.is_scene = True  # 在线场景标志
            return result
        if "我要取钱" in question or "取钱" in question:
            result["behavior"] = int("0x1002", 16)  # 进入在线场景
            result["question"] = "我要取钱"  # 重定义为标准问题
            self.is_scene = True  # 在线场景标志
            return result
        if "信用卡挂失" in question:
            result["behavior"] = int("0x1002", 16)  # 进入在线场景
            result["question"] = "信用卡挂失"  # 重定义为标准问题
            self.is_scene = True  # 在线场景标志
            return result
        if "开通云闪付" in question:
            result["behavior"] = int("0x1002", 16)  # 进入在线场景
            result["question"] = "开通云闪付"  # 重定义为标准问题
            self.is_scene = True  # 在线场景标志
            return result
        if "办理粤卡通" in question or "办理粤通卡" in question:
            result["behavior"] = int("0x1002", 16)  # 进入在线场景
            result["question"] = "办理粤通卡"  # 重定义为标准问题 修正：2017-7-3
            self.is_scene = True  # 在线场景标志
            return result
        # 进入在线场景
        # start_scene = ["理财产品", "wifi", "存款利率", "取钱", "信用卡挂失", "开通云闪付", "办理粤卡通"]
        # for item in start_scene:
        # if item in question:
        # result["behavior"] = int("0x1002", 16) # 进入在线场景
        # result["question"] = "办理粤卡通" # 重定义为标准问题
        # self.is_scene = True # 在线场景标志
        # 退出在线场景
        end_scene = ["退出业务场景", "退出场景", "退出", "返回", "结束", "发挥"]
        for item in end_scene:
            if item == question:  # if item in question: # 避免多个退出模式冲突
                result["behavior"] = int("0x0020", 16)  # 场景退出
                self.is_scene = False
                return result
        previous_step = ["上一步", "上一部", "上一页", "上一个"]
        next_step = ["下一步", "下一部", "下一页", "下一个"]
        if self.is_scene:
            # for item in previous_step:
            # if item in question:
            # result["behavior"] = int("0x001D", 16) # 场景上一步
            # for item in next_step:
            # if item in question:
            # result["behavior"] = int("0x001E", 16) # 场景下一步
            if "上一步" in question or "上一部" in question or "上一页" in question or "上一个" in question:
                result["behavior"] = int("0x001D", 16)  # 场景上一步
                return result
            elif "下一步" in question or "下一部" in question or "下一页" in question or "下一个" in question:
                result["behavior"] = int("0x001E", 16)  # 场景下一步
                return result
            # result["content"] = question
            # return result

        # 常用命令，交互，业务
        # 上下文——重复命令 TODO：确认返回的是正确的指令而不是例如唱歌时的结束语“可以了”
        if "再来一个" in question:
            # TODO：从记忆里选取最近的有意义行为作为重复的内容
            return self.amemory[-1]
        # 四、本地标准语义================================================
        # 模式1：选取语义得分大于阈值
        tag = get_tag(question, self.gconfig)
        # TODO：添加语义标签和关键词综合匹配的情况
        subgraph_all = list(self.graph.find("NluCell", "tag", tag))
        # subgraph_scene = [node for node in subgraph_all if node["topic"]==self.topic]
        # TODO：usergraph_all 包含正常问答和用户自定义问答，可优先匹配用户自定义问答
        usergraph_all = [
            node for node in subgraph_all if node["topic"] in self.usertopics
        ]
        usergraph_scene = [
            node for node in usergraph_all if node["topic"] == self.topic
        ]

        # 查看根据语义标签初步确定的子图
        # for node in usergraph_all:
        # print(node["name"])

        # if subgraph_scene:
        if usergraph_scene:
            result = self.extract_synonym(question, usergraph_scene)
            # result = self.extract_pinyin(question, usergraph_scene)
            if result["context"]:
                self.topic = result["context"]
                self.amemory.append(result)  # 添加到答案记忆
                return result
        result = self.extract_synonym(question, usergraph_all)
        # result = self.extract_pinyin(question, usergraph_all)
        # result  = self.extract_synonym(question, subgraph_all)
        self.topic = result["context"]
        self.amemory.append(result)  # 添加到答案记忆

        # 模式2：包含关键句就匹配
        if not self.topic:
            result = self.extract_keysentence(question)
            if result["context"]:
                self.topic = result["context"]
                self.amemory.append(result)  # 添加到答案记忆
                return result

        # 五、在线语义====================================================
        if not self.topic:
            # 1.音乐(唱一首xxx的xxx)
            if "唱一首" in question or "唱首" in question or "我想听" in question:
                result["behavior"] = int("0x0001", 16)
                result["content"] = "好的，正在准备哦"
            # 2.附近有什么好吃的
            elif "附近" in question or "好吃的" in question:
                result["behavior"] = int("0x001C", 16)
                result["content"] = self.address
            # 3.nlu_tuling(天气)
            elif "天气" in question:
                # 图灵API变更之后 Add in 2017-8-4
                location = get_location(question)
                if not location:
                    # 问句中不包含地址
                    weather = nlu_tuling(self.address + question)
                else:
                    # 问句中包含地址
                    weather = nlu_tuling(question)
                # 图灵API变更之前
                # weather = nlu_tuling(question, loc=self.address)
                result["behavior"] = int("0x0000", 16)
                try:
                    # 图灵API变更之前
                    # temp = weather.split(";")[0].split(",")[1].split()
                    # myweather = temp[0] + temp[2] + temp[3]

                    # 图灵API变更之后 Add in 2017-8-3
                    temp = weather.split(",")
                    myweather = temp[1] + temp[2]
                except:
                    myweather = weather
                result["content"] = myweather
                result["context"] = "nlu_tuling"
            # 4.追加记录回答不上的所有问题
            else:
                with open("C:/nlu/bin/do_not_know.txt", "a",
                          encoding="UTF-8") as file:
                    file.write(question + "\n")
            # 5.nlu_tuling
            # else:
            # result["content"] = nlu_tuling(question, loc=self.address)
            # result["context"] = "nlu_tuling"
        return result

Beispiel #30

0

Datei anzeigen

Datei: journal_article.py Projekt: SciBase-Project/SciBaseGraphDB

							title = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["title"]
							abstract = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["abstract"]
							authors = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["authors"]
							doi = acm_structure[publisher_key][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["doi"]		
							article_to_be_added = graph.merge_one("Article", "doi", doi)
							article_to_be_added['abstract'] = abstract
							# article_to_be_added['authors'] = authors
							article_to_be_added['title'] = title
							article_to_be_added.push()
							print("\t\t\t" + title)
							relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "printed_in", journal_to_be_added, volume=volume_key, issue=issue_key, issue_date=str(acm_structure[publisher_key][journal_key][volume_key][issue_key]["date"]["month"])+str(acm_structure[publisher_key][journal_key][volume_key][issue_key]["date"]["year"]), issn=acm_structure[publisher_key][journal_key][volume_key][issue_key]["issn"]))
							# primary_author_bool = True
							for author in authors:
								# print("Author detected is: " + author["name"])
								# print("Author_link detected is: " + author["link"])
								results = graph.find('Author', 'link', author["link"])
								# print(type(results))
								if len(list(results)) == 1:
									for result in results:
										print("\t\t\t\t" + result['full_name'] + " FOUND")
								else:
									# print("\t\t\t\tNOT FOUND! Creating Author...")
									author_to_be_added = graph.merge_one("Author", "link", author["link"])
									author_str_split_list = author["name"].split()
									if (len(author_str_split_list) == 1):
										author_to_be_added['full_name'] = author["name"].title()
										author_to_be_added['fist_name'] = author_str_split_list[0]
										author_to_be_added['middle_name'] = " "
										author_to_be_added['last_name'] = " "
									elif (len(author_str_split_list) == 2):
										author_to_be_added['full_name'] = author["name"].title()

Beispiel #31

0

Datei anzeigen

Datei: amazon_graph.py Projekt: ADJet1437/ScrapyProject

class CategoryTree(object):
    
    def __init__(self, country):
        project_conf = get_project_conf()
        neo_host = project_conf.get("NEO4J", "host")
        user = project_conf.get("NEO4J", "username")
        password = project_conf.get("NEO4J", "password")
        logging.getLogger("py2neo.batch").setLevel(logging.WARNING)
        logging.getLogger("py2neo.cypher").setLevel(logging.WARNING)
        logging.getLogger("httpstream").setLevel(logging.WARNING)
        authenticate(neo_host, user, password)
        self.graph = Graph("http://%s/db/data/" % neo_host)
        try:
            self.graph.schema.create_uniqueness_constraint("Category", "id")
        except:
            pass
        self.categories = self.get_categories(country)

    def merge_node(self, node, country, do_not_load=False):
        category_id = "%s%s" % (country, str(node['BrowseNodeId']))
        category = self.graph.merge_one('Category', 'id', category_id)
        if 'name' not in category.properties:
            category['name'] = node['Name']
            category['is_root'] = int(node.get('IsCategoryRoot', 0))
            category['do_not_load'] = bool(do_not_load)
            category['country'] = country
            category.push()

        if not category_id in self.categories:
            self.categories[category_id] = self.category_node_dict(category)

        return category

    def relationship(self, parent, child):
        return Relationship(parent, 'HAS_CHILD', child)

    def relationship_exists(self, parent, child):
        if len(list(self.graph.match(start_node=parent,
                                     end_node=child,
                                     rel_type='HAS_CHILD'))) > 0:
            return True
        return False

    def create_relationship(self, relationship):
        self.graph.create_unique(relationship)
        relationship.push()

    def create_relationships(self, parent, children):
        for child in children:
            self.create_relationship(parent, child)


    def add_new_category(self, browsenode, amazon_api, country):
        # browse_node expected format
        #{u'Ancestors': {u'BrowseNode': {u'Ancestors': {u'BrowseNode': {u'BrowseNodeId': u'560798',
        #                                                               u'Name': u'Electronics & Photo'}},
        #                                u'BrowseNodeId': u'560800',
        #                                u'IsCategoryRoot': u'1',
        #                                u'Name': u'Categories'}},
        # u'BrowseNodeId': u'1340509031',
        # u'Children': {u'BrowseNode': [{u'BrowseNodeId': u'560826',
        #                                u'Name': u'Accessories'},
        #                               {u'BrowseNodeId': u'2829144031',
        #                                u'Name': u'Big Button Mobile Phones'},
        #                              {u'BrowseNodeId': u'430574031',
        #                               u'Name': u'Mobile Broadband'},
        #                              {u'BrowseNodeId': u'5362060031',
        #                               u'Name': u'Mobile Phones & Smartphones'},
        #                              {u'BrowseNodeId': u'213005031',
        #                               u'Name': u'SIM Cards'},
        #                              {u'BrowseNodeId': u'3457450031',
        #                               u'Name': u'Smartwatches'}]},
        # u'Name': u'Mobile Phones & Communication'}
        added_categories = []
        do_not_load = True

        current_browsenode = browsenode
        # Determine the value of do not load according to the youngest ancestor's do_not_load
        while 'Ancestors' in current_browsenode:
            current_id = "%s%s" % (country, current_browsenode['BrowseNodeId'])
            current_node = self.categories.get(current_id, None)
            if not current_node:
                if type(current_browsenode['Ancestors']) is dict:
                    current_browsenode = current_browsenode['Ancestors']
                elif type(current_browsenode['Ancestors']) is list:
                    current_browsenode = current_browsenode['Ancestors'][0]
                    # This shouldn't happen. But if it does better to log and continue with the first one
            else:
                do_not_load = bool(current_node['do_not_load'])
                break

        # Create the missing nodes and relationships

        child = self.merge_node(browsenode, country, do_not_load)
        added_categories.append(child)

        current_browsenode = browsenode
        while 'Ancestors' in current_browsenode and int(current_browsenode.get("IsCategoryRoot", 0))!=1:
            if type(current_browsenode['Ancestors']) is dict:
                parent_browsenode_id = current_browsenode['Ancestors']['BrowseNode']['BrowseNodeId']
            elif type(current_browsenode['Ancestors']) is list:
                # This shouldn't happen. But if it does better to log and continue with the first one
                parent_browsenode_id = current_browsenode['Ancestors'][0]['BrowseNode']['BrowseNodeId']

            parent_graph_id="%s%s" % (country,parent_browsenode_id)
            parent_node = self.categories.get(parent_graph_id, None)
            if parent_node:
                parent = self.get_category(parent_graph_id)
                relationship = self.relationship(parent, child)
                self.create_relationship(relationship)
                break
            else:
                parent_browsenode = amazon_api.get_node(parent_browsenode_id)
                if type(parent_browsenode) is dict:
                    parent = self.merge_node(parent_browsenode, country,
                                             do_not_load)
                    relationship = self.relationship(parent, child)
                    self.create_relationship(relationship)
                    added_categories.append(parent)
                    current_browsenode = parent_browsenode
                elif parent_browsenode == "AWS.InvalidParameterValue":
                    print "Deleting node %s and all its children" % str(parent_browsenode_id)
                    self.delete_category(parent_browsenode_id)
                    break
                else:
                    #self.logger.warning("Unknown error from amazon API.")
                    print 'Unknown error from amazon API. %s' % parent_browsenode
                    break

        for category in added_categories:
            category_id = "%s%s" % (country, category['id'])
            length = self.get_shortest_length_to_root(category_id)
            category['shortest_length_root'] = length
            category.push()
            self.categories[category_id] = self.category_node_dict(category)

        new_category_id = "%s%s" % (country, browsenode['BrowseNodeId'])
        return self.categories.get(new_category_id)

    def category_node_dict(self, category_node):
        result = {
            'is_root': category_node['is_root'],
            'id': category_node['id'],
            'name': category_node['name'],
            'do_not_load': category_node['do_not_load'],
            'shortest_length_root': category_node['shortest_length_root']
        }
        return result


    def get_categories(self, country):
        categories = {}
        records = self.graph.find('Category', property_key='country',
                                   property_value=country)
        for category in records:
            categories[category['id']] = self.category_node_dict(category)
        return categories


    def get_category(self, category_id):
        category = self.graph.find_one('Category', property_key='id', property_value=category_id)

        if category:
            return self.category_node_dict(category)

    def is_orphan(self, category_id):
        category = self.get_category(category_id)
        if not category:
            return True

        if not bool(category['is_root']):
            query = """MATCH p=a-[:HAS_CHILD*]->n
                       WHERE n.id = {id} AND a.is_root=1
                       RETURN p
                       LIMIT 1"""
            cypher = self.graph.cypher
            path = cypher.execute_one(query, id=category_id)
            if not path:
                return True
        return False                 

    def get_children(self, category_id):
        query = """MATCH (n)-[r:HAS_CHILD*]->(m)
                   WHERE n.id = {id}
                   RETURN m"""
        cypher = self.graph.cypher
        children = cypher.execute(query, id=category_id)
        return children

    def delete_category(self, category_id):
        cypher = self.graph.cypher
        children = self.get_children(category_id)

        delete_query = """
            MATCH (n {id:'%s'})
            OPTIONAL MATCH n-[r]-()
            DELETE n,r
        """
        if children:
            for record in children:
                child = record[0]
                cypher.execute_one(delete_query % child["id"])
        cypher.execute_one(delete_query % category_id)

    def get_shortest_length_to_root(self, category_id):
        query = """MATCH p=a-[:HAS_CHILD*]->n
                   WHERE n.id={id} AND a.is_root=1
                   RETURN length(p)
                   ORDER BY length(p) DESC
                   LIMIT 1"""
        cypher = self.graph.cypher
        length = cypher.execute_one(query, id=category_id)
        return length

Beispiel #32

0

Datei anzeigen

Datei: search.py Projekt: leolle/deep_learning

#-*-coding:utf-8-*-
import pandas as pd
from py2neo import Graph, Node, walk, Relationship
import re
import jieba
import os
os.chdir('E:/课程/知识图谱/第3周/数据库建立与查询')

g = Graph("http://localhost:7474")  # username="******", password="******"

#加载字典
jieba.load_userdict("./dict.txt")

#获取英雄列表
heros = []
hero = g.find(label='hero')
for i in hero:
    heros.append(i.properties['name'])

#获取装备列表
weapons = []
weapon = g.find(label='weapon')
for i in weapon:
    weapons.append(i.properties['name'])

#属性同义词:数组第一个元素为数据库里属性名，第二个为输出时官方名，后面的是可能的同义词名
HP_all = ['HP', u'生命值', u'血量', u'血上限']
MP_all = ['MP', u'法力值', u'蓝量', u'蓝']
HP_recover_all = [u'HP_recover', u'每5秒回血', '回血']
MP_recover_all = [u'MP_recover', u'每5秒回复法力值', u'每5秒回蓝', u'回蓝']
R_cooling_all = ['R_cooling', u'大招冷却时间']

Beispiel #33

0

Datei anzeigen

Datei: indexing.py Projekt: jenalgit/Autocomplete

from py2neo import Graph, authenticate, Node, Relationship
import MySQLdb
import threading
authenticate("localhost:7474", "neo4j", "8760neo4j")
graph = Graph()
mynode = list(graph.find('fw', property_key='count'))
ct = 1
fobj = open("textdump1.txt", "r").readlines()
file_tacker = open("tarcker.txt", "a")
#for i in fobj:


def indexing(i):
    global ct
    print "*********"
    print i
    print ct
    print "**********"
    i = i.lower()
    file_tacker.write(str(i))

    temp = i.split(" ", 3)
    b = ""
    for i in temp:
        b = b + "".join(" " + str(i).replace("'", ""))

    b = b.strip()
    s = b.split(" ", 3)
    dic = {}
    for i in range(len(s)):

Beispiel #34

0

Datei anzeigen

Datei: title-neo4j-sqlite.py Projekt: TheMongos/recomate

con = sqlite3.connect('../db/search_title.db')
cur = con.cursor()
cur.execute("CREATE TABLE Title (title TEXT, item_id INT, poster_path TEXT, year INT)")
cur.execute("CREATE INDEX title_index ON Title (title)")

def get_new_id():
    query = """MERGE (nid:ItemIncremental)
            ON CREATE SET nid.count = 1
            ON MATCH SET nid.count = nid.count + 1
            RETURN nid.count"""
    new_id = graph.cypher.execute(query)[0][0]

    return new_id

# graph = Graph()
genres = graph.find('Genre')
genre_dict = {}

for genre in genres:
    genre_dict[genre.properties['name']] = genre

with con:
    with open (titles_file, 'r') as f_in:
        counter = 0
        more_than_one = 0
        no_genre_counter = 0
        for line in f_in:
            try:
                obj = json.loads(line)
            except:
                obj = ast.literal_eval(line)

Beispiel #35

0

Datei anzeigen

Datei: graph_exporter.py Projekt: taikonetwork/taikonetwork

class GraphExporter:
    def __init__(self):
        authenticate(neo4j.HOST_PORT, neo4j.USERNAME, neo4j.PASSWORD)
        self.neo4j_db = Graph(neo4j.REMOTE_URI)

    def create_taikonetwork_graph(self):
        print("> Taiko Network Graph: querying all nodes and relationships...")
        self.graph = nx.Graph()
        self._add_group_nodes()
        self._add_memberships()
        self._add_member_nodes()
        self._add_unique_connections()
        print("> Taiko Network Graph: SUCCESSFULLY CREATED!\n"
              "> Export to graph file format to save.\n")

    def create_demographic_graph(self):
        print("> Demographic Graph: querying all Member nodes and Connection rels...")
        self.graph = nx.Graph()
        self._add_member_nodes(demo=True)
        self._add_unique_connections(demo=True)
        print("> Demographic Graph: SUCCESSFULLY CREATED!\n"
              "> Export to graph file format to save.\n")

    def export_gexf_graph(self, filepath='graph.gexf'):
        nx.write_gexf(self.graph, filepath,
                      encoding='utf-8', prettyprint=True, version='1.2draft')

    def _add_group_nodes(self):
        groups = self.neo4j_db.find('Group')
        color = {'r': 255, 'g': 2, 'b': 97, 'a': 1}

        for g in groups:
            data = g.properties
            self.graph.add_node(
                g._id, label=data['name'], sf_id=data['sf_id'],
                viz={'color': color})

    def _add_member_nodes(self, demo=False):
        members = self.neo4j_db.find('Member')

        for m in members:
            data = m.properties
            color = self._random_color(m._id, 1)
            if demo:
                self.graph.add_node(
                    m._id, label=data['firstname'] + ' ' + data['lastname'],
                    gender=data['gender'], dob=data['dob'],
                    race=data['race'], ethnicity=data['asian_ethnicity'],
                    viz={'color': color})
            else:
                self.graph.add_node(
                    m._id, label=data['firstname'] + ' ' + data['lastname'],
                    sf_id=data['sf_id'],
                    viz={'color': color})

    def _add_unique_connections(self, demo=False):
        connections = self.neo4j_db.match(rel_type='CONNECTED_TO')
        unique_rels = []

        for c in connections:
            start = c.start_node._id
            end = c.end_node._id
            if (start, end) not in unique_rels and (end, start) not in unique_rels:
                if demo:
                    color = {'r': 213, 'g': 213, 'b': 213, 'a': 0.3}
                else:
                    color = self._random_color(start, 0.3)
                self.graph.add_edge(start, end, viz={'color': color})
                unique_rels.append((start, end))

    def _add_memberships(self):
        memberships = self.neo4j_db.match(rel_type='MEMBER_OF')

        for ms in memberships:
            color = self._random_color(ms.start_node._id, 0.3)
            self.graph.add_edge(ms.start_node._id, ms.end_node._id,
                                viz={'color': color})

    def _random_color(self, obj_id, alpha):
        colors = [{'r': 164, 'g': 243, 'b': 121},
                  {'r': 243, 'g': 230, 'b': 121},
                  {'r': 243, 'g': 121, 'b': 184},
                  {'r': 154, 'g': 121, 'b': 243},
                  {'r': 202, 'g': 243, 'b': 121},
                  {'r': 243, 'g': 177, 'b': 121},
                  {'r': 243, 'g': 121, 'b': 238},
                  {'r': 121, 'g': 243, 'b': 212},
                  {'r': 243, 'g': 190, 'b': 121},
                  {'r': 121, 'g': 194, 'b': 243},
                  {'r': 157, 'g': 2, 'b': 253},
                  {'r': 2, 'g': 86, 'b': 253}]

        c = colors[obj_id % 12]
        c['a'] = alpha
        return c

Beispiel #36

0

Datei anzeigen

Datei: bsm_netlog.py Projekt: ruettet/belgianstartupmaffia

    for row in bsm.rows[1:]:
      from_type, from_name, edge_type, edge_name, to_type, to_name, netlog = [cell.value for cell in row]
      if netlog is None:
        from_type = "grey"
        to_type = "grey"
      print(from_type, from_name, edge_type, to_type, to_name)
      from_node = graph.merge_one(from_type.strip(), "name", from_name.strip())
      to_node = graph.merge_one(to_type.strip(), "name", to_name.strip())
      from_to = Relationship(from_node, edge_type, to_node)
      graph.create_unique(from_to)

    # get nodes with degree
    nodes = []
    for label in graph.node_labels:
      for p in graph.find(label):
        node = {"id": p.ref.split("/")[-1],
                "label": p["name"], 
                "title": p["name"],
                "value": p.degree,
                "group": label}
        nodes.append(node)
    with open("report/nodesnetlog.js", "w") as f:
      f.write("var nodesraw = " + dumps(nodes, indent=2) + ";")

    # get edges
    edges = []
    for r in graph.match():
      edge = {"to": r.end_node.ref.split("/")[-1],
              "from": r.start_node.ref.split("/")[-1]
             }

Beispiel #37

0

Datei anzeigen

Datei: unit.py Projekt: AuthorOfTheSurf/MomentWafer

class test_pipeline(unittest.TestCase):
    LEN_DATETIME = 26
    LEN_TEST_FILE = 632

    def setUp(self):
        try:
            __location__ = os.path.realpath(
                os.path.join(os.getcwd(), os.path.dirname(__file__)))
            self.src = open(
                os.path.join(__location__, "data/bit-test-data.txt"))
            self.badFreq = open(
                os.path.join(__location__, "data/bad-frequency.txt"))
            self.badStartTime = open(
                os.path.join(__location__, "data/bad-starttime.txt"))
            self.graph = Graph("http://localhost:8484/db/data")
            self.graph.delete_all()
            self.service = WaferService(self.graph)
        except:
            print "Error during unittest setup"

    def tearDown(self):
        self.graph.delete_all()

    #
    # File tests
    #
    def test_open(self):
        self.assertEquals(len(self.src.read().split("\n")), 20)

    #
    # Parser tests
    #
    def test_parser(self):
        bitdo = parser.BITdo(self.src)
        self.assertEquals(len(bitdo.toJson()), test_pipeline.LEN_TEST_FILE)
        self.assertEquals(len(bitdo.channels.keys()), 5)
        self.assertEquals(bitdo.header["SamplingFrequency"], "1000")
        self.assertEquals(len(bitdo.channels["EMG"]), 16)
        # Assure that datetime is to microsecond precision
        self.assertEquals(
            len(bitdo.header["StartDateTime"]), test_pipeline.LEN_DATETIME)


    def test_parser_errors(self):
        self.assertRaises(AttributeError, parser.BITdo, (self.badFreq))
        self.assertRaises(AttributeError, parser.BITdo, (self.badStartTime))

    #
    # Aggregator tests
    #
    def test_aggregator_nums(self):
        a = [0, 0, 1, 1, 1]
        s = aggregator.streaksIn(a)
        self.assertEquals(s[0].getStreaks(), [2])
        self.assertEquals(s[0].getStreakExp(2), [4])
        self.assertEquals(s[1].getStreaks(), [3])
        self.assertEquals(s[1].getStreakExp(2), [9])


    def test_aggregator_bools(self):
        b = [True, False, False, True, False]
        s = aggregator.streaksIn(b)
        self.assertEquals(s[True].getStreaks(), [1, 1])
        self.assertEquals(s[False].getStreaks(), [2, 1])
        self.assertEquals(s[False].getStreakExp(2), [4, 1])


    def test_aggregator_strings(self):
        c = ["cat", "826", "826", "826", "~~", "~~", "cat", "cat", "~~"]
        s = aggregator.streaksIn(c)
        self.assertEquals(s["cat"].getStreaks(), [1, 2])
        self.assertEquals(s["cat"].getStreakExp(2), [1, 4])
        self.assertEquals(s["826"].getStreaks(), [3])
        self.assertEquals(s["826"].getStreakExp(3), [27])
        self.assertEquals(s["~~"].getStreaks(), [2, 1])
        self.assertEquals(s["~~"].getStreakExp(-1), [0.5, 1])


    def test_aggregator_average(self):
        bitdo = parser.BITdo(self.src)
        self.assertEquals(aggregator.average(bitdo.channels['EMG']), 525.4375)
        self.assertEquals(aggregator.average([1, 2, 3]), 2)
        self.assertEquals(aggregator.average([x for x in range(1000)]), 499.5)

    #
    # Graph Service
    #
    def test_add_new_user(self):
        user = self.service.add_user("Duke")
        userid = user.properties["userid"]
        activity = self.service.add_activity(
            userid, "Free Throws", "no description")
        activityname = activity.properties["name"]
        self.service.add_moment(
            userid, activityname, "timestamp", ["a1:true", "a2:false"])
        self.service.add_moment(
            userid, activityname, "timestamp", ["a1:true", "a2:false"])
        self.assertEquals(count(self.graph.find("User")), 1)
        self.assertEquals(count(self.graph.find("Activity")), 1)
        self.assertEquals(count(self.graph.find("Moment")), 2)
        self.assertEquals(count(self.graph.find("Annotation")), 2)

    #
    # Graph API
    #
    def test_post_user(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)


    def test_post_user_fails(self):
        r = requests.post('http://localhost:8000/users', {})
        self.assertEquals(r.status_code, 400)


    def test_post_activity(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)


    def test_post_activity_fails(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)

        # Test explicitly, i.e. not using the helper function
        # so we are able to neglect parameters
        r = requests.post('http://localhost:8000/activities', {
            'userid': 'Thaddeus'})
        self.assertEquals(r.status_code, 400)
        r = requests.post('http://localhost:8000/users', {
            'name': 'Free-throw shooting'})
        self.assertEquals(r.status_code, 400)


    def test_post_moment(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)

        r = newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"])
        self.assertEquals(r.status_code, 201)


    def test_post_moment_fails(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)

        # Test explicitly, i.e. not using the helper function
        # so we are able to neglect parameters
        annotations = ["make:true", "swish:true"]
        r = requests.post('http://localhost:8000/moments', {
            # missing userid
            'name': 'Free-throw shooting',
            'timestamp': now(),
            'annotations[]': annotations})
        self.assertEquals(r.status_code, 400)

        r = requests.post('http://localhost:8000/moments', {
            'userid': 'Thaddeus',
            'name': 'Free-throw shooting',
            'timestamp': now()
            # missing annotations
        })
        self.assertEquals(r.status_code, 400)

        r = requests.post('http://localhost:8000/moments', {
            'userid': 'Thaddeus',
            'name': 'Free-throw shooting',
            'timestamp': now(),
            # it's `annotations[]`... sigh
            'annotations': annotations})
        self.assertEquals(r.status_code, 400)


    def test_get_moment(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)

        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"])
        r = getMoments('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)
        self.assertEquals(len(r.json()), 3)


    def test_get_moment_fails(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Basketball')
        self.assertEquals(r.status_code, 200)

        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"])
        # wrong acitivity name
        r = getMoments('Thaddeus', 'B_sketb_ll')
        self.assertEquals(r.status_code, 400)

Beispiel #38

0

Datei anzeigen

class GraphDatabase():
    def __init__(self):
        try:
            self.graph = Graph(
                'http://*****:*****@localhost:7474/db/data')
        except:
            print 'ERROR: Initialize Neo4j browser'
            self.graph.delete_all()

        def createDocumentNode(self, index, label):
            docNode = self.graph.merge_one('Document', 'name',
                                           'Doc ' + str(index))
            self.updateNode(docNode, {
                'id': index,
                'label': label,
                'in-weight': 0,
                'out-weight': 0
            })
            return docNode

        def createFeatureNode(self, index, word):
            wordNode = Node('Feature', word=word)
            self.graph.create(wordNode)
            self.updateNode(wordNode, {
                'in-weight': 0,
                'out-weight': 0,
                'id': index
            })
            return wordNode

        def getFeatureNode(self, word):
            return list(
                self.graph.find('Feature',
                                property_key='word',
                                property_value=word))[0]

        def createWeightedRelation(self, node1, node2, relation):
            match = self.graph.match(start_node=node1,
                                     rel_type=relation,
                                     end_node=node2)
            numberOfRelations = sum(1 for x in match)
            if numberOfRelations >= 1:
                match = self.graph.match(start_node=node1,
                                         rel_type=relation,
                                         end_node=node2)
                for relationship in match:
                    self.increaseWeight(relationship)
                    self.increaseWeight(node1, 'out-weight')
                    self.increaseWeight(node2, 'in-weight')
            else:
                newRelation = Relationship(node1, relation, node2, weight=1)
                self.graph.create(newRelation)
                self.increaseWeight(node1, 'out-weight')
                self.increaseWeight(node2, 'in-weight')

        def increaseWeight(self, entity, weight='weight'):
            entity[weight] = entity[weight] + 1
            self.graph.push(entity)

        def updateNode(self, node, propertyDict):
            node.properties.update(propertyDict)
            self.graph.push(node)

        def normalizeRelationships(self, nodes, relation):
            for node in nodes:
                for rel in node.match_incoming(relation):
                    rel['norm_weight'] = rel['weight'] / node['in-weight']
                    self.graph.push(rel)

        def getNodes(self, feature):
            recordList = self.graph.cypher.execute(
                'MATCH (node:%s) RETURN node' % feature)
            return [record.node for record in recordList]

        def getMatrix(self,
                      nodesX,
                      nodesY=None,
                      relation='followed_by',
                      propertyType='norm_weight'):
            if nodesY == None:
                nodesY = nodesX
            matrix = np.zeros([len(nodesX), len(nodesY)])
            for node in nodesX:
                rowIndex = node['id']
                for outRelation in node.match_outgoing(relation):
                    colIndex = outRelation.end_node['id']
                    weight = outRelation[propertyType]
                    matrix[rowIndex, colIndex] = weight
                return matrix

        def cypherContextSim(self):
            tx = self.graph.cypher.begin()
            tx.append(CONTEXT_SIM)
            tx.commit()

Beispiel #39

0

Datei anzeigen

class Network():
    def __init__(self):
        self.graph_instance = Graph()
        self.time = self.update_time(str(datetime.datetime.now()))

    # Updates current instance of time by system
    def update_time(self, time):
        self.time = time

    # Checks if node exists, if it does not exist - creates new node; else, updates node
    def add_node(self, link, date_last_updated, frequency):
        calculated_frequency = convert_frequency_to_hours(frequency)
        if (not self.check_node_exist(link)):
            # Create a new node for webpage with an initial calculated frequency
            n = Node(link,
                     date_last_updated=date_last_updated,
                     frequency=frequency,
                     calculated_frequency=calculated_frequency,
                     link=link)
            self.graph_instance.create(n)
        else:
            # Update existing fields for webpage node
            n = self.graph_instance.find_one(link)
            if (n["date_last_updated"] != ""):
                calculated_frequency = self._update_calculated_frequency(
                    n["date_last_updated"], date_last_updated)
            n["date_last_updated"] = date_last_updated
            n["calculated_frequency"] = calculated_frequency
            n["frequency"] = frequency
            n.push()
        return n

    # Measures calculated frequency from subtracting previous date_last_updated to current date_last_updated (returns time in hours)
    def _update_calculated_frequency(self, prev_date_updated,
                                     new_date_updated):
        try:
            prev_date = datetime.datetime.strptime(prev_date_updated,
                                                   "%Y-%m-%d")
            new_date = datetime.datetime.strptime(new_date_updated, "%Y-%m-%d")
            td = new_date - prev_date
            return td.total_seconds() // 3600
        except:
            return -1

    # If the relationship doesn't exist, create a new edge; else, update the tag
    def add_edge(self, node_u, node_v_link, relationship):
        self.add_node(node_v_link, "", "")
        node_v = self.get_node(node_v_link)
        self.graph_instance.create(
            Relationship(node_u, "links_to", node_v, tag=relationship))

    def check_node_exist(self, link):
        return len(list(self.graph_instance.find(link))) != 0

    def check_relationship_exist(self, node_u, node_v):
        return len(
            list(
                self.graph_instance.match(start_node=node_u,
                                          end_node=node_v,
                                          rel_type="links_to"))) > 0

    def delete_failed_webpages(self, link):
        if (self.check_node_exist(link) == False):
            return
        node = self.get_node(link)
        self.delete_relationship(node)
        self.delete_incoming_relationship(node)
        self.graph_instance.delete(node)

    def delete_relationship(self, node_u):
        rels = list(
            self.graph_instance.match(rel_type="links_to",
                                      start_node=node_u,
                                      end_node=None))
        for r in rels:
            self.graph_instance.separate(r)

    def delete_incoming_relationship(self, node_u):
        rels = list(
            self.graph_instance.match(rel_type="links_to",
                                      end_node=node_u,
                                      start_node=None))
        for r in rels:
            self.graph_instance.separate(r)

    def get_node(self, link):
        return self.graph_instance.find_one(link)

    def get_node_information(self, link):
        check_node = self.graph_instance.data("MATCH (n {link: '" + link +
                                              "'} ) RETURN n")
        if len(check_node) == 0:
            return {}

        n = self.get_node(link)
        date_last_updated = n["date_last_updated"]
        calculated_frequency = n["calculated_frequency"]
        frequency = n["frequency"]

        node_data = {}
        node_data["date_last_updated"] = date_last_updated
        node_data["calculated_frequency"] = calculated_frequency
        node_data["frequency"] = frequency
        node_data["outlinks"] = self.get_outlinks(link)
        node_data["inlinks"] = self.get_inlinks(link)

        return node_data

    def get_outlinks(self, link):
        outlink_data = self.graph_instance.data("MATCH (n {link: '" + link +
                                                "'} )-->(node) RETURN node")
        outlinks = []
        for o in outlink_data:
            outlinks.append(o["node"]["link"])
        return outlinks

    def get_inlinks(self, link):
        inlink_data = self.graph_instance.data("MATCH (n {link: '" + link +
                                               "'} )<--(node) RETURN node")
        inlinks = []
        for o in inlink_data:
            inlinks.append(o["node"]["link"])
        return inlinks

    # Get adjacency matrix from Neo4j and nodes from py2neo
    def _to_matrix(self):
        nodes = list(self.graph_instance.node_selector.select())
        N = len(nodes)
        mat = np.zeros((N, N))
        # Populate the adjacency matrix
        for i, a in enumerate(nodes):
            for j, b in enumerate(nodes):
                # Use existing function to check for link
                mat[i, j] = self.check_relationship_exist(a, b)
        return mat

    #Iterate over nodes and add pagerank
    def update_pagerank(self):
        # Get all the nodes
        nodes = self.graph_instance.node_selector.select()
        # Iterate over the result of _pagerank and the nodes
        for pr, node in zip(self._pagerank(), nodes):
            # Update the node's pagerank and push back to neo4j
            node.update(page_rank=pr)
            self.graph_instance.push(node)

    # Simple show function to get nodes and display their pagerank
    def show_pagerank(self, selector=None, link=None):
        nodes = list(self.graph_instance.node_selector.select())
        for node in nodes:
            if isinstance(link, str):
                if not list(node.labels())[0] == link:
                    continue
            elif isinstance(link, (list, tuple)):
                if not list(node.labels())[0] in link:
                    continue

    # Get the pageranks for any given list of links (or all)
    def get_pagerank_dict(self, links=[]):
        nodes = list(self.graph_instance.node_selector.select())
        dct = {}
        for node in nodes:
            if isinstance(links, str):
                if not list(node.labels())[0] == links:
                    continue
            elif isinstance(links, (list, tuple)):
                if not list(node.labels())[0] in links:
                    continue
            dct[list(node.labels())[0]] = node.get('page_rank')
        return dct

    # Creates dictionary object with information for ranking API (including page rank)
    def get_ranking_data(self, links):
        page_ranks = self.get_pagerank_dict(links)
        data = {}
        data["webpages"] = []
        for l in page_ranks.keys():
            webpage_data = {}
            # If the node exists
            if (page_ranks[l] != None):
                n = self.get_node(l)
                webpage_data["pageRankValue"] = page_ranks[l]
                webpage_data["dateLastUpdated"] = n["date_last_updated"]
                webpage_data["frequency"] = n["frequency"]
                webpage_data["webpage"] = l
            else:
                webpage_data["pageRankValue"] = "NULL"
                webpage_data["dateLastUpdated"] = ""
                webpage_data["frequency"] = ""
                webpage_data["webpage"] = ""
            data["webpages"].append(webpage_data)
        return data

    # Perform pagerank on the adjacency matrix, using the power method
    def _pagerank(
        self,
        alpha=0.85,
        max_iter=100,  # Increase this if we get the non-convergence error
        tol=1.0e-6,
    ):
        # Create a sparse matrix rep. of adjacency matrix
        mat = scipy.sparse.csr_matrix(self._to_matrix())
        n, m = mat.shape
        # Make a sum matrix
        S = scipy.array(mat.sum(axis=1)).flatten()
        # Get non-zero rows
        index = scipy.where(S <> 0)[0]
        for i in index:
            # We need to normlize (divide by sum)
            mat[i, :] *= 1.0 / S[i]
        #
        pr = scipy.ones((n)) / n  # initial guess
        # Get dangling nodes
        dangling = scipy.array(scipy.where(mat.sum(axis=1) == 0, 1.0 / n,
                                           0)).flatten()
        for i in range(max_iter):
            prlast = pr
            pr = alpha * (pr * mat + scipy.dot(dangling, prlast)) + (
                1 - alpha) * prlast.sum() / n
            # check if we're done
            err = scipy.absolute(pr - prlast).sum()
            if err < n * tol:
                return pr
        raise Exception("pagerank failed to converge [%d iterations]" %
                        (i + 1))

    # Prioritizer
    def prioritizer(self, outlinks):
        # Get remaining time and number of inlink
        for ol in outlinks:
            if (not self.check_node_exist(ol)):
                outlinks.remove(ol)
            else:
                self.remaining_time(ol)

        self.sort_node(outlinks)
        new_links = sorted(
            outlinks,
            key=lambda k:
            (self.get_node(k)["time_remaining"], self.number_of_inlinks(k)))

        for ol in new_links:
            # Update last_crawled_time
            current = str(datetime.datetime.now())
            node = self.get_node(ol)
            node["last_crawled_time"] = current
            node.push()
        return new_links

    # Get number of inlink
    def number_of_inlinks(self, outlink):
        node = self.get_node(outlink)
        return -len(
            list(
                self.graph_instance.match(
                    rel_type="links_to", end_node=node, start_node=None)))

    # Updates remaining time left for a node to be crawled based on frequency
    def remaining_time(self, outlink):
        node = self.get_node(outlink)
        last_crawled_time = node["last_crawled_time"]

        if (last_crawled_time == None):
            node["time_remaining"] = 0
            node.push()
        else:
            fmt = '%Y-%m-%d %H:%M:%S'
            current = str(datetime.datetime.now())
            start = datetime.datetime.strptime(current[:19], fmt)
            end = datetime.datetime.strptime(last_crawled_time[:19], fmt)
            diff = (start - end).total_seconds() / 60.000 / 60.000
            diff = float(node["calculated_frequency"]) - diff
            node["time_remaining"] = diff
            node.push()

    # Sort node and fill top 100
    def sort_node(self, outlinks):
        num = len(outlinks)
        count = 0
        nodes = self.graph_instance.data("MATCH (n) RETURN n")
        for n in nodes:
            if (not n["n"]["link"] in outlinks):
                self.remaining_time(n["n"]["link"])
        nodes = self.graph_instance.data(
            "MATCH (n) RETURN n ORDER BY (n.time_remaining) DESC")
        for n in nodes:
            link = n["n"]["link"]
            if (not link in outlinks):
                outlinks.append(link)
                count = count + 1
            if (count + num > 100):
                break

    # Return dictionary object of prioritized links and their priority value
    def prioritize_dic(self, outlinks):
        new_links = self.prioritizer(outlinks)
        data = {}
        data["prioritizedLinks"] = []
        p_value = 1
        for l in new_links:
            l_data = {}
            l_data["link"] = l
            l_data["priority_value"] = p_value * 10
            data["prioritizedLinks"].append(l_data)
            p_value = p_value + 1
        return data

Beispiel #40

0

Datei anzeigen

Datei: graph.py Projekt: zsaraf/tweed_django

class TwitterGraph():

    def __init__(self):
        self.graph = Graph("http://*****:*****@54.191.171.209:7474/db/data/")
        self.popularity_heap = []
        self.reassess_popularity()

    def add_user(self, user):
        new_user = Node("User", token=user.token.session_id, user_id=user.id)
        return self.graph.create(new_user)

    def is_cached(self, screen_name):
        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is not None:
            return True

    def get_RT_recommendations(self, user):
        recommendations = Counter()
        user_node = self.graph.find_one("User", 'user_id', user.id)
        following = user_node.match_outgoing("FOLLOWS", limit=5)

        for rel in following:
            retweets = rel.end_node.match_outgoing("RETWEETED", limit=5)
            for r in retweets:
                recommendations[r.end_node.properties['screen_name']] += 1

        return [str for (str, count) in recommendations.most_common(10)]

    def get_generic_recommendations(self):
        return [screen_name for (count, screen_name) in heapq.nlargest(10, self.popularity_heap)]

    def reassess_popularity(self):
        # NOTE: expensive calculation, to be run threaded at multiples of x actions to graph or hourly/daily job
        all_twitter_users = self.graph.find("TwitterUser")
        for tu in all_twitter_users:
            incoming_count = sum(1 for _ in tu.match_incoming())
            heapq.heappush(self.popularity_heap, (incoming_count, tu.properties['screen_name']))

    def add_twitter_user(self, screen_name):
        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            new_twitter_user = Node("TwitterUser", screen_name=screen_name)
            self.graph.create(new_twitter_user)

    def add_follow(self, screen_name, user):
        user_node = self.graph.find_one("User", 'user_id', user.id)
        if user_node is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_user(user)
            user_node = self.graph.find_one("User", 'user_id', user.id)

        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_twitter_user(screen_name)
            twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)

        follow_relationship = Relationship(user_node, "FOLLOWS", twitter_user)
        self.graph.create(follow_relationship)
        self.reassess_popularity()

    def remove_follow(self, screen_name, user):
        user_node = self.graph.find_one("User", 'user_id', user.id)
        if user_node is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_user(user)
            user_node = self.graph.find_one("User", 'user_id', user.id)

        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_twitter_user(screen_name)
            twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)

        follow_relationship = self.graph.match_one(user_node, "FOLLOWS", twitter_user)
        if follow_relationship is not None:
            self.graph.delete(follow_relationship)

    def add_retweet(self, screen_name, retweeted_screen_name):
        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_twitter_user(screen_name)
            twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)

        self.add_twitter_user(retweeted_screen_name)
        retweeted_twitter_user = self.graph.find_one("TwitterUser", 'screen_name', retweeted_screen_name)

        retweet = self.graph.match_one(twitter_user, "RETWEETED", retweeted_twitter_user)
        if retweet is None:
            retweet_relationship = Relationship(twitter_user, "RETWEETED", retweeted_twitter_user)
            retweet_relationship.properties['count'] = 1
            self.graph.create(retweet_relationship)
        elif retweet.properties['count'] is None:
            # this shouldn't happen, just for testing while transitioning db
            retweet.properties['count'] = 1
            retweet.push()
        else:
            retweet.properties['count'] = retweet.properties['count'] + 1
            retweet.push()

Beispiel #41

0

Datei anzeigen

class NFIBManager(object):
  """
  Manage the handling of Network Function Information Base.

  Use neo4j implementation for storing and querying NFs and NF decompositions.
  """

  def __init__ (self):
    """
    Init.
    """
    super(NFIBManager, self).__init__()
    log.debug("Init %s based on neo4j" % self.__class__.__name__)
    # Suppress low level logging
    self.__suppress_neo4j_logging()
    try:
      self.graph_db = Graph()
    except Unauthorized as e:
      quit_with_error(
        "Got Unauthorozed error on: %s from neo4j! Disable the authorization "
        "in /etc/neo4j/neoj4-server.properties!" % e)

  @staticmethod
  def __suppress_neo4j_logging (level=None):
    """
    Suppress annoying and detailed logging of `py2neo` and `httpstream`
    packages.

    :param level: level of logging (default: WARNING)
    :type level: str
    :return: None
    """
    import logging
    level = level if level is not None else logging.WARNING
    logging.getLogger("py2neo").setLevel(level)
    logging.getLogger("neo4j").setLevel(level)
    logging.getLogger("httpstream").setLevel(level)

  def addNode (self, node):
    """
    Add new node to the DB.

    :param node: node to be added to the DB
    :type node: dict
    :return: success of addition
    :rtype: Boolean
    """
    node_db = list(
      self.graph_db.find(node['label'], 'node_id', node['node_id']))
    if len(node_db) > 0:
      log.debug("node %s exists in the DB" % node['node_id'])
      return False
    node_new = py2neo.Node(node['label'], node_id=node['node_id'])
    for key, value in node.items():
      node_new.properties[key] = value
    self.graph_db.create(node_new)
    return True

  def addClickNF (self, nf):
    """
    Add new click-based NF to the DB
 
    :param nf: nf to be added to the DB
    :type nf: dict
    :return: success of addition
    :rtype: Boolean
    """
    dirname = "/home/mininet/escape-shared/mininet/mininet"

    # 1. First check if the source can be compiled
    if nf.get('clickSource', ''):
      if not self.clickCompile(nf):
        return False

    # 2. Check the existence of the required VNFs/Click elements
    dependency = []
    clickTempPath = nf.get('clickTempPath',
                           dirname + '/templates/' + nf['node_id'] + '.jinja2')
    if os.path.exists(clickTempPath):

      with open(clickTempPath) as template:
        for line in template:
          if '::' in line:
            element = line.split('::')[-1].split('(')[0].replace(' ', '')

            node = list(self.graph_db.find('NF', 'node_id', str(element)))
            if len(node) <= 0:
              log.debug(
                "The new NF is dependent on non-existing NF %s" % element)
              return False
            else:
              dependency.append(str(element))
      template = open(clickTempPath, 'r').read()
    else:
      template = ''

    # 3. Extract the click handlers form the source files
    read_handlers = {}
    read = []
    write_handlers = {}
    write = []

    for src in nf.get('clickSource', ''):
      if '.cc' in src:
        with open(nf.get('clickPath', '') + '/' + src) as source:
          for line in source:
            if 'add_read_handler' in line:
              hdlr = line.split('"')[1]
              if hdlr not in read:
                read.append(hdlr)
            if 'add_write_handler' in line:
              hdlr = line.split('"')[1]
              if hdlr not in write:
                write.append(hdlr)
    if read:
      read_handlers[nf['node_id']] = read
    if write:
      write_handlers[nf['node_id']] = write

    # Add the handlers of other elements used in click scripts of the new NF
    if dependency:
      for element in dependency:
        NF_info = self.getNF(element)
        read = eval(NF_info['read_handlers']).get(element, '')
        write = eval(NF_info['write_handlers']).get(element, '')
        if read:
          read_handlers[element] = read
        if write:
          write_handlers[element] = write

    # 4. Add the NF to the DB
    nf.update(
      {
        'dependency': repr(dependency), 'read_handlers': repr(read_handlers),
        'write_handlers': repr(write_handlers), 'command': str(template)
      })
    self.addNode(nf)

  def addVMNF (self, nf):
    # To be updated
    self.addNode(nf)

  @staticmethod
  def clickCompile (nf):
    """
    Compile source of the click-based NF

    :param nf: the click-based NF
    :type nf: dict
    :return: success of compilation
    :rtype: Boolean
    """
    for src in nf.get('clickSource', ''):
      if not os.path.exists(nf.get('clickPath', '') + '/' + src):
        log.debug("source file does not exist: %s" % src)
        return False
    os.system('cd ' + nf.get('clickPath',
                             '') + '; make clean; ./configure; make elemlist; '
                                   'make')
    if not os.path.exists(nf.get('clickPath', '') + '/userlevel/click'):
      log.debug("The source code can not be compiled")
      return False
    else:
      return True

  def removeNF (self, nf_id):
    """
    Remove an NF and all its decompositions from the DB.

    :param nf_id: the id of the NF to be removed from the DB
    :type nf_id: string
    :return: success of removal
    :rtype: Boolean
    """
    node = list(self.graph_db.find('NF', 'node_id', nf_id))
    if len(node) > 0:
      rels_DECOMPOSE = list(
        self.graph_db.match(start_node=node[0], rel_type='DECOMPOSED'))
      for rel in rels_DECOMPOSE:
        self.removeDecomp(rel.end_node.properties['node_id'])
      node[0].delete_related()
      return True
    else:
      log.debug("node %s does not exist in the DB" % nf_id)
      return False

  def updateNF (self, nf):
    """
    Update the information of a NF.

    :param nf: the information for the NF to be updated
    :type nf: dict
    :return: success of the update
    :rtype: Boolean
    """
    node = list(self.graph_db.find(nf['label'], 'node_id', nf['node_id']))
    if len(node) > 0:
      node[0].set_properties(nf)
      return True
    else:
      log.debug("node %s does not exist in the DB" % nf['node_id'])
      return False

  def getNF (self, nf_id):
    """
    Get the information for the NF with id equal to nf_id.
 
    :param nf_id: the id of the NF to get the information for
    :type nf_id: string
    :return: the information of NF with id equal to nf_id
    :rtype: dict
    """
    node = list(self.graph_db.find('NF', 'node_id', nf_id))
    if len(node) > 0:
      return node[0].properties
    else:
      log.debug("node %s does not exist in the DB" % nf_id)
      return None

  def addRelationship (self, relationship):
    """
    Add relationship between two existing nodes

    :param relationship: relationship to be added between two nodes
    :type relationship: dict
    :return: success of the addition
    :rtype: Boolean
    """
    node1 = list(self.graph_db.find(relationship['src_label'], 'node_id',
                                    relationship['src_id']))
    node2 = list(self.graph_db.find(relationship['dst_label'], 'node_id',
                                    relationship['dst_id']))

    if len(node1) > 0 and len(node2) > 0:

      rel = Relationship(node1[0], relationship['rel_type'], node2[0])
      for key, value in relationship.items():
        rel.properties[key] = value
      self.graph_db.create(rel)
      return True
    else:
      log.debug("nodes do not exist in the DB")
      return False

  def removeRelationship (self, relationship):
    """
    Remove the relationship between two nodes in the DB.

    :param relationship: the relationship to be removed
    :type relationship: dict
    :return: the success of the removal
    :rtype: Boolean
    """
    node1 = list(self.graph_db.find(relationship['src_label'], 'node_id',
                                    relationship['src_id']))
    node2 = list(self.graph_db.find(relationship['dst_label'], 'node_id',
                                    relationship['dst_id']))
    if len(node1) > 0 and len(node2) > 0:
      rels = list(self.graph_db.match(start_node=node1[0], end_node=node2[0],
                                      rel_type=relationship['rel_type']))
      for r in rels:
        r.delete()
      return True
    else:
      log.debug("nodes do not exist in the DB")
      return False

  def addDecomp (self, nf_id, decomp_id, decomp):
    """
    Add new decomposition for a high-level NF.

    :param nf_id: the id of the NF for which a decomposition is added
    :type nf_id: string
    :param decomp_id: the id of the new decomposition
    :type decomp_id: string
    :param decomp: the decomposition to be added to the DB
    :type decomp: Networkx.Digraph
    :return: success of the addition
    :rtype: Boolean
    """
    nf = list(self.graph_db.find('NF', 'node_id', nf_id))
    if len(nf) <= 0:
      log.debug("NF %s does not exist in the DB" % nf_id)
      return False

    for n in decomp.nodes():
      node = list(self.graph_db.find('SAP', 'node_id', n))
      if len(node) > 0:
        log.debug("SAPs exist in the DB")
        return False
    if not self.addNode({'label': 'graph', 'node_id': decomp_id}):
      log.debug("decomposition %s exists in the DB" % decomp_id)
      return False

    for n in decomp.nodes():

      if decomp.node[n]['properties']['label'] == 'SAP':
        self.addNode(decomp.node[n]['properties'])
        dst_label = 'SAP'

      elif decomp.node[n]['properties']['label'] == 'NF' and \
            decomp.node[n]['properties']['type'] == 'click':
        self.addClickNF(decomp.node[n]['properties'])
        dst_label = 'NF'

      elif decomp.node[n]['properties']['label'] == 'NF' and \
            decomp.node[n]['properties']['type'] == 'VM':
        self.addVMNF(decomp.node[n]['properties'])
        dst_label = 'NF'

      elif decomp.node[n]['properties']['label'] == 'NF' and \
            decomp.node[n]['properties']['type'] == 'NA':
        self.addNode(decomp.node[n]['properties'])
        dst_label = 'NF'
      else:
        # FIXME - czentye --> add default to dst_label variable always be
        # defined for addRelationship
        self.addNode({'label': 'NF', 'type': 'NA'})
        dst_label = 'NA'

      self.addRelationship(
        {
          'src_label': 'graph', 'dst_label': dst_label, 'src_id': decomp_id,
          'dst_id': n, 'rel_type': 'CONTAINS'
        })

    for e in decomp.edges():
      temp = {
        'src_label': decomp.node[e[0]]['properties']['label'],
        'src_id': e[0],
        'dst_label': decomp.node[e[1]]['properties']['label'],
        'dst_id': e[1], 'rel_type': 'CONNECTED'
      }
      temp.update(decomp.edge[e[0]][e[1]]['properties'])

      self.addRelationship(temp)

    self.addRelationship(
      {
        'src_label': 'NF', 'src_id': nf_id, 'dst_label': 'graph',
        'dst_id': decomp_id, 'rel_type': 'DECOMPOSED'
      })
    return True

  def removeDecomp (self, decomp_id):
    """
    Remove a decomposition from the DB.

    :param decomp_id: the id of the decomposition to be removed from the DB
    :type decomp_id: string
    :return: the success of the removal
    :rtype: Boolean
    """
    node = list(self.graph_db.find('graph', 'node_id', decomp_id))

    if len(node) > 0:
      queue = deque([node[0]])
      while len(queue) > 0:
        node = queue.popleft()

        # we search for all the nodes with relationship CONTAINS or DECOMPOSED
        rels_CONTAINS = list(
          self.graph_db.match(start_node=node, rel_type='CONTAINS'))
        rels_DECOMPOSED = list(
          self.graph_db.match(start_node=node, rel_type='DECOMPOSED'))
        if len(rels_CONTAINS) > 0:
          rels = rels_CONTAINS
        else:
          rels = rels_DECOMPOSED
        for rel in rels:
          if len(list(self.graph_db.match(end_node=rel.end_node,
                                          rel_type='CONTAINS'))) <= 1:
            queue.append(rel.end_node)
        node.isolate()
        node.delete()
      return True
    else:
      log.debug("decomposition %s does not exist in the DB" % decomp_id)
      return False

  def getSingleDecomp (self, decomp_id):
    """
    Get a decomposition with id decomp_id.
  
    : param decomp_id: the id of the decomposition to be returned
    : type decomp_id: str
    : return: decomposition with id equal to decomp_id
    : rtype: tuple of networkx.DiGraph and Relationships 
    """

    graph = networkx.DiGraph()
    node = list(self.graph_db.find('graph', 'node_id', decomp_id))

    if len(node) != 0:
      rels = list(self.graph_db.match(start_node=node[0], rel_type='CONTAINS'))
      for rel in rels:
        graph.add_node(rel.end_node.properties['node_id'])
        graph.node[rel.end_node.properties['node_id']][
          'properties'] = rel.end_node.properties
      for rel in rels:
        rel_CONNECTED = list(
          self.graph_db.match(start_node=rel.end_node, rel_type='CONNECTED'))
        for rel_c in rel_CONNECTED:
          if rel_c.end_node.properties['node_id'] in graph.nodes():
            graph.add_edge(rel_c.start_node.properties['node_id'],
                           rel_c.end_node.properties['node_id'])
            graph.edge[rel_c.start_node.properties['node_id']][
              rel_c.end_node.properties['node_id']][
              'properties'] = rel_c.properties
      return graph, rels
    else:
      log.debug("decomposition %s does not exist in the DB" % decomp_id)
      return None

  def getDecomps (self, nffg):
    """
    Get all decompositions for a given nffg.

    : param nffg: the nffg for which the decompositions should be returned
    : type nffg: nffg
    : return: all the decompositions for the given nffg
    : rtype: dict
    """
    decompositions = {}
    nodes_list = []
    index = 0

    for n in nffg.nfs:
      node = list(self.graph_db.find('NF', 'node_id', n.id))
      if len(node) != 0:
        nodes_list.append(node[0])

      else:
        log.debug("NF %s does not exist in the DB" % n.id)
        return None

    queue = deque([nodes_list])
    queue_nffg = deque([nffg])
    while len(queue) > 0:
      nodes = queue.popleft()
      nffg_init = queue_nffg.popleft()
      indicator = 0
      for node in nodes:
        rels_DECOMPOSED = list(
          self.graph_db.match(start_node=node, rel_type='DECOMPOSED'))
        for rel in rels_DECOMPOSED:
          indicator = 1
          nffg_temp = NFFG()
          graph, rels = self.getSingleDecomp(rel.end_node.properties['node_id'])

          for n in graph.nodes():
            if graph.node[n]['properties']['label'] == 'NF':
              nffg_temp.add_nf(id=n,
                               dep_type=graph.node[n]['properties']['type'],
                               cpu=graph.node[n]['properties']['cpu'],
                               mem=graph.node[n]['properties']['mem'],
                               storage=graph.node[n]['properties']['storage'])

            elif graph.node[n]['properties']['label'] == 'SAP':
              nffg_temp.add_sap(id=n)
          counter = 0
          for edge in graph.edges():

            for nf in nffg_temp.nfs:

              if nf.id == edge[0]:
                node0 = nf
              if nf.id == edge[1]:
                node1 = nf
            for sap in nffg_temp.saps:
              if sap.id == edge[0]:
                node0 = sap
              if sap.id == edge[1]:
                node1 = sap
            # FIXME - czentye --> There is a chance node0, node1 variables
            # not defined yet until here and add_port will be raise an exception
            nffg_temp.add_sglink(node0.add_port(
              graph.edge[edge[0]][edge[1]]['properties']['src_port']),
              node1.add_port(
                graph.edge[edge[0]][edge[1]]['properties']['dst_port']),
              id='hop' + str(counter))

          for n in nffg_init.nfs:
            nffg_temp.add_node(n)
          for n in nffg_init.saps:
            nffg_temp.add_node(n)
          for n in nffg_init.infras:
            nffg_temp.add_node(n)
          for l in nffg_init.links:
            nffg_temp.add_edge(l.src.node, l.dst.node, l)
          for l in nffg_init.sg_hops:
            nffg_temp.add_edge(l.src.node, l.dst.node, l)
          for l in nffg_init.reqs:
            nffg_temp.add_edge(l.src.node, l.dst.node, l)

          extra_nodes = []
          for l in nffg_temp.sg_hops:
            if node.properties['node_id'] == l.src.node.id:
              src_port = l.src
              dst_port = l.dst

              for edge in graph.edges():
                if graph.node[edge[1]]['properties']['label'] == 'SAP':

                  if str(src_port.id) == str(
                     graph.edge[edge[0]][edge[1]]['properties']['dst_port']):

                    for e in nffg_temp.sg_hops:
                      if e.src.node.id == edge[0] and e.dst.node.id == edge[1]:
                        nffg_temp.add_sglink(e.src, dst_port)
                        extra_nodes.append(edge[1])

            if node.properties['node_id'] == l.dst.node.id:
              dst_port = l.dst
              src_port = l.src

              for edge in graph.edges():
                if graph.node[edge[0]]['properties']['label'] == 'SAP':

                  if str(dst_port.id) == str(
                     graph.edge[edge[0]][edge[1]]['properties']['src_port']):
                    for e in nffg_temp.sg_hops:
                      if e.src.node.id == edge[0] and e.dst.node.id == edge[1]:
                        nffg_temp.add_sglink(src_port, e.dst)
                        extra_nodes.append(edge[0])

          nffg_temp.del_node(node.properties['node_id'])
          for extra in extra_nodes:
            nffg_temp.del_node(extra)
          queue_nffg.append(nffg_temp)

          nodes_copy = list(nodes)
          new_nodes = map(lambda x: x.end_node, rels)
          nodes_copy.remove(node)
          queue.append(nodes_copy + new_nodes)
        if indicator == 1:
          break
      if indicator == 0:
        decompositions['D' + str(index)] = nffg_init
        index += 1

    return decompositions

  def removeGraphDB (self):
    """
    Remove all nodes and relationships from the DB.
   
    :return: None
    """
    self.graph_db.delete_all()

  def __initialize (self):
    """
    Initialize NFIB with test data.
    """
    log.info("Initializing NF database with NFs and decompositions...")
    # start clean - all the existing info is removed from the DB
    self.removeGraphDB()
    # add new high-level NF to the DB, all the information related to the NF
    # should be given as a dict
    self.addNode({'label': 'NF', 'node_id': 'forwarder', 'type': 'NA'})
    self.addNode({'label': 'NF', 'node_id': 'compressor', 'type': 'NA'})
    self.addNode({'label': 'NF', 'node_id': 'decompressor', 'type': 'NA'})

    log.debug(
      "%s: high-level NFs were added to the DB" % self.__class__.__name__)

    # generate a  decomposition for a high-level forwarder NF (in form of
    # networkx)
    G1 = networkx.DiGraph()
    G1.add_path(['SAP1', 'simpleForwarder', 'SAP2'])

    # create node properties
    for n in G1.nodes():
      properties = {'node_id': n}

      if 'SAP' in n:
        properties['label'] = 'SAP'
        properties['type'] = 'NA'
      else:
        properties['label'] = 'NF'
        properties['type'] = 'click'
        properties['cpu'] = 10
        properties['mem'] = 100
        properties['storage'] = 100
      G1.node[n]['properties'] = properties

    # create edge properties
    properties = {'BW': 100, 'src_port': 1, 'dst_port': 1}
    G1.edge['SAP1']['simpleForwarder']['properties'] = properties

    properties1 = {'BW': 100, 'src_port': 2, 'dst_port': 2}
    G1.edge['simpleForwarder']['SAP2']['properties'] = properties1

    # generate a decomposition for a high-level compressor NF (in form of
    # networkx)
    G2 = networkx.DiGraph()
    G2.add_path(['SAP3', 'headerCompressor', 'SAP4'])

    # create node properties
    for n in G2.nodes():
      properties = {'node_id': n}
      if 'SAP' in n:
        properties['label'] = 'SAP'
        properties['type'] = 'NA'
      else:
        properties['label'] = 'NF'
        properties['type'] = 'click'
        properties['cpu'] = 20
        properties['mem'] = 200
        properties['storage'] = 200
      G2.node[n]['properties'] = properties

    # create edge properties 
    properties3 = {'BW': 200, 'src_port': 1, 'dst_port': 1}
    G2.edge['SAP3']['headerCompressor']['properties'] = properties3

    properties4 = {'BW': 200, 'src_port': 2, 'dst_port': 2}
    G2.edge['headerCompressor']['SAP4']['properties'] = properties4

    # generate a decomposition for a high-level decompressor NF (in form of
    # networkx)
    G3 = networkx.DiGraph()
    G3.add_path(['SAP5', 'headerDecompressor', 'SAP6'])

    # create node properties
    for n in G3.nodes():
      properties = {'node_id': n}
      if 'SAP' in n:
        properties['label'] = 'SAP'
        properties['type'] = 'NA'
      else:
        properties['label'] = 'NF'
        properties['type'] = 'click'
        properties['cpu'] = 30
        properties['mem'] = 300
        properties['storage'] = 300
      G3.node[n]['properties'] = properties

    # create edge properties
    properties5 = {'BW': 300, 'src_port': 1, 'dst_port': 1}
    G3.edge['SAP5']['headerDecompressor']['properties'] = properties5

    properties6 = {'BW': 300, 'src_port': 2, 'dst_port': 2}
    G3.edge['headerDecompressor']['SAP6']['properties'] = properties6

    # required elementary NFs should be added first to the DB
    self.addClickNF({'label': 'NF', 'node_id': 'Queue', 'type:': 'click'})
    self.addClickNF({'label': 'NF', 'node_id': 'Classifier', 'type': 'click'})
    self.addClickNF({'label': 'NF', 'node_id': 'Counter', 'type': 'click'})
    self.addClickNF({'label': 'NF', 'node_id': 'RFC2507Comp', 'type': 'click'})
    self.addClickNF(
      {'label': 'NF', 'node_id': 'RFC2507Decomp', 'type': 'click'})

    # the NF decompositions are added to the DB
    self.addDecomp('forwarder', 'G1', G1)
    self.addDecomp('compressor', 'G2', G2)
    self.addDecomp('decompressor', 'G3', G3)

    log.debug(
      "%s: NF decompositions were added to the DB" % self.__class__.__name__)

  def initialize (self):
    """
    Initialize NFIB with test data.
    """
    try:
      self.__initialize()
    except SocketError as e:
      log.error(
        "NFIB is not reachable due to failed neo4j service! Cause: " + str(e))
    except KeyboardInterrupt:
      log.warning("NFIB was interrupted by user!")
    except Unauthorized:
      log.error(
        "neo4j responded with Unauthorized error! Maybe you forgot disabling "
        "authentication in '/etc/neo4j/neo4j.conf' ?")
    except IOError as e:
      if ".neo4j/known_hosts" in str(e):
        # Skip Permission denied in case of accessing neo4j cache file (v3.0.2)
        pass
      else:
        raise
    except:
      log.exception("Got unexpected error during NFIB initialization!")

Beispiel #42

0

Datei anzeigen

Datei: bgpstream_populate_neo4j.py Projekt: wicaksana/anycast-dns-monitoring-framework

# get only unique lists in result

# print('res: {}'.format(result))
for prefix in result:
    result[prefix] = [list(x) for x in set(tuple(x) for x in result[prefix])]
print('result: {}'.format(result))

for prefix in result:
    for path in result[prefix]:
        print('path: {}'.format(path))
        cur_node = None
        prev_node = None
        counter_as_prepend = 0
        for index, asn in enumerate(path):
            searched_node = graph.find('asn', property_key='label', property_value=asn)
            try:
                cur_node = searched_node.next()  # see if the AS node is already in the db or not. If yes, cur_node == prev_node
            except StopIteration:
                cur_node = Node('asn', label=str(asn))  # if not exists, then create a new one
            if index > 0:
                if index == len(path) - 1:
                    cur_node['path'] = path  # attach AS path to the last ASN
                if cur_node != prev_node:
                    if counter_as_prepend > 0:
                        cur_node['prepended'] = counter_as_prepend
                        counter_as_prepend = 0  # reset
                    text = 'PEER_{}'.format(prefix)
                    peering = Relationship(cur_node, text, prev_node)
                    peering['time'] = timestamp
                    graph.create(peering)

Beispiel #43

0

Datei anzeigen

Datei: JSON_to_NEO4J.py Projekt: aqeelferoze/JournalRankandCluster

                    for article_key, article_value in issue_attributes_value.items():
                        title    = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["title"]
                        abstract = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["abstract"]
                        authors  = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["authors"]
                        doi      = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["doi"]
                        references = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["references"]
                        citations = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["citations"]
                        article_to_be_added = graph.merge_one("Article", "doi", doi)
                        article_to_be_added['abstract'] = abstract
                        article_to_be_added['authors'] = authors[0]["name"]
                        article_to_be_added['title'] = title


                        article_to_be_added['citations'] = []
                        article_to_be_added['references'] = []

                        if ( len(references) > 0 ) and ( len(citations) > 0 ) :
                            article_to_be_added['references'] = references
                            article_to_be_added['citations'] = citations
                            article_to_be_added.push()

                        #print(title)
                        relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "printed_in", journal_to_be_added, volume=volume_key, issue=issue_key, issn=journal_structure["ACM"][journal_key][volume_key][issue_key]["issn"]))
                        primary_author_bool = True
                        for author in authors:
                            if primary_author_bool:
                                author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find('Author', 'full_name', author), primary_author="YES"))
                                primary_author_bool = False
                            else:
                                author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find('Author', 'full_name', author), primary_author="NO"))

Beispiel #44

0

Datei anzeigen

Datei: twitterGraph.py Projekt: thejerrytan/twitterGraph

class TwitterGraph():
	"""
		Run queries against TwitterGraph. Functions here are mainly read-only, i.e. we only want to get answers,
		we are not modifying the graph structure
	"""
	PASSWORD = "******"
	USER     = "******"
	HOST     = "localhost:7474"

	def __init__(self, host=HOST, user=USER, password=PASSWORD):
		authenticate(host_port=host, user_name=user, password=password)
		self.graph = Graph()
	
	def get_users(self): # TO-DO : make it lazy for large datasets
		result = self.graph.find("User", limit=25)
		list_ = [user for user in result]
		return list_

	def get_user(self, id_):
		result = self.graph.find_one("User", property_key="id", property_value=id_)
		return result

	def get_level_followers(self, limit=50, level=1, uid=None, screen_name=None):
		"""
			Return neo4j.cypher.RecordStream of users who are the n level follower of user uid/screen_name
			Level 1 follower is defined as : (1st_level_follower)-[follows]->(followee)
		"""
		cypher = self.graph.cypher
		statement = self._construct_follower_path(level)
		if uid is None and screen_name is None:
			raise InvalidArgumentException("Please specify either a valid user id or screen_name")
		
		if uid is not None:
			result = cypher.stream(statement, followee=uid, limit=limit)
		elif screen_name is not None:
			result = cypher.stream(statement, followee=screen_name, limit=limit)
		return [f for f in result]

	def is_n_level_follower(self, level, retweeter, screen_name):
		"""
			Given a retweeter screen_name and original tweeter's screen_name, determine if retweeter is n level follower
		"""
		if level == 1:
			return search.is_follower(retweeter, screen_name)
		cypher    = self.graph.cypher
		level -= 1
		statement = self._construct_follower_path(level)
		for follower in cypher.stream(statement, followee=screen_name, limit=5000):
			print follower[0]
			if search.is_follower(retweeter, follower[0]):
				return True
		return False

	def get_retweet_level(self, retweeter, screen_name):
		"""
			Given a retweeter screen name and the original user screen_name who tweeted 
			the original tweet, determine the follower level
		"""
		level = 0
		while level < 10: # stop at 10 to prevent timeout
			level += 1
			# print len(followers)
			if self.is_n_level_follower(level, retweeter, screen_name):
				return level
		return 0

	def _construct_follower_path(self, level, uid=False):
		# Construct pattern
		if uid:
			statement = "MATCH(:User {id_str : {followee} })"
		else:
			statement = "MATCH(:User {screen_name : {followee} })"
		while level > 1:
			statement += "<-[:follows]-(:User)"
			level -= 1
		statement += "<-[:follows]-(a:User) RETURN a.screen_name LIMIT {limit}"		
		return statement

Beispiel #45

0

Datei anzeigen

Datei: indexing.py Projekt: codeorbit/Autocomplete

from py2neo import Graph,authenticate,Node,Relationship
import MySQLdb
import threading
authenticate("localhost:7474","neo4j","8760neo4j")
graph = Graph()
mynode = list(graph.find('fw', property_key='count'))
ct=1
fobj = open("textdump1.txt","r").readlines()
file_tacker=open("tarcker.txt","a")
#for i in fobj:

def indexing(i):
	global ct
	print "*********"
	print i
	print ct
	print "**********"
	i = i.lower()
	file_tacker.write(str(i))

	temp = i.split(" ",3)
	b=""
	for i in temp:
		b=b+"".join(" "+str(i).replace("'",""))
	
	b=b.strip()
	s=b.split(" ",3)
	dic={}
	for i in range(len(s)):
		
		n2=graph.cypher.execute("""MATCH (a: `%s`) where a.auto_name = '%s' return a"""%(str(s[i][0]),str(s[i])))

Beispiel #46

0

Datei anzeigen

def main():
    graph = Graph()
    graph.cypher.execute("CREATE CONSTRAINT ON (user:User) ASSERT user.username IS UNIQUE" )
    graph.cypher.execute("CREATE CONSTRAINT ON (job:Job) ASSERT job.title IS UNIQUE" )
    graph.cypher.execute("CREATE CONSTRAINT ON (city:City) ASSERT city.name IS UNIQUE" )


    userFile = open("users.csv", "r")
    userFile.readline()
    lineNumber = 0
    for line in userFile.readlines():
        print("\r Processing line " + str(lineNumber), end="")
        lineNumber += 1
        parsedLine = line.split(",")
        user = Node("User", username=parsedLine[0],
                name=parsedLine[1],
                biography=parsedLine[4],
                password=bcrypt.encrypt("password"))
        graph.create(user)

        city = graph.merge_one("City", "name", parsedLine[2])
        job = graph.merge_one("Job", "title", parsedLine[3])
        livesIn = Relationship(user, "IS_FROM", city)
        hasJob = Relationship(user, "HAS_JOB_TITLE", job)

        graph.create(livesIn)
        graph.create(hasJob)

        result = graph.cypher.execute("MATCH (beer:Beer) "
                      " RETURN beer, rand() as rand "
                      " ORDER BY rand"
                      " LIMIT {range}", range=random.randrange(100,600))

        for beer in result:
            beerNode = graph.find_one("Beer", "breweryDbId", beer.beer["breweryDbId"])
            likesBrewery = Relationship(user, "LIKES", beerNode)
            graph.create(likesBrewery)


        result = graph.cypher.execute("MATCH (brewery:Brewery) "
                      " RETURN brewery, rand() as rand "
                      " ORDER BY rand"
                      " LIMIT {range}", range=random.randrange(0,10))

        for brewery in result:
            breweryNode = graph.find_one("Brewery", "breweryDbId", brewery.brewery["breweryDbId"])
            likesBrewery = Relationship(user, "LIKES", breweryNode)
            graph.create(likesBrewery)

        if lineNumber > 300:
            break


    for user in graph.find("User"):
        userNode = graph.find_one("User", "username", user["username"])
        result = graph.cypher.execute("MATCH (user:User) "
                                      "WHERE user.username <> {me}"
          " RETURN user, rand() as rand "
          " ORDER BY rand"
          " LIMIT {range}", me=userNode["username"], range=random.randrange(5,40))

        for person in result:
            dude = graph.find_one("User", "username", person.user["username"])
            buddiesWith = Relationship(userNode, "FOLLOWS", dude)
            graph.create(buddiesWith)

Beispiel #47

0

Datei anzeigen

Datei: journal_article.py Projekt: SciBase-Project/SciBaseGraphDB

         issue=issue_key,
         issue_date=str(
             acm_structure[publisher_key]
             [journal_key][volume_key][issue_key]
             ["date"]["month"]) +
         str(acm_structure[publisher_key]
             [journal_key][volume_key][issue_key]
             ["date"]["year"]),
         issn=acm_structure[publisher_key]
         [journal_key][volume_key][issue_key]
         ["issn"]))
 # primary_author_bool = True
 for author in authors:
     # print("Author detected is: " + author["name"])
     # print("Author_link detected is: " + author["link"])
     results = graph.find('Author', 'link',
                          author["link"])
     # print(type(results))
     if len(list(results)) == 1:
         for result in results:
             print("\t\t\t\t" +
                   result['full_name'] + " FOUND")
     else:
         # print("\t\t\t\tNOT FOUND! Creating Author...")
         author_to_be_added = graph.merge_one(
             "Author", "link", author["link"])
         author_str_split_list = author[
             "name"].split()
         if (len(author_str_split_list) == 1):
             author_to_be_added[
                 'full_name'] = author[
                     "name"].title()

Beispiel #48

0

Datei anzeigen

class Robot():
    """NLU Robot.
    自然语言理解机器人。

    Public attributes:
    - graph: The connection of graph database. 图形数据库连接。
    - pattern: The pattern for NLU tool: 'semantic' or 'vec'. 语义标签或词向量模式。
    - memory: The context memory of robot. 机器人对话上下文记忆。
    """
    def __init__(self, password="******"):
        # 连接图知识库
        self.graph = Graph("http://localhost:7474/db/data/", password=password)
        # 语义模式：'semantic' or 'vec'
        self.pattern = 'semantic'
        # 获取导航地点数据库
        self.locations = get_navigation_location()
        # 在线场景标志，默认为False
        self.is_scene = False
        # 在线调用百度地图IP定位api，网络异常时返回默认地址：上海市/从配置信息获取
        self.address = get_location_by_ip(self.graph.find_one("User", "userid", "A0001")['city'])
        # 机器人配置信息
        self.user = None
        # 可用话题列表
        self.usertopics = []
        # 当前QA话题
        self.topic = ""
        # 当前QA id
        self.qa_id = get_current_time()
		# 短期记忆：最近问过的10个问题与10个答案
        self.qmemory = deque(maxlen=10) # 问题
        self.amemory = deque(maxlen=10) # 答案
        self.pmemory = deque(maxlen=10) # 上一步
        # 匹配不到时随机回答 TODO：记录回答不上的所有问题，
        self.do_not_know = [
            "这个问题太难了，{robotname}还在学习中",
            "这个问题{robotname}不会，要么我去问下",
            "您刚才说的是什么，可以再重复一遍吗",
            "{robotname}刚才走神了，一不小心没听清",
            "{robotname}理解的不是很清楚啦，你就换种方式表达呗",
            "不如我们换个话题吧",
            "咱们聊点别的吧",
            "{robotname}正在学习中",
            "{robotname}正在学习哦",
            "不好意思请问您可以再说一次吗",
            "额，这个问题嘛。。。",
            "{robotname}得好好想一想呢",
            "请问您说什么",
            "您问的问题好有深度呀",
            "{robotname}没有听明白，您能再说一遍吗"
        ]

    def __str__(self):
        return "Hello! I'm {robotname} and I'm {robotage} years old.".format(**self.user)

    @time_me()
    def configure(self, info="", userid="userid"):
        """Configure knowledge base.
        配置知识库。
        """
        assert userid is not "", "The userid can not be empty!"
        # TO UPGRADE 对传入的userid参数分析，若不合适则报相应消息 2017-6-7
        if userid != "A0001":
            userid = "A0001"
            print("userid 不是标准A0001，已经更改为A0001")
        match_string = "MATCH (config:Config) RETURN config.name as name"
        subgraphs = [item[0] for item in self.graph.run(match_string)]
        print("所有知识库：", subgraphs)
        if not info:
            config = {"databases": []}
            match_string = "MATCH (user:User)-[r:has]->(config:Config)" + \
                "where user.userid='" + userid + \
                "' RETURN config.name as name, r.bselected as bselected, r.available as available"
            for item in self.graph.run(match_string):
                config["databases"].append(dict(name=item[0], bselected=item[1], available=item[2]))
            print("可配置信息：", config)
            return config
        else:
            selected_names = info.split()
        forbidden_names = list(set(subgraphs).difference(set(selected_names)))
        print("选中知识库：", selected_names)
        print("禁用知识库：", forbidden_names)
        # TODO：待合并精简
        for name in selected_names:
            match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \
                + userid + "' AND config.name='" + name + "' SET r.bselected=1"
            # print(match_string)
            self.graph.run(match_string)
        for name in forbidden_names:
            match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \
                + userid + "' AND config.name='" + name + "' SET r.bselected=0"
            # print(match_string)
            self.graph.run(match_string)
        return self.get_usertopics(userid=userid)

    # @time_me()
    def get_usertopics(self, userid="A0001"):
        """Get usertopics list.
        """
        usertopics = []
        if not userid:
            userid = "A0001"
        # 从知识库获取用户拥有权限的子知识库列表
        match_string = "MATCH (user:User)-[r:has {bselected:1, available:1}]->(config:Config)" + \
            "where user.userid='" + userid + "' RETURN config"
        data = self.graph.run(match_string).data()
        for item in data:
            usertopics.extend(item["config"]["topic"].split(","))
        print("用户：", userid, "\n已有知识库列表：", usertopics)
        return usertopics

    def iformat(self, sentence):
        """Individualization of robot answer.
        个性化机器人回答。
        """
        return sentence.format(**self.user)

    # @time_me()
    def add_to_memory(self, question="question", userid="userid"):
        """Add user question to memory.
        将用户当前对话加入信息记忆。

        Args:
            question: 用户问题。
                Defaults to "question".
            userid: 用户唯一标识。
                Defaults to "userid".
        """
        previous_node = self.graph.find_one("Memory", "qa_id", self.qa_id)
        self.qa_id = get_current_time()
        node = Node("Memory", question=question, userid=userid, qa_id=self.qa_id)
        if previous_node:
            relation = Relationship(previous_node, "next", node)
            self.graph.create(relation)
        else:
            self.graph.create(node)

    # Development requirements from Mr Tang in 2017-5-11.
    # 由模糊匹配->全匹配 from Mr Tang in 2017-6-1.
    def extract_navigation(self, question):
        """Extract navigation。抽取导航地点。
        QA匹配模式：从导航地点列表选取匹配度最高的地点。

        Args:
            question: User question. 用户问题。
        """
        result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \
            context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1)
        # temp_sim = 0
        # sv1 = synonym_cut(question, 'wf')
        # if not sv1:
            # return result
        for location in self.locations:
            # 判断“去”和地址关键词是就近的动词短语情况
            keyword = "去" + location
            if keyword in question:
                print("Original navigation")
                result["name"] = keyword
                result["content"] = location
                result["context"] = "user_navigation"
                result["behavior"] = int("0x001B", 16)
                return result
            # sv2 = synonym_cut(location, 'wf')
            # if sv2:
                # temp_sim = similarity(sv1, sv2, 'j')
            # 匹配加速，不必选取最高相似度，只要达到阈值就终止匹配
            # if temp_sim > 0.92:
                # print("Navigation location: " + location + " Similarity Score: " + str(temp_sim))
                # result["content"] = location
                # result["context"] = "user_navigation"
                # result["behavior"] = int("0x001B", 16)
                # return result
        return result

    def extract_pinyin(self, question, subgraph):
        """Extract synonymous QA in NLU database。
        QA匹配模式：从图形数据库选取匹配度最高的问答对。

        Args:
            question: User question. 用户问题。
            subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。
        """
        temp_sim = 0
        result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \
            context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1)
        sv1 = pinyin_cut(question)
        print(sv1)
        for node in subgraph:
            iquestion = self.iformat(node["name"])
            sv2 = pinyin_cut(iquestion)
            print("  ", sv2)
            temp_sim = jaccard_pinyin(sv1, sv2)
            print(temp_sim)
            # 匹配加速，不必选取最高相似度，只要达到阈值就终止匹配
            if temp_sim > 0.75:
                print("Q: " + iquestion + " Similarity Score: " + str(temp_sim))
                result['name'] = iquestion
                result["content"] = self.iformat(random_item(node["content"].split("|")))
                result["context"] = node["topic"]
                result["tid"] = node["tid"]
                result["txt"] = node["txt"]
                result["img"] = node["img"]
                result["button"] = node["button"]
                if node["url"]:
                    result["url"] = random_item(node["url"].split("|"))
                if node["behavior"]:
                    result["behavior"] = int(node["behavior"], 16)
                if node["parameter"]:
                    result["parameter"] = node["parameter"]
                func = node["api"]
                if func:
                    exec("result['content'] = " + func + "('" + result["content"] + "')")
                return result
        return result

    def extract_synonym(self, question, subgraph):
        """Extract synonymous QA in NLU database。
        QA匹配模式：从知识库选取匹配度最高的问答对。

        Args:
            question: User question. 用户问题。
            subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。
        """
        temp_sim = 0
        result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \
            context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1)
	    # semantic: 切分为同义词标签向量，根据标签相似性计算相似度矩阵，再由相似性矩阵计算句子相似度
	    # vec: 切分为词向量，根据词向量计算相似度矩阵，再由相似性矩阵计算句子相似度
        if self.pattern == 'semantic':
        # elif self.pattern == 'vec':
            sv1 = synonym_cut(question, 'wf')
            if not sv1:
                return result
            for node in subgraph:
                iquestion = self.iformat(node["name"])
                if question == iquestion:
                    print("Similarity Score: Original sentence")
                    result['name'] = iquestion
                    result["content"] = self.iformat(random_item(node["content"].split("|")))
                    result["context"] = node["topic"]
                    result["tid"] = node["tid"]
                    result["txt"] = node["txt"]
                    result["img"] = node["img"]
                    result["button"] = node["button"]
                    if node["url"]:
                        result["url"] = random_item(node["url"].split("|"))
                    if node["behavior"]:
                        result["behavior"] = int(node["behavior"], 16)
                    if node["parameter"]:
                        result["parameter"] = node["parameter"]
                    # 知识实体节点api抽取原始问题中的关键信息，据此本地查询/在线调用第三方api/在线爬取
                    func = node["api"]
                    if func:
                        exec("result['content'] = " + func + "('" + result["content"] + "')")
                    return result
                sv2 = synonym_cut(iquestion, 'wf')
                if sv2:
                    temp_sim = similarity(sv1, sv2, 'j')
			    # 匹配加速，不必选取最高相似度，只要达到阈值就终止匹配
                if temp_sim > 0.92:
                    print("Q: " + iquestion + " Similarity Score: " + str(temp_sim))
                    result['name'] = iquestion
                    result["content"] = self.iformat(random_item(node["content"].split("|")))
                    result["context"] = node["topic"]
                    result["tid"] = node["tid"]
                    result["txt"] = node["txt"]
                    result["img"] = node["img"]
                    result["button"] = node["button"]
                    if node["url"]:
                        result["url"] = random_item(node["url"].split("|"))
                    if node["behavior"]:
                        result["behavior"] = int(node["behavior"], 16)
                    if node["parameter"]:
                        result["parameter"] = node["parameter"]
                    func = node["api"]
                    if func:
                        exec("result['content'] = " + func + "('" + result["content"] + "')")
                    return result
        return result

    def extract_keysentence(self, question, data=None):
        """Extract keysentence QA in NLU database。
        QA匹配模式：从知识库选取包含关键句的问答对。

        Args:
            question: User question. 用户问题。
        """
        result = dict(question=question, name="", content=self.iformat(random_item(self.do_not_know)), \
            context="", tid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1)
        # if data:
            # subgraph = [node for node in data if node["name"] in question]
            # TODO：从包含关键句的问答对中选取和当前问答的跳转链接最接近的
            # node = 和当前问答的跳转链接最接近的 in subgraph
        usertopics = ' '.join(self.usertopics)
        # 只从目前挂接的知识库中匹配
        match_string = "MATCH (n:NluCell) WHERE '" + question + \
            "' CONTAINS n.name and '" + usertopics +  \
            "' CONTAINS n.topic RETURN n LIMIT 1"
        subgraph = self.graph.run(match_string).data()
        if subgraph:
            # TODO：判断 subgraph 中是否包含场景根节点
            node = list(subgraph)[0]['n']
            print("Similarity Score: Key sentence")
            result['name'] = node['name']
            result["content"] = self.iformat(random_item(node["content"].split("|")))
            result["context"] = node["topic"]
            result["tid"] = node["tid"]
            result["txt"] = node["txt"]
            result["img"] = node["img"]
            result["button"] = node["button"]
            if node["url"]:
                result["url"] = random_item(node["url"].split("|"))
            if node["behavior"]:
                result["behavior"] = int(node["behavior"], 16)
            if node["parameter"]:
                result["parameter"] = node["parameter"]
            # 知识实体节点api抽取原始问题中的关键信息，据此本地查询/在线调用第三方api/在线爬取
            func = node["api"]
            if func:
                exec("result['content'] = " + func + "('" + result["content"] + "')")
            return result
        return result

    def remove_name(self, question):
        # 姓氏误匹配重定义
        if question.startswith("小") and len(question) == 2:
            question = self.user['robotname']
        # 称呼过滤
        for robotname in ["小民", "小明", "小名", "晓明"]:
            if question.startswith(robotname) and len(question) >= 4 and "在线" not in question:
                question = question.lstrip(robotname)
        if not question:
            question = self.user['robotname']
        return question

    @time_me()
    def search(self, question="question", tid="", userid="userid"):
        """Nlu search. 语义搜索。

        Args:
            question: 用户问题。
                Defaults to "question".
            userid: 用户唯一标识。
                Defaults to "userid"

        Returns:
            Dict contains:
            question, answer, topic, tid, url, behavior, parameter, txt, img, button.
            返回包含问题，答案，话题，资源，行为，动作，文本，图片及按钮的字典。
        """
        # 添加到问题记忆
        # self.qmemory.append(question)
        # self.add_to_memory(question, userid)

        # 语义：场景+全图+用户配置模式（用户根据 userid 动态获取其配置信息）
        # ========================初始化配置信息==========================
        self.user = self.graph.find_one("User", "userid", userid)
        self.usertopics = self.get_usertopics(userid=userid)
        do_not_know = dict(
            question=question,
            name="",
            # content=self.iformat(random_item(self.do_not_know)),
            content="",
            context="",
            tid="",
            url="",
            behavior=0,
            parameter="",
            txt="",
            img="",
            button="",
            valid=1)
        error_page = dict(
            question=question,
            name="",
            content=self.user['error_page'],
            context="",
            tid="",
            url="",
            behavior=int("0x1500", 16), # Modify：场景内 behavior 统一为 0x1500。(2018-1-8)
            parameter="",
            txt="",
            img="",
            button="",
            valid=0)

        # ========================一、预处理=============================
        # 问题过滤(添加敏感词过滤 2017-5-25)
        if check_swords(question):
            print("问题包含敏感词！")
            return do_not_know
        # 移除称呼
        question = self.remove_name(question)

        # ========================二、导航===============================
        result = self.extract_navigation(question)
        if result["context"] == "user_navigation":
            self.amemory.append(result) # 添加到普通记忆
            self.pmemory.append(result)
            return result
        
        # ========================三、语义场景===========================
        result = copy.deepcopy(do_not_know)
        
        # 全局上下文——重复
        for item in cmd_repeat:
            # TODO：确认返回的是正确的指令而不是例如唱歌时的结束语“可以了”
            # TODO：从记忆里选取最近的有意义行为作为重复的内容
            if item == question:
                if self.amemory:
                    return self.amemory[-1]
                else:
                    return do_not_know

        # 场景——退出
        for item in cmd_end_scene:
            if item == question: # 完全匹配退出模式
                # result['behavior'] = int("0x0020", 16)
                result['behavior'] = 0
                result['name'] = '退出'
                # result['content'] = "好的，退出"
                result['content'] = ""
                self.is_scene = False
                self.topic = ""
                self.amemory.clear() # 清空场景记忆
                self.pmemory.clear() # 清空场景上一步记忆
                return result

        # 场景——上一步：使用双向队列实现
        if self.is_scene:
            for item in cmd_previous_step:
                if item in question:
                    # 添加了链接跳转判断（采用该方案 2017-12-22）
                    if len(self.pmemory) > 1:
                        self.amemory.pop()
                        return self.pmemory.pop()
                    elif len(self.pmemory) == 1:
                        return self.pmemory[-1]
                    else:
                        # Modify：返回 error_page 2017-12-22
                        return error_page
                        # return do_not_know
                    # 未添加链接跳转判断（不用该方案 2017-12-22）
                    # if len(self.pmemory) > 1:
                        # return self.amemory.pop()
                    # elif len(self.amemory) == 1:
                        # return self.amemory[-1]
                    # else:
                        # return do_not_know
            # 场景——下一步：使用双向队列实现
            for item in cmd_next_step:
                if item in question:
                    if len(self.amemory) >= 1:
                        cur_button = json.loads(self.amemory[-1]['button']) if self.amemory[-1]['button'] else {}
                        next = cur_button.get('next', {})
                        if next:
                            next_tid = next['url']
                            next_question = next['content']
                            match_string = "MATCH (n:NluCell {name:'" + \
                                next_question + "', topic:'" + self.topic + \
                                "', tid:" + next_tid + "}) RETURN n"
                            match_data = list(self.graph.run(match_string).data())
                            if match_data:
                                node = match_data[0]['n']
                                result['name'] = self.iformat(node["name"])
                                result["content"] = self.iformat(random_item(node["content"].split("|")))
                                result["context"] = node["topic"]
                                result["tid"] = node["tid"]
                                result["txt"] = node["txt"]
                                result["img"] = node["img"]
                                result["button"] = node["button"]
                                if node["url"]:
                                    result["url"] = random_item(node["url"].split("|"))
                                if node["behavior"]:
                                    result["behavior"] = int(node["behavior"], 16)
                                if node["parameter"]:
                                    result["parameter"] = node["parameter"]
                                func = node["api"]
                                if func:
                                    exec("result['content'] = " + func + "('" + result["content"] + "')")
                                # 添加到场景记忆
                                self.pmemory.append(self.amemory[-1])
                                self.amemory.append(result)
                                return result
                    return error_page
          
        # ==========================场景匹配=============================
        tag = get_tag(question, self.user)
        # subgraph_all = list(self.graph.find("NluCell", "tag", tag)) # 列表
        subgraph_all = self.graph.find("NluCell", "tag", tag) # 迭代器
        usergraph_all = [node for node in subgraph_all if node["topic"] in self.usertopics]
        usergraph_scene = [node for node in usergraph_all if node["topic"] == self.topic]
       
        if self.is_scene: # 在场景中：语义模式+关键句模式
            if usergraph_scene:
                result = self.extract_synonym(question, usergraph_scene)
                if not result["context"]:
                    result = self.extract_keysentence(question, usergraph_scene)
                # result = self.extract_pinyin(question, usergraph_scene)
                if result["context"]:
                    print("在场景中，匹配到场景问答对")
                    # 检测结果的 tid 是否是当前场景的子场景跳转链接
                    # 实现：在 self.amemory[-1] 的跳转链接集合中查找匹配的 tid
                    # ===================================================
                    data_img = json.loads(self.amemory[-1]['img']) if self.amemory[-1]['img'] else {}
                    data_button = json.loads(self.amemory[-1]['button']) if self.amemory[-1]['button'] else {}
                    def get_tids(data):
                        tids = set()
                        for key in data.keys():
                            tid = data[key]['url']
                            if tid:
                                tids.add(int(tid))
                        return tids
                    pre_tids = get_tids(data_img).union(get_tids(data_button.setdefault('area', {})))
                    if int(result["tid"]) in pre_tids:
                        print("正确匹配到当前场景的子场景")
                        self.pmemory.append(self.amemory[-1])
                        self.amemory.append(result) # 添加到场景记忆
                        return result
                    # ===================================================
            # 场景中若找不到子图或者匹配不到就重复当前问题->返回自定义错误提示
            # Modify：返回 error_page (2017-12-22)
            # if self.amemory:              
                # return self.amemory[-1]
            # else:
                # return error_page
            return error_page

        else: # 不在场景中：语义模式+关键句模式
            result = self.extract_synonym(question, usergraph_all)
            if not result["context"]:
                result = self.extract_keysentence(question)
            # result = self.extract_pinyin(question, usergraph_all)         
            if result["tid"] != '': # 匹配到场景节点
                if int(result["tid"]) == 0:
                    print("不在场景中，匹配到场景根节点")
                    self.is_scene = True # 进入场景
                    self.topic = result["context"]
                    self.amemory.clear() # 进入场景前清空普通记忆
                    self.pmemory.clear()
                    self.amemory.append(result) # 添加到场景记忆
                    self.pmemory.append(result)
                    return result
                else:
                    print("不在场景中，匹配到场景子节点")
                    return do_not_know
            elif result["context"]: # 匹配到普通节点
                self.topic = result["context"]
                self.amemory.append(result) # 添加到普通记忆
                self.pmemory.append(result)
                return result

        # ========================五、在线语义===========================
        if not self.topic:
            # 1.音乐(唱一首xxx的xxx)
            if "唱一首" in question or "唱首" in question or "我想听" in question:
                result["behavior"] = int("0x0001", 16)
                result["content"] = "好的，正在准备哦"
            # 2.附近有什么好吃的
            elif "附近" in question or "好吃的" in question:
                result["behavior"] = int("0x001C", 16)
                result["content"] = self.address
            # 3.nlu_tuling(天气)
            elif "天气" in question:
                # 图灵API变更之后 Add in 2017-8-4
                location = get_location(question)
                if not location:
                    # 问句中不包含地址
                    weather = nlu_tuling(self.address + question)
                else:
                    # 问句中包含地址
                    weather = nlu_tuling(question)
                # 图灵API变更之前    
                # weather = nlu_tuling(question, loc=self.address)
                result["behavior"] = int("0x0000", 16)
                try:
                    # 图灵API变更之前
                    temp = weather.split(";")[0].split(",")[1].split()
                    myweather = temp[0] + temp[2] + temp[3]

                    # 图灵API变更之后 Add in 2017-8-3
                    # temp = weather.split(",")
                    # myweather = temp[1] + temp[2]
                except:
                    myweather = weather
                result["content"] = myweather
                result["context"] = "nlu_tuling"
            # 4.追加记录回答不上的所有问题
            else:
                with open(log_do_not_know, "a", encoding="UTF-8") as file:
                    file.write(question + "\n")
            # 5.nlu_tuling
            # else:
                # result["content"] = nlu_tuling(question, loc=self.address)
                # result["context"] = "nlu_tuling"
        if result["context"]: # 匹配到在线语义
            self.amemory.append(result) # 添加到普通记忆
        # ==============================================================

        return result

Beispiel #49

0

Datei anzeigen

Datei: bgpstream_populate_neo4j_1year_ORIG.py Projekt: wicaksana/anycast-dns-monitoring-framework

def get_data(timestamp):
    graph = Graph(password="******")

    stream = BGPStream()
    rec = BGPRecord()
    rec_time = None

    # stream.add_filter('prefix', '198.41.0.0/24')  # A-root
    # stream.add_filter('prefix', '192.228.79.0/24')  # B-root, only 1 site
    # stream.add_filter('prefix', '192.33.4.0/24')  # C-root
    # stream.add_filter('prefix', '199.7.91.0/24')  # D-root
    # stream.add_filter('prefix', '192.203.230.0/24')  # E-root, IPv4 only
    # stream.add_filter('prefix', '192.5.5.0/24')  # F-root
    # stream.add_filter('prefix', '192.112.36.0/24')  # G-root, IPv4 only
    # stream.add_filter('prefix', '198.97.190.0/24')  # H-root
    # stream.add_filter('prefix', '192.36.148.0/24')  # I-root
    # stream.add_filter('prefix', '192.58.128.0/24')  # J-root
    stream.add_filter('prefix', '193.0.14.0/24')  # K-root
    # stream.add_filter('prefix', '199.7.83.0/24')  # L-root
    # stream.add_filter('prefix', '202.12.27.0/24')  # M-root

    # IPv6
    # stream.add_filter('prefix', '2001:503:ba3e::/48')  # A
    ## stream.add_filter('prefix', '2001:500:84::/48')  # B, only 1 site
    # stream.add_filter('prefix', '2001:500:2::/48')  # C
    # stream.add_filter('prefix', '2001:500:2d::/48')  # D
    # stream.add_filter('prefix', '2001:500:2f::/48')  # F
    # stream.add_filter('prefix', '2001:500:1::/48')  # H
    # stream.add_filter('prefix', '2001:7fe::/33')  # I
    # stream.add_filter('prefix', '2001:503:c27::/48')  # J
    # stream.add_filter('prefix', '2001:7fd::/48')  # K
    # stream.add_filter('prefix', '2001:500:9f::/48')  # L
    # stream.add_filter('prefix', '2001:dc3::/32')  # M

    stream.add_filter('record-type', 'ribs')
    # stream.add_filter('collector', 'rrc01')
    stream.add_filter('project', 'routeviews')
    stream.add_interval_filter(timestamp, timestamp)

    stream.start()

    result = {}
    while stream.get_next_record(rec):
        rec_time = rec.time
        if rec.status == "valid":
            elem = rec.get_next_elem()
            while elem:
                print rec.collector, elem.type, elem.peer_address, elem.peer_asn, elem.fields
                as_path = elem.fields['as-path'].split()
                as_path.reverse()
                prefix = elem.fields['prefix']
                if prefix not in result:
                    result[prefix] = []
                result[prefix].append(as_path)
                elem = rec.get_next_elem()

    # get only unique lists in result
    for prefix in result:
        result[prefix] = [list(x) for x in set(tuple(x) for x in result[prefix])]
    print('timestamp {} ==> result: {}'.format(rec_time, result))

    for prefix in result:
        for path in result[prefix]:
            print('path: {}'.format(path))
            cur_node = None
            prev_node = None
            counter_as_prepend = 0
            for index, asn in enumerate(path):
                searched_node = graph.find('asn', property_key='label', property_value=asn)
                try:
                    cur_node = searched_node.next()  # see if the AS node is already in the db or not. If yes, cur_node == prev_node
                except StopIteration:
                    cur_node = Node('asn', label=str(asn))  # if not exists, then create a new one
                if index > 0:
                    if index == len(path) - 1:
                        cur_node['path'] = path  # attach AS path to the last ASN
                    if cur_node != prev_node:
                        if counter_as_prepend > 0:
                            cur_node['prepended'] = counter_as_prepend
                            counter_as_prepend = 0  # reset
                        peering = Relationship(cur_node, 'TO', prev_node, time=rec_time, prefix=prefix)
                        # peering['time'] = rec_time
                        # peering['prefix'] = prefix
                        graph.create(peering)
                    else:  # AS prepending
                        counter_as_prepend += 1
                prev_node = cur_node