Ejemplo n.º 1
0
def createRelationships():
    global relationships
    graph = Graph('http://localhost:7474/db/data')
    for r in relationships:
        NodeA = graph.find_one(r["start"]["collection"],property_key = "_id", property_value = str(r["start"]["_id"]))
        NodeB = graph.find_one(r["end"]["collection"],property_key = "_id", property_value = str(r["end"]["_id"]))
        graph.create(rel(NodeA,r["name"],NodeB))
class Neo4j():
	graph = None
	def __init__(self):
		print("create neo4j class ...")
		
	def connectDB(self):
		self.graph = Graph("http://localhost:7474", username="******", password="******")
		print('connect successed')
		
	def matchItembyTitle(self,value):
		answer = self.graph.find_one(label="Item",property_key="title",property_value=value)
		return answer

	# 根据title值返回互动百科item
	def matchHudongItembyTitle(self,value):
		answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value)
		return answer
			
	# 返回限定个数的互动百科item
	def getAllHudongItem(self, limitnum):
		List = []
		ge = self.graph.find(label="HudongItem", limit=limitnum)
		for g in ge:
			List.append(HudongItem(g))
			
		print('load AllHudongItem over ...')
		return List
		
		
#test = Neo4j()
#test.connectDB()
#a = test.getLabeledHudongItem('labels.txt')
#print(a[10].openTypeList)
class Neo4j():
	graph = None
	def __init__(self):
		print("create neo4j class ...")
		
	def connectDB(self):
		self.graph = Graph("http://localhost:7474", username="******", password="******")
		
	def matchItembyTitle(self,value):
		answer = self.graph.find_one(label="Item",property_key="title",property_value=value)
		return answer

	# 根据title值返回互动百科item
	def matchHudongItembyTitle(self,value):
		answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value)
		return answer
		
	# 返回所有已经标注过的互动百科item   filename为labels.txt
	def getLabeledHudongItem(self, filename):
		labels = readCSV2(filename)
		List = []
		i = 0
		for line in labels:
			ctx = self.graph.find_one(label="HudongItem",property_key="title",property_value=line[0])
			if ctx == None:
				continue;
			cur = HudongItem(ctx)
			cur.label = line[1]
			List.append(cur)
		
		print('load LabeledHudongItem over ...')
		return List
	
	# 返回限定个数的互动百科item
	def getAllHudongItem(self, limitnum):
		List = []
		ge = self.graph.find(label="HudongItem", limit=limitnum)
		for g in ge:
			List.append(HudongItem(g))
			
		print('load AllHudongItem over ...')
		return List
		
		
#test = Neo4j()
#test.connectDB()
#answer = test.graph.find_one(label="HudongItem",property_key="title",property_value='火龙果')
#print(answer)
#a = test.getLabeledHudongItem('labels.txt')
#print(a[10].openTypeList)
Ejemplo n.º 4
0
class Graph(object):

    def __init__(self, neo4j_uri):
        self.graph = NeoGraph(neo4j_uri)

    def find_node(self, label, node_id):
        args = dict(property_key="node_id", property_value=node_id)
        return self.graph.find_one(label, **args)

    def create_user(self, args):
        node = self.find_node("User", args["username"])
        if not node:
            properties = dict(
                node_id=args["username"],
                name=args["name"],
                city=args["city"]
            )
            node = Node("User", **properties)
            self.graph.create(node)
            return node, True
        return node, False

    def delete_user(self, user):
        node = self.find_node("User", user)
        if node:
            self.graph.delete(node)    
            return True
        return False
Ejemplo n.º 5
0
class Achievement(object):
    def __init__(self, graph_db):
        self.name = None
        self.id = None
        self.description = None
        self.title = None
        self.is_visible = True
        self.date = None
        self._graph_db = Graph(settings.DATABASE_URL)

    @property
    def achievement_node(self):
        return self._graph_db.find_one(GraphLabel.ACHIEVEMENT,
                                      property_key='id',
                                      property_value=self.id)

    @property
    def achievement_interests(self):
        """
        get list of interests linked to this achievement
        :return:
        """
        # ach_interests = self.graph_db.match(start_node=self.achievement_node,
        #                                     rel_type=Relationship.)
        return None
class Neo4j():
	graph = None
	def __init__(self):
		print("create neo4j class ...")
		
	def connectDB(self):
		self.graph = Graph("http://localhost:7474", username="******", password="******")
		
	def matchItembyTitle(self,value):
		answer = self.graph.find_one(label="Item",property_key="title",property_value=value)
		return answer

	# 根据title值返回互动百科item
	def matchHudongItembyTitle(self,value):
		answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value)
		return answer
Ejemplo n.º 7
0
class NeoProvider(object):
	
	def __init__(self):
		
		# TODO read this from a config file
		uri = "http://*****:*****@localhost:7474/db/data"
		self.graph = Graph(uri)
		self.store = Store(self.graph)
		
	def get_start_screen(self):
		# Fetch the start node
		start_node = self.graph.find_one("screen", "start", True)

		# Find all the navigations from the start node
		nav_rels = self.graph.match(start_node, "nav")

		# Find all the assets for the start node
		asset_rels = self.graph.match(start_node, "hasAsset")

		# Construct the DTOs
		assets = [Asset(asset_rel.end_node) for asset_rel in asset_rels]
		navs = [Navigation(nav_rel) for nav_rel in nav_rels]
		start_screen = Screen(start_node, navs, assets)
		return start_screen

	def get_next_screen(self, current_screen_key, option):
		# Fetch the current node
		current_node = self.graph.find_one("screen", "id", current_screen_key)

		# Navigate to the next node via option
		current_rels = self.graph.match(current_node, "nav")
		selected_rel = [rel for rel in current_rels if rel.properties['opt'] == int(option)][0]
		next_node = selected_rel.end_node

		# Grab new navigations and assets for the next node
		next_nav_rels = self.graph.match(next_node, "nav")
		asset_rels = self.graph.match(next_node, "hasAsset")

		# Construct the DTOs
		assets = [Asset(asset_rel.end_node) for asset_rel in asset_rels]
		navs = [Navigation(nav_rel) for nav_rel in next_nav_rels]
		next_screen = Screen(next_node, navs, assets)
		return next_screen
Ejemplo n.º 8
0
class Neo4j():
	graph = None
	def __init__(self):
		print("create neo4j class ...")
		
	def connectDB(self):
		self.graph = Graph("http://localhost:7474", username="******", password="******")
		
	def matchItembyTitle(self,value):
		answer = self.graph.find_one(label="Item",property_key="title",property_value=value)
		return answer

	# 根据title值返回互动百科item
	def matchHudongItembyTitle(self,value):
		answer = self.graph.find_one(label="HudongItem",property_key="title",property_value=value)
		return answer

	# 根据entity的名称返回关系
	def getEntityRelationbyEntity(self,value):
		answer = self.graph.data("MATCH (entity1) - [rel] -> (entity2)  WHERE entity1.title = \"" +value +"\" RETURN rel,entity2")
		return answer
Ejemplo n.º 9
0
def before_all(context):
    # import falcon_test
    # context.attachment_dir = os.path.join(os.path.dirname(falcon_test.__file__), 'tests/data')
    # context.sms_path = os.path.join(os.path.dirname(falcon_test.__file__), '../../var/sms/')
    # context.mail_path = os.path.join(os.path.dirname(falcon_test.__file__), '../../var/mail/')
    # clear database
    graph_db = Graph(settings.DATABASE_URL)
    # graph_db.delete_all()
    new_user_node = graph_db.find_one('USER',
                                      property_key='email',
                                      property_value='*****@*****.**')
    graph_db.delete(new_user_node)
    interest_node = graph_db.find_one('INTEREST', property_key='name',
                                      property_value=PERSONAS['interest']['name'])
    interest_relationships = graph_db.match(start_node=None,
                                            rel_type='INTERESTED_IN',
                                            end_node=interest_node)
    for relationship in interest_relationships:
        graph_db.delete(relationship)
    graph_db.delete(interest_node)
    context.base_url = "http://localhost:8000"
    benv.before_all(context)
class AgoraOrganization(object):
    def __init__(self):
        self.name = None
        self.unique_id = None
        self.mission_statement = None
        self.email = None
        self.is_open = False
        self.is_invite_only = False
        self.website = None
        self.graph_db = Graph()

    @property
    def org_node(self):
        return self.graph_db.find_one(AgoraLabel.ORGANIZATION,
                                      property_key='name',
                                      property_value=self.name)

    @property
    def org_members(self):
        """
        list of the members of the organization
        :return: list of tuple of member name, email
        """
        org_members_nodes = self.graph_db.match(start_node=self.org_node,
                                                rel_type=AgoraRelationship.MEMBER_OF,
                                                end_node=None)
        org_members_list = []
        for item in org_members_nodes:
            org_members_list.append((item.end_node["name"], item.end_node["email"]))
        return org_members_list

    def create_organization(self):
        """
        create a new organization
        :return: py2neo Node
        """
        self.unique_id = str(uuid.uuid4())
        new_org_properties = {
            "name": self.name,
            "mission_statement": self.mission_statement,
            "unique_id": self.unique_id,
            "email": self.email,
            "is_open": self.is_open,
            "is_invite_only": self.is_invite_only,
            "website": self.website}

        new_org_node = Node.cast(AgoraLabel.ORGANIZATION, new_org_properties)
        self.graph_db.create(new_org_node)

        return new_org_node
Ejemplo n.º 11
0
class Build_Configuration:
    def __init__(self):
        self.graph = Graph()
        self.graph.delete_all()
        self.namespace = ["Start"]
        self.parent_node = []

    def check_duplicates(self, label, name):
        # print "label",label,name
        if self.graph.find_one(label, property_key="name", property_value=name) != None:
            raise ValueError("Duplicate Node", label, name)

    def get_namespace(self, name):
        print self.namespace, name
        temp = copy.deepcopy(self.namespace)
        temp.append(name)
        return_value = "/".join(temp)
        return return_value

    def get_parent_node(self):
        return self.parent_node[-1]

    def pop_namespace(self):
        del self.namespace[-1]
        del self.parent_node[-1]

    # concept of namespace name is a string which ensures unique name
    # the name is essentially the directory structure of the tree
    def construct_node(self, push_namespace, relationship, label, name, properties):
        namespace = self.get_namespace(name)

        self.check_duplicates(label, name=namespace)

        node = Node(label)
        node.properties["namespace"] = namespace
        node.properties["name"] = name
        for i in properties.keys():
            node.properties[i] = properties[i]
        self.graph.create(node)
        if len(self.parent_node) != 0:
            relation_enity = Relationship(self.get_parent_node(), relationship, node)

            self.graph.create(relation_enity)

        if push_namespace == True:
            self.namespace.append(name)
            self.parent_node.append(node)
class AgoraAchievement(object):
    def __init__(self, graph_db):
        self.name = None
        self.unique_id = None
        self.description = None
        self.title = None
        self.is_visible = True
        self.date = None
        self.graph_db = Graph()

    @property
    def achievement_node(self):
        return self.graph_db.find_one(AgoraLabel.ACHIEVEMENT,
                                      property_key='unique_id',
                                      property_value=self.unique_id)

    @property
    def achievement_interests(self):
        """
Ejemplo n.º 13
0
class Neo4JClient:
    """Client for Neo4J"""
    def __init__(self):
        authenticate("localhost:7474", secrets.NEO4J_USERNAME , secrets.NEO4J_PASSWORD)
        self.graph = Graph("http://localhost:7474/db/data/")

    def create_user_node(self, user_dict):
        mykeys = ['name', 'id_str', 'description', 'screen_name']
        user_dict = {k: v for (k, v) in user_dict.items() if k in mykeys}
        user_node = Node('Person', user_dict['name'], **user_dict)
        self.graph.create(user_node)
        return user_node

    def find_user_node(self, key, value):
        return self.graph.find_one('Person', property_key=key, property_value=value)

    def create_rel(self, start_node, end_node, rel_type):
        return self.graph.create((start_node, rel_type, end_node))

    def update_user_node(id, user_dict):
        pass
Ejemplo n.º 14
0
# 节点间关系的建立
node_1_call_node_2 = Relationship(test_node_1, 'CALL', test_node_2)
node_1_call_node_2['count'] = 1
node_2_call_node_1 = Relationship(test_node_2, 'CALL', test_node_1)
node_2_call_node_1['count'] = 2
test_graph.create(node_1_call_node_2)
test_graph.create(node_2_call_node_1)

# 节点/关系的属性赋值以及属性值的更新
node_1_call_node_2['count'] += 1
test_graph.push(node_1_call_node_2)

# 通过属性值来查找节点和关系(find,find_one)
find_code_1 = test_graph.find_one(label="Person",
                                  property_key="name",
                                  property_value="test_node_1")

find_code_3 = test_graph.find_one(label="Person",
                                  property_key="name",
                                  property_value="test_node_2")

print(find_code_1['name'])

# 通过节点/关系查找相关联的节点/关系
find_relationship = test_graph.match_one(start_node=find_code_1,
                                         end_node=find_code_3,
                                         bidirectional=False)
print(find_relationship)

# match和match_one的参数包括start_node,Relationship,end_node中的至少一个。
Ejemplo n.º 15
0
print graph

# find a node or set of nodes according to properties and labels
# graph.find_one() # returns a single node
# graph.find() # returns a generator

# Let's find Marnee
# marnee_node = graph.find_one("Person", property_key="name", property_value="Marnee")
# print "find_one Marnee %s" % marnee_node
#
# marnee_generator = graph.find("Person", property_key="name", property_value="Marnee")
# for marnee in marnee_generator:
#     print marnee

# Let's find Julian
julian_node = graph.find_one("Person", property_key="name", property_value="Julian")
# print "find_one Julian %s" % julian_node
#s
# # Let's find all the Persons Julian knows
# # show the Cypher -- MATCH
# # show the code
# # graph.match()
# # graph.match_one()
#
#
# julian_knows = graph.match(start_node=julian_node,
#                            rel_type="KNOWS",
#                            end_node=None)
# for friend in julian_knows:
#     print "friend %s" % friend
#
Ejemplo n.º 16
0
class GraphDB():

    def __init__(self, user=NEO4J_USER, pwd=NEO4J_PWD, host=NEO4J_HOST):
        self.graph = Graph("http://%s:%s@%s/db/data/" % (user, pwd, host))

    def query(self, query_str, stream=False):
        if stream:
            return self.graph.cypher.stream(query_str)
        else:
            return self.graph.cypher.execute(query_str)

    def create_relation_user_to_topic(self, user, relation, topic_name):
        userNode = self.graph.find_one("user", 'id', user.id_str)
        if not userNode:
            userNode = self.create_node_from_user(user)
            self.graph.create(userNode)

        topicNode = self.graph.find_one("topic_name", 'name', topic_name)
        if not topicNode:
            topicNode = Node("topic_name", name = topic_name)
            self.graph.create(topicNode)

        relationship = self.graph.match_one(userNode, relation, topicNode)
        if not relationship:
            relationship = Relationship(userNode, relation, topicNode, count = 1)
            self.graph.create(relationship)
        else:
            relationship.properties['count'] += 1
            relationship.push()

    # Relations: follows eventuell favourites, retweets

    def create_relation_user_to_user(self, userA, relation, userB):
        userANode = self.graph.find_one("user", 'id', userA.id_str)
        userBNode = self.graph.find_one("user", 'id', userB.id_str)

        if not userANode:
            userANode = self.create_node_from_user(userA)
            self.graph.create(userANode)

        if not userBNode:
            userBNode = self.create_node_from_user(userB)
            self.graph.create(userBNode)

        relationship = self.graph.match_one(userANode, relation, userBNode)
        if not relationship:
            relationship = Relationship(userANode, relation, userBNode, count = 1)
            self.graph.create(relationship)
        else:
            relationship.properties['count'] += 1
            relationship.push()

    def increment_user_counter(self, user, counter, n):
        userNode = self.graph.find_one("user", 'id', user.id_str)
        if not userNode:
            userNode = self.create_node_from_user(user)
            self.graph.create(userNode)

        if counter in userNode.properties:
            userNode.properties[counter] += n
        else:
            userNode.properties[counter] = n
        userNode.push()

    def get_all_users(self):
        users = []
        for u in self.graph.find('user'):
            users.append({'name': u.properties['screen_name'], 'id_str': u.properties['id']})
        return users

    def create_node_from_user(self, user):
        userNode = Node("user", name=user.screen_name, id=user.id_str, followers_count=user.followers_count,
            friends_count=user.friends_count, statuses_count=user.statuses_count, favourites_count=user.favourites_count)
        return userNode

    def quicksearch(self, username, limit=10):
        cql_query = "match(u:user) WHERE u.name =~ '%s.*' RETURN DISTINCT u.name LIMIT %s;"
        return self.query(cql_query % (username, limit))

    def get_user_count(self):
        cql_query = "match(u:user) RETURN count(DISTINCT u) AS c;"
        for row in self.query(cql_query):
            return row['c']
        return 0
Ejemplo n.º 17
0
    employees = [tuple(line) for line in reader]

f.close()

with open(linkDataFile) as f:
    reader = csv.reader(f)
    next(reader, None)
    relations = [tuple(line) for line in reader]

f.close()

employee_entries = []

for link in relations:
    employee_entries.append({"links": link})

graph = Graph("http://*****:*****@localhost:7474/db/data/")
graph.cypher.execute(
    "MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r")  # deleting existing data

#print (employees)
for emp in employees:
    graph.create(Node("Employee", id=emp[0], email=str(emp[1]), group=1))
    #print(emp[0])

for link in relations:
    node1 = graph.find_one("Employee", "id", link[0])
    node2 = graph.find_one("Employee", "id", link[1])
    #print(node1)
    graph.create(Relationship(node1, "EMAIL", node2, weight=link[2]))
             dislikeCount=arraystring['dislikeCount'],
             likeCount=int(arraystring['likeCount']))
    graph.create(a)
    print("Current run number(While Node Creation): " + str(i))
    # print(arrayjson[i]['videoInfo']['id'])                          ## uncomment to print the data

    ## For-loop for creating relation ships between the created nodes
for i in range(len(arrayjson)):
    element = arrayjson[i]
    for j in range(i - 1, -1, -1):

        # For establishing 'SAME_CHANNEL' relation
        if arrayjson[j]['videoInfo']['snippet']['channelId'] == element[
                'videoInfo']['snippet']['channelId']:
            a = graph.find_one("Youtube",
                               property_key='name',
                               property_value=element['videoInfo']['id'])
            b = graph.find_one("Youtube",
                               property_key='name',
                               property_value=arrayjson[j]['videoInfo']['id'])
            channelRelation = Relationship(a, "SAME_CHANNEL", b)
            graph.create(channelRelation)

        # For establishing 'SIMILAR_DESC' relation
        Count = descriptionCompare(
            arrayjson[i]['videoInfo']['snippet']['description'],
            arrayjson[j]['videoInfo']['snippet']['description'])
        if Count > 3000:
            a = graph.find_one("Youtube",
                               property_key='name',
                               property_value=element['videoInfo']['id'])
Ejemplo n.º 19
0
# mobile_node_1 = Node("Mobile", name="18610558465")

csv_file_path = "D:/github_program/myPython/docs/rst/test.csv"
red = pd.read_csv(csv_file_path)
print(red)

sys.exit(0)

reader = csv.reader(open(csv_file_path, "r"))
for line in reader:
    # print(len(line))
    for single in range(0, len(line)):
        print(line[single])

        find_code_1 = test_graph.find_one(label="Mobile",
                                          property_key="name",
                                          property_value=line[single])
        if find_code_1 is None:
            mobile_node_1 = Node("Mobile", name="18610558465")
            test_graph.create(mobile_node_1)
        else:
            print("Exist")

sys.exit(0)

find_code_1 = test_graph.find_one(label="Mobile",
                                  property_key="name",
                                  property_value="18610558465")
print(find_code_1)
if find_code_1 is None:
    mobile_node_1 = Node("Mobile", name="18610558465")
									for result in results:
										print("\t\t\t\t" + result['full_name'] + " FOUND")
								else:
									# print("\t\t\t\tNOT FOUND! Creating Author...")
									author_to_be_added = graph.merge_one("Author", "link", author["link"])
									author_str_split_list = author["name"].split()
									if (len(author_str_split_list) == 1):
										author_to_be_added['full_name'] = author["name"].title()
										author_to_be_added['fist_name'] = author_str_split_list[0]
										author_to_be_added['middle_name'] = " "
										author_to_be_added['last_name'] = " "
									elif (len(author_str_split_list) == 2):
										author_to_be_added['full_name'] = author["name"].title()
										author_to_be_added['fist_name'] = author_str_split_list[0]
										author_to_be_added['middle_name'] = " "
										author_to_be_added['last_name'] = author_str_split_list[1]
									elif(len(author_str_split_list) == 3):
										author_to_be_added['full_name'] = author["name"].title()
										author_to_be_added['fist_name'] = author_str_split_list[0]
										author_to_be_added['middle_name'] = author_str_split_list[1]
										author_to_be_added['last_name'] = author_str_split_list[2]
									author_to_be_added.push()

								if authors.index(author) == 0:
									author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find_one('Author', 'link', author["link"]), primary_author="YES"))
									# primary_author_bool = False
								else:
									# pass
									author_relationship_to_be_added = graph.create_unique(Relationship(article_to_be_added, "authored_by", graph.find_one('Author', 'link', author["link"]), primary_author="NO"))

print(j_list)
Ejemplo n.º 21
0
authenticate("localhost:7474","t","t")
graph = Graph("http://localhost:7474/db/data/")
def tofrac(a):
    b=float(10**len(str(a)))
    c=float(a)
    d=c/b
    return float(d)
#Relationship(a, "similartags", b, weight = tcount)
node=Node("Ariana",id=0,name="zero_node",type=0,date=0)
graph.create(node)
array=[]
rel_ind=1
rel_type=0
score={}
tmp={}
n4= graph.find_one("Ariana",property_key = 'id', property_value = 0)
for fname in iglob(os.path.expanduser('output1/*.json')):
    with open(fname) as fin:
        print (fname)
        hf=1
        mf=1
        tweets= json.load(fin)
        array.append(tweets)
        auth_id=tweets['meta']['author_id']
        auth_name=tweets['meta']['author_name']
        #print (auth_id)
        dt=tweets['meta']['date']['$date']
        if(graph.find_one("Ariana",property_key = 'name', property_value = auth_name)==None):
            node = Node("Ariana",id=auth_id, name=tweets['meta']['author_name'],bundle_id=tweets['meta']['retweetOf'])
            graph.create(node)
            #score.setdefault(auth_name,{})
Ejemplo n.º 22
0
# 创建
graph_1 = Graph()
graph_2 = Graph(host='localhost')
graph_3 = Graph('http://localhost:7474/db/data/')
s = a | b | r
graph = Graph(password='******')
# 添加
graph.create(s)

# 查找
data = graph.data('MATCH (p:Person) return p')
print(data)
#### DataFrame
df = DataFrame(data)

node = graph.find_one(label='Person')
print(node)
relationship = graph.match_one(rel_type='KNOWS')
print(relationship)

# 更新
node = graph.find_one(label='Person')
node['age'] = 21
graph.push(node)

# 删除  Node 时必须先删除其对应的 Relationship,否则无法删除 Node
node = graph.find_one(label='Person')
relationship = graph.match_one(rel_type='KNOWS')
graph.delete(relationship)
graph.delete(node)
Ejemplo n.º 23
0
BREAKPOINT_INV_COMPANY = 1  # 导入公司持股关系的断点
BREAKPOINT_INV_PERSON = 1  # 导入人持股关系的断点
BREAKPOINT_LEADER = 1  # 导入董监高关系的断点


if __name__ == '__main__':
    df1 = pd.read_csv('../Data/shareholders1.csv', encoding='utf-8', sep=',')
    for i in range(len(df1)):
        data = df1.iloc[i, :]
        entname = data['entname']
        regcap_CNY = data['regcap_CNY']
        holder_label, holder = (data['holder'].split('_'))[0], (data['holder'].split('_'))[1]
        money_CNY = float(data['money_CNY'])
        share_ratio = round(float((data['share_ratio'].split('%'))[0])/100, 6)
        FLAGS = str(data['flags'])
        com_node = graph.find_one(label='COMPANY', property_key='entname', property_value=entname)
        if not com_node:
            com_node = Node('COMPANY')
            com_node['entname'] = entname
            com_node['regcap_CNY'] = regcap_CNY
            com_node['FLAGS'] = FLAGS
            graph.create(com_node)
        else:
            com_node['regcap_CNY'] = regcap_CNY
            graph.push(com_node)
        if holder_label == 'COMPANY':
            holder_node = graph.find_one(label='COMPANY', property_key='entname', property_value=holder)
            if not holder_node:
                holder_node = Node('COMPANY')
                holder_node['entname'] = holder
                holder_node['FLAGS'] = FLAGS
Ejemplo n.º 24
0
from py2neo import Node, Graph
import json
import time
import pandas as pd

start_time = time.time()
print("Start create nodes")

with open('config.json', 'r') as f:
    config = json.load(f)
f.close()

graph_address = 'http://{}:{}@localhost:7474/db/data/'.format(config['neo4j_credentials']['username'],
                                                              config['neo4j_credentials']['password'])

graph = Graph(graph_address)

path = config['path_to_data']

f = open(path + "asoiaf-{}-nodes.csv".format(config["book_to_analyse"]),"r")
nodes = pd.read_csv(f)

list_nodes = list(nodes["Id"])

# create nodes
for x in list(set(list_nodes)):
    if not graph.find_one('character', property_key='name', property_value=x):
        graph.create(Node('character', name=x))

print("--- %s seconds ---" % (time.time() - start_time))
Ejemplo n.º 25
0
content = f.read()
topics = find_topic(content)
#create topic, turn and user nodes
#user does not need id, and has only a name property
#remove author property from turn and add a relationship 
#PUBLISHES between user and turn
#
for topic in topics:
    topic_node = Node("Topic",title=topic.title)
    for turn in topic.turns:
        turn_node = Node("Turn",tid=turn.tid, 
            text=turn.text,date=turn.date)
        author = turn.author
        if len(author) > 0:
           # find author node from graph
            author_node=graph.find_one("User", property_key="name",
                property_value=author)
            if author_node is None:
                author_node=Node("User",name=author)
            author_turn = Relationship(author_node, "PUBLISHES", turn_node)
            graph.create(author_turn)
        if turn.parent_turn is None:
            topic_turn=Relationship(topic_node, "CONTAINS",turn_node)
            graph.create(topic_turn) 
        else:
            p_tid = turn.parent_turn.tid
            parent_node = graph.find_one("Turn",
                property_key="tid", property_value=p_tid)
            if parent_node is not None:
                turn_turn=Relationship(turn_node, "REPLIES", parent_node)
                graph.create(turn_turn)
                #find parent_node's author and create a relationship
Ejemplo n.º 26
0
class LoadDatatoNeo4J(object):
    graph = None

    def __init__(self):
        print("start load data ...")

    def connectDB(self):
        self.graph = Graph("http://localhost:7474", username="******", password="******")
        print("connect neo4j success!")

    def readData(self):
        count = 0
        with open("new_node.csv", 'w') as fw:
            fw.write("title,lable" + '\n')
        with open("wikidata_relation.csv", "w") as fw:
            fw.write("HudongItem1,relation,HudongItem2" + '\n')
        with open("wikidata_relation2.csv", "w") as fw:
            fw.write("HudongItem,relation,NewNode" + '\n')
        with open("../wikidataRelation/entityRelation.json","r") as fr:
            with open("new_node.csv", 'a') as fwNewNode:
                with open("wikidata_relation.csv", 'a') as fwWikidataRelation:
                    with open("wikidata_relation2.csv", 'a') as fwWikidataRelation2:
                        newNodeList = []
                        for line in fr:
                            print(line)
                            entityRelationJson = json.loads(line)
                            entity1 = entityRelationJson['entity1']
                            entity2 = entityRelationJson['entity2']
                            # 搜索entity1
                            find_entity1_result = self.graph.find_one(
                                property_key = "title",
                                property_value = entity1,
                                label = "NerItem"    # 这里的标签
                            )
                            # 搜索entity2
                            find_entity2_result = self.graph.find_one(
                                property_key = "title",
                                property_value = entity2,
                                label = "NerItem"
                            )
                            count += 1
                            print(count)

                            # 如果entity1不在实体列表中,结束
                            if (find_entity1_result is None):
                                continue

                            # 去掉entityRelationJson['relation']中的逗号和双引号
                            entityRelationList = re.split(",|\"",entityRelationJson['relation'])
                            entityRelation = ""
                            for item in entityRelationList:
                                entityRelation = entityRelation + item

                            # 如果entity2既不在实体列表中,又不在NewNode中,则新建一个节点,该节点的lable为newNode,然后添加关系
                            if (find_entity2_result is None):
                                if (entity2 not in newNodeList):
                                    fwNewNode.write(entity2 + "," + "newNode" + '\n')
                                    newNodeList.append(entity2)
                                fwWikidataRelation2.write(entity1 + "," + entityRelation + "," + entity2 + '\n')
                            # 如果entity2在实体列表中,直接查询关系
                            else:
                                fwWikidataRelation.write(entity1 + "," + entityRelation + "," + entity2 + '\n')
class Neo4j():
	graph = None
	def __init__(self):
		print("create neo4j class ...")

	def connectDB(self):
		self.graph = Graph("http://localhost:7474", username="******", password="******")

	def matchItembyTitle(self,value):
		answer = self.graph.find_one(label="Hudong",property_key="title",property_value=value)
		return answer

	# 根据title值返回互动百科item
	def matchHudongItembyTitle(self,value):
		answer = self.graph.find_one(label="Hudong",property_key="title",property_value=value)
		return answer

	# 根据entity的名称返回关系
	def getEntityRelationbyEntity(self,value):
		answer = self.graph.data("MATCH (entity1) - [rel] -> (entity2)  WHERE entity1.title = \"" +value +"\" RETURN rel,entity2")
		return answer

	# 根据关系名称返回结果
	def findRelationEntity(self,value):
		answer = self.graph.data("MATCH (n1:Hudong)- [rel {type:\""+value+"\"}] -> (n2) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong)- [rel {type:\""+value+"\"}] -> (n2) RETURN n1,rel,n2" )
		return answer

	#查找entity1及其对应的关系(与getEntityRelationbyEntity的差别就是返回值不一样)
	def findRelationByEntity(self,entity1):
		answer = self.graph.data("MATCH (n1:Hudong {title:\""+entity1+"\"})- [rel] -> (n2) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\""+entity1+"\"})- [rel] -> (n2) RETURN n1,rel,n2" )
		return answer

	#查找entity2及其对应的关系
	def findRelationByEntity2(self,entity1):
		answer = self.graph.data("MATCH (n1)- [rel] -> (n2:Hudong {title:\""+entity1+"\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1)- [rel] -> (n2:Hudong {title:\""+entity1+"\"}) RETURN n1,rel,n2" )
		return answer

	#根据entity1和关系查找enitty2
	def findOtherEntities(self,entity,relation):
		answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2) RETURN n1,rel,n2" )

		return answer

	#根据entity2和关系查找enitty1
	def findOtherEntities2(self,entity,relation):
		answer = self.graph.data("MATCH (n1)- [rel:RELATION {type:\""+relation+"\"}] -> (n2:Hudong {title:\"" + entity + "\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1)- [rel:RELATION {type:\""+relation+"\"}] -> (n2:Hudong {title:\"" + entity + "\"}) RETURN n1,rel,n2" )

		return answer

	#根据两个实体查询它们之间的关系
	def findRelationByEntities(self,entity1,entity2):
		answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel] -> (n2:Hudong{title:\""+entity2+"\"}) RETURN n1,rel,n2" )

		return answer

	#查询数据库中是否有对应的实体-关系匹配
	def findEntityRelation(self,entity1,relation,entity2):
		answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
		if(len(answer) == 0):
			answer = self.graph.data("MATCH (n1:Hudong {title:\"" + entity1 + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" )

		return answer
Ejemplo n.º 28
0
'''
a = Node('PersonTest', name='张三')
b = Node('PersonTest', name='李四')
r = Relationship(a, 'KNOWNS', b)
s = a | b | r
graph.create(s)
'''
    2 —— Node查询
'''
# 用CQL进行查询,返回的结果是list
data1 = graph.data('MATCH(p:PersonTest) return p')
print("data1 = ", data1, type(data1))
print()
# 用find_one()方法进行node查找,返回的是查找node的第一个node
data2 = graph.find_one(label='PersonTest',
                       property_key='name',
                       property_value="李四")
print("data2 = ", data2, type(data2))
print()
# 用find()方法进行node查找
data3 = graph.find(label='PersonTest')
for data in data3:
    print("data3 = ", data)
print()
'''
    3 —— Relationship查询
'''
relationship = graph.match_one(rel_type='KNOWNS')
print(relationship, type(relationship))
print()
'''
Ejemplo n.º 29
0
            'base_ram', 'base_san', 'base_license', 'price'
        ]
        #print (header)
        csvwriter.writerow(header)
        count += 1
    csvwriter.writerow([
        i['client'], i['platform'], i['env'], i['server'], i['cpu'], i['ram'],
        i['diskGB'], i['os_type'], i['cpu_addon'], i['ram_addon'],
        i['base_vm'], i['base_cpu'], i['base_ram'], i['base_san'],
        i['base_license'], i['price']
    ])
testcsv.close()

exit(0)

neo_vm = graph.find_one('Server', 'name', "ADAM-CUS-NJS1")
pprint.pprint(neo_vm)

for vm in oldvmlist:
    #neo_dc = Node('Datacenter', name=oldvmlist[vm]['site'])
    #graph.merge(neo_dc)
    neo_env = Node('Env', name=oldvmlist[vm]['env'])
    graph.merge(neo_env)
    #link=Relationship(neo_env,'is_on',neo_dc)
    #graph.merge(link)
    neo_client = Node('Compte', name=oldvmlist[vm]['compte'])
    graph.merge(neo_client)
    neo_pf = Node('Plateform', name=oldvmlist[vm]['PF'])
    graph.merge(neo_pf)
    link = Relationship(neo_client, 'have', neo_pf)
    graph.merge(link)
Ejemplo n.º 30
0
class loadDatatoNeo4j(object):
    graph = None

    def __init__(self):
        print("start load data ...")

    def connectDB(self):
        self.graph = Graph("http://localhost:7474",
                           username="******",
                           password="******")
        print("connect neo4j success!")

    def readData(self):
        count = 0
        #新写的的new node.csv  先写标题
        with open("new_nodef.csv", 'w') as fw:
            fw.write("title,lable" + '\n')

        #新写进去的  先写标题
        with open("wikidata_relationf.csv", "w") as fw:
            fw.write("HudongItem1,relation,HudongItem2" + '\n')

        # 新写进去的
        with open("wikidata_relation2f.csv", "w") as fw:
            fw.write("HudongItem,relation,NewNode" + '\n')

        #读取实体关系json
        with open("../wikidataRelation/entityRelation1.json", "r") as fr:

            #	打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入。
            with open("new_nodef.csv", 'a') as fwNewNode:

                with open("wikidata_relationf.csv", 'a') as fwWikidataRelation:
                    with open("wikidata_relation2f.csv",
                              'a') as fwWikidataRelation2:
                        newNodeList = list()
                        for line in fr:
                            # print(line)
                            entityRelationJson = json.loads(line)
                            entity1 = entityRelationJson['entity1']
                            entity2 = entityRelationJson['entity2']
                            #搜索entity1
                            find_entity1_result = self.graph.find_one(
                                property_key="title",
                                property_value=entity1,
                                label="HudongItem")
                            #搜索entity2
                            find_entity2_result = self.graph.find_one(
                                property_key="title",
                                property_value=entity2,
                                label="HudongItem")
                            count += 1
                            print(count / 12358)
                            # 如果entity1不在实体列表中(emmmmmm,不可能吧),那么就不要继续了
                            if (find_entity1_result is None):
                                continue

                            #去掉entityRelationJson['relation']中的逗号和双引号
                            entityRelationList = re.split(
                                ",|\"", entityRelationJson['relation'])
                            entityRelation = ""
                            for item in entityRelationList:
                                entityRelation = entityRelation + item
                            #去掉entity2字符串中的逗号,并将繁体转成简体
                            entity2List = re.split(",|\"", entity2)
                            entity2 = ""
                            for item in entity2List:
                                entity2 = entity2 + item
                            entity2 = Converter('zh-hans').convert(entity2)

                            # 如果entity2既不在实体列表中,又不在NewNode中,则新建一个节点,该节点的lable为newNode,然后添加关系
                            if (find_entity2_result is None):
                                if (entity2 not in newNodeList):
                                    fwNewNode.write(entity2 + "," + "newNode" +
                                                    '\n')
                                    newNodeList.append(entity2)
                                fwWikidataRelation2.write(entity1 + "," +
                                                          entityRelation +
                                                          "," + entity2 + '\n')
                            #如果entity2在实体列表中,直接连关系即可
                            else:
                                fwWikidataRelation.write(entity1 + "," +
                                                         entityRelation + "," +
                                                         entity2 + '\n')
Ejemplo n.º 31
0
class NeoManager:
    def __init__(self, host, port, username, password):
        self.username = username
        self.host = host
        self.port = port
        self.password = password
        
    def connect(self):
        print("http://" + self.host + ":" + str(self.port), self.username, self.password)
        self.graph = Graph("http://" + self.host + ":" + str(self.port), username = self.username, password = self.password)
        if self.graph != None:
            print("Neo4j Database Connected.")
            self.selector = NodeSelector(self.graph)

    def createNode(self, nodelabel, nodename):
        nodename = str(nodename)
        nodelabel = str(nodelabel)
        node = Node(nodelabel, name = nodename)
        self.graph.create(node)
        return node

    def createRelation(self, nodeSrc, nodeDst, relationName):
        relationName = str(relationName)
        if nodeSrc == None or nodeDst == None:
            return
        relationship = Relationship(nodeSrc, relationName, nodeDst)
        print(relationship)
        # self.setRelationAttribute(relation, 'credential', 0.9)
        self.graph.create(relationship)
        return relationship

    def setRelationAttribute(self, relationship, attribute, val):
        relationship[attribute] = val
        return relationship[attribute]
        
    def getRelationAttribute(self, relationship, attribute):
        return relationship[attribute]

    def findByName(self, findName):
        findName = str(findName)
        trustable = self.graph.find_one(property_key = "name", property_value = findName, label = 'labelHolder')
        if trustable == None:
            untrustable = self.graph.find_one(property_key = "name", property_value = findName, label = 'Creditless')
            return False, untrustable
        else:
            return True, trustable

    def findAllByLabel(self, findLabel):
        findLabel = str(findLabel)
        selected = self.selector.select(findLabel)
        print(selected)

    def findNodeRelation(self, node):
        return self.graph.match_one(start_node = node, bidirectional = True)

    def hasStartToRelation(self, node, relstr):
        return self.graph.match(start_node=node, rel_type=relstr)

    def hasEndWithRelation(self, node, relstr):
        return self.graph.match(end_node=node, rel_type=relstr)

    def getRelationBetween(self, nodeA, nodeB):
        if nodeA == None or nodeB == None:
            return None
        else:
            return self.graph.match(start_node = nodeA, end_node = nodeB, bidirectional = True)



# neo = NeoManager('localhost', 7474, 'neo4j', '123')
# neo.connect()
# with open('../Datasets/TrainSetUnique.csv', 'r', encoding = 'utf-8') as input:
#     reader = csv.reader(input)
#     # row: [0] entity1 [1] entity2 [2] relation [3] example
#     for row in reader:
#         node0 = neo.findByName(row[0])
#         node1 = neo.findByName(row[1])
#         print(node0)
#         if node0 == None:
#             node0 = neo.createNode("labelHolder", row[0])
#         if node1 == None:
#             node1 = neo.createNode("labelHolder", row[1])
#         relation = neo.getRelationBetween(node0, node1)
#         print(relation)
#         if relation == None or relation != row[2]:
#             print(row[2])
#             relation = neo.createRelation(node0, node1, row[2])
Ejemplo n.º 32
0
class DBO(object):
	# 初始化,连接后台数据库
	def __init__(self):
		self.graph = Graph(user='******', password='******')
	
	def list_organization_structure(self, Application=None, HostIP=None):
		condition = "where 1=1"
		if Application:
			condition += ' and a.Name="%s"' % Application
		if HostIP:
			condition += ' and n.IP="%s"' % HostIP
		cypher = 'MATCH (p:Project)-[]-(d:Department)-[]-(a:Application)-[]-(n:Host) %s RETURN p.name as Project,d.name as Department,a.name as Application' % condition
		return self.graph.data(cypher)
	
	def enum_vul(self, TaskID, Cypher_Conditions=None):
		if Cypher_Conditions:
			# selector.select.where not good for use , not support zh_cn just pure cypher
			cypher = 'MATCH (n:HostVul) where n.TaskID="%s" %s RETURN n ' % (TaskID, Cypher_Conditions)
			for data in self.graph.data(cypher):
				yield data["n"]
		else:
			selector = NodeSelector(self.graph)
			selected = selector.select("HostVul", TaskID=TaskID)
			for data in list(selected):
				yield data
	
	def add_vul(self, Vul_Data):
		if not self.HostVul_exists(Vul_Data):
			Host = self.graph.find_one("Host", "IP", Vul_Data[u"IP"])
			vul = Node("HostVul")
			vul.update(Vul_Data)
			rel = Relationship(Host, "have", vul)
			self.graph.create(rel)
	
	def HostVul_exists(self, Vul_Data):
		cypher = "Match (n:HostVul) where n.TaskID='%s' and n.Scanner='%s' and n.IP='%s' and n.Port='%s' and n.ID='%s' return n.IP limit 1 " % (
			Vul_Data[u"TaskID"],
			Vul_Data[u"Scanner"],
			Vul_Data[u"IP"],
			Vul_Data[u"Port"],
			Vul_Data[u"ID"])
		result = self.graph.data(cypher)
		# 性能太差,使用其他简单方法
		# selector = NodeSelector(self.graph)
		# selected = selector.select("HostVul",
		#                            IP=Vul_Data[u"IP"],
		#                            ID=Vul_Data[u"ID"]).limit(1)
		# .where("_.IP = '%s'" % Vul_Data[u"IP"],
		#                                        "_.Port='%s'" % Vul_Data[u"Port"],
		#                                        "_.ID='%s'" % Vul_Data[u"ID"])
		return result
	
	def add_host(self, Application, host):
		self.node_simple_add("Host", "IP", host)
		host = self.graph.find_one("Host", "IP", host)
		app = self.graph.find_one("Application", "name", Application)
		self.rel_simple_add(app, "own", host)
	
	def add_department(self, Project, Department):
		self.node_simple_add("Project", "name", Project)
		self.node_simple_add("Department", "name", Department)
		
		pro = self.graph.find_one("Project", property_key="name", property_value=Project)
		dep = self.graph.find_one("Department", property_key="name", property_value=Department)
		
		self.rel_simple_add(pro, "own", dep)
	
	def add_app(self, Project, Department, Application):
		self.node_simple_add("Project", "name", Project)
		self.node_simple_add("Department", "name", Department)
		self.node_simple_add("Application", "name", Application)
		
		pro = self.graph.find_one("Project", property_key="name", property_value=Project)
		dep = self.graph.find_one("Department", property_key="name", property_value=Department)
		app = self.graph.find_one("Application", property_key="name", property_value=Application)
		
		self.rel_simple_add(pro, "own", dep)
		self.rel_simple_add(dep, "own", app)
	
	### meta operate
	def node_exists(self, label, Key, Value):
		Find = self.graph.find_one(label, property_key=Key, property_value=Value)
		if Find:
			print "Node already exists: [%s: %s]" % (label, Find[Key])
			return 2
		else:
			return 0
	
	def node_simple_add(self, label, Key, Value):
		Find = self.graph.find_one(label, property_key=Key, property_value=Value)
		if Find:
			print "Node already exists: [%s: %s]" % (label, Find[Key])
			return 2
		else:
			n = Node(label)
			n.update({Key: Value})
			self.graph.create(n)
			return 1
	
	def rel_exists(self, start_node, rel, end_node):
		Find = self.graph.match_one(start_node=start_node, rel_type=rel, end_node=end_node)
		if type(Find) == Relationship:
			print "Relationship already exists"
			return 2
		else:
			return 0
	
	def rel_simple_add(self, start_node, rel_type, end_node):
		Find = self.graph.match_one(start_node=start_node, rel_type=rel_type, end_node=end_node)
		if type(Find) == Relationship:
			print "Relationship already exists"
			return 2
		else:
			rel = Relationship(start_node, rel_type, end_node)
			self.graph.create(rel)
			return 1
Ejemplo n.º 33
0
class Robot():
    """NLU Robot.
    自然语言理解机器人。

    Public attributes:
    - graph: The connection of graph database. 图数据库连接
    - selector: The selector of graph database. 图数据库选择器
    - locations: Navigation Locations. 导航地点列表
    - is_scene: 在线场景标志,默认为 False
    - user: 机器人配置信息
    - usertopics: 可用话题列表
    - address: 在线调用百度地图 IP 定位 API,网络异常时从配置信息获取默认地址
    - topic: 当前QA话题
    - qa_id: 当前QA id
    - qmemory: 短期记忆-最近用户问过的10个问题
    - amemory: 短期记忆-最近回答用户的10个答案
    - pmemory: 短期记忆-最近一次回答用户的正确答案
    - cmd_end_scene: 退出场景命令集
    - cmd_previous_step: 上一步命令集,场景内全局模式
    - cmd_next_step: 下一步命令集,通过界面按钮实现
    - cmd_repeat: 重复命令集
    - do_not_know: 匹配不到时随机回答
    """
    def __init__(self, password="******", userid="A0001"):
        self.graph = Graph("http://localhost:7474/db/data/", password=password)
        self.selector = NodeSelector(self.graph)
        # self.locations = get_navigation_location()
        self.is_scene = False
        self.user = self.selector.select("User", userid=userid).first()
        self.usertopics = self.get_usertopics(userid=userid)
        self.address = get_location_by_ip(self.user['city'])
        self.topic = ""
        self.qa_id = get_current_time()
        self.qmemory = deque(maxlen=10)
        self.amemory = deque(maxlen=10)
        self.pmemory = deque(maxlen=10)
        self.cmd_end_scene = ["退出业务场景", "退出场景", "退出", "返回", "结束", "发挥"]
        self.cmd_previous_step = ["上一步", "上一部", "上一页", "上一个"]
        self.cmd_next_step = ["下一步", "下一部", "下一页", "下一个"]
        self.cmd_repeat = ['重复', '再来一个', '再来一遍', '你刚说什么', '再说一遍', '重来']
        self.do_not_know = [
            "这个问题太难了,{robotname}还在学习中", "这个问题{robotname}不会,要么我去问下",
            "您刚才说的是什么,可以再重复一遍吗", "{robotname}刚才走神了,一不小心没听清",
            "{robotname}理解的不是很清楚啦,你就换种方式表达呗", "不如我们换个话题吧", "咱们聊点别的吧",
            "{robotname}正在学习中", "{robotname}正在学习哦", "不好意思请问您可以再说一次吗",
            "额,这个问题嘛。。。", "{robotname}得好好想一想呢", "请问您说什么", "您问的问题好有深度呀",
            "{robotname}没有听明白,您能再说一遍吗"
        ]

    def __str__(self):
        return "Hello! I'm {robotname} and I'm {robotage} years old.".format(
            **self.user)

    @time_me()
    def configure(self, info="", userid="A0001"):
        """Configure knowledge base.
        配置知识库。
        """
        assert userid is not "", "The userid can not be empty!"
        # 对传入的 userid 参数分析,若不合适则报相应消息 2017-6-7
        if userid != "A0001":
            userid = "A0001"
            print("userid 不是默认值,已经更改为A0001")
        match_string = "MATCH (config:Config) RETURN config.name as name"
        subgraphs = [item[0] for item in self.graph.run(match_string)]
        print("所有知识库:", subgraphs)
        config = {"databases": []}

        if info != '':
            selected_names = info.split()
            forbidden_names = list(
                set(subgraphs).difference(set(selected_names)))
            print("选中知识库:", selected_names)
            print("禁用知识库:", forbidden_names)
            # TODO:待合并精简 可用 CONTAINS
            for name in selected_names:
                match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \
                    + userid + "' AND config.name='" + name + "' SET r.bselected=1"
                self.graph.run(match_string)
            for name in forbidden_names:
                match_string = "MATCH (user:User)-[r:has]->(config:Config) where user.userid='" \
                    + userid + "' AND config.name='" + name + "' SET r.bselected=0"
                self.graph.run(match_string)

        match_string = "MATCH (user:User)-[r:has]->(config:Config)" + \
            "where user.userid='" + userid + \
            "' RETURN config.name as name, r.bselected as bselected, r.available as available"
        for item in self.graph.run(match_string):
            config["databases"].append(
                dict(name=item[0], bselected=item[1], available=item[2]))
        print("可配置信息:", config)

        return config

    # @time_me()
    def get_usertopics(self, userid="A0001"):
        """Get available topics list.
        """
        usertopics = []
        if not userid:
            userid = "A0001"
        # 从知识库获取用户拥有权限的子知识库列表
        match_string = "MATCH (user:User)-[r:has {bselected:1, available:1}]->(config:Config)" + \
            "where user.userid='" + userid + "' RETURN config"
        data = self.graph.run(match_string).data()
        for item in data:
            usertopics.extend(item["config"]["topic"].split(","))
        print("用户:", userid, "\n已有知识库列表:", usertopics)
        return usertopics

    def iformat(self, sentence):
        """Individualization of robot answer.
        个性化机器人回答。
        """
        return sentence.format(**self.user)

    # @time_me()
    def add_to_memory(self, question="question", userid="A0001"):
        """Add user question to memory.
        将用户当前对话加入信息记忆。

        Args:
            question: 用户问题。
                Defaults to "question".
            userid: 用户唯一标识。
                Defaults to "userid".
        """
        previous_node = self.graph.find_one("Memory", "qa_id", self.qa_id)
        self.qa_id = get_current_time()
        node = Node("Memory",
                    question=question,
                    userid=userid,
                    qa_id=self.qa_id)
        if previous_node:
            relation = Relationship(previous_node, "next", node)
            self.graph.create(relation)
        else:
            self.graph.create(node)

    # def extract_navigation(self, question):
        """Extract navigation from question。从问题中抽取导航地点。
        从导航地点列表选取与问题匹配度最高的地点。
        QA匹配模式:(模糊匹配/全匹配)

        Args:
            question: User question. 用户问题。
        """
        # result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \
        # context="", tid="", ftid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1)

        # 模式1:模糊匹配
        # temp_sim = 0
        # sv1 = synonym_cut(question, 'wf')
        # if not sv1:
        # return result
        # for location in self.locations:
        # sv2 = synonym_cut(location, 'wf')
        # if sv2:
        # temp_sim = similarity(sv1, sv2, 'j')
        # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配
        # if temp_sim > 0.92:
        # print("Navigation location: " + location + " Similarity Score: " + str(temp_sim))
        # result["content"] = location
        # result["context"] = "user_navigation"
        # result["behavior"] = int("0x001B", 16)
        # return result

        # 模式2:全匹配,判断“去”和地址关键词是就近的动词短语情况
        # for location in self.locations:

        # keyword = "去" + location
        # if keyword in question:
        # print("Original navigation")
        # result["name"] = keyword
        # result["content"] = location
        # result["context"] = "user_navigation"
        # result["behavior"] = int("0x001B", 16)
        # return result
        # return result

    def update_result(self, question='', node=None):
        result = dict(question=question, name='', content=self.iformat(random_item(self.do_not_know)), \
            context="", tid="", ftid="", url="", behavior=0, parameter="", txt="", img="", button="", valid=1)
        if not node:
            return result
        result['name'] = self.iformat(node["name"])
        result["content"] = self.iformat(
            random_item(node["content"].split("|")))
        result["context"] = node["topic"]
        result["tid"] = node["tid"]
        result["ftid"] = node["ftid"]
        result["txt"] = node["txt"]
        result["img"] = node["img"]
        result["button"] = node["button"]
        if node["url"]:
            result["url"] = random_item(node["url"].split("|"))
        if node["behavior"]:
            result["behavior"] = int(node["behavior"], 16)
        if node["parameter"]:
            result["parameter"] = node["parameter"]
        func = node["api"]
        if func:
            exec("result['content'] = " + func + "('" + result["content"] +
                 "')")
        return result

    def extract_pinyin(self,
                       question,
                       subgraph,
                       threshold=0.6,
                       athreshold=0.8):
        """Extract synonymous QA in NLU database。
        QA匹配模式:从图形数据库选取匹配度最高的问答对。

        Args:
            question: User question. 用户问题。
            subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。
        """
        temp_sim = 0
        ss = []
        max_score = 0
        sv1 = pinyin_cut(question)
        print(sv1)
        for node in subgraph:
            iquestion = self.iformat(node["name"])
            sv2 = pinyin_cut(iquestion)
            print("  ", sv2)
            temp_sim = jaccard_pinyin(sv1, sv2)
            print(temp_sim)
            # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配
            if temp_sim > athreshold:
                print("Q: " + iquestion + " Similarity Score: " +
                      str(temp_sim))
                return self.update_result(question, node)
        # ===========================================================
            ss.append(temp_sim)
        max_score = max(ss)
        if max_score > threshold:
            node = subgraph[ss.index(max_score)]
            iquestion = self.iformat(node["name"])
            print("Q: " + iquestion + " Similarity Score: " + str(temp_sim))
            return self.update_result(question, node)
        # ===========================================================
        return self.update_result(question)

    def extract_synonym(self,
                        question,
                        subgraph,
                        threshold=0.60,
                        athreshold=0.92):
        """Extract synonymous QA in NLU database。
        QA匹配模式:从知识库选取第一个超过匹配阈值的问答对。

        Args:
            question: User question. 用户问题。
            subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。
        """
        temp_sim = 0
        ss = []
        max_score = 0
        sv1 = synonym_cut(question, 'wf')  # 基于 semantic.jaccard
        # sv1 = segment(question) # 基于 semantic.jaccard2
        if not sv1:
            return self.update_result(question)
        for node in subgraph:
            iquestion = self.iformat(node["name"])
            if question == iquestion:
                print("Similarity Score: Original sentence")
                return self.update_result(question, node)
            sv2 = synonym_cut(iquestion, 'wf')  # 基于 semantic.jaccard
            # sv2 = segment(iquestion) # 基于 semantic.jaccard2
            if sv2:
                temp_sim = similarity(sv1, sv2, 'j')  # 基于 semantic.jaccard
                # temp_sim = similarity(sv1, sv2, 'j2') # 基于 semantic.jaccard2
            # 匹配加速,不必选取最高相似度,只要达到阈值就终止匹配
            if temp_sim > athreshold:
                print("Q: " + iquestion + " Similarity Score: " +
                      str(temp_sim))
                return self.update_result(question, node)
        # ===========================================================
            ss.append(temp_sim)
        max_score = max(ss)
        if max_score > threshold:
            node = subgraph[ss.index(max_score)]
            iquestion = self.iformat(node["name"])
            print("Q: " + iquestion + " Similarity Score: " + str(temp_sim))
            return self.update_result(question, node)
        # ===========================================================
        return self.update_result(question)

    def extract_synonym_first(self, question, subgraph, threshold=0.60):
        """Extract synonymous QA in NLU database。
        QA匹配模式:从知识库选取匹配度最高的问答对。

        Args:
            question: User question. 用户问题。
            subgraph: Sub graphs corresponding to the current dialogue. 当前对话领域对应的子图。
        """
        temp_sim = 0
        ss = []
        max_score = 0
        sv1 = synonym_cut(question, 'wf')  # 基于 semantic.jaccard
        # sv1 = segment(question) # 基于 semantic.jaccard2
        if not sv1:
            return self.update_result(question)
        for node in subgraph:
            iquestion = self.iformat(node["name"])
            if question == iquestion:
                print("Similarity Score: Original sentence")
                return self.update_result(question, node)
            sv2 = synonym_cut(iquestion, 'wf')  # 基于 semantic.jaccard
            # sv2 = segment(iquestion) # 基于 semantic.jaccard2
            if sv2:
                temp_sim = similarity(sv1, sv2, 'j')  # 基于 semantic.jaccard
                # temp_sim = similarity(sv1, sv2, 'j2') # 基于 semantic.jaccard2
            ss.append(temp_sim)
        max_score = max(ss)
        if max_score > threshold:
            node = subgraph[ss.index(max_score)]
            iquestion = self.iformat(node["name"])
            print("Q: " + iquestion + " Similarity Score: " + str(temp_sim))
            return self.update_result(question, node)
        return self.update_result(question)

    def extract_keysentence(self, question, data=None, threshold=0.40):
        """Extract keysentence QA in NLU database。
        QA匹配模式:从知识库选取包含关键句的问答对。

        Args:
            question: User question. 用户问题。
        """
        if data:
            subgraph = [node for node in data if node["name"] in question]
        else:
            usertopics = ' '.join(self.usertopics)
            # 只从目前挂接的知识库中匹配
            match_string = "MATCH (n:NluCell) WHERE '" + question + \
                "' CONTAINS n.name and '" + usertopics +  \
                "' CONTAINS n.topic RETURN n LIMIT 1"
            subgraph = [
                item['n'] for item in self.graph.run(match_string).data()
            ]
        if subgraph:
            # 选取第一个匹配节点
            print("Similarity Score: Key sentence")
            # return self.extract_synonym(question, subgraph, threshold=threshold)
            node = subgraph[0]
            return self.update_result(question, node)
        return self.update_result(question)

    def extract_keysentence_first(self, question, data=None, threshold=0.40):
        """Extract keysentence QA in NLU database。
        QA匹配模式:从知识库选取包含关键句且匹配度最高的问答对。

        Args:
            question: User question. 用户问题。
        """
        if data:
            subgraph = [node for node in data if node["name"] in question]
        else:
            usertopics = ' '.join(self.usertopics)
            # 只从目前挂接的知识库中匹配
            match_string = "MATCH (n:NluCell) WHERE '" + question + \
                "' CONTAINS n.name and '" + usertopics +  \
                "' CONTAINS n.topic RETURN n"
            subdata = self.graph.run(match_string).data()
            subgraph = [item['n'] for item in subdata]
        if subgraph:
            # 选取得分最高的
            print("Similarity Score: Key sentence")
            return self.extract_synonym_first(question,
                                              subgraph,
                                              threshold=threshold)
        return self.update_result(question)

    def remove_name(self, question):
        # 姓氏误匹配重定义
        if question.startswith("小") and len(question) == 2:
            question = self.user['robotname']
        # 称呼过滤
        for robotname in ["小民", "小明", "小名", "晓明"]:
            if question.startswith(
                    robotname) and len(question) >= 4 and "在线" not in question:
                question = question.lstrip(robotname)
        if not question:
            question = self.user['robotname']
        return question

    @time_me()
    def search(self, question="question", tid="", userid="A0001"):
        """Nlu search. 语义搜索。

        Args:
            question: 用户问题。
                Defaults to "question".
            userid: 用户唯一标识。
                Defaults to "userid"

        Returns:
            Dict contains:
            question, answer, topic, tid, url, behavior, parameter, txt, img, button.
            返回包含问题,答案,话题,资源,行为,动作,文本,图片及按钮的字典。
        """
        # 添加到问题记忆
        # self.qmemory.append(question)
        # self.add_to_memory(question, userid)

        # 语义:场景+全图+用户配置模式(用户根据 userid 动态获取其配置信息)
        # ========================初始化配置信息==========================
        self.user = self.selector.select("User", userid=userid).first()
        self.usertopics = self.get_usertopics(userid=userid)
        do_not_know = dict(
            question=question,
            name="",
            content=self.iformat(random_item(self.do_not_know)),
            # content="",
            context="",
            tid="",
            ftid="",
            url="",
            behavior=0,
            parameter="",
            txt="",
            img="",
            button="",
            valid=1)
        error_page = dict(
            question=question,
            name="",
            content=self.user['error_page'],
            context="",
            tid="",
            ftid="",
            url="",
            behavior=int("0x1500",
                         16),  # Modify:场景内 behavior 统一为 0x1500。(2018-1-8)
            parameter="",
            txt="",
            img="",
            button="",
            valid=0)

        # ========================一、预处理=============================
        # 敏感词过滤
        if check_swords(question):
            print("问题包含敏感词!")
            return do_not_know
        # 移除称呼
        question = self.remove_name(question)

        # ========================二、导航===============================
        # result = self.extract_navigation(question)
        # if result["context"] == "user_navigation":
        # self.amemory.append(result) # 添加到普通记忆
        # self.pmemory.append(result)
        # return result

        # ========================三、语义场景===========================
        result = copy.deepcopy(do_not_know)

        # 全局上下文——重复
        for item in self.cmd_repeat:
            # TODO:确认返回的是正确的指令而不是例如唱歌时的结束语“可以了”
            # TODO:从记忆里选取最近的有意义行为作为重复的内容
            if item == question:
                if self.amemory:
                    return self.amemory[-1]
                else:
                    return do_not_know

        # 场景——退出
        for item in self.cmd_end_scene:
            if item == question:  # 完全匹配退出模式
                result['behavior'] = 0
                result['name'] = '退出'
                result['content'] = ""
                self.is_scene = False
                self.topic = ""
                self.amemory.clear()  # 清空场景记忆
                self.pmemory.clear()  # 清空场景上一步记忆
                return result

        # 场景——上一步:返回父节点(TODO:和下一步模式统一)
        if self.is_scene:
            for item in self.cmd_previous_step:
                if item in question:
                    # 添加了链接跳转判断(采用该方案 2017-12-22)
                    if len(self.pmemory) > 1:
                        self.amemory.pop()
                        return self.pmemory.pop()
                    elif len(self.pmemory) == 1:
                        return self.pmemory[-1]
                    else:
                        return error_page
            # 场景——下一步:通过 button 实现
            for item in self.cmd_next_step:
                if item in question:
                    if len(self.amemory) >= 1:
                        parent = self.amemory[-1]
                        if parent['button']:
                            next_name = parent['button'].split('|')[-1]
                            if next_name != '0':  # 确定有下一步
                                # print(type(parent['tid']), parent['tid'])
                                match_string = "MATCH (n:NluCell {name:'" + \
                                    next_name + "', topic:'" + self.topic + \
                                    "', ftid:" + str(int(parent['tid'])) + "}) RETURN n"
                                match_data = list(
                                    self.graph.run(match_string).data())
                                if match_data:
                                    node = match_data[0]['n']
                                    result = self.update_result(question, node)
                                    # 添加到场景记忆
                                    self.pmemory.append(self.amemory[-1])
                                    self.amemory.append(result)
                                    return result
                    return error_page

        # ==========================场景匹配=========================
        if self.is_scene:  # 在场景中:语义模式+关键句模式
            # 场景内所有节点
            match_scene = "MATCH (n:NluCell) WHERE n.topic='" + self.topic + "' RETURN n"
            scene_nodes = self.graph.run(match_scene).data()
            # 根据场景节点的 ftid 是否等于父节点 tid 筛选子场景节点
            subscene_nodes = [
                item['n'] for item in scene_nodes
                if item['n']['ftid'] == self.amemory[-1]['tid']
            ]
            if subscene_nodes:
                result = self.extract_synonym_first(question, subscene_nodes)
                if not result["context"]:
                    result = self.extract_keysentence_first(
                        question, subscene_nodes)
                if not result["context"]:
                    result = self.extract_pinyin(question, subscene_nodes)
                if result["context"]:
                    print("正确匹配到当前场景的子场景")
                    self.pmemory.append(self.amemory[-1])
                    self.amemory.append(result)  # 添加到场景记忆
                    return result
            return error_page
        else:  # 不在场景中:语义模式+关键句模式
            # 场景内和问题语义标签一致的所有节点
            tag = get_tag(question, self.user)
            match_graph = "MATCH (n:NluCell) WHERE n.tag='" + tag + \
                "' and '" + ' '.join(self.usertopics) + "' CONTAINS n.topic RETURN n"
            usergraph_all = [
                item['n'] for item in self.graph.run(match_graph).data()
            ]
            if usergraph_all:
                # 同义句匹配 TODO:阈值可配置
                result = self.extract_synonym(question,
                                              usergraph_all,
                                              threshold=0.90)
                # 关键词匹配 TODO:配置开关
                if not result["context"]:
                    result = self.extract_keysentence(question)
                # 拼音匹配 TODO:配置开关
                if not result["context"]:
                    result = self.extract_pinyin(question, usergraph_all)
            # else: # 全局拼音匹配 TODO:配置开关
            # match_pinyin = "MATCH (n:NluCell) WHERE '" + \
            # ' '.join(self.usertopics) + "' CONTAINS n.topic RETURN n"
            # usergraph_pinyin = [item['n'] for item in self.graph.run(match_pinyin).data()]
            # if usergraph_pinyin:
            # result = self.extract_pinyin(question, usergraph_pinyin)
            if result["tid"] != '':  # 匹配到场景节点
                if int(result["tid"]) == 0:
                    print("不在场景中,匹配到场景根节点")
                    self.is_scene = True  # 进入场景
                    self.topic = result["context"]
                    self.amemory.clear()  # 进入场景前清空普通记忆
                    self.pmemory.clear()
                    self.amemory.append(result)  # 添加到场景记忆
                    self.pmemory.append(result)
                    return result
                else:
                    print("不在场景中,匹配到场景子节点")
                    return do_not_know
            elif result["context"]:  # 匹配到普通节点
                self.topic = result["context"]
                self.amemory.append(result)  # 添加到普通记忆
                self.pmemory.append(result)
                return result

        # ========五、在线语义(Modify:暂时关闭 2018-1-23)===============
        # if not self.topic:
        # 1.音乐(唱一首xxx的xxx)
        # if "唱一首" in question or "唱首" in question or "我想听" in question:
        # result["behavior"] = int("0x0001", 16)
        # result["content"] = "好的,正在准备哦"
        # 2.附近有什么好吃的
        # elif "附近" in question or "好吃的" in question:
        # result["behavior"] = int("0x001C", 16)
        # result["content"] = self.address
        # 3.nlu_tuling(天气)
        # elif "天气" in question:
        # 图灵API变更之后 Add in 2017-8-4
        # location = get_location(question)
        # if not location:
        # 问句中不包含地址
        # weather = nlu_tuling(self.address + question)
        # else:
        # 问句中包含地址
        # weather = nlu_tuling(question)
        # 图灵API变更之前
        # weather = nlu_tuling(question, loc=self.address)
        # result["behavior"] = int("0x0000", 16)
        # try:
        # 图灵API变更之前(目前可用)
        # temp = weather.split(";")[0].split(",")[1].split()
        # myweather = temp[0] + temp[2] + temp[3]

        # 图灵API变更之后 Add in 2017-8-3
        # temp = weather.split(",")
        # myweather = temp[1] + temp[2]
        # except:
        # myweather = weather
        # result["content"] = myweather
        # result["context"] = "nlu_tuling"
        # 4.追加记录回答不上的所有问题
        # else:
        # with open(log_do_not_know, "a", encoding="UTF-8") as file:
        # file.write(question + "\n")
        # 5.nlu_tuling
        # else:
        # result["content"] = nlu_tuling(question, loc=self.address)
        # result["context"] = "nlu_tuling"
        # if result["context"]: # 匹配到在线语义
        # self.amemory.append(result) # 添加到普通记忆
        # ==============================================================

        # 追加记录回答不上的所有问题
        if not self.topic:
            with open(log_do_not_know, "a", encoding="UTF-8") as file:
                file.write(question + "\n")
        return result
Ejemplo n.º 34
0
                print(ans)
            # 通过知识图谱查询
            elif response[0] == '#':
                if response.__contains__("neo4j"):
                    if len(input_message) < 4:
                        ans = kgquery_entity(input_message)
                        print(ans)
                    else:
                        a = input_message.find('和')
                        b = input_message.find('的')
                        name1 = input_message[:a]
                        name2 = input_message[a + 1:b]
                        #提取实体的类别名,在find_one中,类别名跟输入有关系
                        label1 = re.search(r".*[老师/学生/项目]", name1).group(0)
                        label2 = re.search(r".*[老师/学生/项目]", name2).group(0)
                        #find_one函数中,类别名由输入定,属性值也由输入值定
                        n1 = test_graph.find_one(label1,
                                                 property_key="name",
                                                 property_value=name1)
                        n2 = test_graph.find_one(label2,
                                                 property_key="name",
                                                 property_value=name2)
                        ans = kgquery_rel(n1, n2)
                        ans = str(ans)
                        print(name1 + '和' + name2 + '的关系是:' + ans)
                elif response.__contains__("NoMatchingTemplate"):
                    print("NoMatchingTemplate")
                    print("搜索引擎查询,此功能暂不支持")
            else:
                print('ver:' + response)
Ejemplo n.º 35
0
class Query_Configuration:
    def __init__(self, graph=None):
        if graph == None:
            self.graph = Graph()

    def match_labels(self, label):
        results = self.graph.cypher.execute("MATCH (m:" + label + ")  RETURN m")
        return_value = []
        for i in results:
            return_value.append(i[0])
        return return_value

    def match_label_property(self, label, prop_index, prop_value):
        results = self.graph.cypher.execute(
            "MATCH (n:" + label + ") Where (n." + prop_index + "='" + prop_value + "')  RETURN n"
        )
        return_value = []
        for i in results:
            return_value.append(i[0])
        return return_value

    def match_relationship(self, relationship):
        query_string = "MATCH n-[:" + relationship + "]->m   RETURN m"
        # print "---------query string ---------------->"+query_string
        results = self.graph.cypher.execute(query_string)
        return_value = []
        for i in results:
            return_value.append(i[0])
        return return_value

    # not tested yet
    def cypher_query(self, query_string, return_variable):
        query_string = query_string + "   RETURN " + return_variable
        # print "---------query string ---------------->"+query_string
        results = self.graph.cypher.execute(query_string)
        return_value = []
        for i in results:
            return_value.append(i[0])
        return return_value

    def modify_properties(self, graph_object, new_properties):
        for i in new_properties.keys():
            graph_object.properties[i] = new_properties[i]
        graph_object.push()

    # concept of namespace name is a string which ensures unique name
    # the name is essentially the directory structure of the tree
    def construct_merge_node(self, push_namespace, relationship, label, name, new_properties):
        namespace = self.get_namespace(name)
        node = self.graph.find_one(label, property_key="name", property_value=name)
        if self.graph.find_one(label, property_key="name", property_value=name) != None:
            for i in properties.keys():
                node.properties[i] = properties[i]
                node.push()
            return node
        else:
            node = Node(label)
            node.properties["namespace"] = namespace
            node.properties["name"] = name
            for i in properties.keys():
                node.properties[i] = properties[i]
            self.graph.create(node)
            if len(self.namespace) != 0:
                relation_enity = Relationship(self.get_namespace_node(), relationship, node)
            self.graph.create(relation_enity)
            if push_namespace == True:
                self.namespace.append(name)
                self.namespace.append(node)
            return node

    def match_relation_property_specific(
        self, label_name, property_name, property_value, label, return_name, return_value
    ):
        query_string = (
            "MATCH (n:"
            + label_name
            + "   { "
            + property_name
            + ':"'
            + property_value
            + '"})-[*]->(o:'
            + label
            + ") Where o."
            + return_name
            + ' = "'
            + return_value
            + '" RETURN o'
        )
        # print "query string ",query_string
        results = self.graph.cypher.execute(query_string)
        return_value = []
        for i in results:
            return_value.append(i[0])
        return return_value

    def match_relation_property(self, label_name, property_name, property_value, label):
        query_string = (
            "MATCH (n:"
            + label_name
            + "   { "
            + property_name
            + ':"'
            + property_value
            + '"})-[*]->(o:'
            + label
            + ")   RETURN o"
        )
        results = self.graph.cypher.execute(query_string)
        return_value = []
        for i in results:
            return_value.append(i[0])
        return return_value
Ejemplo n.º 36
0
class Cq(object):
    def __init__(self):
        """

        :return:
        """
        self.id = ''
        self.subject = ''
        self.message = ''
        self.created_date = ''
        self._graph_db = Graph(settings.DATABASE_URL)

    @property
    def cq_properties(self):
        """

        :return:
        """
        properties_dict = dict(self.__dict__)
        del properties_dict['_graph_db']
        return properties_dict

    @property
    def cq_node(self):
        """

        :return:
        """
        if self.id != '':
            return self._graph_db.find_one(GraphLabel.CQ,
                                          property_key='id',
                                          property_value=self.id)

    @property
    def response_list(self):
        """
        list of responses to this CQ
        :return: list of responses
        """
        cq_response_relationship = self._graph_db.match(start_node=self.cq_node,
                                                       rel_type=GraphRelationship.TO,
                                                       end_node=None)
        response_list = []
        for rel in cq_response_relationship:
            response = rel.end_node.properties
            user_response_relationship = self._graph_db.match_one(start_node=None,
                                                                 rel_type=GraphRelationship.RESPONDED,
                                                                 end_node=self.cq_node)
            user_node = user_response_relationship.start_node
            response['by'] = '%s / %s' % (user_node.properties['name'],
                                          user_node.properties['call_sign'])
            response_list.append(response)

        return response_list

    @staticmethod
    def create_cq(user_node, cq_dict):
        cq_dict['id'] = str(uuid.uuid4())
        cq_dict['created_date'] = datetime.date.today()
        cq_node = Node.cast(GraphLabel.CQ,
                            cq_dict)
        cq_node, = Graph(settings.DATABASE_URL).create(cq_node)
        cq_relationship = Relationship(user_node,
                                       GraphRelationship.SENT,
                                       cq_node)
        Graph(settings.DATABASE_URL).create_unique(cq_relationship)

    @staticmethod
    def most_recent_cqs():
        params = {

        }
        cypher_str = ""
        match_results = Graph(settings.DATABASE_URL).cypher.execute(statement=cypher_str,
                                                                    parameters=params)
        cq_list = []
        cq = {}
        for item in match_results:
            cq['id'] = item.id
            cq['subject'] = item.subject
            cq['message'] = item.message
            cq['created_date'] = item.created_date
            cq_list.append(cq)
        root = {}
        root['cqs'] = cq_list
        return root


    def response(self, response_id):
        """
        response dictionary details including user details
        :param response_id:
        :return:  dict with response details and a dict of the user who made the response
        """
        response_node = self._graph_db.find_one(GraphLabel.RESPONSE,
                                               property_key='id',
                                               property_value=response_id)
        response_user_relationship = self._graph_db.match_one(start_node=None,
                                                             rel_type=GraphRelationship.RESPONDED,
                                                             end_node=response_node)
        response_dict = {}
        response_dict['response'] = response_node.auto_sync_properties
        response_dict['user'] = response_user_relationship.start_node.properties
        return response_dict
Ejemplo n.º 37
0
class NeoPipeline(object):

    def __init__(self):
        self.graph_path = config.GRAPH_DB['graph_path']
        self.graph = Graph(self.graph_path)
        self.sql_path = "data/network.sqlite"

    def nodes_from_sql(self, query, label, unique="id"):
        """
        INPUT: str, str, str
        OUTPUT: None
        Imports node data from sql query into neo4j
        """
        # Extract data from sql db.
        with sql.connect(self.sql_path) as con:
            nodes = pd.read_sql(sql=query, con=con, index_col=None)
        nodes_dict = nodes.to_dict(outtype="records")

        # Create nodes in graph.
        self.graph.schema.create_uniqueness_constraint(label, unique)
        for node in nodes_dict:
            n = Node.cast(label, node)
            self.graph.create(n)

    def relationships_from_sql(self, query, nodes, label, properties):
        """
        INPUT: str, list(dict), str, dict
        OUTPUT: None
        Imports relationship data from sql query into neo4j
        """
        with sql.connect(self.sql_path) as con:
            rels = pd.read_sql(sql=query, con=con, index_col=None)
        rels_dict = rels.to_dict(outtype="records")

        for rel in rels_dict:
            r = Relationship.cast(self.graph.find_one(nodes[0]["label"], nodes[0]["property"], rel[nodes[0]["sql_col"]]),
                                  label,
                                  self.graph.find_one(nodes[1]["label"], nodes[1]["property"], rel[nodes[1]["sql_col"]]),
                                  properties)
            self.graph.create(r)

    def build_network(self):
        query_players = '''
                        SELECT player_name AS name, player_id AS id, player_pos AS pos
                        FROM individuals_subset
                        GROUP BY player_id
                        '''
        self.nodes_from_sql(query_players, "Players", unique="id")
        query_coaches = '''
                        SELECT coach_name AS name, coach_id AS id
                        FROM individuals_subset
                        GROUP BY coach_id
                        '''
        self.nodes_from_sql(query_coaches, "Coaches", unique="id")

        query_play_coach = '''
                            SELECT *
                            FROM individuals_subset
                            '''
        play_coach = [{'label': "Coach", 'property': "id", 'sql_col': "coach_id"}, {'label': "Player", 'property': "id", 'sql_col': "player_id"}]
        self.relationships_from_sql(query_play_coach, nodes=play_coach, label_rel="COACHED", properties={"league": "NBA"})
Ejemplo n.º 38
0
class PopItToNeo(object):
    def __init__(self):
        config = yaml.load(open("config.yaml"))
        self.endpoint = "https://sinar-malaysia.popit.mysociety.org/api/v0.1"

        # you know so that you can override this. why? I am not sure
        self.membership_field = "memberships"
        self.person_field = "persons"
        self.organization_field = "organizations"
        self.post_field = "posts"
        self.graph = Graph(config["graph_db"])
        if config["refresh"] == True:
            self.graph.delete_all()

        # Because I am still not familiar to query with cypher
        # So lets cache here. Hopefully the memory usage don't kill me
        self.organization_processed = {}
        self.person_processed = {}
        self.post_processed = {}

    def process_membership(self):
        # So lets start from membership
        membership_url = "%s/%s" % (self.endpoint, self.membership_field)
        while True:
            logging.warning("Processing %s" % membership_url)
            data = self.fetch_entity(membership_url)
            logging.warning("Processing membership")

            entries = data["result"]
            for entry in entries:

                # a membership have 3 important field, person_id, organization_id, posts_id
                if not (entry.get("person_id") and entry.get("organization_id")):
                    continue

                person = self.fetch_person(entry["person_id"])
                if not person:
                    continue
                role = entry.get("role","member")
                if not role:
                    role = "member"
                logging.warning("Role: %s" % role)

                params = []

                # This happens only once anyway
                kwparams = {}
                kwparams["popit_id"] = entry["id"]
                start_date = get_timestamp(entry.get("start_date"))
                if start_date:
                    kwparams["start_date"] = start_date
                end_date = get_timestamp(entry.get("end_date"))
                if end_date:
                    kwparams["end_date"] = end_date

                post_exist = False
                if entry.get("post_id"):
                    post = self.fetch_post(entry["post_id"])
                    if not post:
                        continue
                    if self.graph.match_one(person, role, post):
                        post_exist = True
                        logging.warning("Already exist, skipping")

                    if not post_exist:

                        relationship = Relationship(person, role, post, **kwparams)
                        self.graph.create(relationship)

                organization_exist = False

                if entry.get("organization_id"):
                    organization = self.fetch_organization(entry["organization_id"])
                    if not organization:
                        continue
                    if self.graph.match_one(person, role, organization):
                        logging.warning("Already exist, skipping")
                        organization_exist = True

                    if not organization_exist:

                        relationship = Relationship(person, role, organization, **kwparams)
                        self.graph.create(relationship)

            if data.get("next_url"):
                membership_url = data.get("next_url")
            else:
                break

    def fetch_person(self, person_id):
        if person_id in self.person_processed:
            logging.warning("Person %s fetch from cache" % person_id)
            return self.person_processed[person_id]

        node = self.graph.find_one("Persons", "popit_id", person_id)
        if node:
            logging.warning("Already exist, skipping")
            self.person_processed[person_id] = node
            return node

        person_url = "%s/%s/%s" % (self.endpoint, self.person_field, person_id)
        data = self.fetch_entity(person_url)
        if not data:
            # Don't assume that this id won't be created the next time
            logging.warning("person not exist %s" % person_id)
            return None
        logging.warning("Fetching person")

        entity = data["result"]
        if type(entity["name"]) == list:
            name = entity["name"][0]

        else:
            name = entity["name"]
        logging.warning("Name: %s" % name)
        kwparam = {}

        birth_date = get_timestamp(entity.get("birth_date"))
        if birth_date:
            kwparam["birth_date"] = birth_date
        death_date = get_timestamp(entity.get("death_date"))
        if death_date:
            kwparam["death_date"] = death_date
        kwparam["name"] = name
        kwparam["popit_id"] = entity["id"]
        node = Node("Persons", **kwparam)
        self.graph.create(node)
        self.person_processed[entity["id"]] = node
        return node

    def fetch_organization(self, organization_id):
        if organization_id in self.organization_processed:
            logging.warning("Organization %s fetch from cache" % organization_id)
            return self.organization_processed[organization_id]

        node = self.graph.find_one("Organization", "popit_id", organization_id)
        if node:
            logging.warning("Already exist, skipping")
            self.organization_processed[organization_id] = node
            return node

        organization_url = "%s/%s/%s" % (self.endpoint, self.organization_field, organization_id)
        data = self.fetch_entity(organization_url)
        if not data:
            logging.warning("Organization don't exist %s" % organization_id)
            return None
        logging.warning("Fetch orgnanization")

        entity = data["result"]
        if type(entity["name"]) == list:
            name = entity["name"][0]
        else:
            name = entity["name"]

        kwparams = {}
        logging.warning("Name: %s" % name)
        kwparams["name"] = name
        kwparams["popit_id"] = entity["id"]
        founding_date = get_timestamp(entity.get("founding_date"))
        if founding_date:
            kwparams["founding_date"] = founding_date
        dissolution_date = get_timestamp(entity.get("dissolution_date"))
        if dissolution_date:
            kwparams["dissolution_date"] = dissolution_date

        if "classification" in entity:

            logging.warning("Classification:%s" % entity["classification"])
            kwparams["classification"] = entity["classification"]

        node = Node("Organization", **kwparams)
        self.graph.create(node)
        self.organization_processed[entity["id"]] = node
        return node

    def fetch_post(self, post_id):
        if post_id in self.post_processed:
            logging.warning("Post %s fetch from cache" % post_id)
            return self.post_processed[post_id]

        node = self.graph.find_one("Posts", "popit_id", post_id)
        if node:
            logging.warning("Already exist, skipping")
            self.post_processed[post_id] = node
            return node

        post_url = "%s/% s/%s" % (self.endpoint, self.post_field, post_id)
        data = self.fetch_entity(post_url)
        if not data:
            logging.warning("Post don't exist %s" % post_id)
            return None
        logging.warning("Fetch post")

        entity = data["result"]
        # Fetch organization node, because post is link to organization
        # What is the implication of post without organization?
        try:
            if entity.get("organization_id"):
                organization = self.fetch_organization(entity["organization_id"])
            else:
                organization = None
        except Exception as e:
            logging.warning(e.message)
            organization = None
        logging.warning("Label: %s" % entity["label"])
        kwparams = {}
        kwparams["name"] = entity["label"]
        kwparams["popit_id"] = entity["id"]
        start_date = get_timestamp(entity.get("start_date"))
        if start_date:
            kwparams["start_date"] = start_date

        end_date = get_timestamp(entity.get("end_date"))
        if end_date:
            kwparams["end_date"] = end_date

        node = Node("Posts", **kwparams)
        self.graph.create(node)
        self.post_processed[entity["id"]] = node
        if organization:
            temp_param = {}
            if start_date:
                temp_param["start_date"] = start_date
            if end_date:
                temp_param["end_date"] = end_date
            relation = Relationship(node, "of", organization, **kwparams)
            self.graph.create(relation)

        return node

    def process_parent_company(self):
        organizations_url = "%s/%s" % (self.endpoint, self.organization_field)


        while True:
            data = self.fetch_entity(organizations_url)

            entries = data["result"]
            for entry in entries:
                if not entry.get("parent_id"):
                    logging.warning("No parent id, moving on")
                    continue
                else:
                    logging.warning(entry.get("parent_id"))

                # TODO: Dafuq this is not DRY.
                parent_node = self.fetch_organization(entry["parent_id"])
                if not parent_node:
                    continue
                child_node = self.fetch_organization(entry["id"])
                parent_relationship = Relationship(parent_node, "parent_of", child_node)
                if self.graph.match_one(parent_node, "parent_of", child_node):
                    logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"]))
                    continue
                self.graph.create(parent_relationship)
                if self.graph.match_one(child_node, "child_of", parent_node):
                    logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"]))
                    continue
                child_relationship = Relationship(child_node, "child_of", parent_node)
                self.graph.create(child_relationship)

            if "next_url" in data:
                organizations_url = data["next_url"]
                logging.warning(organizations_url)
            else:
                break

    def process_posts(self):
        post_url = "%s/%s" % (self.endpoint, self.post_field)
        while True:
            data = self.fetch_entity(post_url)
            entries = data["result"]
            for entry in entries:
                node = self.fetch_post(entry["id"])
                self.graph.create(node)
                # Since creating organization relationship is already part of getting post
                # ourjob is done here
            if "next_url" in data:
                post_url = data["next_url"]
                logging.warning(post_url)
            else:
                break

    def fetch_entity(self, url):
        r = requests.get(url)
        time.sleep(0.1)
        if r.status_code != 200:
            # Just to make output consistent, excception did not kill the script anyway
            return {}
        return r.json()
Ejemplo n.º 39
0
__author__ = 'Marnee Dearman'
from py2neo import Graph, Node, Relationship
from settings import graphene

graph = Graph(graphene.DATABASE_URL)
print graph

# find a node or set of nodes according to properties and labels
# graph.find_one() # returns a single node
# graph.find() # returns a generator

# Let's find Marnee
marnee_node = graph.find_one("Person",
                             property_key="name",
                             property_value="Marnee")
print "find_one Marnee %s" % marnee_node

marnee_generator = graph.find("Person",
                              property_key="name",
                              property_value="Marnee")
for marnee in marnee_generator:
    print marnee

# Let's find Julian
julian_node = graph.find_one("Person",
                             property_key="name",
                             property_value="Julian")
print "find_one Julian %s" % julian_node

# Let's find all the Persons Julian knows
# show the Cypher -- MATCH
)

# #Recommendation

# ##Add User

# In[4]:

UserNode = graph_db.merge_one("User", "Name", "Ragnar")

# ##Add User likes

# In[5]:

UserRef = graph_db.find_one("User",
                            property_key="Name",
                            property_value="Ragnar")  #look for user Ragnar

# In[6]:

RecipeRef = graph_db.find_one(
    "Recipe", property_key="Name",
    property_value="Spaghetti Bolognese")  #look for recipe Spaghetti Bolognese
NodesRelationship = Relationship(UserRef, "Likes",
                                 RecipeRef)  #Ragnar likes Spaghetti Bolognese
graph_db.create_unique(NodesRelationship)  #Commit his like to database

# In[7]:

graph_db.create_unique(
    Relationship(
Ejemplo n.º 41
0
class NeoRepo():
    def __init__(self):
        self._host = "140.82.17.30"
        self.g = Graph("http://140.82.17.30",
                       username="******",
                       password="******")

    def add_user(self, user):
        n = Node("User", name=user)
        self.g.merge(n)

    def add_repo(self, repo):
        n = Node("Repo", name=repo)
        self.g.merge(n)

    def get_user(self, user):
        n = self.g.find_one("User", property_key='name', property_value=user)
        return n

    def get_repo(self, repo):
        n = self.g.find_one("Repo", property_key='name', property_value=repo)
        return n

    def add_rel(self, user, repo, rel_type):
        user = Node("User", name=user)
        repo = Node("Repo", name=repo)
        rel = Relationship(user, rel_type, repo)
        self.g.merge(rel)

    def match_user(self, user, rel_type='star'):
        if isinstance(user, str): user = self.get_user(user)
        if user is None: return []
        match = self.g.match(start_node=user,
                             bidirectional=False,
                             rel_type=rel_type)
        return match

    def match_repo(self, repo, rel_type='star'):
        if isinstance(repo, str): repo = self.get_repo(repo)
        if repo is None: return []
        match = self.g.match(end_node=repo,
                             bidirectional=False,
                             rel_type=rel_type)
        return match

    def match_one(self, user, repo, rel_type='star'):
        if isinstance(user, str): user = self.get_user(user)
        if isinstance(repo, str): repo = self.get_repo(repo)
        if user is None: return None
        if repo is None: return None
        match = self.g.match_one(start_node=user,
                                 end_node=repo,
                                 bidirectional=False,
                                 rel_type=rel_type)
        return match

    def suggest(self, repo):
        match_repo = self.match_repo(repo)
        count = {}
        for item_repo in match_repo:
            user = item_repo.start_node()
            # count[user['name']] = 0

            match_user = self.match_user(user)

            for item_user in match_user:
                repo_suggest = item_user.end_node()
                if repo_suggest['name'] in count:
                    count[repo_suggest['name']] += 1
                else:
                    count[repo_suggest['name']] = 1
        if count == {}: return []
        return (sorted(count.items(), key=lambda item: item[1],
                       reverse=True))[1:51]
Ejemplo n.º 42
0
from py2neo import Graph, Node, Relationship,authenticate
from talk import find_topic

authenticate("localhost:7474", "neo4j", "M0ring15")
graph = Graph()

f = open('talk_archive')
content = f.read()
topics = find_topic(content)
#create topic, turn nodes
for topic in topics:
    topic_node = Node("Topic",title=topic.title)
    for turn in topic.turns:
        turn_node = Node("Turn",tid=turn.tid, 
            text=turn.text, author=turn.author,date=turn.date)
        if turn.parent_turn is None:
            topic_turn=Relationship(topic_node, "CONTAINS",turn_node)
            graph.create(topic_turn) 
        else:
            p_tid = turn.parent_turn.tid
            parent_node = graph.find_one("Turn",
                property_key="tid", property_value=p_tid)
            if parent_node is not None:
                turn_turn=Relationship(turn_node, "REPLIES", parent_node)
                graph.create(turn_turn)

Ejemplo n.º 43
0
class Interest(object):
    def __init__(self, graph_db=None):
        self.name = None
        self.id = None
        self.description = None
        self._graph_db = Graph(settings.DATABASE_URL)

    @property
    def interest_properties(self):
        properties_dict = dict(self.__dict__)
        del properties_dict['_graph_db']
        return properties_dict

    @property
    def interest_node_by_id(self):
        if not self.id is None:
            return self._graph_db.find_one(GraphLabel.INTEREST,
                                          property_key='id',
                                          property_value=self.id)
        else:
            return None

    @property
    def interest_node_by_name(self):
        if not self.name is None:
            return self._graph_db.find_one(GraphLabel.INTEREST,
                                  property_key='name',
                                  property_value=self.name)
        else:
            return None

    def set_interest_attributes(self, interest_properties):
        for key, value in interest_properties.iteritems():
            setattr(self, key, value)

    def create_interest(self):
        """
        create an interest node based on the class attributes
        :return: py2neo Node
        """
        #TODO error handling
        self.id = str(uuid.uuid4())
        new_interest_node = Node.cast(GraphLabel.INTEREST, self.interest_properties)
        try:
            self._graph_db.create(new_interest_node)
        except:
            pass

        return new_interest_node

        # interest_node = self.get_interest()
        # if interest_node is None:
        #     self.id = str(uuid.uuid4())
        #     new_interest = neo4j.Node.abstract(name=self.name, desciption=self.description, id=self.id)
        #     created_interest, = self.graph_db.create(new_interest)
        #     created_interest.add_labels(GraphLabel.INTEREST)
        #     return created_interest
        # else:
        #     return interest_node

    def matched_interests(self, match_string, limit):
        params = {
            'match': '(?i)%s.*' % match_string,
            'limit': limit
        }
        cypher_str = "MATCH (interest:INTEREST ) " \
            "WHERE interest.name =~ {match} " \
            "RETURN interest.name as name, interest.id as id " \
            "LIMIT {limit}"
        match_results = self._graph_db.cypher.execute(statement=cypher_str, parameters=params)
        root = {}
        root['count'] = 0
        interest_found = {}
        interests_list = []
        for item in match_results:
            interest_found['id'] = item.id
            interest_found['name'] = item.name
            # self.id = item['id']
            # self.get_user()
            # users_list.append(dict(self.user_properties))
            interests_list.append(dict(interest_found))
            root['count'] += 1
        root['interests'] = interests_list
        return root

    def get_interest_by_name(self):
        """
        get interest node
        :return:
        """
        interest_node = self.interest_node_by_name

        if not interest_node is None:
            interest_attributes = self.interest_properties
            for key, value in interest_attributes.iteritems():
                setattr(self, key, value)
        return interest_node

    def get_interest_by_id(self):
        interest_node = self.interest_node_by_id

        if not interest_node is None:
            interest_attributes = self.interest_properties
            for key, value in interest_node.properties.iteritems():
                setattr(self, key, value)
        return interest_node

    def get_interest_for_json(self):
        root = {}
        return {
            '__class': self.__class__.__name__,
            'id': self.id,
            'name': self.name
        }
Ejemplo n.º 44
0
#a = df['head'].value_counts()
a = df['head']
b = df['tail']
frames = [a, b]
result = pd.concat(frames)
result = result.drop_duplicates(keep='first', inplace=False)
result = result.rename(columns={'0': 'node'})
result.to_csv(path + "node.csv", header=1)

###上传到neo4j

n = open(path + "node.csv", encoding='utf-8')
r = open(path + str(database) + ".csv", encoding='utf-8')
data01 = pd.read_csv(n)
data02 = pd.read_csv(r)

###上传节点
for i in range(len(data01)):
    temp = Node("Person", name=data01['0'][i])
    g.create(temp)

###上传关系
for i in range(len(data02)):
    object = g.find_one(label="Person",
                        property_key='name',
                        property_value=data02["head"][i])
    subject = g.find_one(label="Person",
                         property_key='name',
                         property_value=data02["tail"][i])
    temp = Relationship(subject, data02['label'][i], object)
    g.create(temp)
Ejemplo n.º 45
0
    "mission_statement": "Develop the Agora",
    "unique_id": unique_id,
    "email": '*****@*****.**'.lower(),
    "is_mentor": True,
    "is_tutor": True,
    "is_visible": True,
    "is_available_for_in_person": True,
    "is_admin": True}
new_user_node = Node.cast(AgoraLabel.USER, new_user_properties)
try:
    graph_db.create(new_user_node)
except:
    print 'Node found'

user_node = graph_db.find_one(AgoraLabel.USER,
                                      property_key='email',
                                      property_value="*****@*****.**".lower())
print user_node["email"]

user = AgoraUser()
user.email = "*****@*****.**"
print user.user_interests

interest = AgoraInterest()
interest.name = 'SAMPLE'
interest.description = 'SAMPLE DESCRIPTION'
new_interest_node = interest.create_interest()

user_interest_relationship_node = Relationship(start_node=user_node,
                                               rel=AgoraRelationship.INTERESTED_IN,
                                               end_node=new_interest_node)
Ejemplo n.º 46
0
class TwitterGraph():
	"""
		Run queries against TwitterGraph. Functions here are mainly read-only, i.e. we only want to get answers,
		we are not modifying the graph structure
	"""
	PASSWORD = "******"
	USER     = "******"
	HOST     = "localhost:7474"

	def __init__(self, host=HOST, user=USER, password=PASSWORD):
		authenticate(host_port=host, user_name=user, password=password)
		self.graph = Graph()
	
	def get_users(self): # TO-DO : make it lazy for large datasets
		result = self.graph.find("User", limit=25)
		list_ = [user for user in result]
		return list_

	def get_user(self, id_):
		result = self.graph.find_one("User", property_key="id", property_value=id_)
		return result

	def get_level_followers(self, limit=50, level=1, uid=None, screen_name=None):
		"""
			Return neo4j.cypher.RecordStream of users who are the n level follower of user uid/screen_name
			Level 1 follower is defined as : (1st_level_follower)-[follows]->(followee)
		"""
		cypher = self.graph.cypher
		statement = self._construct_follower_path(level)
		if uid is None and screen_name is None:
			raise InvalidArgumentException("Please specify either a valid user id or screen_name")
		
		if uid is not None:
			result = cypher.stream(statement, followee=uid, limit=limit)
		elif screen_name is not None:
			result = cypher.stream(statement, followee=screen_name, limit=limit)
		return [f for f in result]

	def is_n_level_follower(self, level, retweeter, screen_name):
		"""
			Given a retweeter screen_name and original tweeter's screen_name, determine if retweeter is n level follower
		"""
		if level == 1:
			return search.is_follower(retweeter, screen_name)
		cypher    = self.graph.cypher
		level -= 1
		statement = self._construct_follower_path(level)
		for follower in cypher.stream(statement, followee=screen_name, limit=5000):
			print follower[0]
			if search.is_follower(retweeter, follower[0]):
				return True
		return False

	def get_retweet_level(self, retweeter, screen_name):
		"""
			Given a retweeter screen name and the original user screen_name who tweeted 
			the original tweet, determine the follower level
		"""
		level = 0
		while level < 10: # stop at 10 to prevent timeout
			level += 1
			# print len(followers)
			if self.is_n_level_follower(level, retweeter, screen_name):
				return level
		return 0

	def _construct_follower_path(self, level, uid=False):
		# Construct pattern
		if uid:
			statement = "MATCH(:User {id_str : {followee} })"
		else:
			statement = "MATCH(:User {screen_name : {followee} })"
		while level > 1:
			statement += "<-[:follows]-(:User)"
			level -= 1
		statement += "<-[:follows]-(a:User) RETURN a.screen_name LIMIT {limit}"		
		return statement
Ejemplo n.º 47
0
                                data_type = field_relationship["dataType"]
                                id_field = field_relationship["idField"]
                                log.debug(
                                    "Found relationship mapping for [%s]. (:%s)-[:%s]->(:%s)",
                                    key, object_type, relationship_type,
                                    node_type)

                                if data_type == "array":
                                    log.debug("Processing %s as %s", key,
                                              data_type)
                                    for id_value in value:
                                        log.debug("(:%s)-[:%s]->(:%s{%s: %s})",
                                                  object_type,
                                                  relationship_type, node_type,
                                                  id_field, id_value)
                                        related_node = g.find_one(
                                            node_type, id_field, id_value)
                                        if not related_node:
                                            log.debug(
                                                "Didn't find a %s with %s == %s so I'm creating it",
                                                node_type, id_field, id_value)
                                            r_data = {id_field: id_value}
                                            related_node = Node(
                                                node_type, **r_data)
                                            tx.create(related_node)
                                            tx.commit()
                                            tx = g.begin()

                                        node_relationships.append({
                                            'relationship': {
                                                'type': relationship_type
                                            },
Ejemplo n.º 48
0
from py2neo import Node, Relationship, Graph
import requests, json, sys

print "Performing pathfinding search with", sys.argv

if len(sys.argv) != 4:
    print "You failed to enter the correct arguments."
    print "article one, article two, depth"
else:
    graph = Graph("http://*****:*****@localhost:7474/db/data/")

    a = graph.find_one("Article", "lowerTitle", sys.argv[1].lower())
    b = graph.find_one("Article", "lowerTitle", sys.argv[2].lower())
    #print a, b
    if a and b:
        ENDPOINT = "http://*****:*****@localhost:7474/db/data/"
        request = {
            "to":ENDPOINT+"node/"+str(b._id),
            "max_depth": int(sys.argv[3]),
            "relationships": {
                "type":"LINKS",
                "direction":"out"
            },
            "algorithm":"allSimplePaths"
        }
        r = requests.post(ENDPOINT+"node/"+str(a._id)+"/paths", data=json.dumps(request))
        # print r.json()
        if r.status_code == 200:
            for path in r.json():
                print "Path:"
                for node in path['nodes']:
Ejemplo n.º 49
0
class Neo4j(object):
    def __init__(self):
        self.graph = Graph(Config.NEO_URL,
                           username=Config.NEO_USR,
                           password=Config.NEO_PSW)
        self.mm = MongoManager.DBManager()

    def add_relation(self,
                     node_name1,
                     node_name2,
                     movie_name='name',
                     url='url'):
        """
        图中添加新的导演关系
        若关系中的两个节点不在图中则自动创建
        同时为关系添加电影名、发行时间、关系计数这几个参数
        :param node_name1:
        :param node_name2:
        :param movie_name:
        :param release_time:
        :return:
        """
        node1 = Node(DIRECTOR_LABEL, name=node_name1)
        node1['type'] = 'director'
        node2 = Node(ACTOR_LABEL, name=node_name2)
        # node3 = Node(MOVIE_LABEL, name=movie_name)
        # node3['url'] = url
        #
        # actor_movie_relation = Relationship(node2, ACTIN_LABEL, node3)
        # director_movie_relation = Relationship(node1, DIRECT_LABEL, node3)
        # self.graph.merge(actor_movie_relation, DEFAULT_LABEL, 'name')
        # self.graph.merge(director_movie_relation, DEFAULT_LABEL, 'name')

        # print(actor_movie_relation)
        # print(director_movie_relation)

        # if self.find_relation(node_name1, node_name2):
        #     print('relation already existed, add count')
        # else:
        relation = Relationship(node1, COOPERATE_LABEL, node2)
        relation['count'] = 1
        self.graph.merge(relation, DEFAULT_LABEL, 'name')
        # print("成功创建关系", node_name1, ',', COOPERATE_LABEL, ',', node_name2)

    def print(self, name, relation):
        """
        打印所有以名字为name的节点开始、具有relation关系的边的终节点的信息
        :param name:
        :param relation:
        :return:
        """
        print('##########')
        query = 'MATCH (n) WHERE n.name={name} RETURN n'
        params = dict(name=name)
        node = self.graph.evaluate(query, params)
        print(node)
        for rel in self.graph.match((node, ), relation):
            print(rel.end_node['name'], rel.end_node.labels, rel['movie_name'],
                  rel['release_time'])

    def find_director_node(self, name):
        """
        查找具有某名字的节点,若图中有此节点则返回true,反之返回false
        :param name:
        :return:
        """
        query = 'MATCH (n:Director) WHERE n.name={name} RETURN n'
        params = dict(name=name)
        node = self.graph.evaluate(query, params)
        if node is None:
            return False
        if self.graph.exists(node):
            return True
        else:
            return False

    def find_actor_node(self, name):
        """
        查找具有某名字的节点,若图中有此节点则返回true,反之返回false
        :param name:
        :return:
        """
        query = 'MATCH (n:Actor) WHERE n.name={name} RETURN n'
        params = dict(name=name)
        node = self.graph.evaluate(query, params)
        if node is None:
            return False
        if self.graph.exists(node):
            return True
        else:
            return False

    def get_labeled_node(self, count=1):
        """
        获取具有某个标签的节点列表
        打印节点数量
        并返回该list
        :return:
        """
        # 用CQL进行查询,返回的结果是list
        datas = self.graph.data('MATCH(p:Director) return p')
        # 目标节点数量
        # print(len(datas))
        # 数据类型为list
        # print(type(datas))
        _count = 1
        for data in datas:
            # data类型为dict
            # print(type(data))
            # if _count > count:
            #     break
            print(data)
            _count += 1
        print('Total count of Director is', _count)
        return datas

    def find_relation_and_add_count(self, name1, name2):
        """
        查找分别以name1, name2为起始、终止节点的 CooperateWith 关系
        若找到则对应count数加一
        :param name1:
        :param name2:
        :return:
        """
        sn = self.graph.find_one(DIRECTOR_LABEL,
                                 property_key='name',
                                 property_value=name1)
        en = self.graph.find_one(ACTOR_LABEL,
                                 property_key='name',
                                 property_value=name2)
        rel = self.graph.match(start_node=sn,
                               rel_type=COOPERATE_LABEL,
                               end_node=en)
        # print(rel)

        # print('--------')
        query = 'MATCH(n:Director)-[r:CooperateWith]->(m:Actor) WHERE n.name={name1} and m.name={name2} RETURN r'
        params = dict(name1=name1, name2=name2)
        relation = self.graph.evaluate(query, params)
        if relation is None:
            print('relation is none')
            self.add_relation(name1, name2)
            return False
        if self.graph.exists(relation):
            print('relation exists, add count')
            relation['count'] += 1
            self.graph.push(relation)
            print(relation.start_node()['name'], '->', relation['count'], '->',
                  relation.end_node()['name'])
            return True
        else:
            print('relation does not exist')
            return False

    def clear_graph(self):
        """
        清空图数据库
        :return:
        """
        self.graph.delete_all()

    def show_end_node(self, name, relation_label):
        """
        根据输入的起始节点名和关系标签,遍历全部对应关系,并打印终节点的属性群
        :param name:
        :param relation_label:
        :param attrs:
        :return:
        """
        query = 'MATCH (n) WHERE n.name={name} RETURN n'
        params = dict(name=name)
        node = self.graph.evaluate(query, params)
        if node is None:
            print('node is None!')
            return False
        if self.graph.exists(node):
            print(node)
            # 遍历此起始节点的全部关系,打印关系的个数
            for rel in self.graph.match((node, ), relation_label):
                print(name, '->', rel['count'], '->', rel.end_node['name'])
        else:
            print('node not exists!')
            return False

    def get_coop_count(self):
        """
        获取全部导演、演员合作关系及次数并打印
        :return:
        """
        directors = self.get_labeled_node()
        # print(type(directors))
        count = 1
        for director in directors:
            if count > 1:
                break
            # print(director['p']['name'])
            self.show_end_node(director['p']['name'], COOPERATE_LABEL)
            count += 1

    def get_cooperations(self):
        directors = self.get_labeled_node()
        # datas = []
        for director in directors:
            query = 'MATCH (n) WHERE n.name={name} RETURN n'
            params = dict(name=director['p']['name'])
            node = self.graph.evaluate(query, params)
            if node is None:
                print('node is None!')
                return None
            if self.graph.exists(node):
                # 遍历此起始节点的全部关系,一一存入结果集并返回
                for rel in self.graph.match(start_node=node,
                                            rel_type=COOPERATE_LABEL):
                    data = {
                        'director': director['p']['name'],
                        'actor': rel.end_node()['name'],
                        'count': rel['count']
                    }
                    # print("合作信息,", data)
                    self.mm.save_data(Config.COOPERATION_TEMP, data)
                    # datas.append(data)
            else:
                print('node not exists!')
                return None
Ejemplo n.º 50
0
    "mission_statement": "Use the Agora to learn all the things.",
    "id": id,
    "email": email.lower(),
    "is_mentor": True,
    "is_tutor": True,
    "is_visible": True,
    "is_available_for_in_person": True,
    "is_admin": False}
new_user_node = Node.cast(AgoraLabel.USER, new_user_properties)
try:
    graph_db.create(new_user_node)
except:
    print 'Node found'

user_node = graph_db.find_one(AgoraLabel.USER,
                                      property_key='email',
                                      property_value=email.lower())
print user_node["email"]

user = AgoraUser()
user.email = email
user.get_user()
print user.user_interests

interest = AgoraInterest()
interest.name = 'Music'
interest.description = 'Learning how to communicate clearly through writing.'
new_interest_node = interest.create_interest()
print new_interest_node
print user_node['name']
interest_node = Graph().find_one('INTEREST',
Ejemplo n.º 51
0
class GraphClass():
    dbb = ''
    graph = ''

    def __init__(self):
        self.db = GraphDatabase("http://localhost:7474/db/data/")
        self.graph = Graph("http://localhost:7474/db/data/")

    def InsertStudentNode(self, name):
        try:
            Student_node = self.graph.find_one(label="Student",
                                               property_key="name",
                                               property_value=name)
            if Student_node is None:
                tx = self.graph.begin()
                student_n = Node("Student", name=name)
                tx.create(student_n)
                tx.commit()
                return student_n
            else:
                return Student_node
        except:
            print("problem with student node insert")
            tx.rollback()

    def InsertSubjectNode(self, name):

        try:
            Subject_node = self.graph.find_one(label="Subject",
                                               property_key="name",
                                               property_value=name)
            if Subject_node is None:
                tx = self.graph.begin()
                Subject_n = Node("Subject", name=name)
                tx.create(Subject_n)
                tx.commit()
                return Subject_n
            else:
                return Subject_node
        except:
            print("problem with subject node insert")
            tx.rollback()

    def relationship(self, startnode, endnode, review):
        try:

            tx = self.graph.begin()
            ab = Relationship(startnode, review, endnode)
            tx.create(ab)
            tx.commit()
        except:
            print("problem with relationship")
            tx.rollback()

    def Toptrending(self):
        TopList = []
        RecordList = self.db.query(
            "MATCH (n)-[r]->(m) RETURN m, COUNT(r) ORDER BY COUNT(r) DESC LIMIT 4 ",
            returns=(dict, str))
        for record in RecordList:
            TopList.append(record[0]['data']['name'])
        return TopList

    def ColabFiltering(self, user):
        SubjList = []
        RecordListLike = self.db.query(
            "MATCH (s:Student)-[:like]->(n:Subject)<-[:like]-()-[:like]->(m:Subject) WHERE s.name = {username} AND  NOT  (s)-[:like]->(m:Subject) RETURN m.name",
            params={"username": user},
            returns=(str))
        RecordListOpen = self.db.query(
            "MATCH (s:Student)-[:open]->(n:Subject)<-[:open]-()-[:open]->(m:Subject) WHERE s.name = {username} AND  NOT  (s)-[:open]->(m:Subject) RETURN m.name",
            params={"username": user},
            returns=(str))

        for record in RecordListLike:
            if record[0] not in SubjList:
                SubjList.append(record[0])

        for record in RecordListOpen:
            if record[0] not in SubjList:
                SubjList.append(record[0])

        return SubjList

#if __name__ == '__main__':
#app.run(debug=True)
# graph = GraphClass()
# print(graph.ColabFiltering(user='******'))

    """relationship(startnode=InsertStudentNode("raam"), endnode=InsertSubjectNode("subject-5"), review="like")
Ejemplo n.º 52
0
#!/usr/bin/env python
""" create languages for app """

from py2neo import Graph, Node

languages = [
    "PHP", "Python", "Ruby", "Erlang", "Elixir", "Haskell", "Go", "Java",
    "Scala", "Groovy", "JavaScript", "C#", "C++", "Swift"
]

graph = Graph('http://*****:*****@127.0.0.1:7474/db/data/')

for i in languages:
    language = graph.find_one("Language", "name", i)
    if None == language:
        lang = Node("Language", name=i)
        graph.create(lang)
Ejemplo n.º 53
0
class TwitterGraph():

    def __init__(self):
        self.graph = Graph("http://*****:*****@54.191.171.209:7474/db/data/")
        self.popularity_heap = []
        self.reassess_popularity()

    def add_user(self, user):
        new_user = Node("User", token=user.token.session_id, user_id=user.id)
        return self.graph.create(new_user)

    def is_cached(self, screen_name):
        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is not None:
            return True

    def get_RT_recommendations(self, user):
        recommendations = Counter()
        user_node = self.graph.find_one("User", 'user_id', user.id)
        following = user_node.match_outgoing("FOLLOWS", limit=5)

        for rel in following:
            retweets = rel.end_node.match_outgoing("RETWEETED", limit=5)
            for r in retweets:
                recommendations[r.end_node.properties['screen_name']] += 1

        return [str for (str, count) in recommendations.most_common(10)]

    def get_generic_recommendations(self):
        return [screen_name for (count, screen_name) in heapq.nlargest(10, self.popularity_heap)]

    def reassess_popularity(self):
        # NOTE: expensive calculation, to be run threaded at multiples of x actions to graph or hourly/daily job
        all_twitter_users = self.graph.find("TwitterUser")
        for tu in all_twitter_users:
            incoming_count = sum(1 for _ in tu.match_incoming())
            heapq.heappush(self.popularity_heap, (incoming_count, tu.properties['screen_name']))

    def add_twitter_user(self, screen_name):
        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            new_twitter_user = Node("TwitterUser", screen_name=screen_name)
            self.graph.create(new_twitter_user)

    def add_follow(self, screen_name, user):
        user_node = self.graph.find_one("User", 'user_id', user.id)
        if user_node is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_user(user)
            user_node = self.graph.find_one("User", 'user_id', user.id)

        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_twitter_user(screen_name)
            twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)

        follow_relationship = Relationship(user_node, "FOLLOWS", twitter_user)
        self.graph.create(follow_relationship)
        self.reassess_popularity()

    def remove_follow(self, screen_name, user):
        user_node = self.graph.find_one("User", 'user_id', user.id)
        if user_node is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_user(user)
            user_node = self.graph.find_one("User", 'user_id', user.id)

        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_twitter_user(screen_name)
            twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)

        follow_relationship = self.graph.match_one(user_node, "FOLLOWS", twitter_user)
        if follow_relationship is not None:
            self.graph.delete(follow_relationship)

    def add_retweet(self, screen_name, retweeted_screen_name):
        twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)
        if twitter_user is None:
            # this shouldn't happen, just for testing while transitioning db
            self.add_twitter_user(screen_name)
            twitter_user = self.graph.find_one("TwitterUser", 'screen_name', screen_name)

        self.add_twitter_user(retweeted_screen_name)
        retweeted_twitter_user = self.graph.find_one("TwitterUser", 'screen_name', retweeted_screen_name)

        retweet = self.graph.match_one(twitter_user, "RETWEETED", retweeted_twitter_user)
        if retweet is None:
            retweet_relationship = Relationship(twitter_user, "RETWEETED", retweeted_twitter_user)
            retweet_relationship.properties['count'] = 1
            self.graph.create(retweet_relationship)
        elif retweet.properties['count'] is None:
            # this shouldn't happen, just for testing while transitioning db
            retweet.properties['count'] = 1
            retweet.push()
        else:
            retweet.properties['count'] = retweet.properties['count'] + 1
            retweet.push()
Ejemplo n.º 54
0
class StuffNeo4j():
    def __init__(self, nodelabel, reltype):
        self.graph_db = None
        self.nodelabel = nodelabel
        self.reltype = reltype

    def connect(self, uri, usr="******", pwd="neo4j"):
        if not uri.endswith('/'):
            uri += '/'
        authenticate(uri, usr, pwd)
        self.graph_db = Graph(uri + "db/data")

    def create_indexes(self):
        #If index is already created py2neo throws exception.
        try:
            self.graph_db.cypher.execute("CREATE INDEX ON :%s(name)" %
                                         self.nodelabel)
        except:
            pass
        try:
            self.graph_db.cypher.execute("CREATE INDEX ON :%s(synset_id)" %
                                         self.nodelabel)
        except:
            pass
        try:
            self.graph_db.cypher.execute(
                "CREATE INDEX ON :%s(pointer_symbol)" % self.reltype)
        except:
            pass

    def create_node(self, nodetype, **kwargs):
        return Node(nodetype, **kwargs)

    def merge_node(self, nodetype, uniq_key, uniq_val, **kwargs):
        n = self.graph_db.merge_one(nodetype, uniq_key, uniq_val)
        for k in kwargs:
            n.properties[k] = kwargs[k]
        n.push()
        return n

    def insert_rel(self, reltype, node1, node2, **kwargs):
        if node1 is not None and node2 is not None:
            rel = Relationship(node1, reltype, node2, **kwargs)
            self.graph_db.create(rel)
        else:
            print "Could not insert relation (%s) - [%s] -> (%s)" % (
                node1, reltype, node2)

    def merge_rel(self, reltype, node1, node2, **kwargs):
        if node1 is not None and node2 is not None:
            rel = Relationship(node1, reltype, node2, **kwargs)
            return self.graph_db.create_unique(rel)
        else:
            print "Could not merge relation (%s) - [%s] -> (%s)" % (
                node1, reltype, node2)

    def create_wordnet_rel(self, synset1, synset2, ptype):
        """
        Pointer symbols
        http://wordnet.princeton.edu/wordnet/man/wninput.5WN.html
        
         The pointer_symbol s for nouns are:
        
            !    Antonym
            @    Hypernym
            @i    Instance Hypernym
             ~    Hyponym
             ~i    Instance Hyponym
            #m    Member holonym
            #s    Substance holonym
            #p    Part holonym
            %m    Member meronym
            %s    Substance meronym
            %p    Part meronym
            =    Attribute
            +    Derivationally related form        
            ;c    Domain of synset - TOPIC
            -c    Member of this domain - TOPIC
            ;r    Domain of synset - REGION
            -r    Member of this domain - REGION
            ;u    Domain of synset - USAGE
            -u    Member of this domain - USAGE
        
        The pointer_symbol s for verbs are:
        
            !    Antonym
            @    Hypernym
             ~    Hyponym
            *    Entailment
            >    Cause
            ^    Also see
            $    Verb Group
            +    Derivationally related form        
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE
        
        The pointer_symbol s for adjectives are:
        
            !    Antonym
            &    Similar to
            <    Participle of verb
            \    Pertainym (pertains to noun)
            =    Attribute
            ^    Also see
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE
        
        The pointer_symbol s for adverbs are:
        
            !    Antonym
            \    Derived from adjective
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE 
        """
        node1 = self.graph_db.find_one(self.nodelabel,
                                       property_key="synset_id",
                                       property_value=synset1)
        node2 = self.graph_db.find_one(self.nodelabel,
                                       property_key="synset_id",
                                       property_value=synset2)
        if (node1 is not None) and (node2 is not None):
            rel = Relationship(node1,
                               self.reltype,
                               node2,
                               pointer_symbol=ptype)
            return rel
        else:
            raise Exception(
                "Could not create Wordnet relation (%s) - [%s] -> (%s)" %
                (synset1, ptype, synset2))

    def insert_bulk(self, objs):
        if len(objs) > 0:
            self.graph_db.create(*objs)
Ejemplo n.º 55
0
		f.write(user);
		f.write('\n');
		f.close();
		'''
		for i in Cursor(api.followers, id=user).items():
			print "adding " + i.screen_name;
			f.write("\t"+i.screen_name+"\n");
			to_be_networked.append(i.screen_name);
		'''
		while True:
		
			try:
				temp_user = api.get_user(user);
				temp_ratio = float(temp_user.followers_count) / float(temp_user.friends_count);
				
				base_node = graph.find_one("regular", "screen_name", user);
				
				if not base_node:
					base_node = graph.find_one("verified", "screen_name", user);
					
				if not base_node:
					base_node = graph.find_one("cautious", "screen_name", user);
				
				if not base_node:				
					if temp_user.verified:
						base_node = Node("verified", screen_name=user, ratio=temp_ratio, tweets=temp_user.statuses_count, created=temp_user.created_at, followers=temp_user.followers_count, following=temp_user.friends_count, location=temp_user.location);
					else:
						if temp_ratio < 0.01:
							base_node = Node("cautious", screen_name=user, ratio=temp_ratio, tweets=temp_user.statuses_count, created=temp_user.created_at, followers=temp_user.followers_count, following=temp_user.friends_count, location=temp_user.location);
						else:
							base_node = Node("regular", screen_name=user, ratio=temp_ratio, tweets=temp_user.statuses_count, created=temp_user.created_at, followers=temp_user.followers_count, following=temp_user.friends_count, location=temp_user.location);
Ejemplo n.º 56
0
class UserGraph:
    def __init__(self):
        self.graph = None
        self.label = "Account"
        self.user_dao = UserDAO()
        self.address_dao = AddressDAO()
        self.transaction_dao = TransactionDAO()

    def init_connect(self):
        self.graph = Graph("http://127.0.0.1:7474",
                           username="******",
                           password="******")

    def clear_data(self):
        self.graph.delete_all()

    def create_a_user_node(self, name_code, btc):
        a_user = Node(self.label, name=name_code, btc=btc)
        return self.graph.create(a_user)

    def get_a_user_node_by_code(self, name_code):
        user_node = self.graph.find_one(self.label,
                                        property_key='name',
                                        property_value=name_code)
        return user_node

    def add_a_transaction(self, transaction_dict):
        if not transaction_dict['source']:
            # check the user node, if no create
            destination = transaction_dict['destination']
            temp_result = self.address_dao.get_address_by_address(destination)
            if not temp_result:
                # this destination address haven't recorded in address table
                new_user_for_dest = self.user_dao.create_user()
                temp_result = self.address_dao.create_address(
                    new_user_for_dest['id'], destination)

            # add a new node
            account_dest = self.user_dao.get_user_by_id(temp_result['user_id'])
            account_node = self.get_a_user_node_by_code(account_dest['code'])
            if account_node:
                account_node['btc'] += transaction_dict['value']
                self.graph.push(account_node)
            else:
                self.create_a_user_node(account_dest['code'],
                                        transaction_dict['value'])
            return
        # if have source and destination address, not only need to check node but also add relation
        source = transaction_dict['source']
        destination = transaction_dict['destination']
        # source address must be exit, so skip to check
        temp_source_result = self.address_dao.get_address_by_address(source)
        temp_dest_result = self.address_dao.get_address_by_address(destination)
        if not temp_dest_result:
            new_user_for_dest = self.user_dao.create_user()
            temp_dest_result = self.address_dao.create_address(
                new_user_for_dest['id'], destination)

        # get source acount and check and update node
        account_source = self.user_dao.get_user_by_id(
            temp_source_result['user_id'])
        account_source_node = self.get_a_user_node_by_code(
            account_source['code'])
        if account_source_node:
            account_source_node['btc'] -= transaction_dict['value']
            self.graph.push(account_source_node)
        else:
            self.create_a_user_node(account_source['code'],
                                    -transaction_dict['value'])
            account_source_node = self.get_a_user_node_by_code(
                account_source['code'])

            # get destination acount and check and update node
        account_destination = self.user_dao.get_user_by_id(
            temp_dest_result['user_id'])
        account_destination_node = self.get_a_user_node_by_code(
            account_destination['code'])
        if account_destination_node:
            account_destination_node['btc'] += transaction_dict['value']
            self.graph.push(account_destination_node)
        else:
            self.create_a_user_node(account_destination['code'],
                                    transaction_dict['value'])
            account_destination_node = self.get_a_user_node_by_code(
                account_destination['code'])

        # add the relationship for two node
        source_node_pay_destination_node = self.graph.match_one(
            start_node=account_source_node,
            end_node=account_destination_node,
            bidirectional=False)
        if source_node_pay_destination_node:
            source_node_pay_destination_node['btc'] += transaction_dict[
                'value']
            self.graph.push(source_node_pay_destination_node)
        else:
            source_node_pay_destination_node = Relationship(
                account_source_node, "Pay", account_destination_node)
            source_node_pay_destination_node['btc'] = transaction_dict['value']
            self.graph.create(source_node_pay_destination_node)

    def generate_user_graph(self):
        page_total = 100
        for page_num in range(page_total):
            print("====Generate User Graph 处理第 " + str(page_num) + " / " +
                  str(page_total) + " 页交易(100/page)")
            transaction_list = self.transaction_dao.paginate_list_resource(
                models.Transaction, page_num)
            count = 1
            for item_transaction in transaction_list:
                # print("==处理本页第 " + str(count) + " / 100" + " 个交易")
                count += 1
                # print(item_transaction)
                # deal each transaction
                transaction_dict = {
                    'source': item_transaction['source'],
                    'destination': item_transaction['destination'],
                    'value': item_transaction['value']
                }
                self.add_a_transaction(transaction_dict)


#user_graph = UserGraph()
#user_graph.init_connect()
#user_graph.generate_user_graph()
Ejemplo n.º 57
0
pw=open('neo4j_pw').readline().strip()
authenticate("localhost:7474", "neo4j", pw)

# connect to authenticated graph database
graph = Graph()

tx = graph.cypher.begin()
conceptnodes={}
tasknodes={}
contrastnodes={}

# Create concept nodes
for i in range(len(concept_ids)):
    tx.append('CREATE (%s:concept {name: "%s", id:"%s"}) RETURN %s'%(concept_ids[i],
        concept_names[i],concept_ids[i],concept_ids[i]))
    if graph.find_one('concept',property_key='id', property_value=concept_ids[i]) == None:
        conceptnode= Node("concept",name=concept_names[i],id=concept_ids[i])
        graph.create(conceptnode)

# Create task nodes
for i in range(len(task_ids)):
    tx.append('CREATE (%s:task {name: "%s", id:"%s"}) RETURN %s'%(task_ids[i],
        task_names[i],task_ids[i],task_ids[i]))
    if graph.find_one('task',property_key='id', property_value=task_ids[i]) == None:
        tasknode= Node("task", name=task_names[i],id=task_ids[i])
        graph.create(tasknode)

# Create contrast nodes, associate with task
for i in range(len(contrast_tasks)):
    tasknode=graph.find_one('task',property_key='id', property_value=contrast_tasks[i])
    path = Path(tasknode,Rel("HASCONTRAST"),Node("contrast", name=contrast_names[i],id=contrast_ids[i]))
Ejemplo n.º 58
0
property_all = aa + bb + cc + dd + ee + ff + gg + hh + ii + jj + HP_all + MP_all + HP_recover_all + MP_recover_all + R_cooling_all + R_cost_all + skill_R_all + attack_all + attack_range_all
property_name = []
for i in property_all_2:
    property_name.append(i[1])

#节点列表
things_2 = [weapons, heros]
things = weapons + heros

#获取英雄tag
m = []
tags_all = []
for i in range(0, 68):
    a = re.findall(
        '\S+',
        g.find_one('hero', property_key='name',
                   property_value=heros[i])['tag'])
    m = m + a
for p in m:
    if p not in tags_all:
        tags_all.append(p)

#关系列表
relation = [u'相似', u'克制', u'搭配', u'推荐', u'适合用于对抗']
rel_2 = [[u'相似', u'像'], [u'克制'], [u'搭配', u'配合', u'组合'], [u'推荐', u'出装'],
         [u'用来对抗', u'适合用来对抗', u'适合用于对抗']]
rel_3 = [
    u'相似', u'像', u'克制', u'搭配', u'配合', u'组合', u'推荐', u'出装', u'用来对抗', u'适合用来对抗',
    u'适合用于对抗'
]
#用来判断对不对的属性
if_true = [u'远程', u'近战' + u'近程'] + tags_all
Ejemplo n.º 59
0
class WriteToNeo4j:
    """将Json类型的知识三元组导入Neo4j数据库"""
    def __init__(self, triple_path):
        self.entity_set = set()  # 实体节点集合
        self.nlp = NLP()
        # 连接neo4j数据库
        self.graph = Graph(host='localhost',
                           http_port=7474,
                           user='******',
                           password='******')
        f_in = open(triple_path, 'r')
        triple_str = f_in.read()  # 读取整个Json
        self.triple = json.loads(triple_str)

    def write_litigant(self, litigants, relation):
        """处理当事人信息(原告和原告)
        Args:
            litigant: list,当事人信息
        """
        for litigant in litigants:
            node_litigant = Node(self.get_label(litigant['名字']),
                                 name=litigant['名字'],
                                 id=litigant['编号'])
            self.graph.create(node_litigant)
            self.entity_set.add(litigant['名字'])
            node_root = self.graph.find_one('判决书',
                                            property_key='name',
                                            property_value='判决书001')
            entity_relation = Relationship(node_root,
                                           relation,
                                           node_litigant,
                                           label='relation')
            self.graph.create(entity_relation)

            for item in litigant:
                if item != '名字' and item != '编号':
                    node_repr = Node(self.get_label(litigant[item]),
                                     name=litigant[item])  # 负责人,委托代理人
                    self.graph.create(node_repr)
                    self.entity_set.add(litigant[item])
                    entity_relation = Relationship(node_litigant,
                                                   item,
                                                   node_repr,
                                                   label='关系')
                    self.graph.create(entity_relation)

    def get_label(self, word):
        """根据单词获得标签
        Args:
            word: str,单词
        Returns:
            label: str,类型标签
        """
        label = ''
        postag = self.nlp.get_postag(word)
        if postag == 'nh':
            label = '人'
        elif postag == 'ni':
            label = '组织'
        elif postag == 'ns':
            label = '地点'
        else:
            label = '其他'
        return label

    def write(self):
        """写入图数据库"""
        # 根节点
        # 一篇判决书具有"文书编号","文书标题","按键编号","文书类型","案件编号"几个属性
        node_root = Node('判决书',
                         name='判决书001',
                         id=self.triple['文书编号'],
                         title=self.triple['文书标题'],
                         type=self.triple['文书类型'],
                         case=self.triple['案件编号'])
        self.graph.create(node_root)
        self.entity_set.add('判决书001')
        node_court = Node('组织', name=self.triple['受理法院'])
        self.graph.create(node_court)
        self.entity_set.add(self.triple['受理法院'])

        entity_rerlation = Relationship(node_root,
                                        '受理法院',
                                        node_court,
                                        label='关系')
        self.graph.create(entity_rerlation)

        # 遍历原告,被告
        plaintiffs = self.triple['原告']
        self.write_litigant(plaintiffs, '原告')
        defendants = self.triple['被告']
        self.write_litigant(defendants, '被告')

        facts = self.triple['案情事实']
        for fact in facts:
            tri = fact['知识']
            entity1 = tri[0]
            relation = tri[1]
            entity2 = tri[2]

            node_list = []
            node1 = Node(self.get_label(entity1), name=entity1)
            if entity1 not in self.entity_set:
                self.graph.create(node1)
                node_list.append(node1)
                self.entity_set.add(entity1)
            else:
                node_list.append(
                    self.graph.find_one(self.get_label(entity1),
                                        property_key='name',
                                        property_value=entity1))

            node2 = Node(self.get_label(entity2), name=entity2)
            if entity2 not in self.entity_set:
                self.graph.create(node2)
                node_list.append(node2)
                self.entity_set.add(entity2)
            else:
                node_list.append(
                    self.graph.find_one(self.get_label(entity2),
                                        property_key='name',
                                        property_value=entity2))

            entity_relation = Relationship(node_list[0],
                                           relation,
                                           node_list[1],
                                           label='关系')
            self.graph.create(entity_relation)
Ejemplo n.º 60
0
    host="154.8.214.203",  # neo4j 搭载服务器的ip地址,ifconfig可获取到
    http_port=7474,  # neo4j 服务器监听的端口号
    user="******",  # 数据库user name,如果没有更改过,应该是neo4j
    password="******")

with codecs.open('xywy_new2.json', 'r', encoding='utf-8') as f:
    contents = f.read()
    l = json.loads(contents)
    # print(len(l))
    # print(l[18])
    # print(l[22])

for b in l:
    # sql = "MATCH (n:`疾病`{名称:'%s'}) RETURN n.名称" % (b['名称'])
    # m = graph.run(sql).data()
    a = graph.find_one(label="疾病", property_key="名称", property_value=b['名称'])
    if a:  # 如果不为none,则更新
        #更新标签
        sql = "MATCH (n:`疾病`{名称:'%s'}) set n:寻医问药" % (b['名称'])
        graph.run(sql)
        #更新别称
        if b['别称'] != ['暂无数据']:
            if a['别称'] != ['暂无数据']:
                for bc in b['别称']:
                    if bc not in a['别称']:
                        a['别称'].append(bc)
            else:
                a['别称'] = b['别称']
            sql = "MATCH (n:`疾病`{名称:'%s'}) set n.别称='%s'" % (b['名称'], a['别称'])
            graph.run(sql)