def link(self, uid, endnode_id, relation_type, endnode_type=None, properties=None, label=None): '''link will create a new link (relation) from a uid to a relation, first confirming that the relation is valid for the node :param uid: the unique identifier for the source node :param endnode_id: the unique identifier for the end node :raram relation_type: the relation type :param endnode_type: the type of the second node. If not specified, assumed to be same as startnode :param properties: properties to add to the relation ''' if label is None: label = self.name if endnode_type is None: endnode_type = self.name startnode = self.graph.find_one(label, property_key='id', property_value=uid) endnode = self.graph.find_one(endnode_type, property_key='id', property_value=endnode_id) if startnode != None and endnode != None: # If the relation_type is allowed for the node type if relation_type not in self.relations: raise InvalidNodeOperation("Relationship type not used by this node type") if self.graph.match_one(start_node=startnode, rel_type=relation_type, end_node=endnode) is None: relation = Relationship(startnode, relation_type, endnode) self.graph.create(relation) if properties != None: for property_name in properties.keys(): relation.properties[property_name] = properties[property_name] relation.push() return relation else: return True return False
def link(self,uid,endnode_id,relation_type,endnode_type=None,properties=None): '''link will create a new link (relation) from a uid to a relation, first confirming that the relation is valid for the node :param uid: the unique identifier for the source node :param endnode_id: the unique identifier for the end node :param relation_type: the relation type :param endnode_type: the type of the second node. If not specified, assumed to be same as startnode :param properties: properties to add to the relation ''' if endnode_type == None: endnode_type = self.name startnode = graph.find_one(self.name,property_key='id',property_value=uid) endnode = graph.find_one(endnode_type,property_key='id',property_value=endnode_id) if startnode != None and endnode != None: # If the relation_type is allowed for the node type if relation_type in self.relations: if graph.match_one(start_node=startnode, rel_type=relation_type, end_node=endnode) == None: relation = Relationship(startnode, relation_type, endnode) graph.create(relation) if properties != None: for property_name in properties.keys(): relation.properties[property_name] = properties[property_name] relation.push() return relation
def fill_similarities_graph(self): authenticate(settings.NeoHost, settings.NeoLog, settings.NeoPass) graph = Graph("{0}/db/data/".format(settings.NeoHost)) #graph.delete_all() try: graph.schema.create_uniqueness_constraint('Video', 'id') except: pass data = pd.DataFrame(self.db_game.read_videodata_from_db()) if not isinstance(data, str) and not data.empty: data = data[pd.notnull(data['title'])] data = data[pd.notnull(data['rating'])] k = len(data) mes = smilarities.SimilarityMeasures() vid = 0 while vid < k: if data['hashtags'][vid] is not None: #print(data['hashtags'][vid], data['id'][vid]) if len(data['hashtags'][vid]) > 3: hashes = self.hashtag_list_to_str( data['hashtags'][vid]) #print(hashes, vid) data1 = pd.DataFrame( self.db_game.read_text_index_videodata_from_db( 'hashtags', hashes)) data1 = data1[pd.notnull(data1['title'])] data1 = data1[pd.notnull(data1['rating'])] data1 = data1.reset_index() start = Node("Video", id=str(data['id'][vid])) graph.merge(start) start.properties['rating'] = data['rating'][vid] start.properties['title'] = data['title'][vid] start.push() vid1 = 0 while vid1 < len(data1): stop = Node("Video", id=str(data1['id'][vid1])) graph.merge(stop) stop.properties['rating'] = data1['rating'][vid1] stop.properties['title'] = data1['title'][vid1] stop.push() num = mes.jaccard_similarity( data['hashtags'][vid], data1['hashtags'][vid1]) #print(len(data['hashtags'][vid])) if (num > 0.5 and len(data1['hashtags'][vid1]) > 3 ) and data1['id'][vid1] != data['id'][vid]: #print(num, vid, vid1) following = Relationship( start, "Jaccard", stop) graph.merge(following) following.properties[ 'jaccard_similarity'] = num following.push() vid1 += 1 vid += 1 #print(pd.DataFrame(graph.run("MATCH (a:Video) RETURN a.id, a.title, a.rating LIMIT 10").data())) return
def make_relation(startnode,rel_type,endnode,properties=None): relation = None if graph.match_one(start_node=startnode, rel_type=rel_type, end_node=endnode) == None: relation = Relationship(startnode, rel_type, endnode) print("Creating relation %s [%s] %s" %(startnode.properties["name"],rel_type,endnode.properties["name"])) graph.create(relation) if properties != None: for property_name in properties.keys(): relation.properties[property_name] = properties[property_name] relation.push() return relation
def add_relation(mid,fid,rname,rtype,role):#genre/keyword = 1 actor=3 other = 2 n1 = connection.g.node(mid) n2 = connection.g.node(fid) if rtype==1: new_relationship = Relationship(n1,rname, n2) else: new_relationship = Relationship(n2,rname, n1) connection.g.create(new_relationship) if rtype==3: new_relationship['role']=role new_relationship.push() EditMovie.controller.show_page("EditMovie")
def make_relation(startnode, rel_type, endnode, properties=None): relation = None if graph.match_one( start_node=startnode, rel_type=rel_type, end_node=endnode) is None: relation = Relationship(startnode, rel_type, endnode) print("Creating relation {} [{}] {}".format( startnode.properties["name"], rel_type, endnode.properties["name"]).encode('utf-8')) graph.create(relation) if properties is not None: for property_name in properties.keys(): relation.properties[property_name] = properties[property_name] relation.push() return relation
def add_relation(vote,comment,likes,rating): n1 = connection.g.node(connection.uid) n2 = connection.g.node(connection.movie) new_relationship = Relationship(n1,"LIKES", n2) connection.g.create(new_relationship) new_relationship['rating']=vote new_relationship['comment']=comment n2['likes'] = likes + 1 n2['rating'] = (rating + int(vote))/(likes + 1) new_relationship.push() n2.push() MediaPage.controller.show_page("MediaPage")
def createLink(self, projectId, link_obj): if not link_obj.has_key('relation'): return print(link_obj) srcNode = graph.node(int(link_obj['source']['id'])) # 给源节点起了个名字 tarNode = graph.node(int(link_obj['target']['id'])) newLink = Relationship(srcNode, 'CONNECT', tarNode) self.deleteLink(projectId, link_obj) # 删除已存在的关系 如果不存在就不执行此操作 graph.merge(newLink) for key in link_obj.keys(): if key not in hideKeys: newLink[key] = link_obj[key] newLink.push() return ''
def make_relation(startnode, rel_type, endnode, properties=None): relation = None if graph.match_one(start_node=startnode, rel_type=rel_type, end_node=endnode) == None: relation = Relationship(startnode, rel_type, endnode) print("Creating relation %s [%s] %s" % (startnode.properties["name"], rel_type, endnode.properties["name"])) graph.create(relation) if properties != None: for property_name in properties.keys(): relation.properties[property_name] = properties[property_name] relation.push() return relation
def process_follower(follower, followee): """ Given 2 twitter-user Nodes, establish the following relationship User (follower) - (follows) -> User (followee) and syncs with Neo4j Follower and followee must be created in the graph database before calling this method Returns Relationship ((:User)-[r:"follows"]->(:User)) """ remote_followee = graph.merge_one("User", "id", followee["id"]) remote_follower = graph.merge_one("User", "id", follower["id"]) follow = Relationship(remote_follower, "follows", remote_followee) graph.create_unique(follow) follow.properties["timestamp_ms"] = int(1000 * time.time()) # To-Do : figure out when A follows B follow.push() return follow
def test_can_push_relationship(self): a = Node() b = Node() ab = Relationship(a, "KNOWS", b) self.graph.create(ab) value = self.graph.evaluate( "MATCH ()-[ab:KNOWS]->() WHERE id(ab)={i} " "RETURN ab.since", i=remote(ab)._id) assert value is None ab["since"] = 1999 ab.push() value = self.graph.evaluate( "MATCH ()-[ab:KNOWS]->() WHERE id(ab)={i} " "RETURN ab.since", i=remote(ab)._id) assert value == 1999
def create_relation_user_to_topic(self, user, relation, topic_name): userNode = self.graph.find_one("user", 'id', user.id_str) if not userNode: userNode = self.create_node_from_user(user) self.graph.create(userNode) topicNode = self.graph.find_one("topic_name", 'name', topic_name) if not topicNode: topicNode = Node("topic_name", name = topic_name) self.graph.create(topicNode) relationship = self.graph.match_one(userNode, relation, topicNode) if not relationship: relationship = Relationship(userNode, relation, topicNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push()
def create_relation_user_to_user(self, userA, relation, userB): userANode = self.graph.find_one("user", 'id', userA.id_str) userBNode = self.graph.find_one("user", 'id', userB.id_str) if not userANode: userANode = self.create_node_from_user(userA) self.graph.create(userANode) if not userBNode: userBNode = self.create_node_from_user(userB) self.graph.create(userBNode) relationship = self.graph.match_one(userANode, relation, userBNode) if not relationship: relationship = Relationship(userANode, relation, userBNode, count = 1) self.graph.create(relationship) else: relationship.properties['count'] += 1 relationship.push()
def process_retweet(user, tweet, level=1, date_created=None, timestamp_ms=None): """ Given a Node(user) and Node(Tweet) already created in the graph db, create User - (retweeted) -> Tweet relationship Note that tweet is the original tweet. Returns Relationship(retweeted_n) where n represents level of follower """ retweeted = Relationship(user, "retweeted_" + str(level), tweet) graph.create_unique(retweeted) retweeted.properties["timestamp_ms"] = ( int(1000 * time.time()) if timestamp_ms is None else timestamp_ms ) # Use now, since we do not have exact information retweeted.properties["created_at"] = ( datetime.datetime.fromtimestamp(int(time.time())).strftime("%Y-%m-%d %H:%M:%S") if date_created is None else date_created ) retweeted.push() return retweeted
from py2neo import Graph graph = Graph() from py2neo import Node, Relationship magdalena = Node("Artist", name="Abakanowicz, Magdalena", id=10093, gender="Female", birthYear=1930) polska = Node("Place", name="Polska") magdalena_born_in_polska = Relationship(magdalena, "BORN_IN", polska) graph.create(magdalena_born_in_polska) polska.properties["id"] = "Polska" polska.push() magdalena_born_in_polska.properties["date"] = 1930 magdalena_born_in_polska.push() t12979 = Node("Artwork", title="Abakan Red", date=1969, acno="T12979", id=102938) magdalena_contributed_to_t12979 = Relationship(magdalena, "CONTRIBUTED_TO", t12979) graph.create(magdalena_contributed_to_t12979) metal = Node("Medium", name="metal") sisal = Node("Medium", name="sisal") abakan_made_of_metal = Relationship(t12979, "MADE_OF", metal) abakan_made_of_sisal = Relationship(t12979, "MADE_OF", sisal) graph.create(abakan_made_of_metal, abakan_made_of_sisal) cities = Node("Place Types", name="cities, towns, villages (non-UK)")
def process_tweet(d): """ Given a tweet from the streaming API, recursively unravel embedded quotes and retweets and users from the tweet Creates the following nodes Tweet (quotes, and original_tweets, retweets are not created, it is instead represented by a User - (retweeted)-> original_tweet relationship) User Hashtag Creates the following relationships User - (tweeted) -> Tweet User - (retweeted) -> Tweet Tweet - (mentioned) -> User Tweet - (tagged) -> Hashtag Tweet - (in_reply_to) -> Tweet (to be implemented) """ try: screen_name = d["user"]["screen_name"] except KeyError as e: # print "error : " + str(e) screen_name = None timestamp_ms = tryGet(d, "timestamp_ms") coordinates = tryGet(d, "coordinates") filter_level = tryGet(d, "filter_level") is_quote_status = tryGet(d, "is_quote_status") created_at = tryGet(d, "created_at") favorite_count = tryGet(d, "favorite_count") tid = tryGet(d, "id") in_reply_to_screen_name = tryGet(d, "in_reply_to_screen_name") in_reply_to_status_id = tryGet(d, "in_reply_to_status_id") in_reply_to_user_id = tryGet(d, "in_reply_to_user_id") lang = tryGet(d, "lang") place = tryGet(d, "place") retweet_count = tryGet(d, "retweet_count") source = tryGet(d, "source") text = tryGet(d, "text") truncated = tryGet(d, "truncated") u = tryGet(d, "user") if u is not None: user = process_user(u) else: user = None try: if d["retweeted_status"] is not None: original_tweet = process_tweet(d["retweeted_status"]) if original_tweet is not None and user is not None: original_tweet = graph.merge_one("Tweet", "id", original_tweet["id"]) retweeted = Relationship(user, "retweeted", original_tweet) retweeted.properties["timestamp_ms"] = timestamp_ms graph.create_unique(retweeted) tweet = None except KeyError as e: # print "Error with retweet : " + str(e) tweet = graph.merge_one("Tweet", "id", tid) tweet.properties["text"] = text tweet.properties["created_at"] = created_at # tweet.properties['coordinates'] = coordinates tweet.properties["favorite_count"] = favorite_count tweet.properties["filter_level"] = filter_level tweet.properties["in_reply_to_screen_name"] = in_reply_to_screen_name tweet.properties["in_reply_to_user_id"] = in_reply_to_user_id tweet.properties["in_reply_to_status_id"] = in_reply_to_status_id tweet.properties["is_quote_status"] = is_quote_status tweet.properties["lang"] = lang # tweet.properties['place'] = place tweet.properties["retweet_count"] = retweet_count tweet.properties["source"] = source tweet.properties["timestamp_ms"] = timestamp_ms tweet.properties["truncated"] = truncated tweet.push() tweeted = Relationship(user, "tweeted", tweet) tweeted.properties["created_at"] = created_at tweeted.properties["timestamp_ms"] = timestamp_ms graph.create_unique(tweeted) try: if d["quoted_status"] is not None: original_quote = process_tweet(d["quoted_status"]) if original_quote is not None: original_quote = graph.merge_one("Tweet", "id", original_quote["id"]) quoted = Relationship(tweet, "quote_of", original_quote) quoted.properties["timestamp_ms"] = timestamp_ms graph.create_unique(quoted) except KeyError as e: # print "Error with quote : " + str(e) pass try: if tweet is not None: if d["entities"]["hashtags"] is not None: for tag in d["entities"]["hashtags"]: hashtag = graph.merge_one("Hashtag", "text", tag["text"]) hashtag.properties["timestamp_ms"] = int(1000 * time.time()) hashtag.push() tagged = Relationship(tweet, "tagged", hashtag) tagged.properties["timestamp_ms"] = timestamp_ms graph.create_unique(tagged) else: print "Error : entity, hashtags" if d["entities"]["hashtags"] is not None: for user in d["entities"]["user_mentions"]: mentioned_user = graph.merge_one("User", "id", user["id"]) mentioned_user.properties["screen_name"] = user["screen_name"] mentioned_user.push() mentioned = Relationship(tweet, "mentioned", mentioned_user) graph.create_unique(mentioned) else: print "Error : entity, user_mentions" if in_reply_to_status_id is not None: # Query for tweet replied to and create in_reply_to relationship original_reply = get_status(in_reply_to_status_id) if original_reply is not None: original_reply_node = process_tweet(original_reply) in_reply_to_tweet = Relationship(tweet, "in_reply_to_tweet", original_reply_node) graph.create_unique(in_reply_to_tweet) in_reply_to_tweet.properties["timestamp_ms"] = timestamp_ms in_reply_to_tweet.push() if in_reply_to_user_id is not None: receiver = get_user(in_reply_to_user_id) if receiver is not None: receiver_node = process_user(receiver) in_reply_to_user = Relationship(tweet, "in_reply_to_user", receiver_node) graph.create_unique(in_reply_to_user) in_reply_to_user.properties["timestamp_ms"] = timestamp_ms in_reply_to_user.push() except KeyError as e: print "Error while parsing entities : " + str(e) pass return tweet
Script to create some sample nodes and edges, purely for illustration purposes. Does not align with the nodes, edges and properties in other scripts. ''' from py2neo import Graph graph = Graph() from py2neo import Node, Relationship magdalena = Node("Artist", name="Abakanowicz, Magdalena", id=10093, gender="Female", birthYear=1930) polska=Node("Place", name="Polska") magdalena_born_in_polska = Relationship(magdalena, "BORN_IN", polska) graph.create(magdalena_born_in_polska) polska.properties["id"]="Polska" polska.push() magdalena_born_in_polska.properties["date"]=1930 magdalena_born_in_polska.push() t12979 = Node("Artwork", title="Abakan Red", date=1969, acno="T12979", id=102938) magdalena_contributed_to_t12979 = Relationship(magdalena, "CONTRIBUTED_TO", t12979) graph.create(magdalena_contributed_to_t12979) metal = Node("Medium", name="metal") sisal = Node("Medium", name="sisal") abakan_made_of_metal = Relationship(t12979, "MADE_OF", metal) abakan_made_of_sisal = Relationship(t12979, "MADE_OF", sisal) graph.create(abakan_made_of_metal, abakan_made_of_sisal) cities = Node("Place Types", name="cities, towns, villages (non-UK)") countries = Node("Place Types", name="countries and continents") abakan = Node("Place", name="Abakan") abakan = Node("Place", name="Russia, Khakassia") abakan = Node("Place", name="Abakan") russia = Node("Place", name="Russia, Khakassia")
def push_tweet(data, timelineable, parse_terms): global max_id global min_id global id_policy_bits id = data["id"] # merge tweet by id tweet = graph.merge_one("Tweet", "id", id) # timelining stuff if timelineable: if id > max_id: max_id = id if id < min_id: min_id = id id_policy_bits = id_policy_bits | USE_MAX_ID # authorship if "user" in data: user = push_user(data["user"]) graph.create_unique(Relationship(user, "POSTS", tweet)) # quotes if "quoted_status" in data: tweet2 = push_tweet(data["quoted_status"], False, False) graph.create_unique(Relationship(tweet, "QUOTES", tweet2)) # is a retweet if "retweeted_status" in data: tweet2 = push_tweet(data["retweeted_status"], False, False) graph.create_unique(Relationship(tweet, "RETWEETS", tweet2)) # reply reply = data.get("in_reply_to_status_id") if reply: reply_tweet = graph.merge_one("Tweet", "id", data["in_reply_to_status_id"]) graph.create_unique(Relationship(tweet, "REPLY_TO", reply_tweet)) # geolocation exact/estimated if data["coordinates"] is not None: tweet.properties["lon"] = data["coordinates"]["coordinates"][0] tweet.properties["lat"] = data["coordinates"]["coordinates"][1] elif data["place"] is not None: coordinates = data["place"]["bounding_box"]["coordinates"][0] lon = (coordinates[0][0] + coordinates[1][0] + coordinates[2][0] + coordinates[3][0]) / 4 lat = (coordinates[0][1] + coordinates[1][1] + coordinates[2][1] + coordinates[3][1]) / 4 tweet.properties["lon"] = lon tweet.properties["lat"] = lat # fav count tweet.properties["favorite_count"] = data["favorite_count"] # rt count tweet.properties["retweet_count"] = data["retweet_count"] # text tweet.properties["text"] = data["text"] if "user" in data and parse_terms: for tok in process_text(data["text"]): word = push_word(tok) if "terms" in user.properties: # terms = user.properties["terms"] # q = "{0}:".format(tok) # idx = terms.find(q) # if not idx == -1: # sub = terms[(idx + len(q)):] # sub = sub[:sub.find(" ")] # q += sub # terms = terms.replace(q, "{0}:{1}".format(tok, int(sub) + 1)) # else: # terms += "{0}:1 ".format(tok) # user.properties["terms"] = terms user.properties[ "term_count"] = user.properties["term_count"] + 1 else: user.properties["term_count"] = 1 # user.properties["terms"] = "{0}:1 ".format(tok) user.push() rel = graph.match_one(user, "DISCUSSES", word) if rel: rel.properties["count"] = rel.properties["count"] + 1 rel.push() else: rel = Relationship(user, "DISCUSSES", word) rel.properties["count"] = 1 graph.create_unique(rel) if "text" in data: sent = sentiment.get_sentiment(data["text"]) tweet["polarity"] = sent[0] tweet["subjectivity"] = sent[1] for tok in process_text(data["text"]): word = push_word(tok) rel = graph.match_one(tweet, "CONTAINS", word) if rel: rel.properties["count"] = rel.properties["count"] + 1 rel.push() else: rel = Relationship(tweet, "CONTAINS", word) rel.properties["count"] = 1 graph.create_unique(rel) # hashtags for h in data["entities"].get("hashtags", []): hashtag = push_hashtag(h) graph.create_unique(Relationship(hashtag, "TAGS", tweet)) # mentions for m in data["entities"].get("user_mentions", []): mention = push_user(m) graph.create_unique(Relationship(tweet, "MENTIONS", mention)) tweet.push() return tweet