class TwitterGraph(object): '''A class for interfacing with the Neo4j Twitter network database''' # Initial setup and linking into the database def __init__(self, host_port, user, password): '''Makes connection to Neo4j database''' # set up authentication parameters authenticate(host_port, user, password) # connect to authenticated graph database url = 'http://{}/db/data/'.format(host_port) self.graph = Graph(url) try: self.graph.schema.create_uniqueness_constraint('User', 'id') except: #ConstraintViolationException print 'Unique id on Node User already exists' # Functions to add data to the database def add_following(self, user_id, following_ids, rec_count): '''Given a unique user id, adds the relationship for who they follow. Adds a User Node with the id if it doesn't exist.''' user = Node('User', id=user_id) self.graph.merge(user) # important to merge before doing anything rec = 1 + rec_count # preserving the order of the following. 1 = most recent for fid in following_ids: user2 = Node('User', id=fid) self.graph.merge(user2) self.graph.merge(Relationship(user, 'FOLLOWS', user2, rec=rec)) rec += 1 user['following_added'] = True self.graph.push(user) def add_followers(self, user_id, follower_ids, rec_count): '''Given a unique user id, adds the relationship for follows them. Adds a User Node with the id if it doesn't exist.''' user = Node('User', id=user_id) self.graph.merge(user) rec = 1 + rec_count for fid in follower_ids: user2 = Node('User', id=fid) self.graph.merge(user2) self.graph.merge(Relationship(user2, 'FOLLOWS', user, rec=rec)) rec += 1 user['followers_added'] = True self.graph.push(user) def add_user_properties(self, user): '''Given a unique user id, adds properties to the existing user Node''' try: user_id = user.id existing_user = Node('User', id=user_id) clean_prop_dict = self.__clean_user_dict(user.__dict__) self.graph.merge(existing_user) for k, v in clean_prop_dict.iteritems(): existing_user[k] = v # add additional label to verified accounts if clean_prop_dict['verified']: print True existing_user.add_label('Verified') except: # bad user id user_id = user['user_id'] error = user['error'] existing_user = Node('User', id=user_id) self.graph.merge(existing_user) existing_user['screen_name'] = 'INVALID' existing_user['error'] = error print 'Found invalid user id' self.graph.push(existing_user) def __clean_user_dict(self, user_prop_dict): '''Given the ''' keep = ['contributors_enabled', 'created_at', 'default_profile', 'default_profile_image', 'description', 'favourites_count', 'followers_count', 'friends_count', 'geo_enabled', 'id', 'id_str', 'is_translator', 'lang', 'listed_count', 'location', 'name', 'profile_image_url_https', 'protected', 'screen_name', 'statuses_count', 'time_zone', 'utc_offset', 'verified', 'withheld_in_countries', 'withheld_scope'] # only keep the above keys for inserting clean = {k: v for k, v in user_prop_dict.iteritems() if k in keep} image = os.path.splitext(clean['profile_image_url_https'])[0] ext = os.path.splitext(clean['profile_image_url_https'])[1] clean['profile_image_url_https'] = image.rstrip('_normal') + ext # convert date time to string clean['created_at_ord'] = clean['created_at'].toordinal() clean['created_at'] = clean['created_at'].strftime('%Y-%m-%d %H:%M:%S') return clean # Functions to query database def get_nodes_missing_props(self, limit=100): '''Returns the first 100 ids of nodes without user properties''' selector = NodeSelector(self.graph) selected = selector.select('User').where("_.screen_name IS NULL").limit(limit) return [s['id'] for s in selected] def get_nodes_missing_props_follb(self, limit=100): cypherq = """MATCH (n)-[r:FOLLOWS]->(m) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.screen_name) RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels(self, rel='FOLLOWING', limit=1): '''Returns ids missing the follower or following relationships. Valid inputs for rel is FOLLOWING or FOLLOWERS''' selector = NodeSelector(self.graph) if rel == 'FOLLOWING': selected = selector.select('User').where("_.following_added IS NULL").limit(limit) elif rel == 'FOLLOWERS': selected = selector.select('User').where("_.followers_added IS NULL").limit(limit) else: # TO DO: flesh out the exception calling raise Exception return [s['id'] for s in selected] def get_nodes_missing_rels_params(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)-[r:FOLLOWS]->(m:User) WHERE n.followers_count >= 1000 AND NOT EXISTS(n.following_added) AND m.screen_name = 'BernieSanders' RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels_bfriends(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.following_added) RETURN n.id LIMIT 100;""" return [i['n.id'] for i in self.graph.run(cypherq).data()] def get_nodes_missing_rels_bfriends_step(self, rel='FOLLOWING'): cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User) WHERE m.screen_name = 'BernieSanders' AND NOT EXISTS(n.following_added) RETURN n.id LIMIT 500;""" return [i['n.id'] for i in self.graph.run(cypherq).data()[-100:]]
def add_property(node, property_key, property_value): print("{}, {}, {}.".format(node, property_key, property_value)) graph = Graph('http://*****:*****@localhost:7474/db/data/') graph_node = Node("Person", name="" + node + "") graph.merge(graph_node) graph_node["{}".format(property_key)] = "{}".format(property_value) graph_node.push()
class Neo4jGraph: def __init__(self, uri: str, auth: Tuple[str, str] ): self._graph = Graph(uri=uri, auth=auth) def commit_relation(self, src: Dict[str, str], rel: Dict[str, str], dst: Dict[str, str], ) -> None: srckind = src['kind'] srcnode = Node( srckind, **{k: v for k, v in src.items() if k != 'kind'}) dstkind = dst['kind'] dstnode = Node( dstkind, **{k: v for k, v in dst.items() if k != 'kind'}) relkind = rel['kind'] relationship = Relationship(srcnode, relkind, dstnode, **{k: v for k, v in rel.items() if k != 'kind'}) self._graph.merge(srcnode, "Author", "name") self._graph.create(dstnode) self._graph.create(relationship) def run(self, query: str): return self._graph.run(query)
def moveProficiencyTable(): # get a list of all unique learners # neo4j graph connector graph = Graph() lids = session.execute( "SELECT DISTINCT learner_id from learnerproficiency") for lid in lids: # get the knowledge state for this guy # <concept-id>,<socre> in schema uid = lid['learner_id'] # create a learner node node = Node("Learner", id=uid) graph.merge(node, "Learner", "id") print("** learner:", uid) profDict = session.execute( "SELECT proficiency from learnerproficiency WHERE learner_id='" + uid + "'")[0]['proficiency'] for cid, score in profDict.items(): print("concept:", cid, "score", score) # create/find concept node node2 = Node("Concept", id=cid) graph.merge(node2, "Concept", "id") # add a relationship with property score graph.create(Relationship(node, "ASSESSED_IN", node2, score=score))
def save_node(self, label, properties_dict, unique=True): ''' create neo4j node, with a label, and properties ''' if unique == True: length, lst = self.exists_node(label, properties_dict['name']) if length > 0: #exists update g = Graph(password=self.password) b = lst[0] g.merge(b) for k, v in properties_dict.items(): b[k] = v #b['age'] = properties_dict['age'] #b['x'] = 8 g.push(b) else: #does not exist, insert new g = Graph(password=self.password) tx = g.begin() a = Node(label, **properties_dict) tx.create(a) tx.commit() else: # allow new duplicate nodes, why??? raise Exception("do not allow duplicate named nodes")
def moveContentSummaryTable(): graph = Graph() lids = session.execute( "SELECT DISTINCT learner_id from learnercontentsummary") for lid in lids: uid = lid['learner_id'] print("** learner:", uid) # content_id text, interactions_per_min double, #num_of_sessions_played int, #time_spent double, node = Node("Learner", id=uid) graph.merge(node, "Learner", "id") contentDict = session.execute( "SELECT * from learnercontentsummary WHERE learner_id='" + uid + "'")[0] cid = contentDict['content_id'] tsp = contentDict['time_spent'] ipm = contentDict['interactions_per_min'] node2 = Node("Content", id=cid) graph.merge(node2, "Content", "id") # add a relationship with property score graph.create( Relationship(node, "INTERACTED_WITH", node2, timeSpent=tsp, ipm=ipm)) print('content: ', cid, 'tsp: ', tsp, 'ipm', ipm)
def moveRelevancyTableAll(): graph = Graph() # get a list of all unique learners lids = session.execute( "SELECT DISTINCT learner_id from learnerconceptrelevance") for lid in uids: # get the knowledge state for this guy # <concept-id>,<score> in schema uids = [lid['learner_id'] for lid in lids] node = Node("Learner", id=uid) graph.merge(node, "Learner", "id") print("** learner:", uid) relDict = session.execute( "SELECT relevance from learnerconceptrelevance WHERE learner_id='" + uid + "'")[0]['relevance'] for cid, score in relDict.items(): #print("concept:",cid,"score",score) # create a node, if it does not exist # else, merge with it node2 = Node("Concept", id, cid) graph.merge(node2, "Concept", "id") # add a relationship with property score graph.create(Relationship(node2, "RELEVANT_TO", node, score=score))
def main(path: 'Set path to file'): graph = build_graph(path) graph_db = Graph("http://neo4j:7474/db/data") for url in graph: if url is None: continue tx = graph_db.begin() try: url_node = Node("Url", name=url) graph_db.merge(url_node) for link in graph[url]: if link is None: continue try: link_node = Node("Url", name=link) graph_db.merge(link_node) node_relation = Relationship(url_node, "LINKS_TO", link_node) tx.create(node_relation) except Exception as e: print("Error in building relationship: " + str(e)) tx.commit() except Exception as e: print("got error: " + str(e)) tx.rollback() continue print("Added to neo4j")
def add_triples_to_neo4j_db(triples): graph = Graph("bolt://localhost:7687", auth=("neo4j", "eragold")) for triple in triples: subj = Node("Person", name=triple[0]) obj = Node("Entity", name=triple[1]) re = Relationship.type(triple[2])(subj, obj) graph.merge(re, 'Person', 'name')
def add_property2(node, *labels, **params): node_value = node graph = Graph('http://*****:*****@localhost:7474/db/data/') graph_node = Node(labels, name="" + node + "") graph.merge(graph_node) for k, v in params: graph_node["{}".format(k)] = "{}".format(v) graph_node.push()
def put_data_frame_in_db(df): graph = Graph(password="******") for row in df.itertuples(): user = Node('User', id=row.user.item()) deal = Node('Deal', id=row.deal.item()) graph.merge(user) graph.merge(deal) user.push() graph.create(Relationship(user, "rates", deal, rating=row.rating))
def lambda_handler(event, context): graph = Graph(host=os.environ["NAME_NEO_DOMAIN"], user=os.environ["USER"], password=os.environ["PASSWORD"]) user = Node("User", id=event['id']) graph.merge(user) for key, value in event['datas'].items(): user[key] = value graph.push(user)
class DiseasePipeline(object): def __init__(self): self.graph = Graph(NEO4J_URL, auth = (NEO4J_USERNAME, NEO4J_PASSWORD)) self.graph.delete_all() # self.file = open('test.txt', "a+") def process_item(self, item, spider): # self.file.write(str(item) + '\n\n') # self.file.flush() item['name'] = item['name'].strip() node = self.graph.nodes.match('disease', name = item['name']).first() if node is None: # 如果不存在这种疾病,那就创建它 node = Node('disease', **item) self.graph.create(node) node = self.graph.nodes.match('disease', name = item['name']).first() else: # 如果已经存在了这个疾病,那就更新它 node.update(item) self.graph.merge(node, 'disease', 'name') # 建立相关疾病的联系 relatedDiseases = item['relatedDisease'] for disease in relatedDiseases: disease = disease.strip() newNode = self.graph.nodes.match('disease', name = disease).first() if newNode is None: # 如果不存在这种疾病,那就创建它,从而能够建立联系 newNode = Node('disease', name = disease) self.graph.create(newNode) newNode = self.graph.nodes.match('disease', name = disease).first() # 查询两种疾病之间是否存在相关联系,若不存在,则创建这个联系 r = Relationship(node, "ralate", newNode) if self.graph.match_one((node, newNode), r_type = 'relate') is None: self.graph.create(r) # 建立疾病与症状之间的联系 symptoms = item['typicalSymptom'].split('、') for symptom in symptoms: symptom = symptom.strip() # 消除多余的空格 newNode = self.graph.nodes.match('symptom', name = symptom).first() if newNode is None: # 如果不存在这个症状,那就创建它 newNode = Node('symptom', name = symptom) self.graph.create(newNode) newNode = self.graph.nodes.match('symptom', name = symptom).first() # 查询两种疾病之间是否存在伴随联系,若不存在,则创建这个联系 r = Relationship(node, 'have', newNode) if self.graph.match_one((node, newNode), r_type = 'have') is None: self.graph.create(r)
def moveRelevancyTable(n=10): # get a list of all unique learners # filepath = "batch-models/src/test/resources/concept-similarity/ConceptSimilarity.json" # neo4j graph connector graph = Graph() # only compute bottom "n" and top "n" relevent concepts lids = session.execute( "SELECT DISTINCT learner_id from learnerconceptrelevance") for lid in lids: # get the knowledge state for this guy # <concept-id>,<rel score> in schema uid = lid['learner_id'] # create a learner node node = Node("Learner", id=uid) graph.merge(node, "Learner", "id") print("** learner:", uid) relDict = session.execute( "SELECT relevance from learnerconceptrelevance WHERE learner_id='" + uid + "'")[0]['relevance'] rawScores = relDict.values() qU = round(sorted(rawScores, reverse=True)[n - 1] * 1e4) / 1e4 qL = round(sorted(rawScores)[n - 1] * 1e4) / 1e4 for cid, rawscore in relDict.items(): score = round(rawscore * 1e4) / 1e4 if (score >= qU): print("concept:", cid, "score", score) # create/find concept node node2 = Node("Concept", id=cid) graph.merge(node2, "Concept", "id") # add a relationship with property score graph.create( Relationship(node2, "RELEVENT_FOR", node, score=score)) elif (score <= qL): print("concept:", cid, "score", score) # create/find concept node #node2 = graph.merge_one("Concept","id",cid) # add a relationship with property score #graph.create(Relationship(node2, "NOT_RELEVENT_FOR", node,score=score)) pass else: pass
def upload_entity(entry): """ Upload entry into Graph database """ graph = Graph('http://*****:*****@id'] name = entry['result']['name'] description = entry['result']['description'] entity = Node('Entity', id=id_num, description=description, name=name) graph.merge(entity, 'Entity', 'id') graph.merge(entity, 'Entity', 'description') graph.merge(entity, 'Entity', 'name') for n in entry['result']['@type']: item = Node('Item', type=n) graph.merge(item, 'Item', 'type') graph.merge(Relationship(entity, "IS", item))
def mockConceptCoverage(): # neo4j graph connector authenticate("localhost:7474", "neo4j", "1sTep123") graph = Graph() cypher = graph.cypher # get a list of all content conceptDict = cypher.execute("MATCH (x:Concept) RETURN x.id as concept") contentDict = cypher.execute("MATCH (x:Content) RETURN x.id as content") n = len(contentDict) for concept in conceptDict: id = concept.concept node = Node("Concept", id=id) graph.merge(node, "Concept", "id") i = random.randint(0, n - 1) id = contentDict[i].content node2 = Node("Content", id=id) graph.merge(node2, "Content", "id") graph.create(Relationship(node, "COVERED_IN", node2))
def mockMisConcepts(): # neo4j graph connector authenticate("localhost:7474", "neo4j", "1sTep123") graph = Graph() cypher = graph.cypher # get a list of all content learnerDict = cypher.execute("MATCH (x:Learner) RETURN x.id as learner") conceptDict = cypher.execute("MATCH (x:Concept) RETURN x.id as concept") n = len(conceptDict) for learner in learnerDict: id = learner.learner node = Node("Learner", id=id) graph.merge(node, "Learner", "id") i = random.randint(0, n - 1) id = conceptDict[i].concept node2 = Node("Concept", id=id) graph.merge(node2, "Concept", "id") graph.create(Relationship(node, "HAS_MISCONCEPTION_IN", node2))
class Neo4jDBPipleline(object): def __init__(self): # self.db = Graph(host="localhost", user="******", password="******") db_info = settings.DB_INFO self.db = Graph(host=db_info["host"], http_port=db_info["http_port"], user=db_info["user"], password=db_info["password"]) def process_item(self, item, spider): """ 判断item的类型,并作相应的处理,再入数据库 """ if isinstance(item, InformationItem): usr = Node("WeiboUser", **dict(item)) self.db.merge(usr, "WeiboUser", "wb_usr_id") elif isinstance(item, TweetsItem): weibo = Node("WeiboTweets", **dict(item)) self.db.merge(weibo, "WeiboTweets", "wb_tt_id") usr = Node("WeiboUser", wb_usr_id=weibo["wb_usr_id"]) fan_follow_sb = Relationship(usr, "TWEETS", weibo) self.db.merge(fan_follow_sb) elif isinstance(item, FollowsItem): followsItems = dict(item) follows = followsItems.pop("follows") fan = Node("WeiboUser", wb_usr_id=followsItems["wb_usr_id"]) for sb_id in follows: sb = Node("WeiboUser", wb_usr_id=sb_id) fan_follow_sb = Relationship(fan, "FOLLOWS", sb) self.db.merge(fan_follow_sb) elif isinstance(item, FansItem): fansItems = dict(item) fans = fansItems.pop("fans") sb = Node("WeiboUser", wb_usr_id=fansItems["wb_usr_id"]) for fan_id in fans: fan = Node("WeiboUser", wb_usr_id=fan_id) fan_follow_sb = Relationship(fan, "FOLLOWS", sb) self.db.merge(fan_follow_sb) return item
def fill_similarities_graph(self): authenticate(settings.NeoHost, settings.NeoLog, settings.NeoPass) graph = Graph("{0}/db/data/".format(settings.NeoHost)) #graph.delete_all() try: graph.schema.create_uniqueness_constraint('Video', 'id') except: pass data = pd.DataFrame(self.db_game.read_videodata_from_db()) if not isinstance(data, str) and not data.empty: data = data[pd.notnull(data['title'])] data = data[pd.notnull(data['rating'])] k = len(data) mes = smilarities.SimilarityMeasures() vid = 0 while vid < k: if data['hashtags'][vid] is not None: #print(data['hashtags'][vid], data['id'][vid]) if len(data['hashtags'][vid]) > 3: hashes = self.hashtag_list_to_str( data['hashtags'][vid]) #print(hashes, vid) data1 = pd.DataFrame( self.db_game.read_text_index_videodata_from_db( 'hashtags', hashes)) data1 = data1[pd.notnull(data1['title'])] data1 = data1[pd.notnull(data1['rating'])] data1 = data1.reset_index() start = Node("Video", id=str(data['id'][vid])) graph.merge(start) start.properties['rating'] = data['rating'][vid] start.properties['title'] = data['title'][vid] start.push() vid1 = 0 while vid1 < len(data1): stop = Node("Video", id=str(data1['id'][vid1])) graph.merge(stop) stop.properties['rating'] = data1['rating'][vid1] stop.properties['title'] = data1['title'][vid1] stop.push() num = mes.jaccard_similarity( data['hashtags'][vid], data1['hashtags'][vid1]) #print(len(data['hashtags'][vid])) if (num > 0.5 and len(data1['hashtags'][vid1]) > 3 ) and data1['id'][vid1] != data['id'][vid]: #print(num, vid, vid1) following = Relationship( start, "Jaccard", stop) graph.merge(following) following.properties[ 'jaccard_similarity'] = num following.push() vid1 += 1 vid += 1 #print(pd.DataFrame(graph.run("MATCH (a:Video) RETURN a.id, a.title, a.rating LIMIT 10").data())) return
class Neo4jCustomer(object): def __init__(self): self.redis_conn = redis.StrictRedis( host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PARAMS["password"], ) self.graph = Graph(NEO4J_URI, password=NEO4J_PWD) self.pending_queue = NEO4J_PENDING_QUEUE self.doing_queue = NEO4J_DOING_QUEUE def listen_task(self): todo_task = self.redis_conn.lpop(self.doing_queue) if todo_task: self.save_relationships(todo_task) while True: task = self.redis_conn.brpoplpush(self.pending_queue, self.doing_queue, 0) self.save_relationships(task) def save_relationships(self, task): self.graph.merge(pickle.loads(task)) self.redis_conn.lpop(self.doing_queue)
def moveConceptMap(): # neo4j graph connector graph = Graph() # delete entire graph url = "http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/domain/map" resp = requests.get(url).json() # move all concepts conceptList = resp["result"]["concepts"] for conceptDict in conceptList: identifier = None if (not conceptDict.has_key('identifier')): continue identifier = conceptDict['identifier'] # create/find node node = Node("Concept", id=identifier) graph.merge(node, "Concept", "id") if (conceptDict.has_key('subject')): subject = conceptDict['subject'] node.properties["subject"] = subject node.push() if (conceptDict.has_key('gradeLevel')): gradeLevel = conceptDict['gradeLevel'] node.properties["gradeLevel"] = gradeLevel node.push() if (conceptDict.has_key('objectType')): objectType = conceptDict['objectType'] node.properties["objectType"] = objectType node.push() # move all relations relationList = resp["result"]["relations"] for relationDict in relationList: if (not relationDict.has_key('startNodeId')): continue if (not relationDict.has_key('endNodeId')): continue if (not relationDict.has_key('relationType')): continue startNodeId = relationDict['startNodeId'] endNodeId = relationDict['endNodeId'] relationType = relationDict['relationType'] print('A:', startNodeId, 'relationType', relationType, 'B:', endNodeId) node1 = Node("Concept", id=startNodeId) graph.merge(node1, "Concept", "id") node2 = Node("Concept", id=endNodeId) graph.merge(node2, "Concept", "id") graph.create(Relationship(node1, relationType, node2))
def db(): with open('input.csv', 'r') as csv_file: csv_reader = csv.DictReader( csv_file) # add , delimiter=',' to specify delimiter # next(csv_reader) # skips over both header rows graph = Graph("bolt://localhost:7687", auth=("neo4j", "ubdprototype")) try: graph.run("Match () Return 1 Limit 1") except Exception: print( 'Invalid connection. Is Neo4j running? Check username and password.' ) raise Exception graph.delete_all() for line in csv_reader: topic = Node("Topic", name=line['topic']) application = Node("Application", name=line['name'], website=line['website'], publication=line['publication']) dataset = Node("Dataset", identifier=line['identifier'] ) # may include a identifier TYPE property graph.merge(topic, "Topic", "name") graph.merge(application, "Application", "name") graph.merge(dataset, "Dataset", "identifier") graph.create(Relationship(application, "relates to", topic)) graph.create( Relationship(application, "uses", dataset, conf_level=line['conf-level'])) return graph
class MadpyHabitsSurvey: def __init__(self): self.responses = google_survey.get('madpy-habits-survey.yaml') self.responses.set_index('question_id', inplace=True) def graph_survey(self): screen_names = self.responses.ix['q0', ['person_id', 'response']] self.pythonistas = {person_id: Node('Pythonista', screen_name=name) for _, (person_id, name) in screen_names.iterrows()} self.graph = Graph(password=environ['NEO4J_PASSWORD']) for node in self.pythonistas.values(): self.graph.merge(node, label='Pythonista') self.graph_question('q1', 'Editor', 'TYPES_IN') self.graph_question('q2', 'Package', 'LIKES') self.graph_question('q3', 'VersionControl', 'USES') self.graph_question('q4', 'Language', 'KNOWS') def graph_question(self, question_id, node_label, relationship_label): def Response(node_value): return Node(node_label, name=node_value) responses = self.responses.ix[question_id, ['person_id', 'response']] response_nodes = {} # nodes for unique responses relationships = [] # relationships between people and responses for _, (person_id, node_value) in responses.iterrows(): pythonista = self.pythonistas[person_id] node = response_nodes.setdefault(node_value, Response(node_value)) response = Relationship(pythonista, relationship_label, node) relationships.append(response) for node in response_nodes.values(): self.graph.merge(node, label=node_label) for relationship in relationships: self.graph.merge(relationship, label=node_label)
def _neo4j(self, article): uri = os.environ['NEO4J'] u = os.environ['NEO4JUSER'] p = os.environ['NEO4JPASSWD'] # generate nodes news = Node('NEWSITEMS', url=article['url']) orgs = [Node('ENTS', name=e, type='org') for e in article['ner']['org']] people = [Node('ENTS', name=e, type='people') for e in article['ner']['people']] # generate relationships relations_orgs = [Relationship(n, 'CONTAINED_IN', news) for n in orgs] relations_people = [Relationship(n, 'CONTAINED_IN', news) for n in people] # join relations = relations_orgs + relations_people # store neo4j = Graph(uri, username=u, password=p) neo4j.merge(news, 'NEWSITEMS', 'url') for n in orgs: neo4j.merge(n, 'ENTS', 'name') for n in people: neo4j.merge(n, 'ENTS', 'name') for r in relations: neo4j.create(r)
df = pd.read_csv(fl + "/Disease_Details.csv") dfl = df[['Disease', 'Diagnosis_treatment']] dfl = dfl.drop_duplicates() dfl.sort_values(['Disease'], ascending=True) records_ = dfl.to_dict(orient='records') results = db.Disease.insert_many(records_) df2 = pd.DataFrame(list(results.inserted_ids)) df2.columns = ['Object_id'] df = df.join(df2) for index, row in df.iterrows(): #print(row['Disease'], row['Object_id']) a = Node("Disease", name=row['Disease'], id=str(row['Object_id'])) graph.merge(a, "Disease", "name") df_symptoms = pd.read_csv(fl + "/Disease_Symptoms.csv") df_symptoms = df_symptoms[['Disease', 'Symptoms']] df_symptoms = df_symptoms.drop_duplicates() df_symptoms = df_symptoms.dropna() df_symptoms = df_symptoms.groupby('Disease')['Symptoms'].apply( list).reset_index(name='Symptoms_arr') for index, row in df_symptoms.iterrows(): query = {'Disease': {'$eq': row['Disease']}} update = {'$set': {'Symptoms.Name': row['Symptoms_arr']}} results_symptoms = db.Disease.update_one(query, update, upsert=True) print(f"Symptoms data inserted into MongoDB {results_symptoms}") for i in range(len(row['Symptoms_arr'])):
for index, row in wikiedit_df.iterrows(): if pd.isnull(row["manual"]): page_name = row["wikisearch"] else: if row["manual"] == "None": continue else: page_name = row["manual"] entity_n = graph.nodes.match("Entity", entity_name = row["entity_name"]).first() wiki_n = Node("Wikipedia", page_name = page_name ) wiki_n.__primarylabel__ = 'Wikipedia' wiki_n.__primarykey__ = 'page_name' wiki_n["url"]="https://fr.wikipedia.org/wiki/"+page_name OWNED_BY = Relationship.type("OWNED_BY") graph.merge(OWNED_BY(wiki_n, entity_n)) print(entity_n["entity_name"], wiki_n["url"]) # %% Download wiki pages (with infobox)as files from wiki nodes results = graph.nodes.match("Wikipedia") for wiki_n in results: # regarde si fichier existe. Si non, if os.path.isfile(wikipath + wiki_n['page_name']+".wikipage"): print(wiki_n['page_name']+".wikipage already fetched.") continue try: page = wptools.page(wiki_n['page_name'], lang ='fr') page.get_parse() except: print("error 1 with name:",wiki_n['page_name'])
class Command(BaseCommand): help = 'port group data from sql server to neo4j.' def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) self._sql_server_conn = pymssql.connect(server='SX-DEV') self._init_graph() def handle(self, *args, **options): self._start_import() def _init_graph(self): self._graph = Graph(host=settings.NEO4J['HOST'], http_port=settings.NEO4J['PORT'], user=settings.NEO4J['USER'], password=settings.NEO4J['PWD']) def _start_import(self): self.stdout.write('Start to migrate data from sql server to neo4j') group_db, person_db = self._get_databases() # create all the group nodes for db in group_db: for table in self._get_db_tables(db): self._create_group(db, table) # create all group users nodes and build relations for db in person_db: for table in self._get_db_tables(db): self._create_person(db, table) self._close_mssql_conn() self.stdout.write( self.style.SUCCESS( 'Successfully imported all data to neo4j server')) def _close_mssql_conn(self): self._sql_server_conn.close() def _get_databases(self): cursor = self._sql_server_conn.cursor() cursor.execute('SELECT name FROM sys.databases;') dbs = cursor.fetchall() group_db = [] person_db = [] for db in dbs: db_name = db[0] if 'GroupData' in db_name: person_db.append(db_name) elif 'QunInfo' in db_name: group_db.append(db_name) return group_db, person_db def _get_db_tables(self, db_name): cursor = self._sql_server_conn.cursor() cursor.execute( "SELECT TABLE_NAME FROM %s.INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';" % db_name) return [ tb[0] for tb in cursor.fetchall() if 'QunList' in tb[0] or 'Group' in tb[0] ] def _create_group(self, db_name, table_name, start_id=0): curr_id = start_id cursor = self._sql_server_conn.cursor() cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' % (db_name, table_name, start_id)) total = cursor.fetchall()[0][0] cursor = self._sql_server_conn.cursor() cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' % (db_name, table_name, start_id)) pbar = tqdm(desc='Creating Group Nodes from [%s.%s]' % (db_name, table_name), total=total) try: g = cursor.fetchone() while g: curr_id = g[0] group = Group() group.number = g[1] group.mastqq = g[2] group.date = g[3] group.title = g[4] group.groupclass = g[5] group.intro = g[6] self._graph.merge(group) pbar.update(1) g = cursor.fetchone() except: print('Catch an Exception, resume group creating from id: %d' % (curr_id - 1)) pbar.close() self._init_graph() self._create_group(db_name, table_name, curr_id - 1) pbar.close() def _create_person(self, db_name, table_name, start_id=0): curr_id = start_id cursor = self._sql_server_conn.cursor() cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' % (db_name, table_name, start_id)) total = cursor.fetchall()[0][0] cursor = self._sql_server_conn.cursor() cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' % (db_name, table_name, start_id)) pbar = tqdm(desc='Creating Person Nodes and Relations from [%s.%s]' % (db_name, table_name), total=total) try: p = cursor.fetchone() while p: curr_id = p[0] person = Person() person.qq = p[1] person.nick = p[2] person.age = p[3] person.gender = p[4] person.auth = p[5] group_number = p[6] # get group node group = Group.select(self._graph, group_number).first() if group: # build relations person.groups.add(group) group.members.add(person) # update group node self._graph.push(group) self._graph.merge(person) pbar.update(1) p = cursor.fetchone() except: print('Catch an Exception, resume person creating from id: %d' % (curr_id - 1)) pbar.close() self._init_graph() self._create_person(db_name, table_name, curr_id - 1) pbar.close()
from py2neo import Graph, Node, Relationship, authenticate authenticate("localhost:7474", "neo4j", "cloudchaser") graph = Graph("http://localhost:7474/db/data/") graph.delete_all() alice = graph.merge("Person", "name", "Alice") bob = graph.merge("Person", "name", "Bob") chelsea = graph.merge("Person", "name", "Chelsea") prof = { 'name': 'Dennis' } fav = { 'name': 'Emma' } query = ( 'MERGE (profile:soundcloud {name: {profile}.name}) \ ON CREATE SET profile={profile} ' 'MERGE (favorite:soundcloud {name: {favorite}.name}) \ ON CREATE SET favorite={favorite} ' ) graph.cypher.execute(query, { 'profile': prof, 'favorite': fav } )
D0_outdegree = node['outdegree'], D0_creation_date = node['creation_date'], D0_indexing_status = node['indexing_status'], D0_crawled = node['crawled'], D0_last_modification_date = node[ 'last_modification_date'], D0_pages_crawled = node['pages_crawled'], D0_name = node['name'], D0_label = node['label'], D0_id = node['id'] ) if len(graph.nodes.match(site_name = node['label'])) !=0: print(node) a["site_name"]=node['label'] + "X" + str(random.randint(1000,9999)) a.__primarylabel__ = 'Website' a.__primarykey__ = 'site_name' graph.merge(a) for link in data_D0['links']: try: tx = graph.begin() source_n = graph.nodes.match(D0_id = link['source']).first() target_n = graph.nodes.match(D0_id = link['target']).first() rel = Relationship(source_n, "LINKS_TO", target_n) rel["count_D0"]=link['count'] tx.merge(rel) tx.commit() except: pass # regarde "label" # si label déjà dans base
for tweet in results['statuses']: tweetText = tweet['text'].encode('utf-8') tweetAuthor = tweet['user']['screen_name'].encode('utf-8') language = tweet['user']['lang'].encode('utf-8') if language != "en": continue if 'http' in tweetText: continue if 'RT @' in tweetText: continue register(tweetAuthor, tweetAuthor+tweetAuthor) user = graph.find_one('User', 'username',tweetAuthor) post = Node('Post', id=tweet['id'], title="Tweet", author=tweetAuthor,text=tweetText, timestamp=timestamp(), date=date()) rel = Relationship(user, 'PUBLISHED', post) try: graph.create(rel) except Exception, e: continue print "New Tweet ("+language+"): "+tweetAuthor+":"+tweetText for hashtag in tweet['entities']['hashtags']: tag = Node('Tag', name=hashtag['text']) graph.merge(tag) rel = Relationship(tag, 'TAGGED', post) graph.create(rel)
class DataBase: def __init__(self): py2neo.authenticate("localhost:7474", "neo4j", "st1215") self.graph = Graph("http://localhost:7474/db/data/") def get_all_news_from(self, site): # news=set() all_news = self.graph.run( 'MATCH (s:Site)-[:PUBLICOU]-(n:News)-[:E]-(t:Tipo) WHERE s.name="' + site + '" RETURN n,t').data() dataSet = list() for n in all_news: dataSet.append( (n['n']['title'], removerAcentosECaracteresEspeciais(n['n']['content']), n['t']['description'])) return dataSet def get_all_news_from_no_class(self, site): all_news = self.graph.run( 'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE s.name="' + site + '" RETURN n').data() dataSet = list() for n in all_news: dataSet.append( (n['n']['title'], removerAcentosECaracteresEspeciais(n['n']['content']), '')) return dataSet def get_news_by_title(self, title): all_news = self.graph.run( 'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE n.title="' + title + '" RETURN n').data() news = News() for n in all_news: news.title = n['n']['title'] news.url = news.title = n['n']['url'] return news def get_all_data_set(self, sites): dataSet = list() for s in sites: dataSet.extend(self.get_all_news_from(s)) return dataSet # def get_queue(self, site_url): # queue=set() # for s in SiteQueue.select(self.graph).where(site=site_url): # queue.add(s.page) # return queue # # def save_queue(self, site, page): # queue=SiteQueue() # queue.site=site # queue.page=page # self.graph.push(queue) def get_site(self, name): sites = Site.select(self.graph).where(name=name) for site in sites: return site def get_clazz(self, name): tipos = Tipo.select(self.graph).where(description=name) for tipo in tipos: return tipo def save_site(self, site_name, url): site = Site() site.name = site_name site.url = url self.graph.push(site) def save_news(self, site, url, title, sub_title, content, tipo): s = self.get_site(site) t = self.get_clazz(tipo) news = News() news.site.add(s) news.tipo.add(t) news.title = title news.sub_title = sub_title news.content = content news.url = url self.graph.merge(news) def create_rel(self, node1, node2): self.graph.create("(s:Site)-[:PUBLICOU]->(n:News)") def install(self): self.graph.run("MATCH (n) DETACH DELETE n") self.graph.run("MATCH (n) DETACH DELETE n") def delete(self): self.graph.delete_all() tipo = Tipo() tipo.description = 'False' self.graph.merge(tipo) tipo = Tipo() tipo.description = 'True' self.graph.merge(tipo)
return titlecase(''.join(random.choice(chars) for _ in range(size))) + " " + titlecase(''.join(random.choice(chars) for _ in range(size))) #for i in range(0,5): # Author_Generator() #a = Author_Generator() #a1=a #a = graph.merge_one("Author", "Name",a1 ) #r1 = random.randint(0,10) for i in range(0,40): Author_name.append(Author_Generator()) Author_id.append((i)) a=Node("Author", ID=Author_id[i],Name=Author_name[i]) #a.properties["Name"]=Author_name[i] graph.merge(a) for i in range(0,5): r1 = random.randint(0,40) Author_id.append((i+40)) Author_name.append(Author_name[r1]) a=Node("Author", ID=Author_id[i+40],Name=Author_name[i+40]) graph.merge(a) for i in range(0,45): for j in range(0,45): r1 = random.randint(0,50) r2 = random.randint(5,25) if i == j: Matrix[i][j]=r2
for a in allTrades: if a.name in southeastList: a.division = "Southeast" elif a.name in atlanticList: a.division = "Atlantic" elif a.name in centralList: a.division = "Central" elif a.name in southwestList: a.division = "Southwest" elif a.name in northwestList: a.division = "Northwest" elif a.name in pacificList: a.division = "Pacific" for j in range(0, len(matches)): #allTrades[j * 2].gave.update(allTrades[j * 2 + 1], properties={"month": tradeMonth[j], "year": 2016, "draft": allDraft[j][1]}) #allTrades[j * 2 + 1].received.update(allTrades[j * 2], properties={"month": tradeMonth[j], "year": 2016, "draft": allDraft[j][0]}) sgraph.merge( Relationship(allTrades[j * 2].__ogm__.node, "TRADED", allTrades[j * 2 + 1].__ogm__.node, month=tradeMonth[j], year=2016, draft=allDraft[j][1])) sgraph.merge( Relationship(allTrades[j * 2 + 1].__ogm__.node, "TRADED", allTrades[j * 2].__ogm__.node, month=tradeMonth[j], year=2016, draft=allDraft[j][0])) # for t in teams: # sgraph.push(t) options = {"Team": "name"}
def process (self, parameters={}, data={} ): if 'verbose' in parameters: self.config['verbose'] = parameters['verbose'] # for this facets, do not add additional entity to connect with, but write to properties of the entity properties = ['content_type_ss', 'content_type_group_ss', 'language_ss', 'language_s'] host = 'localhost' if 'neo4j_host' in parameters: host = parameters['neo4j_host'] user = '******' if 'neo4j_user' in parameters: user = parameters['neo4j_user'] password = '******' if 'neo4j_password' in parameters: password = parameters['neo4j_password'] graph = Graph(host=host, user=user, password=password) document_node = Node('Document', name = parameters['id']) if 'title' in data: document_node['title'] = data['title'] # add properties from facets for entity_class in parameters['facets']: if entity_class in data: entity_class_label = parameters['facets'][entity_class]['label'] if entity_class in properties: document_node[entity_class_label] = data[entity_class] graph.merge(document_node) # add / connect linked entities from facets for entity_class in parameters['facets']: if entity_class in data: entity_class_label = entity_class if parameters['facets'][entity_class]['label']: entity_class_label = parameters['facets'][entity_class]['label'] if not entity_class in properties: relationship_label = entity_class_label if entity_class in ['person_ss','organization_ss', 'location_ss']: relationship_label = "Named Entity Recognition" # convert to array, if single entity / not multivalued field if isinstance(data[entity_class], list): entities = data[entity_class] else: entities = [ data[entity_class] ] for entity in entities: if self.config['verbose']: print ("Export to Neo4j: Merging entity {} of class {}".format(entity, entity_class_label)) # if not yet there, add the entity to graph entity_node = Node(entity_class_label, name = entity) graph.merge(entity_node) # if not yet there, add relationship to graph relationship = Relationship(document_node, relationship_label, entity_node) graph.merge(relationship) return parameters, data