def import_api_data2(): authenticate("localhost:7474", "neo4j", "1111") graph = Graph() #graph.delete_all() # Uncomment on the first run! #graph.schema.create_uniqueness_constraint("Borjnuk", "id") #graph.schema.create_uniqueness_constraint("Obtaj", "id") #graph.schema.create_uniqueness_constraint("Property", "id") obtajenna = get_objects_art('obtaj') for api_obtaj in obtajenna: node_obtaj= graph.merge_one("Obtaj", "id", api_obtaj["id"]) node_obtaj["reason_doc"] = api_obtaj["reason_doc"] node_obtaj["cost_size"] = api_obtaj["cost_size"] for api_author in api_obtaj["borjnuku"]: node_borjnuk = graph.merge_one("Borjnuk", "id", api_author["id"]) node_borjnuk["name"] = api_author["name"] node_borjnuk["tel_number"] = api_author["tel_number"] node_borjnuk.push() graph.create_unique(Relationship(node_borjnuk, "obtajuetsa", node_obtaj)) for api_property in api_obtaj["properties"]: node_property = graph.merge_one("Property", "id", api_property["id"]) node_property["name"] = api_property["name_property"] node_property["ser_number"] = api_property["ser_number"] node_property.push() graph.create_unique(Relationship(node_property, "zakladena", node_obtaj)) node_obtaj.push()
def import_api_data(): """ imports data from my register (method and all adjacent) into graph DB """ graph = Graph() # Uncomment on the first run! # graph.schema.create_uniqueness_constraint("Method", "id") # graph.schema.create_uniqueness_constraint("Author", "id") # graph.schema.create_uniqueness_constraint("Category", "id") methods = get_objects('method') for api_method in methods: node_method = graph.merge_one("Method", "id", api_method["id"]) node_method["name"] = api_method["name"] node_method["creation_date"] = api_method["creation_date"] node_method["approval_date"] = api_method["approval_date"] for api_author in api_method["authors"]: node_author = graph.merge_one("Author", "id", api_author["id"]) node_author["name"] = api_author["name"] node_author.push() graph.create_unique(Relationship(node_author, "WROTE", node_method)) api_category = api_method["category"] node_category = graph.merge_one("Category", "id", api_category["id"]) node_category["name"] = api_category["name"] node_category.push() graph.create_unique(Relationship(node_category, "CONTAINS", node_method)) node_method.push()
def import_api2_data(): """ imports data from second register (experts and all adjacent) """ graph = Graph() # Uncomment on first run! # graph.schema.create_uniqueness_constraint("Expert", "id") # graph.schema.create_uniqueness_constraint("Document", "id") # graph.schema.create_uniqueness_constraint("Comission_order", "id") # graph.schema.create_uniqueness_constraint("Legal_issue", "id") # graph.schema.create_uniqueness_constraint("Expertise", "id") experts = get_objects2("experts") for api_expert in experts: node_expert = graph.merge_one("Expert", "id", api_expert["id"]) node_expert["name"] = api_expert["name"] node_expert["workplace"] = api_expert["workplace"] node_expert["address"] = api_expert["address"] node_expert["phone"] = api_expert["phone"] for api_document in api_expert["documents"]: node_document = graph.merge_one("Document", "id", api_document["id"]) node_document["id_doc"] = api_document["id_doc"] node_document["release_date"] = api_document["release_date"] node_document["expiry_date"] = api_document["expiry_date"] node_document["document_type"] = api_document["document_type"] node_document.push() graph.create_unique(Relationship(node_expert, "SIGNED", node_document)) for api_order in api_expert["commission_orders"]: node_order = graph.merge_one("Comission_order", "id", api_order["id"]) node_order["commission_name"] = api_order["commission_name"] node_order["order_number"] = api_order["order_number"] node_order["order_date"] = api_order["order_date"] node_order.push() graph.create_unique(Relationship(node_order, "APPOINTED", node_expert)) for api_expertise in api_order["expertises"]: node_expertise = graph.merge_one("Category", "id", api_expertise["id"]) node_expertise["name"] = node_expertise["name"] node_expertise.push() graph.create_unique(Relationship(node_order, "INCLUDES", node_expertise)) for api_issue in api_expert["legal_issues"]: node_issue = graph.merge_one("Legal_issue", "id", api_issue["id"]) node_issue["description"] = api_issue["description"] node_issue["date"] = api_issue["date"] node_issue.push() graph.create_unique(Relationship(node_expert, "WORKED_ON", node_issue)) node_expert.push()
def upload_tweets(users): graph = Graph() for t in users: u = t['user'] e = t['password'] users = graph.merge_one("user","id", t['id']) users.properties['text']= t['text'] user = graph.merge_one("User","username", u["screen_name"])
def moveConceptMap(): # neo4j graph connector graph = Graph() # load concept map from production import requests url = "https://api.ekstep.in/learning/v2/domains/numeracy/concepts" payload = "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"\r\n\r\n\r\n-----011000010111000001101001--" headers = { 'content-type': "multipart/form-data; boundary=---011000010111000001101001", 'user-id': "rayuluv", 'cache-control': "no-cache", 'postman-token': "96bc4304-3f9b-6de5-6143-4c14507fe0a5" } resp = requests.request("GET", url, data=payload, headers=headers).json() # move all concepts conceptList = resp["result"]["concepts"] for conceptDict in conceptList: identifier=None if(not conceptDict.has_key('identifier')): continue identifier = conceptDict['identifier'] # create/find node node = graph.merge_one("Concept","id",identifier) if(conceptDict.has_key('subject')): subject = conceptDict['subject'] node.properties["subject"]=subject node.push() if(conceptDict.has_key('objectType')): objectType = conceptDict['objectType'] node.properties["objectType"]=objectType node.push() if(conceptDict.has_key('children')): relationList=conceptDict['children'] for relationDict in relationList: if (not relationDict.has_key('identifier') ): continue if (not relationDict.has_key('relation') ): continue node1 = graph.merge_one("Concept","id",relationDict['identifier']) relationType=relationDict['relation'] graph.create(Relationship(node, relationType, node1))
class VbplPipeline(object): def __init__(self): authenticate("localhost:7474", "neo4j", "123456") self.graph = Graph() def process_item(self, item, spider): document = item['document'] histories = item['histories'] related_documents = item['related_documents'] # Create document node document_node = self.graph.merge_one("LegalNormativeDocument", "id", document['document_id']) document_node.properties['content'] = document.get('content', '') document_node.properties['title'] = document.get('title','') document_node.properties['official_number'] = document.get('official_number','') document_node.properties['legislation_type'] = document.get('legislation_type','') document_node.properties['source'] = document.get('source','') document_node.properties['department'] = document.get('department', '') document_node.properties['issuing_office'] = document.get('issuing_office', '') document_node.properties['effective_area'] = document.get('effective_area','') document_node.properties['effective_date'] = document.get('effective_date', '') document_node.properties['gazette_date'] = document.get('gazette_date', '') document_node.properties['field'] = document.get('field', '') document_node.properties['signer_title'] = document.get('signer_title', '') document_node.properties['signer_name'] = document.get('signer_name', '') document_node.push() for history in histories: history_node = self.graph.merge_one("History", "id", history['history_id']) # history_node.properties['document_id'] = history['document_id'] history_node.properties['title'] = history.get('title', '') history_node.properties['date'] = history.get('date', '') history_node.properties['status'] = history.get('status', '') history_node.properties['original_document'] = history.get('original_document', '') history_node.properties['ineffective_part'] = history.get('ineffective_part', '') history_node.push() # Add 'HAS' relationship self.graph.create_unique(Relationship(document_node, "HAS", history_node)) for related_document in related_documents: # related_document_node.properties['document_id'] = related_document['document_id'] related_document_node = self.graph.merge_one("RelatedDocument", "id", related_document['related_document_id']) related_document_node.properties['title'] = related_document.get('title', '') related_document_node.properties['relating_type'] = related_document.get('relating_type', '') related_document_node.push() # Add "HAS" relationship self.graph.create_unique(Relationship(document_node, "HAS", related_document_node)) return item
def moveContentSummaryTable(): graph = Graph() lids = session.execute("SELECT DISTINCT learner_id from learnercontentsummary") for lid in lids: uid = lid['learner_id'] node = graph.merge_one("Learner","id",uid) contentDict = session.execute("SELECT * from learnercontentsummary WHERE learner_id='" + uid + "'")[0] cid = contentDict['content_id'] tsp = contentDict['time_spent'] ipm = contentDict['interactions_per_min'] node2 = graph.merge_one("Content","id",cid) # add a relationship with property score graph.create(Relationship(node, "INTERACTED_WITH", node2,timeSpent=tsp,ipm=ipm))
def main1(): authenticate("localhost:7474", "neo4j", "1234") graph = Graph(GRAPH_CONNECTION_STRNIG) graph.delete_all() banana = Node("Fruit", name="banana", colour="yellow", tasty=True) graph.create(banana) t = graph.merge_one("Fruit", 'name', 'apple') t['colour'] = 'green' t['tasty'] = True t.push()
def moveProficiencyTable(): # get a list of all unique learners # neo4j graph connector graph = Graph() lids = session.execute("SELECT DISTINCT learner_id from learnerproficiency") for lid in lids: # get the knowledge state for this guy # <concept-id>,<socre> in schema uid = lid['learner_id'] # create a learner node node = graph.merge_one("Learner","id",uid) profDict = session.execute("SELECT proficiency from learnerproficiency WHERE learner_id='" + uid + "'")[0]['proficiency'] paramDict= session.execute("SELECT model_params from learnerproficiency WHERE learner_id='" + uid + "'")[0]['model_params'] for cid, score in profDict.items(): # create/find concept node node2 = graph.merge_one("Concept","id",cid) alpha=float(paramDict[cid][9:12]) beta=paramDict[cid][20] # add a relationship with property score graph.create(Relationship(node, "ASSESSED_IN", node2,score=score,alpha=alpha,beta=beta))
def main2(): authenticate("localhost:7474", "neo4j", "1234") graph = Graph(GRAPH_CONNECTION_STRNIG) graph.delete_all() banana = Node("Fruit", name="banana", colour="yellow", tasty=True) graph.create(banana) t = graph.merge_one("Fruit", 'name', 'apple') t['colour'] = 'green' t['tasty'] = True t.push() alice = Node("Person", name="Alice") bob = Node("Person", name="Bob") alice_knows_bob = Relationship(alice, "KNOWS", bob, since=1999) graph.create(alice) graph.create(bob) graph.create(alice_knows_bob)
# ##Spaghetti Bolognese # In[15]: graph_db.cypher.execute( "MATCH (REC1:Recipe{Name:'Spaghetti Bolognese'})-[r:Contains]->(ING:Ingredient) RETURN REC1.Name, ING.Name;" ) # #Recommendation # ##Add User # In[4]: UserNode = graph_db.merge_one("User", "Name", "Ragnar") # ##Add User likes # In[5]: UserRef = graph_db.find_one("User", property_key="Name", property_value="Ragnar") #look for user Ragnar # In[6]: RecipeRef = graph_db.find_one( "Recipe", property_key="Name", property_value="Spaghetti Bolognese") #look for recipe Spaghetti Bolognese NodesRelationship = Relationship(UserRef, "Likes",
class StuffNeo4j(): def __init__(self, nodelabel, reltype): self.graph_db = None self.nodelabel = nodelabel self.reltype = reltype def connect(self, uri, usr="******", pwd="neo4j"): if not uri.endswith('/'): uri += '/' authenticate(uri, usr, pwd) self.graph_db = Graph(uri + "db/data") def create_indexes(self): #If index is already created py2neo throws exception. try: self.graph_db.cypher.execute("CREATE INDEX ON :%s(name)" % self.nodelabel) except: pass try: self.graph_db.cypher.execute("CREATE INDEX ON :%s(synset_id)" % self.nodelabel) except: pass try: self.graph_db.cypher.execute( "CREATE INDEX ON :%s(pointer_symbol)" % self.reltype) except: pass def create_node(self, nodetype, **kwargs): return Node(nodetype, **kwargs) def merge_node(self, nodetype, uniq_key, uniq_val, **kwargs): n = self.graph_db.merge_one(nodetype, uniq_key, uniq_val) for k in kwargs: n.properties[k] = kwargs[k] n.push() return n def insert_rel(self, reltype, node1, node2, **kwargs): if node1 is not None and node2 is not None: rel = Relationship(node1, reltype, node2, **kwargs) self.graph_db.create(rel) else: print "Could not insert relation (%s) - [%s] -> (%s)" % ( node1, reltype, node2) def merge_rel(self, reltype, node1, node2, **kwargs): if node1 is not None and node2 is not None: rel = Relationship(node1, reltype, node2, **kwargs) return self.graph_db.create_unique(rel) else: print "Could not merge relation (%s) - [%s] -> (%s)" % ( node1, reltype, node2) def create_wordnet_rel(self, synset1, synset2, ptype): """ Pointer symbols http://wordnet.princeton.edu/wordnet/man/wninput.5WN.html The pointer_symbol s for nouns are: ! Antonym @ Hypernym @i Instance Hypernym ~ Hyponym ~i Instance Hyponym #m Member holonym #s Substance holonym #p Part holonym %m Member meronym %s Substance meronym %p Part meronym = Attribute + Derivationally related form ;c Domain of synset - TOPIC -c Member of this domain - TOPIC ;r Domain of synset - REGION -r Member of this domain - REGION ;u Domain of synset - USAGE -u Member of this domain - USAGE The pointer_symbol s for verbs are: ! Antonym @ Hypernym ~ Hyponym * Entailment > Cause ^ Also see $ Verb Group + Derivationally related form ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE The pointer_symbol s for adjectives are: ! Antonym & Similar to < Participle of verb \ Pertainym (pertains to noun) = Attribute ^ Also see ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE The pointer_symbol s for adverbs are: ! Antonym \ Derived from adjective ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE """ node1 = self.graph_db.find_one(self.nodelabel, property_key="synset_id", property_value=synset1) node2 = self.graph_db.find_one(self.nodelabel, property_key="synset_id", property_value=synset2) if (node1 is not None) and (node2 is not None): rel = Relationship(node1, self.reltype, node2, pointer_symbol=ptype) return rel else: raise Exception( "Could not create Wordnet relation (%s) - [%s] -> (%s)" % (synset1, ptype, synset2)) def insert_bulk(self, objs): if len(objs) > 0: self.graph_db.create(*objs)
print(record['FULL Name']) ''' with open('../data/tmp.json') as journal_article_file: journal_structure = json.load(journal_article_file) j_list = [] a_list = [] #print type(journal_structure["ACM"]) print(journal_structure["ACM"]["JDIQ"]["Volume5"]["Issue4"]["articles"]["Article No.: 13"]["references"][1]) #print(journal_structure["ACM"]["JDIQ"]) for journal_key, journal_value in journal_structure["ACM"].items(): j_list.append(journal_key) print(journal_key) journal_to_be_added = graph.merge_one("Journal", "name", journal_key) for volume_key, volume_value in journal_value.items(): print('\t' + volume_key) for issue_key, issue_value in volume_value.items(): #print('\t\t' + issue_key), #print("key") for issue_attributes_key, issue_attributes_value in issue_value.items(): #print(issue_attributes_key), #print("attributes") if issue_attributes_key in "articles": for article_key, article_value in issue_attributes_value.items(): title = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["title"] abstract = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["abstract"] authors = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["authors"] doi = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["doi"]
#/usr/bin/python # a simple neo4j script to create two nodes and an edge # setup user name & password import myconfig print myconfig.getUser() from py2neo import Graph, Path, Node, Relationship graph = Graph("http://"+myconfig.getUser()+":"+myconfig.getPass()+"@localhost:7474/db/data") # graph.merge_one only creates this node if it has to subject = graph.merge_one("instance","uniqueid", "E58") object = graph.merge_one("instance","uniqueid", "billy2") # only create unique relationships, don't need to keep re-expressing the same thing # this fails for some reason #rel = graph.create_unique(Relationship(subject,"bredBy",object)) rel = Relationship(subject,"bredBy",object) graph.create(rel)
graph.schema.drop_uniqueness_constraint("Fund", "name") graph.schema.drop_uniqueness_constraint("Institute", "name") graph.schema.drop_uniqueness_constraint("Person", "name") graph.schema.create_uniqueness_constraint("Company", "name") graph.schema.create_uniqueness_constraint("Fund", "name") graph.schema.create_uniqueness_constraint("Institute", "name") graph.schema.create_uniqueness_constraint("Person", "name") for row in bsm.rows[1:]: from_type, from_name, edge_type, edge_name, to_type, to_name, netlog = [cell.value for cell in row] if netlog is None: from_type = "grey" to_type = "grey" print(from_type, from_name, edge_type, to_type, to_name) from_node = graph.merge_one(from_type.strip(), "name", from_name.strip()) to_node = graph.merge_one(to_type.strip(), "name", to_name.strip()) from_to = Relationship(from_node, edge_type, to_node) graph.create_unique(from_to) # get nodes with degree nodes = [] for label in graph.node_labels: for p in graph.find(label): node = {"id": p.ref.split("/")[-1], "label": p["name"], "title": p["name"], "value": p.degree, "group": label} nodes.append(node) with open("report/nodesnetlog.js", "w") as f:
class GraphDatabase(): def __init__(self): try: self.graph = Graph( 'http://*****:*****@localhost:7474/db/data') except: print 'ERROR: Initialize Neo4j browser' self.graph.delete_all() def createDocumentNode(self, index, label): docNode = self.graph.merge_one('Document', 'name', 'Doc ' + str(index)) self.updateNode(docNode, { 'id': index, 'label': label, 'in-weight': 0, 'out-weight': 0 }) return docNode def createFeatureNode(self, index, word): wordNode = Node('Feature', word=word) self.graph.create(wordNode) self.updateNode(wordNode, { 'in-weight': 0, 'out-weight': 0, 'id': index }) return wordNode def getFeatureNode(self, word): return list( self.graph.find('Feature', property_key='word', property_value=word))[0] def createWeightedRelation(self, node1, node2, relation): match = self.graph.match(start_node=node1, rel_type=relation, end_node=node2) numberOfRelations = sum(1 for x in match) if numberOfRelations >= 1: match = self.graph.match(start_node=node1, rel_type=relation, end_node=node2) for relationship in match: self.increaseWeight(relationship) self.increaseWeight(node1, 'out-weight') self.increaseWeight(node2, 'in-weight') else: newRelation = Relationship(node1, relation, node2, weight=1) self.graph.create(newRelation) self.increaseWeight(node1, 'out-weight') self.increaseWeight(node2, 'in-weight') def increaseWeight(self, entity, weight='weight'): entity[weight] = entity[weight] + 1 self.graph.push(entity) def updateNode(self, node, propertyDict): node.properties.update(propertyDict) self.graph.push(node) def normalizeRelationships(self, nodes, relation): for node in nodes: for rel in node.match_incoming(relation): rel['norm_weight'] = rel['weight'] / node['in-weight'] self.graph.push(rel) def getNodes(self, feature): recordList = self.graph.cypher.execute( 'MATCH (node:%s) RETURN node' % feature) return [record.node for record in recordList] def getMatrix(self, nodesX, nodesY=None, relation='followed_by', propertyType='norm_weight'): if nodesY == None: nodesY = nodesX matrix = np.zeros([len(nodesX), len(nodesY)]) for node in nodesX: rowIndex = node['id'] for outRelation in node.match_outgoing(relation): colIndex = outRelation.end_node['id'] weight = outRelation[propertyType] matrix[rowIndex, colIndex] = weight return matrix def cypherContextSim(self): tx = self.graph.cypher.begin() tx.append(CONTEXT_SIM) tx.commit()
class Neo4jModel: def __init__(self): self.graph = Graph() def create(self): self.graph.schema.create_uniqueness_constraint("Region", "name") self.graph.schema.create_uniqueness_constraint("Court", "name") self.graph.schema.create_uniqueness_constraint("Court_Decision_Type", "name") self.graph.schema.create_uniqueness_constraint("Court_Judgement_Type", "name") self.graph.schema.create_uniqueness_constraint("Case", "id") self.graph.schema.create_uniqueness_constraint("Chairman", "name") def region(self, region_name): __region = self.graph.merge_one("Region", "name", region_name) __region.push() return __region def court(self, court_name, region_name): __court = self.graph.merge_one("Court", "name", court_name) __court.push() self.graph.create_unique( Relationship(__court, "SITUATED_IN", self.region(region_name))) return __court def chairman(self, chairman_name): __chairman = self.graph.merge_one("Chairman", "name", chairman_name) __chairman.push() return __chairman def decision_type(self, decision_type_name): __decision_type = self.graph.merge_one("Court_Decision_Type", "name", decision_type_name) __decision_type.push() return __decision_type def judgement_type(self, judgement_type_name): __judgement_type = self.graph.merge_one("Court_Judgement_Type", "name", judgement_type_name) __judgement_type.push() return __judgement_type def case(self, court_case, region_name): __case = self.graph.merge_one("Case", "id", court_case.decision_number) __case["reg_date"] = __timestamp__(court_case.reg_date) __case["law_date"] = __timestamp__(court_case.law_date) __case["link"] = court_case.link __case["text"] = court_case.text __case["case_number"] = court_case.case_number self.graph.create_unique( Relationship(__case, "RULED_BY", self.court(court_case.court_name, region_name))) self.graph.create_unique( Relationship(__case, "CARRIED_BY", self.chairman(court_case.chairman))) self.graph.create_unique( Relationship(__case, "OF_JUDGEMENT_TYPE", self.judgement_type(court_case.vr_type))) self.graph.create_unique( Relationship(__case, "OF_DECISION_TYPE", self.decision_type(court_case.cs_type))) __case.push() return __case def change_date(self): query = "MATCH (n:Case) WHERE NOT (n.law_date='') RETURN n LIMIT 5" id_list = [] for n in self.graph.cypher.execute(query): id_list.append(n[0].__str__()[2:].split(':')[0]) # getting an id for _id in id_list: n = self.graph.node(str(_id)) n['law_date'] = __timestamp__(n['law_date']) n.push() print(n)
def moveContentModel(): listURL= "https://api.ekstep.in/learning/v2/content/list" payload = "{\n \"request\": { \n \"search\": {\n \"fields\": [\"name\", \"contentType\"],\n \"status\":[\"Live\", \"Draft\", \"Retired\"],\n \"contentType\":[\"Game\", \"Worksheet\", \"Story\"],\n \"limit\":2000\n \n }\n }\n}" headers = { 'content-type': "application/json", 'user-id': "mahesh", 'cache-control': "no-cache", 'postman-token': "cec63279-346d-a452-4b13-e3cc0a0c2e4d" } resp = requests.request("POST", listURL, data=payload, headers=headers).json() # neo4j graph connector graph = Graph() # no of content contentList = resp["result"]["content"] for contentListDict in contentList: # check if there is an identifier for this content if(not contentListDict.has_key('identifier')): continue # check if there is an identifier for this content identifier = contentListDict['identifier'] # create a node for this Content node = graph.merge_one("Content","id",identifier) createdOn=None languageCode=None gradeLevel=None identifier=None ageGroup=None concept=None owner=None contentDict = contentListDict if(contentDict.has_key('languageCode')): languageCode = contentDict['languageCode'] node.properties['languageCode'] = languageCode node.push() if(contentDict.has_key('createdOn')): createdOn = contentDict['createdOn'] node.properties['createdOn'] = createdOn node.push() if(contentDict.has_key('ageGroup')): ageGroup = contentDict['ageGroup'][0] node.properties['ageGroup'] = ageGroup node.push() if(contentDict.has_key('gradeLevel')): gradeLevel = contentDict['gradeLevel'][0] node.properties['gradeLevel'] = gradeLevel node.push() if(contentDict.has_key('owner')): owner = contentDict['owner'] node.properties['owner'] = owner node.push() if(contentDict.has_key('concepts')): # this forms a "relationship" in the graph concepts = contentDict['concepts'] for concept in concepts: if(concept.has_key('identifier')): node2 = graph.merge_one("Concept","id",concept['identifier']) graph.create(Relationship(node2, "COVERED_IN", node))
marnee_properties["age"] = 100 marnee_with_dict_node = Node.cast("Person", marnee_properties) print marnee_with_dict_node graph.create(marnee_with_dict_node) # look at the graph # BUT I END UP WITH GHOLA, I mean DUPLICATES # How can we do this without duplication???? # Show merge in the browser # PY2NEO # graph.merge() -- returns a generator (generators are cool) # graph.merge_one() -- returns one node # show documentation marnee_merge_node = graph.merge_one(label="Person", property_key="name", property_value="Marnee") print marnee_merge_node # but I have more than one property on this node. How do I get them in there # merge returns a node, or set of nodes, and we can do things to a Node like Node.Push # Node.properties # Node.push() for key, value in marnee_properties.iteritems(): # so pythonic marnee_merge_node[key] = value marnee_merge_node.push() #look at the graph. did we create a third Marnee? No we only have two. #How many Marnees do you know? # comment out marnee dict and clear db to start over
import unicodedata from py2neo import Graph, Node, Relationship g = Graph() g.delete_all() starts = 'Stockholm', 'Edinburgh' for start in starts: print "\nStarting " + start start_node = g.merge_one('Airport', property_key='name', property_value=start) for l in open(start + '.csv'): items = l.split(',') airline = unicodedata.normalize('NFKD', unicode(items[0], encoding='utf-8')).encode('ascii', 'ignore') print "Airline is " + airline for airport in items[1:]: airport = unicodedata.normalize('NFKD', unicode(airport, encoding='utf-8')).encode('ascii', 'ignore').strip().split('-')[0] print "Endpoint is " + airport end_node = g.merge_one("Airport", property_key='name', property_value=airport) g.create(Relationship(start_node, "FLIES_TO", end_node, airline=airline))
marnee_properties["age"] = 100 # marnee_with_dict_node = Node.cast("Person", marnee_properties) # print marnee_with_dict_node # graph.create(marnee_with_dict_node) # # look at the graph # # BUT I END UP WITH GHOLA, I mean DUPLICATES # # How can we do this without duplication???? # # # Show merge in the browser # # # PY2NEO # # graph.merge() -- returns a generator (generators are cool) # # graph.merge_one() -- returns one node # # show documentation # marnee_merge_node = graph.merge_one(label="Person", property_key="name", property_value="Marnee") print marnee_merge_node # # # but I have more than one property on this node. How do I get them in there # # merge returns a node, or set of nodes, and we can do things to a Node like Node.Push # # Node.properties # # Node.push() # for key, value in marnee_properties.iteritems(): # so pythonic marnee_merge_node[key] = value marnee_merge_node.push() # #look at the graph. did we create a third Marnee? No we only have two. # # #How many Marnees do you know? # # comment out marnee dict and clear db to start over #
''' Import the second level in the subjects hierarchy. By default, this will be from the level1list.json file. ''' import json import sys from py2neo import Graph graph = Graph() from py2neo import Node, Relationship level1_f = sys.argv[1] subjects = json.loads(open(level1_f).read()) for d in subjects: parent = graph.merge_one("Subject", "id", str(d["parent0"])) print str(d["id"]) + ":" + d["name"] + "--" + parent["name"] n = Node("Subject", id=d["id"], name=d["name"], level=1) r = Relationship(n, "TYPE_OF", parent) graph.create(r)
def update_show_info_old(): print 'updating show info' authenticate("localhost:7474", "neo4j", "1234") graph = Graph(GRAPH_CONNECTION_STRNIG) results = graph.cypher.stream("match (s:Show) return id(s) as eid,s.id") start_id = 764 for record in results: if int(record['s.id']) < start_id: continue node_show = graph.node(record['eid']) result_dict = {} success = True while success: try: show_info_e_list = requests.get( 'http://services.tvrage.com/feeds/full_show_info.php?sid={0}'.format(node_show['id'])) result_dict = xmltodict.parse(show_info_e_list.text) omdb_show_info = requests.get( 'http://www.omdbapi.com/?t={0}&y=&plot=full&r=json'.format(node_show['name'])) dict_omdb_show_info = json.loads(omdb_show_info.text) if dict_omdb_show_info['Response'] == 'True': for key, value in dict_omdb_show_info.iteritems(): node_show[key] = value success = False except ValueError as e: logger.exception("Value Error") continue except Exception as e: logger.exception("Some network issue, will try again") success = True print str(node_show['name']) # info node_show['started'] = result_dict['Show'].get('started', None) node_show['ended'] = result_dict['Show'].get('ended', None) node_show['image'] = result_dict['Show'].get('image', None) node_show['status'] = result_dict['Show'].get('status', None) node_show.push() #Country from_country = result_dict['Show'].get('origin_country', 'unknown') node_country = graph.merge_one("Country", 'country', from_country) node_country.push() show_from_country = Relationship(node_show, "from", node_country) graph.create(show_from_country) #Genres if result_dict['Show'].get('genres', None) is not None: genre_list = [] if type(result_dict['Show']['genres']['genre']) is list: genre_list = result_dict['Show']['genres']['genre'] else: genre_list.append(result_dict['Show']['genres']['genre']) for genre in genre_list: node_genre = graph.merge_one("Genre", 'name', genre) node_genre.push() show_of_genre = Relationship(node_show, "of genre", node_genre) graph.create(show_of_genre) """try: print node_show['started'] a = node_show['started'].split("/") if int(a[len(a)-1]) < 2000: continue except Exception: continue """ #Seasons season_list = [] if result_dict['Show'].get('Episodelist', None) is None: continue if type(result_dict['Show']['Episodelist']['Season']) is list: season_list = result_dict['Show']['Episodelist']['Season'] else: season_list.append(result_dict['Show']['Episodelist']['Season']) for season in season_list: node_season = Node.cast('Season', {'no': season['@no']}) graph.create(node_season) show_season = Relationship(node_show, "has", node_season) graph.create(show_season) #Episodes episode_list = [] if type(season['episode']) is list: episode_list = season['episode'] else: episode_list.append(season['episode']) count = 1 for episode in episode_list: node_episode = Node.cast('Episode', { 'airdate': episode.get('airdate', None), 'epnum': count, 'screencap': episode.get('screencap', None), 'title': episode.get('title', None) }) graph.create(node_episode) success = True while success: try: omdb_episode_info = requests.get('http://www.omdbapi.com/?t={0}&Season={1}&Episode={2}' .format(node_show['name'], node_season['no'], node_episode['epnum'])) dict_omdb_episode_info = json.loads(omdb_episode_info.text) if dict_omdb_episode_info['Response'] == 'True': for key, value in dict_omdb_episode_info.iteritems(): node_episode[key] = value node_episode.push() success = False except ValueError as e: logger.exception("Value error") continue except Exception as e: logger.exception("network issue: wil try again") success = True show_episode = Relationship(show_season, "has", node_episode) graph.create(show_episode) count = count + 1 print 'end updating show info'
from py2neo import Graph, Node, Relationship import json f = open('tt2.json', 'r') jj = json.loads(f.read()) f.close() graph = Graph('http://*****:*****@localhost:7474/db/data') for post in jj: poster = graph.merge_one("User", "id", post['poster']) neoPost = graph.merge_one("Post", "id", post['id']) posted = graph.create_unique(Relationship(poster, "POSTED", neoPost)) print "(%s)-[:POSTED]->(%s)" % (post['poster'], post['id']) if post.get('reblogged_from'): reblogger = graph.merge_one("User", "id", post['reblogged_from']) reblogged_post = graph.merge_one("Post", "id", post['reblog_post_id']) graph.create_unique(Relationship(reblogger, "POSTED", reblogged_post)) graph.create_unique(Relationship(neoPost, "REBLOG_OF", reblogged_post)) print "(%s)-[:POSTED]->(%s)" % (post['reblogged_from'], post['reblog_post_id']) if post.get('original_poster'): original_poster = graph.merge_one("User", "id", post['original_poster']) original_post = graph.merge_one("Post", "id", post['original_post_id']) graph.create_unique(Relationship(original_poster, "POSTED", original_post)) graph.create_unique(Relationship(neoPost, "ORIGINATES_FROM", original_post)) print "(%s)-[:POSTED]->(%s)" % (post['original_poster'], post['original_post_id'])
# Authenticate and create graph authenticate("localhost:7474", "neo4j", "somak"); graph = Graph(); idx = graph.legacy.get_or_create_index(neo4j.Node, "Entities") # TODO: everytime database is reset, create the constraint #graph.schema.create_uniqueness_constraint("Entity", "name") words_dict={}; if len(sys.argv) < 2: print "python conceptnetneo4j.py <seedsfile>"; sys.exit(); with open(sys.argv[1], "r") as f: for line in f: word=line.strip(); word = str('/c/en/')+word; n = graph.merge_one("Entity", "name", word); #n['completed'] = 0; idx.add("name",encode(n["name"]),n); with open(sys.argv[1], "r") as f: i=0; threads = []; for line in f: word=line.strip(); word = str('/c/en/')+word; FINDER = AssertionFinder(); t = threading.Thread(target=recursivelyAddNodesAndEdges, name=str(i), args=(word,idx,graph,FINDER,0)); t.start(); threads.append(t); i = i+1; if i%10 == 0:
# Read the author data structure file for ACM Scraping. with open('data/acm_author.json') as author_file: author_structure = json.load(author_file) # Create a node for every author of type "Author" storing the first, middle, last and full name. Currently we use the unique ACM Profile link for the author as the unique constraint while creating the node. for key, value in author_structure.items(): for record in value: link = str(record['link']) # print(link) first_name = record['FName'] mid_name = record['MName'] last_name = record['LName'] full_name = record['FULL Name'] author_to_be_added = graph.merge_one("Author", "link", link) author_to_be_added['full_name'] = full_name author_to_be_added['fist_name'] = first_name author_to_be_added['middle_name'] = mid_name author_to_be_added['last_name'] = last_name author_to_be_added.push() print(record['FULL Name'] + "\t") print("\n") # Read the journal and article data structure file for ACM Scraping with open('data/tmp.json') as journal_article_file: acm_structure = json.load(journal_article_file) j_list = [] a_list = []
class StuffNeo4j(): def __init__(self, nodelabel, reltype): self.graph_db = None self.nodelabel = nodelabel self.reltype = reltype def connect(self, uri, usr="******", pwd="neo4j"): if not uri.endswith('/'): uri += '/' authenticate(uri, usr, pwd) self.graph_db = Graph(uri + "db/data") def create_indexes(self): #If index is already created py2neo throws exception. try: self.graph_db.cypher.execute("CREATE INDEX ON :%s(name)" % self.nodelabel) except: pass try: self.graph_db.cypher.execute("CREATE INDEX ON :%s(synset_id)" % self.nodelabel) except: pass try: self.graph_db.cypher.execute("CREATE INDEX ON :%s(pointer_symbol)" % self.reltype) except: pass def create_node(self, nodetype, **kwargs): return Node(nodetype, **kwargs) def merge_node(self, nodetype, uniq_key, uniq_val, **kwargs): n = self.graph_db.merge_one(nodetype, uniq_key, uniq_val) for k in kwargs: n.properties[k] = kwargs[k] n.push() return n def insert_rel(self, reltype, node1, node2, **kwargs): if node1 is not None and node2 is not None: rel = Relationship(node1, reltype, node2, **kwargs) self.graph_db.create(rel) else: print "Could not insert relation (%s) - [%s] -> (%s)" % ( node1, reltype, node2) def merge_rel(self, reltype, node1, node2, **kwargs): if node1 is not None and node2 is not None: rel = Relationship(node1, reltype, node2, **kwargs) return self.graph_db.create_unique(rel) else: print "Could not merge relation (%s) - [%s] -> (%s)" % ( node1, reltype, node2) def create_wordnet_rel(self, synset1, synset2, ptype): """ Pointer symbols http://wordnet.princeton.edu/wordnet/man/wninput.5WN.html The pointer_symbol s for nouns are: ! Antonym @ Hypernym @i Instance Hypernym ~ Hyponym ~i Instance Hyponym #m Member holonym #s Substance holonym #p Part holonym %m Member meronym %s Substance meronym %p Part meronym = Attribute + Derivationally related form ;c Domain of synset - TOPIC -c Member of this domain - TOPIC ;r Domain of synset - REGION -r Member of this domain - REGION ;u Domain of synset - USAGE -u Member of this domain - USAGE The pointer_symbol s for verbs are: ! Antonym @ Hypernym ~ Hyponym * Entailment > Cause ^ Also see $ Verb Group + Derivationally related form ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE The pointer_symbol s for adjectives are: ! Antonym & Similar to < Participle of verb \ Pertainym (pertains to noun) = Attribute ^ Also see ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE The pointer_symbol s for adverbs are: ! Antonym \ Derived from adjective ;c Domain of synset - TOPIC ;r Domain of synset - REGION ;u Domain of synset - USAGE """ node1 = self.graph_db.find_one(self.nodelabel, property_key="synset_id", property_value=synset1) node2 = self.graph_db.find_one(self.nodelabel, property_key="synset_id", property_value=synset2) if (node1 is not None) and (node2 is not None): rel = Relationship(node1, self.reltype, node2, pointer_symbol=ptype) return rel else: raise Exception("Could not create Wordnet relation (%s) - [%s] -> (%s)" % ( synset1, ptype, synset2)) def insert_bulk(self, objs): if len(objs) > 0: self.graph_db.create(*objs)
class Neo4jModel: def __init__(self): self.graph = Graph() def create(self): self.graph.schema.create_uniqueness_constraint("Region", "name") self.graph.schema.create_uniqueness_constraint("Court", "name") self.graph.schema.create_uniqueness_constraint("Court_Decision_Type", "name") self.graph.schema.create_uniqueness_constraint("Court_Judgement_Type", "name") self.graph.schema.create_uniqueness_constraint("Case", "id") self.graph.schema.create_uniqueness_constraint("Chairman", "name") def region(self, region_name): __region = self.graph.merge_one("Region", "name", region_name) __region.push() return __region def court(self, court_name, region_name): __court = self.graph.merge_one("Court", "name", court_name) __court.push() self.graph.create_unique(Relationship(__court, "SITUATED_IN", self.region(region_name))) return __court def chairman(self, chairman_name): __chairman = self.graph.merge_one("Chairman", "name", chairman_name) __chairman.push() return __chairman def decision_type(self, decision_type_name): __decision_type = self.graph.merge_one("Court_Decision_Type", "name", decision_type_name) __decision_type.push() return __decision_type def judgement_type(self, judgement_type_name): __judgement_type = self.graph.merge_one("Court_Judgement_Type", "name", judgement_type_name) __judgement_type.push() return __judgement_type def case(self, court_case, region_name): __case = self.graph.merge_one("Case", "id", court_case.decision_number) __case["reg_date"] = __timestamp__(court_case.reg_date) __case["law_date"] = __timestamp__(court_case.law_date) __case["link"] = court_case.link __case["text"] = court_case.text __case["case_number"] = court_case.case_number self.graph.create_unique(Relationship(__case, "RULED_BY", self.court(court_case.court_name, region_name))) self.graph.create_unique(Relationship(__case, "CARRIED_BY", self.chairman(court_case.chairman))) self.graph.create_unique(Relationship(__case, "OF_JUDGEMENT_TYPE", self.judgement_type(court_case.vr_type))) self.graph.create_unique(Relationship(__case, "OF_DECISION_TYPE", self.decision_type(court_case.cs_type))) __case.push() return __case def change_date(self): query = "MATCH (n:Case) WHERE NOT (n.law_date='') RETURN n LIMIT 5" id_list = [] for n in self.graph.cypher.execute(query): id_list.append(n[0].__str__()[2:].split(':')[0]) # getting an id for _id in id_list: n = self.graph.node(str(_id)) n['law_date'] = __timestamp__(n['law_date']) n.push() print(n)
from py2neo import Graph, Path, Node, Relationship db = MongoClient('mongodb://<user>:<pass>@ds<id>.mongolab.com:<port>/<db>') collection = db["muziekcentrum"]["muziekcentrum"] graph = Graph("http://<user>:<apikey>@<db>.sb02.stations.graphenedb.com:<port>/db/data/") graph.cypher.execute("MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE r,n") graph.schema.drop_uniqueness_constraint("Album", "name") graph.schema.drop_uniqueness_constraint("Uitvoerder", "name") graph.schema.drop_uniqueness_constraint("Label", "name") graph.schema.create_uniqueness_constraint("Album", "name") graph.schema.create_uniqueness_constraint("Uitvoerder", "name") graph.schema.create_uniqueness_constraint("Label", "name") for doc in collection.find({"Type": "album"}): for uitvoerder in doc["Uitvoerder(s)"]: uitvoerder_node = graph.merge_one("Uitvoerder", "name", uitvoerder) album_node = graph.merge_one("Album", "name", doc["Titel"]) uitvoerder_makes_album = Relationship(uitvoerder_node, "MADE", album_node) graph.create_unique(uitvoerder_makes_album) for label in doc["Label(s)"]: label_node = graph.merge_one("Label", "name", label) album_node = graph.merge_one("Album", "name", doc["Titel"]) label_releases_album = Relationship(label_node, "RELEASED", album_node) graph.create_unique(label_releases_album)
#/usr/bin/python # a simple neo4j script to create two nodes and an edge # setup user name & password import myconfig print myconfig.getUser() from py2neo import Graph, Path, Node, Relationship graph = Graph("http://" + myconfig.getUser() + ":" + myconfig.getPass() + "@localhost:7474/db/data") # graph.merge_one only creates this node if it has to subject = graph.merge_one("instance", "uniqueid", "E58") object = graph.merge_one("instance", "uniqueid", "billy2") # only create unique relationships, don't need to keep re-expressing the same thing # this fails for some reason #rel = graph.create_unique(Relationship(subject,"bredBy",object)) rel = Relationship(subject, "bredBy", object) graph.create(rel)
if "children" in artwork["subjects"]: for sl1 in artwork["subjects"]["children"]: #print sl1["name"] if "children" in sl1: for sl2 in sl1["children"]: #print "-" + sl2["name"] if "children" in sl2: for sl3 in sl2["children"]: #print "--" + sl3["name"] subjects.append(sl3["id"]) node = Node("Artwork", id=artwork["id"], title=artwork["title"], acno=artwork["acno"]) graph.create(node) for s in subjects: subject = graph.merge_one("Subject", "id", s) r = Relationship(node, "FEATURES", subject) graph.create(r) if artwork["medium"]: for m in artwork["medium"].split(","): for n in m.split(" and "): for o in n.split(" on "): s = ''.join([i for i in o if not i.isdigit()]) if s.strip().lower() not in mediums: mediums.append(s.strip().lower()) for m in mediums: medium = graph.merge_one("Medium", "id", m) r = Relationship(node, "MADE_OF", medium) graph.create(r)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) #Get specific hashtag until the date we want tweets = tweepy.Cursor(api.search, q="#SuperBowl", count=100, until='2016-02-09', include_entities=True).items() for tweet in tweets: #find if exists for exploration... mynode = list( graph.find('User', property_key='Screen_Name', property_value=tweet.user.screen_name.encode('utf8'))) x = graph.merge_one("User", "Screen_Name", tweet.user.screen_name.encode('utf8')) x.properties.update({ "Name": tweet.user.name, "Description": tweet.user.description.encode('utf8'), "Location": tweet.user.location, "Followers": tweet.user.followers_count, "Friends": tweet.user.friends_count, "Tweets": tweet.user.statuses_count, "Image": tweet.user.profile_image_url }) if len(mynode) == 0: x.properties.update({"Exploration": ''}) x.push() t = graph.merge_one("Tweet", "ID", tweet.id)
class SyntaxGraph(): """ The aim of this class is to find associated words to database syntax. A user will input a sentence, and these associations will be used to find the correct SQL statement to execute in the database. The relations between words are modelled as a graph. The nodes of the graph are the words, and the edges (relationships) between nodes represent when a word means another word (e.g. is a synonym). The graph is "seeded" using a set of database syntax words, finding synonyms/related words to these initial words using a call to a thesaurus API. The graph is then "grown" from the resulting synonyms using subsequent API calls, in a recursive fashion. When a user enters a sentence, this graph will be used to find database syntax words which are within a certain "degree of separation" from each word in the sentence, in an attempt to start building a SQL query from this sentence. """ def __init__(self, seed_words=None, seed_mappings=None): self.sql_terms = SQLTerms().sql_terms self.graph = Graph(DB_URI) self.tx = self.graph.cypher.begin() self.seed_mappings = seed_mappings or {'where': ['filter', 'for', 'during'], 'from': ['source', 'in'], 'into': ['toward', 'within', 'inside'], 'group':['by'], 'and': ['with']} self.seed_words = seed_words or [x for x in self.sql_terms if x not in self.seed_mappings] self.seed_words.extend([x for x in self.seed_mappings.iterkeys()]) self.exclude_words = ['display'] def seed(self, reset=False): print 'Seeding graph' if reset: self.graph.delete_all() for word in self.seed_words: if not self.already_called(word): self.add_synonyms(word) if word in self.seed_mappings: print 'Mapping %s to %s' % ( ','.join(self.seed_mappings[word]), word ) base = self.graph.merge_one('Word', 'name', word) synonyms = [self.graph.merge_one('Word', 'name', x) for x in self.seed_mappings[word]] [self.graph.create_unique(Relationship(base, 'MEANS', synonym)) for synonym in synonyms] [self.graph.create_unique(Relationship(synonym, 'MEANS', base)) for synonym in synonyms] def grow(self, levels=1): print 'Levels left: %d' % levels query = ''' MATCH (w:Word) WHERE NOT HAS (w.called) RETURN w.name ''' results = self.graph.cypher.execute(query) for word in results: self.add_synonyms(word['w.name']) if levels > 1: self.grow(levels-1) def already_called(self, word): if len (self.graph.cypher.execute('''MATCH (w:Word) WHERE w.name = '%s' AND HAS (w.called) RETURN w.name ''' % word) ) > 0: return True def update_set_called(self, word): word_node = self.graph.merge_one('Word', 'name', word) word_node.properties['called'] = 1 word_node.push() def add_synonyms(self, word): url = 'http://words.bighugelabs.com/api/2/%s/%s/json' % (API_KEY, word) print url response = requests.get(url) try: data = response.json() except JSONDecodeError: self.update_set_called(word) return if 'verb' in data: for key in data['verb']: # Synonyms: words are all interrelated (connected graph) if key == 'syn': synonyms = [word] synonyms.extend([x for x in data['verb'][key] if ' ' not in x]) nodes = [self.graph.merge_one('Word', 'name', x) for x in synonyms] [self.graph.create_unique(Relationship(i, 'MEANS', j)) for j in nodes for i in nodes if i!=j] # Similar / user defined words: words are related both ways between root and related words (both direction) elif key in ('sim', 'usr'): related_words = [word] related_words.extend([x for x in data['verb'][key] if ' ' not in x]) nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words] [self.graph.create_unique(Relationship(nodes[i], 'MEANS', nodes[j])) for j in range(len(nodes)) for i in range(len(nodes)) if (i+j>0 and i*j==0)] # Related words: words are related only from root to related word (one direction) elif key == 'rel': related_words = [word] related_words.extend([x for x in data['verb'][key] if ' ' not in x]) nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words] [self.graph.create_unique(Relationship(nodes[0], 'MEANS', nodes[i])) for i in range(1, len(nodes))] self.update_set_called(word) def replace_word(self, word, max_degree_separation=2): if word in self.seed_words or word in self.exclude_words: return word replacement_candidates = [] for seed_word in self.seed_words: query = '''MATCH p=shortestPath((w:Word{name:"%s"})-[*]-(n:Word{name:"%s"})) RETURN length(p), n.name ''' % (word, seed_word) results = self.graph.cypher.execute(query) try: replacement_candidates.append(min([(row['length(p)'], row['n.name']) for row in results])) except ValueError: pass if len(replacement_candidates) > 0: replacement = min(replacement_candidates) if replacement[0] <= max_degree_separation: return replacement[1] def replace_text(self, text): pattern = re.compile('[\W_]+') cleaned = [] replacements = [] for word in text.split(): cleaned_word = pattern.sub('', word) if cleaned_word not in [x[0] for x in cleaned]: cleaned.append([cleaned_word, self.replace_word(cleaned_word)]) replacements.append(self.replace_word(cleaned_word) or cleaned_word) return ' '.join(replacements)
], 'Elegant': ['Arctic Zen', 'Soho Sophistication', 'Istanbul Mosaic'], 'Cozy': [ 'Ubud Terraces', 'Mekong Meander', 'Lunuganga Estate', 'Dorchester Comfort' ], 'Stylish': ['Santorini Calm', 'Arctic Zen', 'Jodhpur Blues', 'Soho Sophistication'], 'Quirky': ['Warhol Burst', 'Banksy Quirk'], 'Vibrant': [ 'Sindhoor Colonial', 'Istanbul Mosaic', 'Eiffel Chic', 'Red Earth', 'Malnad Pure' ] } keys = questions.keys() for k in keys: q = Node("Questions", name=k) q.properties['order'] = q_order[k] for e in questions[k]: o = Node("Options", name=e) o.properties['image'] = opt_images[e] rel = Relationship(q, "HAS_OPTION", o) graph.create(rel) for z in op_maps[e]: z_node = graph.merge_one('Profiles', 'name', z) rel_op = Relationship(o, "LEADS_TO_PROFILE", z_node) graph.create(rel_op) watch("httpstream")
def import_api_data(): """ imports data from my register (method and all adjacent) into graph DB """ graph = Graph() # graph.delete_all() # Uncomment on the first run! # graph.schema.create_uniqueness_constraint("Method", "id") # graph.schema.create_uniqueness_constraint("Author", "id") # graph.schema.create_uniqueness_constraint("Category", "id") obtajenna = get_objects_art('obtaj') for api_obtaj in obtajenna: node_demand= graph.merge_one("Demand", "id", api_obtaj["id"]) node_demand["reason_doc"] = api_obtaj["reason_doc"] node_demand["cost_size"] = api_obtaj["cost_size"] for api_author in api_obtaj["borjnuku"]: node_borjnuk = graph.merge_one("Borjnuk", "id", api_author["id"]) node_borjnuk["name"] = api_author["name"] node_borjnuk["tel_number"] = api_author["tel_number"] node_borjnuk.push() graph.create_unique(Relationship(node_borjnuk, "obtajuetsa", node_demand)) for api_property in api_obtaj["properties"]: node_property = graph.merge_one("Property", "id", api_property["id"]) node_property["name"] = api_property["name_property"] node_property["ser_number"] = api_property["ser_number"] node_property.push() graph.create_unique(Relationship(node_property, "zakladena", node_demand)) node_demand.push() demands = get_objects('demand') for api_demand in demands: node_demand = graph.merge_one("Demand", "id", api_demand["id"]) node_demand["sum"] = api_demand["sum"] api_debtor = api_demand["Debtor"] node_debtor = graph.merge_one("Debtor", "id", api_debtor["id"]) node_debtor["name"] = api_debtor["name"] api_arbitration = get_object('arbitration/' + str(api_debtor["arbitration_id"])) #print(api_arbitration.text) node_arbitration = graph.merge_one("Arbitration", "id", api_arbitration["id"]) node_arbitration["name"] = api_arbitration["name"] node_arbitration.push() graph.create_unique(Relationship(node_arbitration, "CONTAINS", node_debtor)) node_debtor.push() graph.create_unique(Relationship(node_debtor, "CONTAINS", node_demand)) api_creditor = api_demand["Creditor"] node_creditor = graph.merge_one("Creditor", "id", api_creditor["id"]) node_creditor["name"] = api_creditor["name"] node_creditor.push() graph.create_unique(Relationship(node_creditor, "CONTAINS", node_demand)) """ for api_author in api_method["authors"]: node_author = graph.merge_one("Author", "id", api_author["id"]) node_author["name"] = api_author["name"] node_author.push() graph.create_unique(Relationship(node_author, "WROTE", node_method)) api_category = api_method["category"] node_category = graph.merge_one("Category", "id", api_category["id"]) node_category["name"] = api_category["name"] node_category.push() graph.create_unique(Relationship(node_category, "CONTAINS", node_method))""" node_demand.push()
''' Import the third level in the subjects hierarchy. By default, this will be from the level2list.json file. CLI parameters: argv[1] - full path to json file, including filename argv[2] - optional flag to print or not ''' import json import sys from py2neo import Graph graph = Graph() from py2neo import Node, Relationship level2_f = sys.argv[1] verbose = sys.argv[2] subjects = json.loads(open(level2_f).read()) for d in subjects: parent = graph.merge_one("Subject", "id", d["parent1"]) if verbose: print str(d["id"]) + ":" + d["name"] + "--" + parent["name"] n = Node("Subject", id=d["id"], name=d["name"], level=2) r = Relationship(n, "TYPE_OF", parent) graph.create(r)
if (colorFound == False): prod_name = node.properties['product_name'] color = utilities.searchPrefix(prod_name) #Create color node and relationship color_node = graph.merge_one('Color', 'Color', color) node_rel_dest = Relationship(node, "HAS_COLOR", color_node) graph.create_unique(node_rel_dest) """ # main # Create nodes and relationship between category and sub-category graph.delete_all() parent_cat_node = graph.merge_one('Category', 'product_category', 'Mobiles & Tablets') sub_cat_node = graph.merge_one('Category', 'product_sub_category', 'Mobile Phones') node_rel_dest = Relationship(sub_cat_node, "SUB_CAT_OF", parent_cat_node) graph.create_unique(node_rel_dest) for d in data: rec = d['record'] if not rec['product_name'] or not rec['uniq_id']: logging.info ("Incomplete product ... skipping") logging.debug(rec) continue else: node = createProductNode(rec) addNodeProperties(node, rec) node.push()
# Authentication authenticate("localhost:7474", args.user, args.password) db = Graph() # Start with empty database db.delete_all() # Index your data db.cypher.execute("CREATE INDEX ON :Beer(name)") db.cypher.execute("CREATE INDEX ON :Brewery(name)") db.cypher.execute("CREATE INDEX ON :Alcohol(percentage)") db.cypher.execute("CREATE INDEX ON :Type(type)") # Add nodes + relations # `merge_one` will try to match an existing node for row in read_rows(args.file, args.delimiter): beer = db.merge_one("Beer", "name", row["Merk"]) brewer = db.merge_one("Brewery", "name", row["Brouwerij"]) alc = db.merge_one("Alcohol", "percentage", row["Percentage alcohol"]) # Add node to db (this is kinda slow, you might want to do it in batches) db.create(Relationship(beer, "has_alcohol", alc)) db.create(Relationship(brewer, "brews", beer)) # Comma seperated for t in row["Soort"].split(","): btype = db.merge_one("Type", "type", t) db.create(Relationship(beer, "is_a", btype)) print "Done."
posts = Relationship(initu, "POSTS", initt) graph.create_unique(posts) except Exception, e: #error handler print 'Exception Retweet' pass # find REPLY source # Check if we have a Reply if tweet.in_reply_to_status_id != None: # Get Tweet attributes on trpl based on TWEET ID try: trpl = api.get_status(id=tweet.in_reply_to_status_id) # rpl is the Tweet node in which we Reply to rpl = graph.merge_one('Tweet', 'ID', trpl.id) if hasattr(trpl, 'retweeted_status'): rtcount = 0 else: rtcount = trpl.retweet_count rpl.properties.update({ "Date": trpl.created_at.strftime('%Y-%m-%d %H:%M:%S'), "Text": trpl.text.encode('utf8'), "Favourites": trpl.favorite_count, "Retweets": rtcount
neo_instance = "192.168.1.4:7474" neo_username = "******" neo_password = "******" authenticate(neo_instance,neo_username,neo_password) neo = Graph("http://192.168.1.4:7474/db/data") try: neo.schema.create_uniqueness_constraint("Function", "name") except: pass target = idaapi.get_root_filename() for f in Functions(): callee_name = GetFunctionName(f) callee = neo.merge_one("Function","name",callee_name) if target not in callee.labels: callee.labels.add(target) callee.push() for xref in XrefsTo(f): caller_name = GetFunctionName(xref.frm) if caller_name == '': print "Indirect call to " + callee_name + " ignored." continue caller = neo.merge_one("Function","name",caller_name) if target not in callee.labels: callee.labels.add(target) callee.push() caller_callee = Relationship(caller, "CALLS", callee) neo.get_or_create(caller_callee) print "Export finished"
class CategoryTree(object): def __init__(self, country): project_conf = get_project_conf() neo_host = project_conf.get("NEO4J", "host") user = project_conf.get("NEO4J", "username") password = project_conf.get("NEO4J", "password") logging.getLogger("py2neo.batch").setLevel(logging.WARNING) logging.getLogger("py2neo.cypher").setLevel(logging.WARNING) logging.getLogger("httpstream").setLevel(logging.WARNING) authenticate(neo_host, user, password) self.graph = Graph("http://%s/db/data/" % neo_host) try: self.graph.schema.create_uniqueness_constraint("Category", "id") except: pass self.categories = self.get_categories(country) def merge_node(self, node, country, do_not_load=False): category_id = "%s%s" % (country, str(node['BrowseNodeId'])) category = self.graph.merge_one('Category', 'id', category_id) if 'name' not in category.properties: category['name'] = node['Name'] category['is_root'] = int(node.get('IsCategoryRoot', 0)) category['do_not_load'] = bool(do_not_load) category['country'] = country category.push() if not category_id in self.categories: self.categories[category_id] = self.category_node_dict(category) return category def relationship(self, parent, child): return Relationship(parent, 'HAS_CHILD', child) def relationship_exists(self, parent, child): if len(list(self.graph.match(start_node=parent, end_node=child, rel_type='HAS_CHILD'))) > 0: return True return False def create_relationship(self, relationship): self.graph.create_unique(relationship) relationship.push() def create_relationships(self, parent, children): for child in children: self.create_relationship(parent, child) def add_new_category(self, browsenode, amazon_api, country): # browse_node expected format #{u'Ancestors': {u'BrowseNode': {u'Ancestors': {u'BrowseNode': {u'BrowseNodeId': u'560798', # u'Name': u'Electronics & Photo'}}, # u'BrowseNodeId': u'560800', # u'IsCategoryRoot': u'1', # u'Name': u'Categories'}}, # u'BrowseNodeId': u'1340509031', # u'Children': {u'BrowseNode': [{u'BrowseNodeId': u'560826', # u'Name': u'Accessories'}, # {u'BrowseNodeId': u'2829144031', # u'Name': u'Big Button Mobile Phones'}, # {u'BrowseNodeId': u'430574031', # u'Name': u'Mobile Broadband'}, # {u'BrowseNodeId': u'5362060031', # u'Name': u'Mobile Phones & Smartphones'}, # {u'BrowseNodeId': u'213005031', # u'Name': u'SIM Cards'}, # {u'BrowseNodeId': u'3457450031', # u'Name': u'Smartwatches'}]}, # u'Name': u'Mobile Phones & Communication'} added_categories = [] do_not_load = True current_browsenode = browsenode # Determine the value of do not load according to the youngest ancestor's do_not_load while 'Ancestors' in current_browsenode: current_id = "%s%s" % (country, current_browsenode['BrowseNodeId']) current_node = self.categories.get(current_id, None) if not current_node: if type(current_browsenode['Ancestors']) is dict: current_browsenode = current_browsenode['Ancestors'] elif type(current_browsenode['Ancestors']) is list: current_browsenode = current_browsenode['Ancestors'][0] # This shouldn't happen. But if it does better to log and continue with the first one else: do_not_load = bool(current_node['do_not_load']) break # Create the missing nodes and relationships child = self.merge_node(browsenode, country, do_not_load) added_categories.append(child) current_browsenode = browsenode while 'Ancestors' in current_browsenode and int(current_browsenode.get("IsCategoryRoot", 0))!=1: if type(current_browsenode['Ancestors']) is dict: parent_browsenode_id = current_browsenode['Ancestors']['BrowseNode']['BrowseNodeId'] elif type(current_browsenode['Ancestors']) is list: # This shouldn't happen. But if it does better to log and continue with the first one parent_browsenode_id = current_browsenode['Ancestors'][0]['BrowseNode']['BrowseNodeId'] parent_graph_id="%s%s" % (country,parent_browsenode_id) parent_node = self.categories.get(parent_graph_id, None) if parent_node: parent = self.get_category(parent_graph_id) relationship = self.relationship(parent, child) self.create_relationship(relationship) break else: parent_browsenode = amazon_api.get_node(parent_browsenode_id) if type(parent_browsenode) is dict: parent = self.merge_node(parent_browsenode, country, do_not_load) relationship = self.relationship(parent, child) self.create_relationship(relationship) added_categories.append(parent) current_browsenode = parent_browsenode elif parent_browsenode == "AWS.InvalidParameterValue": print "Deleting node %s and all its children" % str(parent_browsenode_id) self.delete_category(parent_browsenode_id) break else: #self.logger.warning("Unknown error from amazon API.") print 'Unknown error from amazon API. %s' % parent_browsenode break for category in added_categories: category_id = "%s%s" % (country, category['id']) length = self.get_shortest_length_to_root(category_id) category['shortest_length_root'] = length category.push() self.categories[category_id] = self.category_node_dict(category) new_category_id = "%s%s" % (country, browsenode['BrowseNodeId']) return self.categories.get(new_category_id) def category_node_dict(self, category_node): result = { 'is_root': category_node['is_root'], 'id': category_node['id'], 'name': category_node['name'], 'do_not_load': category_node['do_not_load'], 'shortest_length_root': category_node['shortest_length_root'] } return result def get_categories(self, country): categories = {} records = self.graph.find('Category', property_key='country', property_value=country) for category in records: categories[category['id']] = self.category_node_dict(category) return categories def get_category(self, category_id): category = self.graph.find_one('Category', property_key='id', property_value=category_id) if category: return self.category_node_dict(category) def is_orphan(self, category_id): category = self.get_category(category_id) if not category: return True if not bool(category['is_root']): query = """MATCH p=a-[:HAS_CHILD*]->n WHERE n.id = {id} AND a.is_root=1 RETURN p LIMIT 1""" cypher = self.graph.cypher path = cypher.execute_one(query, id=category_id) if not path: return True return False def get_children(self, category_id): query = """MATCH (n)-[r:HAS_CHILD*]->(m) WHERE n.id = {id} RETURN m""" cypher = self.graph.cypher children = cypher.execute(query, id=category_id) return children def delete_category(self, category_id): cypher = self.graph.cypher children = self.get_children(category_id) delete_query = """ MATCH (n {id:'%s'}) OPTIONAL MATCH n-[r]-() DELETE n,r """ if children: for record in children: child = record[0] cypher.execute_one(delete_query % child["id"]) cypher.execute_one(delete_query % category_id) def get_shortest_length_to_root(self, category_id): query = """MATCH p=a-[:HAS_CHILD*]->n WHERE n.id={id} AND a.is_root=1 RETURN length(p) ORDER BY length(p) DESC LIMIT 1""" cypher = self.graph.cypher length = cypher.execute_one(query, id=category_id) return length
def update_info_and_links(): print 'updating show info' authenticate("localhost:7474", "neo4j", "1234") graph = Graph(GRAPH_CONNECTION_STRNIG) results = graph.cypher.stream("match (s:Show) return id(s) as eid,s.id") start_id = 0 for record in results: if int(record['s.id']) < start_id: continue node_show = graph.node(record['eid']) result_dict = {} success = True while success: try: show_info_e_list = requests.get( 'http://services.tvrage.com/feeds/full_show_info.php?sid={0}'.format(node_show['id'])) result_dict = xmltodict.parse(show_info_e_list.text) omdb_show_info = requests.get( 'http://www.omdbapi.com/?t={0}&y=&plot=full&r=json'.format(node_show['name'])) dict_omdb_show_info = json.loads(omdb_show_info.text) if dict_omdb_show_info['Response'] == 'True': for key, value in dict_omdb_show_info.iteritems(): node_show[key] = value success = False except ValueError as e: logger.exception("Value error") continue except Exception as e: logger.exception("Some network issue: will try again") success = True print str(node_show['name']) # info node_show['started'] = result_dict['Show'].get('started', None) node_show['ended'] = result_dict['Show'].get('ended', None) node_show['image'] = result_dict['Show'].get('image', None) node_show['status'] = result_dict['Show'].get('status', None) node_show.push() #Country from_country = result_dict['Show'].get('origin_country', 'unknown') node_country = graph.merge_one("Country", 'country', from_country) node_country.push() show_from_country = Relationship(node_show, "from", node_country) graph.create(show_from_country) #Genres if result_dict['Show'].get('genres', None) is not None: genre_list = [] if type(result_dict['Show']['genres']['genre']) is list: genre_list = result_dict['Show']['genres']['genre'] else: genre_list.append(result_dict['Show']['genres']['genre']) for genre in genre_list: node_genre = graph.merge_one("Genre", 'name', genre) node_genre.push() show_of_genre = Relationship(node_show, "of genre", node_genre) graph.create(show_of_genre) """ try: print node_show['started'] a = node_show['started'].split("/") if int(a[len(a)-1]) < 2000: continue except Exception: continue """ #Seasons season_list = [] if result_dict['Show'].get('Episodelist', None) is None: continue if type(result_dict['Show']['Episodelist']['Season']) is list: season_list = result_dict['Show']['Episodelist']['Season'] else: season_list.append(result_dict['Show']['Episodelist']['Season']) for season in season_list: node_season = Node.cast('Season', {'no': season['@no']}) graph.create(node_season) show_season = Relationship(node_show, "has", node_season) graph.create(show_season) #Episodes episode_list = [] if type(season['episode']) is list: episode_list = season['episode'] else: episode_list.append(season['episode']) count = 1 for episode in episode_list: node_episode = Node.cast('Episode', { 'airdate': episode.get('airdate', None), 'epnum': count, 'screencap': episode.get('screencap', None), 'title': episode.get('title', None) }) graph.create(node_episode) success = True while success: try: omdb_episode_info = requests.get('http://www.omdbapi.com/?t={0}&Season={1}&Episode={2}' .format(node_show['name'], node_season['no'], node_episode['epnum'])) dict_omdb_episode_info = json.loads(omdb_episode_info.text) if dict_omdb_episode_info['Response'] == 'True': for key, value in dict_omdb_episode_info.iteritems(): node_episode[key] = value node_episode.push() success = False except ValueError as e: logger.exception("Value error") continue except Exception as e: logger.exception("Some network issue: will try again") success = True try: search = node_show['name'] + ' s' + str(node_season['no']).zfill(2) + 'e' + str(node_episode['epnum']).zfill(2) print search #links search_numbers = [3552639851, 8556419051, 2649486255, 7079685853, 8416818254, 1870757059, 1731156253, 4545021852, 6021755051, 8975221455] for n in search_numbers: links_from_google = requests.get( 'https://www.googleapis.com/customsearch/v1element?key=AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY&rsz=small&num=10&hl=en&prettyPrint=false&source=gcsc&gss=.com&sig=cb6ef4de1f03dde8c26c6d526f8a1f35&cx=partner-pub-2526982841387487:{1}' '&q={0}&googlehost=www.google.com&oq={0}'.format(search, n)) dict_from_google = json.loads(links_from_google.text) for result in dict_from_google['results']: node_link = Node.cast('Link', { 'host': result.get('visibleUrl', None), 'url': result['url'] }) graph.create(node_link) link_episode = Relationship(node_episode, "has", node_link) graph.create(link_episode) except Exception, err: logger.exception("error grom google part") show_episode = Relationship(show_season, "has", node_episode) graph.create(show_episode) count = count + 1
class ApiProvider(): def __init__(self, request_data): self._request_data = request_data authenticate("localhost:7474", "neo4j", "1234") # authenticate("52.27.227.159:7474", "neo4j", "1234") self.graph = Graph(GRAPH_CONNECTION_STRNIG) def _update_show(self, show_id): # get the node from the graph node_show = self.graph.node(show_id) if node_show['updated'] == True: return result_dict = {} try: show_info_e_list = requests.get( 'http://services.tvrage.com/feeds/full_show_info.php?sid={0}'.format(node_show['id'])) result_dict = xmltodict.parse(show_info_e_list.text) omdb_show_info = requests.get( 'http://www.omdbapi.com/?t={0}&y=&plot=full&r=json'.format(node_show['name'])) dict_omdb_show_info = json.loads(omdb_show_info.text) if dict_omdb_show_info['Response'] == 'True': for key, value in dict_omdb_show_info.iteritems(): node_show[key] = value success = False except ValueError as e: logger.exception("Value Error") return except Exception as e: logger.exception("Some network issue, will try again") return # add the new extracted data to the show node_show['started'] = result_dict['Show'].get('started', None) node_show['ended'] = result_dict['Show'].get('ended', None) node_show['image'] = result_dict['Show'].get('image', None) node_show['status'] = result_dict['Show'].get('status', None) node_show.push() # Country from_country = result_dict['Show'].get('origin_country', 'unknown') node_country = self.graph.merge_one("Country", 'country', from_country) node_country.push() # add the relation to the graph show_from_country = Relationship(node_show, "from", node_country) self.graph.create(show_from_country) # Genres if result_dict['Show'].get('genres', None) is not None: genre_list = [] if type(result_dict['Show']['genres']['genre']) is list: genre_list = result_dict['Show']['genres']['genre'] else: genre_list.append(result_dict['Show']['genres']['genre']) for genre in genre_list: # create the genre node node_genre = self.graph.merge_one("Genre", 'name', genre) node_genre.push() # add the Genre relation to the graph show_of_genre = Relationship(node_show, "of genre", node_genre) self.graph.create(show_of_genre) # Seasons season_list = [] if result_dict['Show'].get('Episodelist', None) is None: return if type(result_dict['Show']['Episodelist']['Season']) is list: season_list = result_dict['Show']['Episodelist']['Season'] else: season_list.append(result_dict['Show']['Episodelist']['Season']) for season in season_list: # create node for season node_season = Node.cast('Season', {'no': season['@no']}) self.graph.create(node_season) # create the relation n the graph show_season = Relationship(node_show, "has", node_season) self.graph.create(show_season) # Episodes episode_list = [] if type(season['episode']) is list: episode_list = season['episode'] else: episode_list.append(season['episode']) count = 1 for episode in episode_list: # create a node for episode node_episode = Node.cast('Episode', { 'airdate': episode.get('airdate', None), 'epnum': count, 'screencap': episode.get('screencap', None), 'title': episode.get('title', None) }) self.graph.create(node_episode) # add relation to the graph show_episode = Relationship(show_season, "has", node_episode) self.graph.create(show_episode) # Episode info try: omdb_episode_info = requests.get('http://www.omdbapi.com/?t={0}&Season={1}&Episode={2}' .format(node_show['name'], node_season['no'], node_episode['epnum'])) dict_omdb_episode_info = json.loads(omdb_episode_info.text) if dict_omdb_episode_info['Response'] == 'True': for key, value in dict_omdb_episode_info.iteritems(): node_episode[key] = value node_episode.push() except ValueError as e: logger.exception("Value error") except Exception as e: logger.exception("network issue: wil try again") # links try: search = node_show['name'] + ' s' + str(node_season['no']).zfill(2) + 'e' + str( node_episode['epnum']).zfill(2) # links search_numbers = [3552639851, 8556419051, 2649486255, 7079685853, 8416818254, 1870757059, 1731156253, 4545021852, 6021755051, 8975221455] for n in search_numbers: links_from_google = requests.get( 'https://www.googleapis.com/customsearch/v1element?key=AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY&rsz=small&num=10&hl=en&prettyPrint=false&source=gcsc&gss=.com&sig=cb6ef4de1f03dde8c26c6d526f8a1f35&cx=partner-pub-2526982841387487:{1}' '&q={0}&googlehost=www.google.com&oq={0}'.format(search, n)) dict_from_google = json.loads(links_from_google.text) for result in dict_from_google['results']: # create node for link node_link = Node.cast('Link', { 'host': result.get('visibleUrl', None), 'url': result['url'] }) self.graph.create(node_link) # create the relation in the graph link_episode = Relationship(node_episode, "has", node_link) self.graph.create(link_episode) except Exception, err: logger.exception("error grom google part") count = count + 1 # notify that all went OK and finish node_show['updated'] = True node_show.push()
def handle(self, *args, **kwargs): graph = Graph(settings.GRAPH_URL) # dropping uniqueness contraints # graph.schema.drop_uniqueness_constraint('Company', 'id') # graph.schema.drop_uniqueness_constraint('User', 'id') # graph.schema.drop_uniqueness_constraint('User', 'email') # graph.schema.create_uniqueness_constraint('Visitor', 'key') # graph.schema.drop_uniqueness_constraint('Campaign', 'id') # graph.schema.drop_uniqueness_constraint('Impression', 'id') # graph.schema.drop_uniqueness_constraint('Postal', 'code') # graph.schema.drop_uniqueness_constraint('City', 'name') # graph.schema.drop_uniqueness_constraint('Country', 'name') # create initial labels graph.schema.create_uniqueness_constraint('Company', 'id') graph.schema.create_uniqueness_constraint('User', 'id') graph.schema.create_uniqueness_constraint('User', 'email') graph.schema.create_uniqueness_constraint('Visitor', 'key') graph.schema.create_uniqueness_constraint('Campaign', 'id') graph.schema.create_uniqueness_constraint('Impression', 'id') graph.schema.create_uniqueness_constraint('Postal', 'code') graph.schema.create_uniqueness_constraint('City', 'name') graph.schema.create_uniqueness_constraint('Country', 'name') # importing models from apps.users.models import User, Visitor from apps.companies.models import Company from apps.campaigns.models import Campaign from apps.impressions.models import Impression # importing serializers from apps.companies.api.serializers import BaseCompanySerializer from apps.campaigns.api.serializers import BaseCampaignSerializer from apps.impressions.api.serializers import ImpressionSerializer from apps.dashboard.serializers import DashboardUserSerializer for company in Company.objects.all(): # ncompany = Node.cast('Company', CompanySerializer(company).data) print 'company: %s' %(company.id) ncompany = graph.merge_one('Company', 'id', company.id) print BaseCompanySerializer(company).data ncompany.properties.update(BaseCompanySerializer(company).data) graph.push(ncompany) for user in company.users.all(): nuser = graph.merge_one('User', 'email', user.email) nuser.properties.update(DashboardUserSerializer(user).data) graph.push(nuser) rel = Relationship.cast(ncompany, 'CompanyUser', nuser) graph.create_unique(rel) for campaign in company.campaigns.all(): print 'campaign: %s' %(campaign.id) ncampaign = graph.merge_one('Campaign', 'id', campaign.id) ncampaign.properties.update(BaseCampaignSerializer(campaign).data) graph.push(ncampaign) rel = Relationship.cast(ncompany, 'CompanyCampaign', ncampaign) graph.create_unique(rel) for impression in campaign.impressions.all(): meta = impression.hydrate_meta visitor = graph.merge_one('Visitor', 'key', meta['visitor']) if meta['country']: country = graph.merge_one('Country', 'name', meta['country']) graph.create_unique( Relationship.cast(visitor, 'CampaignVisitorCountry', country)) graph.create_unique( Relationship.cast(visitor, 'CampaignVisitor', ncampaign)) graph.create_unique( Relationship.cast(country, 'CampaignCountry', ncampaign)) if meta['city']: city = graph.merge_one('City', 'name', meta['city']) graph.create_unique( Relationship.cast(city, 'CampaignCity', ncampaign) ) graph.create_unique( Relationship.cast(visitor, 'VistitorCity', city)) graph.create_unique( Relationship.cast(city, 'CityCountry', country)) if meta['postal_code']: postal = graph.merge_one('Postal', 'code', meta['postal_code']) graph.create_unique( Relationship.cast(postal, 'CityPostalCode', city)) graph.create_unique( Relationship.cast(postal, 'CampaignPostalCode', ncampaign)) graph.create_unique( Relationship.cast(visitor, 'VistorPostalCode', postal))
def main(): graph = Graph() graph.cypher.execute("CREATE CONSTRAINT ON (user:User) ASSERT user.username IS UNIQUE" ) graph.cypher.execute("CREATE CONSTRAINT ON (job:Job) ASSERT job.title IS UNIQUE" ) graph.cypher.execute("CREATE CONSTRAINT ON (city:City) ASSERT city.name IS UNIQUE" ) userFile = open("users.csv", "r") userFile.readline() lineNumber = 0 for line in userFile.readlines(): print("\r Processing line " + str(lineNumber), end="") lineNumber += 1 parsedLine = line.split(",") user = Node("User", username=parsedLine[0], name=parsedLine[1], biography=parsedLine[4], password=bcrypt.encrypt("password")) graph.create(user) city = graph.merge_one("City", "name", parsedLine[2]) job = graph.merge_one("Job", "title", parsedLine[3]) livesIn = Relationship(user, "IS_FROM", city) hasJob = Relationship(user, "HAS_JOB_TITLE", job) graph.create(livesIn) graph.create(hasJob) result = graph.cypher.execute("MATCH (beer:Beer) " " RETURN beer, rand() as rand " " ORDER BY rand" " LIMIT {range}", range=random.randrange(100,600)) for beer in result: beerNode = graph.find_one("Beer", "breweryDbId", beer.beer["breweryDbId"]) likesBrewery = Relationship(user, "LIKES", beerNode) graph.create(likesBrewery) result = graph.cypher.execute("MATCH (brewery:Brewery) " " RETURN brewery, rand() as rand " " ORDER BY rand" " LIMIT {range}", range=random.randrange(0,10)) for brewery in result: breweryNode = graph.find_one("Brewery", "breweryDbId", brewery.brewery["breweryDbId"]) likesBrewery = Relationship(user, "LIKES", breweryNode) graph.create(likesBrewery) if lineNumber > 300: break for user in graph.find("User"): userNode = graph.find_one("User", "username", user["username"]) result = graph.cypher.execute("MATCH (user:User) " "WHERE user.username <> {me}" " RETURN user, rand() as rand " " ORDER BY rand" " LIMIT {range}", me=userNode["username"], range=random.randrange(5,40)) for person in result: dude = graph.find_one("User", "username", person.user["username"]) buddiesWith = Relationship(userNode, "FOLLOWS", dude) graph.create(buddiesWith)
graph_db = Graph("http://*****:*****@localhost:7474/db/data/") filename = 'C:/Users/Gebruiker/Downloads/ingredients.txt' ingredients = [] with open(filename) as f: for line in f: ingredients.append(line.strip()) print(ingredients) ingredientnumber = 0 grandtotal = 0 for ingredient in ingredients: try: IngredientNode = graph_db.merge_one("Ingredient", "Name", ingredient) except: continue ingredientnumber += 1 searchbody = { "size": 99999999, "query": { "match_phrase": { "ingredients": { "query": ingredient, } } } }
akey = '' asecret = '' #authorization auth = tweepy.OAuthHandler(ckey, csecret) auth.set_access_token(atoken, asecret) #wait for rate limits api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) while(1): #find Followers for Tweet User #check if we've already explored this user's network (DB) users = graph.cypher.execute("MATCH (x:User)-[:POSTS]->(t) WHERE x.Exploration='' RETURN DISTINCT x.Screen_Name") for r in users: scrname=r[0] x=graph.merge_one("User","Screen_Name",scrname) print scrname try: #find Followers for Tweet User followers = api.followers_ids(screen_name=scrname) for page in paginate(followers, 100): results = api.lookup_users(user_ids=page) for result in results: #Only add relationships between users that already exist in the network because of their tweets (get_tweets.py, get_live_tweets.py) mynode = list(graph.find('User',property_key='Screen_Name', property_value=result.screen_name)) if len(mynode) > 0: # use of merge_one in order to avoid duplicates y=graph.merge_one("User","Screen_Name",result.screen_name.encode('utf8')) y.properties.update({"Name": result.name, "Description": result.description.encode('utf8'),"Location":result.location