class MetaInfoCreator( object ): def __init__( self, databasePath, datasetDict): #increment to existing db self.db = GraphDatabase( databasePath ) self.datasetDict = datasetDict def start(self): self._createInfoNodes() self._finish() def _createInfoNodes( self ): print "Creating info nodes" # do all insertions within one transaction: complete failure or success! with self.db.transaction: metaVertex = self.db.node() #self.db.reference_node print "Meta node created, id %i" %( metaVertex.id ) index = self.db.node.indexes.create('meta') index['meta']['meta'] = metaVertex for num, (label, patientFile) in enumerate( self.datasetDict.items() ): patientFile = open( patientFile, 'r') patientFile.readline() #header datasetVertex = self.db.node() datasetVertex['datalabel'] = label datasetVertex['barcode'] = patientFile.readline().strip("\n") metaVertex.relationships.create('DATASET', datasetVertex, label=label) patientFile.close() def _finish(self): self.db.shutdown() print "Infonodes created"
def make_custs_embedded(amount=10000, offset=0, every=10000): db = GraphDatabase(db_path) customers = db.node[1] with db.transaction: for i in range(offset, offset + amount): name = 'Customer {:0=5}'.format(i) customer = db.node(name=name) customer.INSTANCE_OF(customers) if i % every == 0: print datetime.datetime.now(), i db.shutdown()
def addtodb(lnode, rnode, relationship): # Create a database db = GraphDatabase(workingdb) # rel['subjsym'], rel['objsym'], rel['filler'] with db.transaction: lnodef = False #lnodef and rnodef store whether the node has been found in the db rnodef = False for node in db.nodes: for key, value in node.items(): if (key == "name"): if(value == lnode): leftnode = node lnodef = True if(value == rnode): rightnode = node rnodef = True if (not lnodef): leftnode = db.node(name=(lnode)) print "Lnode " + lnode + "created" if (not rnodef): rightnode = db.node(name=(rnode)) print "Rnode " + rnode + "created" relf = False for rel in leftnode.relationships.outgoing: for key, value in rel.items(): if (str(rel.type) == relationship and key == 'hits'and rel.end == rightnode): rel[key] = value + 1 relf = True print "rel found. Increasing number of hits " if (not relf): rel = leftnode.relationships.create(relationship, rightnode) print "created relationship " + relationship rel['hits'] = 1 db.shutdown()
def test_hello_world(self): folder_to_put_db_in = tempfile.mkdtemp() try: # START SNIPPET: helloworld from neo4j import GraphDatabase # Create a database db = GraphDatabase(folder_to_put_db_in) # All write operations happen in a transaction with db.transaction: firstNode = db.node(name='Hello') secondNode = db.node(name='world!') # Create a relationship with type 'knows' relationship = firstNode.knows(secondNode, name='graphy') # Read operations can happen anywhere message = ' '.join( [firstNode['name'], relationship['name'], secondNode['name']]) print message # Delete the data with db.transaction: firstNode.knows.single.delete() firstNode.delete() secondNode.delete() # Always shut down your database when your application exits db.shutdown() # END SNIPPET: helloworld finally: if os.path.exists(folder_to_put_db_in): import shutil shutil.rmtree(folder_to_put_db_in) self.assertEqual(message, 'Hello graphy world!')
def test_hello_world(self): folder_to_put_db_in = tempfile.mkdtemp() try: # START SNIPPET: helloworld from neo4j import GraphDatabase # Create a database db = GraphDatabase(folder_to_put_db_in) # All write operations happen in a transaction with db.transaction: firstNode = db.node(name="Hello") secondNode = db.node(name="world!") # Create a relationship with type 'knows' relationship = firstNode.knows(secondNode, name="graphy") # Read operations can happen anywhere message = " ".join([firstNode["name"], relationship["name"], secondNode["name"]]) print message # Delete the data with db.transaction: firstNode.knows.single.delete() firstNode.delete() secondNode.delete() # Always shut down your database when your application exits db.shutdown() # END SNIPPET: helloworld finally: if os.path.exists(folder_to_put_db_in): import shutil shutil.rmtree(folder_to_put_db_in) self.assertEqual(message, "Hello graphy world!")
class MetaInfoCreator(object): def __init__(self, databasePath, datasetDict): #increment to existing db self.db = GraphDatabase(databasePath) self.datasetDict = datasetDict def start(self): self._createInfoNodes() self._finish() def _createInfoNodes(self): print "Creating info nodes" # do all insertions within one transaction: complete failure or success! with self.db.transaction: metaVertex = self.db.node() #self.db.reference_node print "Meta node created, id %i" % (metaVertex.id) index = self.db.node.indexes.create('meta') index['meta']['meta'] = metaVertex for num, (label, patientFile) in enumerate(self.datasetDict.items()): patientFile = open(patientFile, 'r') patientFile.readline() #header datasetVertex = self.db.node() datasetVertex['datalabel'] = label datasetVertex['barcode'] = patientFile.readline().strip("\n") metaVertex.relationships.create('DATASET', datasetVertex, label=label) patientFile.close() def _finish(self): self.db.shutdown() print "Infonodes created"
class NeoCreator: def __init__(self, path): self.db = GraphDatabase(path) def shutdown(self): self.db.shutdown() def createNewNode(self, _id, nick): with self.db.transaction: newNode = self.db.node(uid=_id, Label=nick) if newNode is None: raise return newNode def createRelationship(self, origin, destiny): with self.db.transaction: origin.tweets(destiny, Label="tweets")
class NeoCreator: def __init__(self,path): self.db = GraphDatabase(path) def shutdown(self): self.db.shutdown() def createNewNode(self,_id,nick): with self.db.transaction: newNode = self.db.node(uid=_id,Label=nick) if newNode is None: raise return newNode def createRelationship(self,origin,destiny): with self.db.transaction: origin.tweets(destiny,Label="tweets")
def addUser(usuario): # Create a database db = GraphDatabase('/tmp/') user_idx = db.node.indexes.get('users') cafe_idx = db.node.indexes.get('cafes') # All write operations happen in a transaction with db.transaction: firstNode = db.node(name=usuario, type_record='user') user_idx['name'][usuario] = firstNode secondNode = cafe_idx['name']['lungo'].single # Create a relationship with type 'knows' relationship = firstNode.toma(secondNode, cantidad=3) # Always shut down your database when your application exits db.shutdown()
class CableNetwork(object): def __init__(self, config, graphtype, minoccs=1, maxcoocs=1, maxcables=None, year=None): self.mongodb = CablegateDatabase( config['general']['mongodb'])["cablegate"] self.graphdb = GraphDatabase(config['general']['neo4j']) self.config = config if graphtype is None or graphtype == "occurrences": self.update_occurrences_network(minoccs, maxcoocs, maxcables, year, documents=False) elif graphtype == "cooccurrences": (nodecache, ngramcache) = self.update_occurrences_network(minoccs, maxcoocs, maxcables, year, documents=False) self.update_cooccurrences_network(nodecache, ngramcache, minoccs, maxcoocs) def update_occurrences_network(self, minoccs=1, maxcoocs=1, maxcables=None, year=None, documents=True): nodecache = {} ngramcache = {} count = 0 if maxcables is None: maxcables = self.mongodb.cables.count() if year is None: cable_curs = self.mongodb.cables.find(timeout=False) else: start = datetime(year, 1, 1, 0, 0, 0) end = datetime(year + 1, 1, 5, 0, 0, 0) cable_curs = self.mongodb.cables.find( {"start": { "$gte": start, "$lt": end }}, timeout=False) for cable in cable_curs: with self.graphdb.transaction as trans: if documents is True: del cable['content'] cablenode = self.add_node(cable, trans) for ngid, occs in cable['edges']['NGram'].iteritems(): ngram = self.mongodb.ngrams.find_one({'_id': ngid}) if ngram is None: #logging.warning('ngram %s linked to document %s but not found in mongodb'%(ngid, cable['_id'])) continue if ngram['occs'] < minoccs: continue ### first time if export this node if ngid not in ngramcache.keys(): new_ngramnode = self.add_node(ngram, trans) ngramcache[ngid] = str(new_ngramnode.id) nodecache[str(new_ngramnode.id)] = new_ngramnode if documents is True: cablenode.occurrence(ngramcache[ngid], weight=occs) logging.debug("done the network around cable %s" % cable["_id"]) count += 1 if count > maxcables: return (nodecache, ngramcache) return (nodecache, ngramcache) def update_cooccurrences_network(self, nodecache, ngramcache, minoccs=1, maxcoocs=1): logging.debug("cooccurrences within : %d" % self.mongodb.ngrams.find( { '_id': { "$in": ngramcache.keys() } }, timeout=False).count()) with self.graphdb.transaction as trans: for ngram in self.mongodb.ngrams.find( {'_id': { "$in": ngramcache.keys() }}, timeout=False): # this REGEXP select only edges with source == ngram['_id'] coocidRE = re.compile("^" + ngram['_id'] + "_[a-z0-9]+$") for cooc in self.mongodb.cooc.find( {"_id": { "$regex": coocidRE }}, sort=[("value", pymongo.DESCENDING)], limit=maxcoocs, timeout=False): ng1, ng2 = cooc['_id'].split("_") # clean cooc db from self cooccurrences if ng1 == ng2: self.mongodb.cooc.delete({"_id": cooc['_id']}) continue ngram2 = self.mongodb.ngrams.find_one({'_id': ng2}) # exclude ngram2 if not in the previously selected if ng2 not in ngramcache.keys(): continue #new_ngramnode = self.add_node(ngram2, trans) #ngramcache[ng2] = str(new_ngramnode.id) #nodecache[str(new_ngramnode.id)] = new_ngramnode # exclude self relationships if ngramcache[ng2] == ngramcache[ng1]: logging.warning( "not setting relationship on a node itself") continue # write the cooccurrence nodecache[ngramcache[ng1]].cooccurrence( nodecache[ngramcache[ng2]], weight=cooc['value']) def set_node_attr(self, record, node): """ Type conversion from python/mongodb to neo4j restricts a node's attributes to string or numeric """ for key, value in record.iteritems(): if type(value) == unicode: node[key.encode("ascii", "ignore")] = value.encode("ascii", "ignore") elif type(value) == int or type(value) == float or type( value) == str: node[key.encode("utf-8", "replace")] = value elif type(value) == datetime: node[key.encode("utf-8", "replace")] = value.strftime('%Y-%m-%d') def add_node(self, record, transact=None): if transact is None: with self.transaction: node = self.graphdb.node() self.set_node_attr(record, node) return node else: node = self.graphdb.node() self.set_node_attr(record, node) return node def get_node(self, _id): return self.graphdb.node[_id]
class CableNetwork(object): def __init__(self, config, overwrite=True, minoccs=1, mincoocs=1, maxcables=None, year=None): self.mongodb = CablegateDatabase(config['general']['mongodb'])["cablegate"] self.graphdb = GraphDatabase(config['general']['neo4j']) self.config = config nodecache = self.update_occurrences_network(minoccs, mincoocs, maxcables, year) self.update_cooccurrences_network(nodecache, minoccs, mincoocs, maxcables) def update_occurrences_network(self, minoccs=1, mincoocs=1, maxcables=None, year=None): nodecache = {} count=0 if maxcables is None: maxcables = self.mongodb.cables.count() if year is None: cable_curs = self.mongodb.cables.find(timeout=False) else: start = datetime(year,1,1,0,0,0) end = datetime(year+1,1,5,0,0,0) cable_curs = self.mongodb.cables.find({"start":{"$gte":start,"$lt":end}}, timeout=False) for cable in cable_curs: with self.graphdb.transaction as trans: del cable['content'] #cablenode = self.add_node(cable, trans) for ngid, occs in cable['edges']['NGram'].iteritems(): ngram = self.mongodb.ngrams.find_one({'_id':ngid}) if ngram is None: logging.warning('ngram %s linked to document %s but not found in mongodb'%(ngid, cable['_id'])) continue if ngram['occs'] < minoccs: continue ### first time if export this node if 'nodeid' not in ngram or str(ngram['nodeid']) not in nodecache: new_ngramnode = self.add_node(ngram, trans) ngram['nodeid'] = new_ngramnode.id nodecache[str(new_ngramnode.id)] = new_ngramnode self.mongodb.ngrams.save(ngram) #cablenode.occurrence(nodecache[str(ngram['nodeid'])], weight=occs) logging.debug("done the network around cable %s"%cable["_id"]) count += 1 if count > maxcables: return nodecache return nodecache def update_cooccurrences_network(self, nodecache, minoccs=1, mincoocs=1, maxcables=None): nodecachedkeys = [int(key) for key in nodecache.keys()] logging.debug("cooccurrences processing for %d ngram nodes"%self.mongodb.ngrams.find({'nodeid': {"$in": nodecachedkeys}}, timeout=False).count()) with self.graphdb.transaction as trans: for ngram in self.mongodb.ngrams.find({'nodeid': {"$in": nodecachedkeys}}, timeout=False): # this REGEXP select only edges with source == ngram['_id'] coocidRE = re.compile("^"+ngram['_id']+"_[a-z0-9]+$") for cooc in self.mongodb.cooc.find({"_id":{"$regex":coocidRE}}, timeout=False, sort=[("value",pymongo.DESCENDING)], limit=mincoocs): #if cooc['value'] < mincoocs: continue ng1, ng2 = cooc['_id'].split("_") if ng1 == ng2: self.mongodb.cooc.delete({"_id":cooc['_id']}) continue ngram2 = self.mongodb.ngrams.find_one({'_id':ng2}) if 'nodeid' not in ngram2: new_ngramnode = self.add_node(ngram2, trans) ngram['nodeid'] = new_ngramnode.id nodecache[str(new_ngramnode.id)] = new_ngramnode #self.mongodb.ngrams.save(ngram2) if ngram2['nodeid'] == ngram['nodeid']: logging.warning("not setting relationship on a node itself") continue if ngram2['nodeid'] not in nodecachedkeys: #logging.warning("ngram not in nodecache keys, skipping") continue # inserting the cooccurrence nodecache[str(ngram['nodeid'])].cooccurrence(nodecache[str(ngram2['nodeid'])], weight=cooc['value']) def set_node_attr(self, record, node): """ Type conversion from python/mongodb to neo4j restricts a node's attributes to string or numeric """ for key, value in record.iteritems(): if type(value) == unicode: node[key.encode("ascii","ignore")] = value.encode("ascii","ignore") elif type(value) == int or type(value) == float or type(value) == str: node[key.encode("utf-8","replace")] = value elif type(value) == datetime: node[key.encode("utf-8","replace")] = value.strftime('%Y-%m-%d') def add_node(self, record, transact=None): if transact is None: with self.transaction: node = self.graphdb.node() self.set_node_attr(record, node) return node else: node = self.graphdb.node() self.set_node_attr(record, node) return node def get_node(self, _id): return self.graphdb.node[_id]
def test_invoice_app(self): folder_to_put_db_in = tempfile.mkdtemp() try: # START SNIPPET: invoiceapp-setup from neo4j import GraphDatabase, INCOMING, Evaluation # Create a database db = GraphDatabase(folder_to_put_db_in) # All write operations happen in a transaction with db.transaction: # A node to connect customers to customers = db.node() # A node to connect invoices to invoices = db.node() # Connected to the reference node, so # that we can always find them. db.reference_node.CUSTOMERS(customers) db.reference_node.INVOICES(invoices) # An index, helps us rapidly look up customers customer_idx = db.node.indexes.create('customers') # END SNIPPET: invoiceapp-setup # START SNIPPET: invoiceapp-domainlogic-create def create_customer(name): with db.transaction: customer = db.node(name=name) customer.INSTANCE_OF(customers) # Index the customer by name customer_idx['name'][name] = customer return customer def create_invoice(customer, amount): with db.transaction: invoice = db.node(amount=amount) invoice.INSTANCE_OF(invoices) invoice.SENT_TO(customer) return customer # END SNIPPET: invoiceapp-domainlogic-create # START SNIPPET: invoiceapp-domainlogic-get-by-idx def get_customer(name): return customer_idx['name'][name].single # END SNIPPET: invoiceapp-domainlogic-get-by-idx # START SNIPPET: invoiceapp-domainlogic-get-by-cypher def get_invoices_with_amount_over(customer, min_sum): # Find all invoices over a given sum for a given customer. # Note that we return an iterator over the "invoice" column # in the result (['invoice']). return db.query('''START customer=node({customer_id}) MATCH invoice-[:SENT_TO]->customer WHERE has(invoice.amount) and invoice.amount >= {min_sum} RETURN invoice''', customer_id = customer.id, min_sum = min_sum)['invoice'] # END SNIPPET: invoiceapp-domainlogic-get-by-cypher # START SNIPPET: invoiceapp-create-and-search for name in ['Acme Inc.', 'Example Ltd.']: create_customer(name) # Loop through customers for relationship in customers.INSTANCE_OF: customer = relationship.start for i in range(1,12): create_invoice(customer, 100 * i) # Finding large invoices large_invoices = get_invoices_with_amount_over(get_customer('Acme Inc.'), 500) # Getting all invoices per customer: for relationship in get_customer('Acme Inc.').SENT_TO.incoming: invoice = relationship.start # END SNIPPET: invoiceapp-create-and-search self.assertEqual(len(list(large_invoices)), 7) db.shutdown() finally: if os.path.exists(folder_to_put_db_in): import shutil shutil.rmtree(folder_to_put_db_in)
#!/usr/bin/python #coding:utf8 # Created: 2013-11-05 # ''' A sample app using cypher and indexes ''' from neo4j import GraphDatabase, INCOMING, Evaluation # Create a database db = GraphDatabase(folder_to_put_db_in) # All write operations happen in a transaction with db.transaction: # A node to connect customers to customers = db.node() # A node to connect invoices to invoices = db.node() # Connected to the reference node, so # that we can always find them. db.reference_node.CUSTOMERS(customers) db.reference_node.INVOICES(invoices) # An index, helps us rapidly look up customers customer_idx = db.node.indexes.create('customers')
class Neo4jHandler: def __init__(self, graph_path): config = {"node_keys_indexable":'key,name',"node_auto_indexing":'true' } self.graph_db = GraphDatabase(graph_path ,**config) self.node_auto_index = self.graph_db.node.indexes.get('node_auto_index') #self.graph_db = py2neo.neo4j.GraphDatabaseService( graph_path) def findNode(self, node_dict): result = self.node_auto_index['key'][node_dict['key']] if result: # .single goes mad when there is a result set with more than one value return result.single #py2neo_node = self.graph_db.get_indexed_node('node_auto_index', 'key', node_dict['key']) #return py2neo_node def createUniqueNodeByKey(self, node_dict): py2neo_node = self.findNode(node_dict) if py2neo_node: # if it was found return it return py2neo_node else: # if not create it with self.graph_db.transaction: return self.graph_db.node( **node_dict ) #return self.graph_db.node( node(**node_dict) )[0] def findNodeByKey(self, key, value): return self.node_auto_index[key][value].single #py2neo_node = self.graph_db.get_indexed_node('node_auto_index', key, value) #return py2neo_node def addEvent(self, parsed_data): # create event node key_of_relato = "relato:"+str(parsed_data['id_relato']) with self.graph_db.transaction: relato_neo4j_node = self.graph_db.node(key=key_of_relato, type="relato") #relato_neo4j_node = self.graph_db.create( node(key=key_of_relato) )[0] # connect victims to events via actos actos_processed = list() for acto in parsed_data['actos']: actos_processed.append(self.getCrimes(acto)) victim_groups = set() for acto in actos_processed: key_of_victim_node = acto[0] if key_of_victim_node.startswith('grupo:'): victim_groups.add(key_of_victim_node) # get the node dictioanry of the armed entities list_of_node_dict_armed_entities = list() list_of_node_dict_victim_groups=list() for armed_entity in parsed_data['grupo']: node_dict_entity = self.getArmedEntity(armed_entity) if not node_dict_entity['key'] in victim_groups: list_of_node_dict_armed_entities.append( node_dict_entity ) else: list_of_node_dict_victim_groups.append(node_dict_entity) # add them to neo4j list_of_neo4j_armed_nodes =list() for armed_node_dict in list_of_node_dict_armed_entities: # create armed group node armed_node =self.createUniqueNodeByKey(armed_node_dict) list_of_neo4j_armed_nodes.append(armed_node) # relate armed group with eevent #self.graph_db.create( (armed_node,'responsible_for',relato_neo4j_node) ) with self.graph_db.transaction: armed_node.relationships.create('responsible_for', relato_neo4j_node ) for victim_group in list_of_node_dict_victim_groups: # create armed group node victim_group_node =self.createUniqueNodeByKey(victim_group) # get the node dictionaries for the victims list_of_node_dict_victims = list() list_of_adjacent_victim_relation = list() for victima in parsed_data['victimas']: victim_node, adjacent_relations = self.getVictim(victima) list_of_node_dict_victims.append(victim_node) list_of_adjacent_victim_relation.append(adjacent_relations) # add them to neo4j list_of_neo4j_victim_nodes =list() for index in range(0,len(list_of_node_dict_victims)): victim_node_dict = list_of_node_dict_victims[index] other_relations = list_of_adjacent_victim_relation[index] # create victim node victim_node =self.createUniqueNodeByKey(victim_node_dict) list_of_neo4j_victim_nodes.append(victim_node) # connect to things like rando de edad, sexo, organizacion for other_rel in other_relations: relation_name = other_rel[0] other_node_dict = other_rel[1] other_neo4j_node =self.createUniqueNodeByKey(other_node_dict) # connecting victim to other info #self.graph_db.create( (victim_node,relation_name,other_neo4j_node) ) with self.graph_db.transaction: victim_node.relationships.create(relation_name, other_neo4j_node ) for acto in actos_processed: key_of_victim_node = acto[0] key_of_armed_group = acto[1] arc_dictionary = acto[2] victim_neo4j_node = self.findNodeByKey('key',key_of_victim_node) armed_group_neo4j_node = self.findNodeByKey('key',key_of_armed_group) #self.graph_db.create( (relato_neo4j_node,arc_dictionary['agresion_particular'],victim_neo4j_node, arc_dictionary) ) with self.graph_db.transaction: relato_neo4j_node.relationships.create(arc_dictionary['agresion_particular'],victim_neo4j_node, **arc_dictionary) # transform each observacion in a node connected to the event for observacion in parsed_data['observaciones']: if not 'bienes' in observacion: for key, value in observacion.items(): observacion_node_dict = dict() if isinstance(value, list): for element in value: observacion_node_dict['key'] ="observacion:"+element observacion_node_dict['type'] ="observacion" observacion_neo4j_node = self.createUniqueNodeByKey(observacion_node_dict) #self.graph_db.create( (relato_neo4j_node,key, observacion_neo4j_node) ) with self.graph_db.transaction: relato_neo4j_node.relationships.create(key, observacion_neo4j_node) else: observacion_node_dict['key'] ="observacion:"+value observacion_node_dict['type'] ="observacion" observacion_neo4j_node = self.createUniqueNodeByKey(observacion_node_dict) #self.graph_db.create( (relato_neo4j_node,key, observacion_neo4j_node) ) with self.graph_db.transaction: relato_neo4j_node.relationships.create(key, observacion_neo4j_node) # create arcs between event and groups ( using the) # create arcs between event and victims def getVictim(self, dictionary_of_victim_node): victim_node_dict = dict() victim_node_dict['key']= 'persona:'+dictionary_of_victim_node['id_persona'] victim_node_dict['fecha_nacimiento'] = dictionary_of_victim_node.get('fecha_nacimiento') victim_node_dict['name'] = dictionary_of_victim_node.get('nombre') victim_node_dict['type'] = 'victim' victim_node_dict['sexo'] = dictionary_of_victim_node.get('sexo') del dictionary_of_victim_node['id_persona'] if 'fecha_nacimiento' in dictionary_of_victim_node: del dictionary_of_victim_node['fecha_nacimiento'] if 'nombre' in dictionary_of_victim_node: del dictionary_of_victim_node['nombre'] # making gender a node property if 'sexo' in dictionary_of_victim_node: del dictionary_of_victim_node['sexo'] # instead of using properties use node and relations to define the other fields adjacent_relations = list() for relation_name,value in dictionary_of_victim_node.items(): other_node_dict = { 'key':relation_name+":"+value, 'type':relation_name } relation_tuple = (relation_name, other_node_dict) adjacent_relations.append(relation_tuple) return victim_node_dict, adjacent_relations def getArmedEntity(self, dictionary_of_armed_entity): armed_node_dict = dict() armed_node_dict['key'] = "grupo:"+dictionary_of_armed_entity['id_grupo'] armed_node_dict['type'] = "victimario" if 'nombre_grupo' in dictionary_of_armed_entity: armed_node_dict['name'] = dictionary_of_armed_entity['nombre_grupo'] return armed_node_dict def getCrimes(self, dictionary_of_crime): key_of_victim_node = None if 'id_grupo_victima' in dictionary_of_crime: key_of_victim_node = "grupo:"+dictionary_of_crime['id_grupo_victima'] else: key_of_victim_node = "persona:"+dictionary_of_crime['id_victima_individual'] key_of_armed_group = "grupo:" +dictionary_of_crime['id_presunto_grupo_responsable'] arc_dictionary = dict() arc_dictionary['agresion_particular'] = dictionary_of_crime.get('agresion_particular') arc_dictionary['agresion'] = dictionary_of_crime.get('agresion') return (key_of_victim_node, key_of_armed_group, arc_dictionary)
#!/usr/bin/env python import networkx as nx import sys from neo4j import GraphDatabase #path ='/home/jvipin/scratch/ibm_dns_ocr/misc/graphdb/' path = '/scratch/vipin/working_dns_data/ccs/precision/ibm_dns_ocr/misc/graphdb/' f = open(sys.argv[1],'r') db = GraphDatabase(path) count = 0 for ln in f: count += 1 line = ln.rstrip().split(',') if count%10000 == 0: print count # print line[0],line[1],line[2],line[3],line[4] with db.transaction: fnode = db.node(name=line[0],ntype=line[2]) snode = db.node(name=line[1],ntype=line[3]) rel = fnode.knows(snode,etype=line[4]) db.shutdown() f.close()
class Memory(object): def __init__(self,mind,dbpath="data"): self.mind = mind self.dbpath = dbpath self.mind.log("Starting Graph Database at "+dbpath) self.db = GraphDatabase(dbpath) self.root = None memorySize = len(self.db.nodes) self.log("starting with "+str(memorySize)+" nodes in memory") self.vars = {} self.initialGraph(memorySize<=1) def __del__(self): self.shutdown() def shutdown(self): self.log("shutting down Graph Database...") self.db.shutdown() def log(self,something): self.mind.log(something) def error(self,errorType,command): self.mind.error(errorType,command) def initialTime(self,create=False): if create: self.log("create time graph...") with self.db.transaction: self.timeIndex = self.db.node.indexes.create('abstractTime') self.eventIndex = self.db.node.indexes.create('events') self.vars["timeroot"] = self.createNode("TIME_ROOT") self.vars["root"].TIMERELATION(self.vars["timeroot"]) self.vars["timeroot"].TIMERELATION(self.createNode("JALALI_CALENDAR"),cal_type="J") self.vars["timeroot"].TIMERELATION(self.createNode("HIJRI_CALENDAR"),cal_type="H") self.vars["timeroot"].TIMERELATION(self.createNode("GREGORY_CALENDAR"),cal_type="G") else: self.log("initial time...") with self.db.transaction: self.vars["timeroot"] = self.vars["root"].TIMERELATION.single.end self.timeIndex = self.db.node.indexes.get('abstractTime') self.eventIndex = self.db.nodes.indexes.get('events') def initialPlace(self,create=False): if create: self.log("create place graph ...") with self.db.transaction: self.placeIndex = self.db.node.indexes.create('abstractPlace') self.vars["placeroot"] = self.createNode(name="PLACE_ROOT") self.vars["root"].PLACERELATION(self.vars["placeroot"]) else: self.log("initial place ...") with self.db.transaction: self.vars["placeroot"] = self.vars["root"].PLACERELATION.single.end self.placeIndex = self.db.node.indexes.get('abstractPlace') def initialObjects(self,create=False): if create: with self.db.transaction: self.vars["objectroot"] = self.createNode("OBJECT_ROOT") self.vars["root"].OBJECTRELATION(self.vars["objectroot"]) self.vars["actionroot"] = self.createNode("ACTION_ROOT") self.vars["objectroot"].ACTIONRELATION(self.vars["actionroot"]) self.vars["eventroot"] = self.createNode("EVENT_ROOT") self.vars["objectroot"].EVENTRELATION(self.vars["eventroot"]) self.vars["me"] = self.createNode('me') self.vars["me"].ISA(self.vars["objectroot"],type="me") self.vars["master"] = self.createNode('master') self.vars["master"].ISA(self.vars["objectroot"],type="master") else: self.vars["objectroot"] = self.vars["root"].OBJECTRELATION.single.end self.vars["actionroot"] = self.vars["objectroot"].ACTIONRELATION.single.end self.vars["eventroot"] = self.vars["objectroot"].EVENTRELATION.single.end self.vars["me"] = [rel for rel in self.vars["objectroot"].ISA.incoming if rel["type"]=="me" ][0].start self.vars["master"] = [rel for rel in self.vars["objectroot"].ISA.incoming if rel["type"]=="master" ][0].start def initialGraph(self,create=False): if create: self.log("create Graph ...") with self.db.transaction: self.vars["root"] = self.db.node[0] self.vars["root"]['name'] = 'ROOT' self.nameIndex = self.db.node.indexes.create('name') self.indexName(self.vars["root"]) self.messageIndex = self.db.node.indexes.create('message',type='fulltext') else: self.log("initial graph...") with self.db.transaction: self.vars["root"] = self.db.node[0] self.nameIndex = self.db.node.indexes.get('name') self.messageIndex = self.db.node.indexes.get('message') self.initialTime(create) self.initialPlace(create) self.initialObjects(create) def getNodes(self,str): if str.startswith("`") and str.endswith("`"): result = [] for id in str[1:-1].split(","): result.append(self.db.node[int(id)]) if len(result)==1: return result[0] if len(result)>1: return result return str def cypher(self,query): return self.db.query(query) def findNodeById(self,id): return self.db.nodes[id] def indexName(self,obj): try: del self.nameIndex['name'][obj['name']][obj] except: pass self.nameIndex['name'][obj['name']] = obj def createNode(self,name=None): with self.db.transaction: if name == None: node = self.db.node() else: node = self.db.node(name=name) self.indexName(node) return node def createNodeOFType(self, typename=None, prop={}): with self.mind.memory.db.transaction: name = None if "name" in prop.keys(): name = prop["name"] node = self.createNode(name) self.setNodeProperties(node,prop) if typename is not None: typevar = self.objectType(typename) node.IS(typevar) return node def setNodeProperties(self,node,prop): with self.db.transaction: for k,v in prop.items(): node[k] = v if k=="name": self.indexName(node) def setRelationProperties(self,rel,prop): with self.db.transaction: for k,v in prop.items(): rel[k] = v def createRelation(self,type,prop,src,target): with self.db.transaction: rel = src.relationships.create(type,target) self.setRelationProperties(rel, prop) def isTypeOf(self,node,type): return node is not None and node.hasProperty("type") and node["type"]==type def getProperty(self,node,name,default=None): if node is None or not node.hasProperty(name): return default else: return node[name] def typeOfObject(self,node): if node is None: return [] result = [] for v in node.IS.outgoing: if v.end.id != self.vars["objectroot"].id: result.append(v.end) return result def objectType(self,typename): with self.db.transaction: if typename==None: return self.vars["objectroot"] else: resultVarname = self.mind.workingMemory.getTmpVarname() query = """CYPHER 1.9 start root=node(0),t=node:name(name='%s') MATCH (root)-[:OBJECTRELATION]->()<-[:IS]-(t) RETURN t"""%(typename) self.mind.memoryWindow.cypher(resultVarname,query) resultVar = self.mind.workingMemory.getVar(resultVarname) if len(resultVar)==0 or resultVar[0] is None: typevar = self.createNode(typename) typevar.IS(self.mind.workingMemory.getVar("objectroot")[0]) else: typevar = resultVar[0] return typevar def actionType(self,subject,actionname): with self.db.transaction: resultVarname = self.mind.workingMemory.getTmpVarname() if len(subject)==0: query = """CYPHER 1.9 start root=node(0),t=node:name(name='%s') MATCH (root)-[:OBJECTRELATION]->()-[:ACTIONRELATION]->()<-[r:IS]-(t) WHERE has(r.type) and r.type="objective" RETURN t"""%(actionname) self.mind.memoryWindow.cypher(resultVarname,query) resultvar = self.mind.workingMemory.getVar(resultVarname) if resultvar is None or len(resultvar)==0 or resultvar[0] is None: actionvar = self.createNode(actionname) actionvar.IS(self.vars["actionroot"],type="objective") else: actionvar = resultvar[0] return [actionvar] else: for sbj in subject: sbjtype = self.typeOfObject(sbj) print sbjtype if len(sbjtype) == 0: sbjtype = [sbj] actionvars = [] for st in sbjtype: query = """CYPHER 1.9 start root=node(0),t=node:name(name='%s') MATCH (root)-[:OBJECTRELATION]->()-[:ACTIONRELATION]->()<-[r:IS]-(w)-[:WHO]->(t) WHERE has(r.type) and r.type="subjective" RETURN w"""%(self.getProperty(st, "name", "---")) self.mind.memoryWindow.cypher(resultVarname,query) resultvar = self.mind.workingMemory.getVar(resultVarname) if resultvar is None or len(resultvar)==0 or resultvar[0] is None: actionvar = self.createNode(self.getProperty(st, "name", "---")) actionvar.IS(self.vars["actionroot"],type="subjective") actionvar.WHO(st) else: actionvar = resultvar[0] created = False for ins in actionvar.IS.incoming: if self.getProperty(ins, "name") == actionname: created = True actionvars.append(ins) if not created: ins = self.createNode(actionname) ins.IS(actionvar) actionvars.append(ins) return actionvars
Description: Actions related to database only Copyrgiht (c) 2012 by huxuan. All rights reserved. License GPLv3 """ from neo4j import GraphDatabase from config import DBNAME GRAPHDB = GraphDatabase(DBNAME) if GRAPHDB.node.indexes.exists('user'): USER_IDX = GRAPHDB.node.indexes.get('user') else: USER_IDX = GRAPHDB.node.indexes.create('user') if GRAPHDB.node.indexes.exists('tweet'): TWEET_IDX = GRAPHDB.node.indexes.get('tweet') else: TWEET_IDX = GRAPHDB.node.indexes.create('tweet') TWEET_REF_ID = 1000000 if len(TWEET_IDX['tid'][TWEET_REF_ID]) == 0: with GRAPHDB.transaction: TWEET_REF = GRAPHDB.node() TWEET_REF['tot_tweet'] = 0 TWEET_IDX['tid'][TWEET_REF_ID] = TWEET_REF TWEET_REF = TWEET_IDX['tid'][TWEET_REF_ID].single
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Neo4j图形数据库示例 # from neo4j import GraphDatabase, INCOMING # 创建或连接数据库 db = GraphDatabase('neodb') # 在一个事务内完成写或读操作 with db.transaction: #创建用户组节点 users = db.node() # 连接到参考节点,方便查找 db.reference_node.USERS(users) # 为用户组建立索引,便于快速查找 user_idx = db.node.indexes.create('users') #创建用户节点 def create_user(name): with db.transaction: user = db.node(name=name) user.INSTANCE_OF(users) # 建立基于用户name的索引 user_idx['name'][name] = user return user #根据用户名获得用户节点 def get_user(name): return user_idx['name'][name].single
# Created: 2013-11-05 # ''' Hello, world! ''' from neo4j import GraphDatabase # Create a database db = GraphDatabase(u'store') # All write operations happen in a transaction with db.transaction: firstNode = db.node(name='Hello') secondNode = db.node(name='world!') # Create a relationship with type 'knows' relationship = firstNode.knows(secondNode, name='graphy') # Read operations can happen anywhere message = ' '.join([firstNode['name'], relationship['name'], secondNode['name']]) print message # Delete the data with db.transaction: firstNode.knows.single.delete() firstNode.delete() secondNode.delete()
from neo4j import GraphDatabase db = GraphDatabase('data') print 'deleting' with db.transaction: db.query('start r=relationship(*) delete r'); db.query('start n=node(*) delete n;') print 'reading' friends = cPickle.load(open('friends.pickle')) print 'nodes' nodes = {} with db.transaction: for a,data in friends.items(): n = db.node(noscenda_name='person%s'%a) nodes[a]=n if a%1000==0: print 'n ',a i= 0 print 'relations' with db.transaction: for a,targets in friends.items(): for b in targets: i+=1 getattr(nodes[a],'friend')(nodes[b]) if i%1000==0: print 'r ',i print 'fini'
from neo4j import GraphDatabase # Create a database db = GraphDatabase('/tmp/') user_idx = db.node.indexes.create('users') cafe_idx = db.node.indexes.create('cafes') # All write operations happen in a transaction with db.transaction: firstNode = db.node(name='usuario', node_type='user') secondNode = db.node(name='lungo', node_type='cafe') user_idx['name']['usuario'] = firstNode cafe_idx['name']['lungo'] = secondNode # Create a relationship with type 'knows' relationship = firstNode.toma(secondNode, cantidad=3) # Always shut down your database when your application exits db.shutdown()
class SocialGraph: def __init__(self, neo4j_url="graph.db"): self.db = GraphDatabase(neo4j_url) self.nodes = {} self.rels = [] def file_to_db(self, data_path): data = json_io.read_json(data_path) with self.db.transaction: for source_name, targets in data.iteritems(): if source_name in self.nodes: source = self.nodes[source_name] else: source = self.db.node(name=source_name) self.nodes[source_name] = source for target_name in targets: if target_name in self.nodes: target = self.nodes[target_name] else: target = self.db.node(name=target_name) self.nodes[target_name] = target #for attr, val in targets[target_name].iteritems(): self.rels.append(source.knows(target)) return self.nodes def load_pattern(self, dir_file, clip_file): self.dir_patterns = json_io.read_json(dir_file) self.clip_patterns = json_io.read_json(clip_file) def has_relationship(self, source_name, target_name): source = self.nodes[source_name] target = self.nodes[target_name] query = '''start source=node({s_id}) \ match (source)-[r]->(target) \ where target.name = {t_name} return r''' number_rel = self.db.query(query, s_id=source.id, t_name=target_name)['r'] if len(number_rel) > 1: return True else: return False def pattern_matching(self, source_name, target_name, keyword): source = self.nodes[source_name] target = self.nodes[target_name] result = self.dir_query(source, target_name) if result == keyword: return True elif result: return result elif result == False: return False result = self.dir_query(target, source_name) if result == keyword: return True elif result: return result elif result == False: return False result = self.clip_query(source, target_name) if result == keyword: return True elif result: return result elif result == False: return False return True def dir_query(self, source, target_name): dir_query = '''START source=node({s_id}) \ MATCH (source)-[r1]->(middleman)-[r2]->(target) \ WHERE target.name = {t_name} RETURN r1, r2''' results = self.db.query(dir_query, s_id=source.id, t_name=target_name) for result in results: if 'rel' in result['r1'].keys() and \ 'rel' in result['r2'].keys(): relationship1 = result['r1']['rel'] relationship2 = result['r2']['rel'] if relationship2 in self.dir_patterns[relationship1]: predict_rel = self.dir_patterns[relationship1][relationship2] else: return False return predict_rel return None def clip_query(self, source, target_name): dir_query = '''START source=node({s_id}) \ MATCH (source)-[r1]->(middleman)<-[r2]-(target) \ WHERE target.name = {t_name} RETURN r1, r2''' results = self.db.query(dir_query, s_id=source.id, t_name=target_name) for result in results: if 'rel' in result['r1'].keys() and 'rel' in result['r2'].keys(): relationship1 = result['r1']['rel'] relationship2 = result['r2']['rel'] if relationship2 in self.clip_patterns[relationship1]: predict_rel = self.clip_patterns[relationship1][relationship2] else: return False return predict_rel return None def relationship_tagging(self, source_name, target_name, keyword, confidence): source = self.nodes[source_name] target = self.nodes[target_name] with self.db.transaction: relationship = source.knows(target, rel=keyword) if confidence <= 2: print source_name + ' <-- ' + keyword + ' <-- ' + target_name relationship = target.knows(source, rel=keyword) def clear(self): with self.db.transaction: for rel in self.db.relationships: rel.delete() for node in self.db.nodes: node.delete() def shutdown(self): self.db.shutdown()
def test_invoice_app(self): folder_to_put_db_in = tempfile.mkdtemp() try: # START SNIPPET: invoiceapp-setup from neo4j import GraphDatabase, INCOMING, Evaluation # Create a database db = GraphDatabase(folder_to_put_db_in) # All write operations happen in a transaction with db.transaction: # A node to connect customers to customers = db.node() # A node to connect invoices to invoices = db.node() # Connected to the reference node, so # that we can always find them. db.reference_node.CUSTOMERS(customers) db.reference_node.INVOICES(invoices) # An index, helps us rapidly look up customers customer_idx = db.node.indexes.create('customers') # END SNIPPET: invoiceapp-setup # START SNIPPET: invoiceapp-domainlogic-create def create_customer(name): with db.transaction: customer = db.node(name=name) customer.INSTANCE_OF(customers) # Index the customer by name customer_idx['name'][name] = customer return customer def create_invoice(customer, amount): with db.transaction: invoice = db.node(amount=amount) invoice.INSTANCE_OF(invoices) invoice.RECIPIENT(customer) return customer # END SNIPPET: invoiceapp-domainlogic-create # START SNIPPET: invoiceapp-domainlogic-get-by-idx def get_customer(name): return customer_idx['name'][name].single # END SNIPPET: invoiceapp-domainlogic-get-by-idx # START SNIPPET: invoiceapp-domainlogic-get-by-traversal def get_invoices_with_amount_over(customer, min_sum): def evaluator(path): node = path.end if node.has_key('amount') and node['amount'] > min_sum: return Evaluation.INCLUDE_AND_CONTINUE return Evaluation.EXCLUDE_AND_CONTINUE return db.traversal()\ .relationships('RECIPIENT', INCOMING)\ .evaluator(evaluator)\ .traverse(customer)\ .nodes # END SNIPPET: invoiceapp-domainlogic-get-by-traversal # START SNIPPET: invoiceapp-create-and-search for name in ['Acme Inc.', 'Example Ltd.']: create_customer(name) # Loop through customers for relationship in customers.INSTANCE_OF: customer = relationship.start for i in range(1,12): create_invoice(customer, 100 * i) # Finding large invoices large_invoices = get_invoices_with_amount_over(get_customer('Acme Inc.'), 500) # Getting all invoices per customer: for relationship in get_customer('Acme Inc.').RECIPIENT.incoming: invoice = relationship.start # END SNIPPET: invoiceapp-create-and-search self.assertEqual(len(large_invoices), 6) db.shutdown() finally: if os.path.exists(folder_to_put_db_in): import shutil shutil.rmtree(folder_to_put_db_in)