def __init__(self, uri, database, logger=None, model_paths=[]): """ Connects to the database instance through a Rexster client, and initializes a session and identity map. :param uri: The URI to the database. :type uri: str :param database: The database name. :type database: str :param logger: A logger instance. :type logger: logging :param model_paths: The fully qualified path to your models. :type model_paths: list """ self.logger = logger # Init connection from bulbs.config import Config # config = Config(uri) from bulbs.titan import TitanClient self.client = TitanClient(db_name=database) from bulbs.titan import Graph self.graph = Graph(self.client.config) # Init identity map self.session_delete = [] self.session_add = [] self._repositorys = {} self.model_paths = model_paths
def get_sliced_data(query, fields, with_header=False): """ Returns a query result. The result can be cleaned, removing duplicates In case of error it raises a ValueError """ from urllib2 import HTTPError from tempfile import mkstemp # from webui.cnmain.utils import get_virtuoso # virtuoso = get_virtuoso(instance='master') from bulbs.titan import Graph from bulbs.config import Config header = [field.strip() for field in fields.split(',')] handle_, path = mkstemp(text=True) with open(path, 'w') as f: f.write(query) c = Config('http://localhost:8182/graphs/graph') g = Graph(c) g.scripts.update(path) try: nodes_fun = g.scripts.get('nodes') except KeyError: raise ValueError("Malformed query: missing nodes() function") nodes_id = g.gremlin.execute(nodes_fun, {}).content['results'] if with_header: yield header block_size = 100 has_results = False for i in range(len(nodes_id) / block_size + 1): start = i * block_size block = nodes_id[start:start + block_size] try: slice_fun = g.scripts.get('slice') content = g.gremlin.execute(slice_fun, {'nodes_id': block}).content except HTTPError, e: raise ValueError("Malformed query: {}".format(e.read())) for result in content['results']: has_results = True yield result
def test_suite(): # pass in a db_name to test a specific database client = TitanClient(db_name=db_name) BulbsTestCase.client = client BulbsTestCase.vertex_index_proxy = VertexIndexProxy BulbsTestCase.edge_index_proxy = EdgeIndexProxy BulbsTestCase.index_class = KeyIndex BulbsTestCase.graph = Graph(client.config) suite = bulbs_test_suite() #suite.addTest(unittest.makeSuite(RestTestCase)) suite.addTest(unittest.makeSuite(GremlinTestCase)) return suite
def __init__(self, restUrl): self.url = restUrl self.config = Config(self.url) self.graph = Graph(self.config)
# Not able to use bulbs to program Titan GraphDB/Rexster >>> from bulbs.titan import Graph >>> g = Graph()
from bulbs.model import Node, Relationship from bulbs.property import String, Integer, DateTime from bulbs.utils import current_datetime class Person(Node): element_type = "person" name = String(nullable=False) age = Integer() class Knows(Relationship): label = "knows" created = DateTime(default=current_datetime, nullable=False) from bulbs.titan import Graph config = Config("http://titan:8182") config.set_logger(DEBUG) g = Graph(config) g.add_proxy("people", Person) g.add_proxy("knows", Knows) james = g.people.create(name="James") julie = g.people.create(name="Julie") g.knows.create(james, julie)
class BulbsObjectManager(object): """ This is a wrapper around the Titan APIs designed to provide a clean an uniformized interface for model persistence in graphs. Loading the OGM : >>> ogm = BulbsObjectManager("http://localhost:8182/graphs", "graph") Querying with simple filters : >>> websites = ogm.repository('Website').filter(url='http://www.cuisineaz.com') Deleting entities : >>> website_del = websites[0] >>> ogm.delete(website_old) >>> ogm.commit() >>> ogm.flush() Updating entities : >>> website_upd = websites[1] >>> website_upd.name = 'CuisineAZ - Forum' >>> ogm.add(website_upd) >>> ogm.commit() >>> ogm.flush() Creating entities : >>> website_add = Website(name="Marmiton", url="http://www.marmiton.org") >>> ogm.add(website_new) >>> ogm.commit() >>> ogm.flush() Performing lazy-loaded traversals : >>> website_upd.pages() """ def __init__(self, uri, database, logger=None, model_paths=[]): """ Connects to the database instance through a Rexster client, and initializes a session and identity map. :param uri: The URI to the database. :type uri: str :param database: The database name. :type database: str :param logger: A logger instance. :type logger: logging :param model_paths: The fully qualified path to your models. :type model_paths: list """ self.logger = logger # Init connection from bulbs.config import Config # config = Config(uri) from bulbs.titan import TitanClient self.client = TitanClient(db_name=database) from bulbs.titan import Graph self.graph = Graph(self.client.config) # Init identity map self.session_delete = [] self.session_add = [] self._repositorys = {} self.model_paths = model_paths def repository(self, repository_name): """ Returns the repository corresponding to a given model. :param repository_name: The name of the repository to load. :type repository_name: str :returns: graphalchemy.repository.BulbsNodeRepository -- the requested repository, loaded with the model metadata. """ if repository_name in self._repositorys: return self._repositorys[repository_name] found = False for module_name in self.model_paths: module = importlib.import_module(module_name) if repository_name not in module.__dict__: continue found = True break if not found: raise Exception('Repository not found.') # Register proxy and metdata model = module.__dict__[repository_name] # Build repository if issubclass(model, Node): repository = BulbsNodeRepository( model, self.client, graph=self.graph, logger=self.logger ) elif issubclass(model, Relationship): self.graph.add_proxy(repository_name, model) repository = BulbsRelationshipRepository( model, self.client, graph=self.graph, logger=self.logger ) else: raise Exception('Element class seems not to be Node nor Relationship.') self._repositorys[repository_name] = repository return repository def add(self, entity): """ Adds an entity to the current session for bulk save / update. :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ found = False for entity_add in self.session_add: if entity_add is entity: found = True break if not found: self.session_add.append(entity) for entity_delete in self.session_delete: if entity_delete is entity: self.session_delete.remove(entity) break return self def delete(self, entity): """ Schedules an entity for removal in the database. :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ found = False for entity_delete in self.session_delete: if entity is entity_delete: found = True break if not found: self.session_add.append(entity) for entity_add in self.session_add: if entity is entity_add: self.session_add.remove(entity) return self def commit(self): """ Prepares the gremlin queries, start a transaction. :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ self.query('g.commit()', {}) return self def flush(self): """ Commits the transaction, flushes the result to the database. :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ # We need to save nodes first for entity in self.session_add: if isinstance(entity, Node): self._log("Flushed "+str(entity)) self._flush_one_node(entity) for entity in self.session_add: if isinstance(entity, Relationship): self._log("Flushed "+str(entity)) self._flush_one_relation(entity) # Do not reset the session, we keep them tracked # self.session_add = [] # We need to delete nodes first for entity in self.session_delete: if entity._client is None: continue if isinstance(entity, Relationship): self._log("Deleted "+str(entity)) entity._edges.delete(entity.eid) for entity in self.session_delete: if entity._client is None: continue if isinstance(entity, Node): self._log("Deleted "+str(entity)) entity._vertices.delete(entity.eid) # All deleted entities are detached self.session_delete = [] return self def _flush_one_node(self, entity): # Regular case, the entity is either loaded or created via repository if entity._client is not None: entity.save() return self # Case where the entity was created from scratch (our hack) entity._client = self.graph.client entity._create(entity._get_property_data(), {}) return self def _flush_one_relation(self, entity): # Regular case, the entity is either loaded or created via repository if entity._client is not None: entity.save() return self # Case where the entity was created from scratch (our hack) entity._client = self.graph.client # At this point, all Nodes are supposed to have been persisted, so we # can retrieve their eid. if entity._outV_vertex is None: raise Exception('Outbound Vertex not set') if entity._inV_vertex is None: raise Exception('Inbound Vertex not set') entity._create( entity._outV_vertex, entity._inV_vertex, entity._get_property_data(), {} ) entity._outV_vertex = None entity._inV_vertex = None return self def query(self, gremlin, params): """ Performs a gremlin query against the database. :param gremlin: The gremlin query. :type gremlin: str :param params: The set of parameter values. :type params: dict :returns: The result of the query. """ self._log(gremlin+u" "+unicode(params)) return self.graph.gremlin.query(gremlin, params) def refresh(self, entity): """ Fetches information from the database to update the current entity. @todo :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ caches = [key for key in entity.__dict__ if '_cache' in key] for cache in caches: delattr(entity, cache) # Reload all other properties from DB. return self def close_all(self): """ Fetches information from the database to update the current entity. @todo we should also free all _clients properties of all loaded entities. :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ self.session_add = [] self.session_delete = [] return self def merge(self, entity): """ Merges an entity with its current version in the database. @todo :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ raise NotImplementedException('Method not implemented.') def _log(self, message, level=10): """ Thin wrapper for logging purposes. :param message: The message to log. :type message: str :param level: The level of the log. :type level: int :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ if self.logger: self.logger.log(level, message) return self
# -*- coding: utf-8 -*- """ Created on Fri Jun 20 15:00:48 2014 @author: turbosnail9 """ # # Imports # from graphelements import User, Chooses, Sport, Location, Budget, Has, LivesIn from bulbs.titan import Graph import requests import json g = Graph() g.add_proxy("user", User) g.add_proxy("chooses", Chooses) g.add_proxy("has", Has) g.add_proxy("sport", Sport) g.add_proxy("location", Location) g.add_proxy("lives_in", LivesIn) g.add_proxy("budget", Budget) # # Method definitions # def GetOrCreateUser(p_name, p_age, p_gender, p_email, p_sportList): #Create a new person node and pass in the properties
from bulbs.titan import Graph, Config config = Config("http://localhost:8182/graphs/yourdatabasename/") g = Graph(config) g.gremlin.command(""" v1 = g.addVertex([account_id: 1]) v2 = g.addVertex([account_id: 2]) g.addEdge(v1, v2, 'follow') """) vertex = g.gremlin.query("g.V().has('account_id', 1).out('follow')") assert vertex.next().get('account_id') == 2
from bulbs.utils import current_datetime class Titan(Node): element_type = 'titan' name = String() age = Integer() class Father(Relationship): label = 'father' config = Config("http://localhost:8182/graphs/yourdatabasename/") g = Graph(config) #g.config.set_logger(DEBUG) g.scripts.update('gremlin.groovy') # add file to scripts index g.add_proxy('titan', Titan) g.add_proxy('father', Father) ''' Testing scenarios ''' # locate saturn node saturn = g.titan.get_or_create('name', 'saturn') # index name assert (saturn.get_index_name(config) == 'vertex') # get saturn's grandchild
class BulbsObjectManager(object): """ This is a wrapper around the Titan APIs designed to provide a clean an uniformized interface for model persistence in graphs. Loading the OGM : >>> ogm = BulbsObjectManager("http://localhost:8182/graphs", "graph") Querying with simple filters : >>> websites = ogm.repository('Website').filter(url='http://www.cuisineaz.com') Deleting entities : >>> website_del = websites[0] >>> ogm.delete(website_old) >>> ogm.commit() >>> ogm.flush() Updating entities : >>> website_upd = websites[1] >>> website_upd.name = 'CuisineAZ - Forum' >>> ogm.add(website_upd) >>> ogm.commit() >>> ogm.flush() Creating entities : >>> website_add = Website(name="Marmiton", url="http://www.marmiton.org") >>> ogm.add(website_new) >>> ogm.commit() >>> ogm.flush() Performing lazy-loaded traversals : >>> website_upd.pages() """ def __init__(self, uri, database, logger=None, model_paths=[]): """ Connects to the database instance through a Rexster client, and initializes a session and identity map. :param uri: The URI to the database. :type uri: str :param database: The database name. :type database: str :param logger: A logger instance. :type logger: logging :param model_paths: The fully qualified path to your models. :type model_paths: list """ self.logger = logger # Init connection from bulbs.config import Config # config = Config(uri) from bulbs.titan import TitanClient self.client = TitanClient(db_name=database) from bulbs.titan import Graph self.graph = Graph(self.client.config) # Init identity map self.session_delete = [] self.session_add = [] self._repositorys = {} self.model_paths = model_paths def repository(self, repository_name): """ Returns the repository corresponding to a given model. :param repository_name: The name of the repository to load. :type repository_name: str :returns: graphalchemy.repository.BulbsNodeRepository -- the requested repository, loaded with the model metadata. """ if repository_name in self._repositorys: return self._repositorys[repository_name] found = False for module_name in self.model_paths: module = importlib.import_module(module_name) if repository_name not in module.__dict__: continue found = True break if not found: raise Exception('Repository not found.') # Register proxy and metdata model = module.__dict__[repository_name] # Build repository if issubclass(model, Node): repository = BulbsNodeRepository(model, self.client, graph=self.graph, logger=self.logger) elif issubclass(model, Relationship): self.graph.add_proxy(repository_name, model) repository = BulbsRelationshipRepository(model, self.client, graph=self.graph, logger=self.logger) else: raise Exception( 'Element class seems not to be Node nor Relationship.') self._repositorys[repository_name] = repository return repository def add(self, entity): """ Adds an entity to the current session for bulk save / update. :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ self.session_add.append(entity) if entity in self.session_delete: self.session_delete.remove(entity) return self def delete(self, entity): """ Schedules an entity for removal in the database. :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ self.session_delete.append(entity) if entity in self.session_add: self.session_add.remove(entity) return self def commit(self): """ Prepares the gremlin queries, start a transaction. :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ self.query('g.commit()', {}) return self def flush(self): """ Commits the transaction, flushes the result to the database. :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ for entity in self.session_add: # Case where the entity was created from scratch (our hack) if entity._client is None: entity._client = self.graph.client entity._create(entity._get_property_data(), {}) # Regular case, the entity is either loaded or created via repository else: entity.save() # Do not reset the session, we keep them tracked # self.session_add = [] for entity in self.session_delete: # Case where the entity was created from scratch (our hack) if entity._client is None: continue # Regular case, the entity is either loaded or created via repository else: entity.delete() # All deleted entities are detached self.session_delete = [] return self def query(self, gremlin, params): """ Performs a gremlin query against the database. :param gremlin: The gremlin query. :type gremlin: str :param params: The set of parameter values. :type params: dict :returns: The result of the query. """ self._log(gremlin + u" " + unicode(params)) return self.graph.gremlin.query(gremlin, params) def refresh(self, entity): """ Fetches information from the database to update the current entity. @todo :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ raise NotImplementedException('Method not implemented.') def close_all(self): """ Fetches information from the database to update the current entity. @todo we should also free all _clients properties of all loaded entities. :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ self.session_add = [] self.session_delete = [] return self def merge(self, entity): """ Merges an entity with its current version in the database. @todo :param entity: The entity to add to the session. :type entity: bulbs.model.Node, bulbs.model.Relationship :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ raise NotImplementedException('Method not implemented.') def _log(self, message, level=10): """ Thin wrapper for logging purposes. :param message: The message to log. :type message: str :param level: The level of the log. :type level: int :returns: graphalchemy.ogm.BulbsObjectManager -- this object itself. """ if self.logger: self.logger.log(level, message) return self
def enrich(self, g): """ :param g: graph to be merged :param titan: reference to titan database :return: Nonetype NOTE: Merge occurs on node name rather than attributes NOTE: Merge iterates through edges, finds the edge's nodes, looks for the edge & creates if it doesn't exist. Any nodes without edges are iterated through and created if they do not already exist. """ # Get config titan = self.titandb_config # Connect to TitanDB Database titan_graph = TITAN_Graph(titan) # Add schema relationships titan_graph.add_proxy("talks_to", TalksTo) titan_graph.add_proxy("described_by", DescribedBy) titan_graph.add_proxy("influences", Influences) for edge in g.edges(data=True): # print edge # DEBUG # Get the src node src_uri = edge[0] attr = g.node[src_uri] # print "Node {0} with attributes:\n{1}".format(src_uri, attr) # DEBUG # Get/Create node in titan src = titan_graph.vertices.get_or_create( "uri", src_uri, attr) # WARNING: This only works if g was created correctly # Update the times if "start_time" in attr and attr["start_time"] is not "": if "start_time" in src and ( src.start_time == "" or datetime.strptime(src.start_time, "%Y-%m-%dT%H:%M:%SZ") > datetime.strptime(attr["start_time"], "%Y-%m-%dT%H:%M:%SZ")): src.start_time = attr["start_time"] if "finish_time" in attr: if "finish_time" in src and ( src.finish_time == "" or datetime.strptime( src.finish_time, "%Y-%m-%dT%H:%M:%SZ") < datetime.strptime( attr["finish_time"], "%Y-%m-%dT%H:%M:%SZ")): src.finish_time = attr["finish_time"] src.save() # Get the dst node dst_uri = edge[1] attr = g.node[dst_uri] # Get/Create node in titan dst = titan_graph.vertices.get_or_create( "uri", dst_uri, attr) # WARNING: This only works if g was created correctly # Update the times if "start_time" in attr and attr["start_time"] is not "": if "start_time" in dst and ( dst.start_time == "" or datetime.strptime(dst.start_time, "%Y-%m-%dT%H:%M:%SZ") > datetime.strptime(attr["start_time"], "%Y-%m-%dT%H:%M:%SZ")): dst.start_time = attr["start_time"] if "finish_time" in attr: if "finish_time" in dst and ( dst.finish_time == "" or datetime.strptime( dst.finish_time, "%Y-%m-%dT%H:%M:%SZ") < datetime.strptime( attr["finish_time"], "%Y-%m-%dT%H:%M:%SZ")): dst.finish_time = attr["finish_time"] dst.save() # print "edge 2 before relationship is\n{0}".format(edge[2]) # DEBUG # Create the edge if it doesn't exist ## This matches on src, dst, the relationship & it's chain (relationship->described_by->___) and origin # fixed "described_by" relationship for how it's stored in TitanDB try: relationship = edge[2].pop('relationship') except: # default to 'described_by' relationship = 'describedBy' if relationship == 'described_by': relationship = 'describedBy' if relationship == 'talks_to': relationship = 'talksTo' # Match on the relationship chain chain = relationship edge_attr = "" # print "edge 2 before while is\n{0}".format(edge[2]) # DEBUG while chain in edge[2]: edge_attr += "it.{0} == '{1}' & ".format(chain, edge[2][chain]) chain = edge[2][chain] # Remove the irrelevant edge properties # print "edge 2 before origin is\n{0}".format(edge[2]) # DEBUG if 'origin' in edge[2]: edge_attr += "it.origin == '{0}' & ".format(edge[2]['origin']) else: edge_attr = "" if edge_attr: edge_attr = ".filter{0}".format("{" + edge_attr.rstrip(" & ") + "}") # Execute a gremlin query from src to dst to get the edges between them that match the attributes of the edge query = "g.v({0}).outE('{3}'){2}.as('r').inV.retain([g.v({1})]).back('r')".format( src.eid, dst.eid, edge_attr, relationship) # print query # DEBUG edges = titan_graph.gremlin.query(query) # If an edge exists, update it's times, otherwise create the edge if edges: e = edges.next() # print "e is\n".format(e) # DEBUG # print "edge 2 is\n{0}".format(edge[2]) if "start_time" in e and ( e.start_time == "" or datetime.strptime(e.start_time, "%Y-%m-%dT%H:%M:%SZ") > datetime.strptime(edge[2]["start_time"], "%Y-%m-%dT%H:%M:%SZ")): e.start_time = edge[2]["start_time"] if "finish_time" in e and ( e.finish_time == "" or datetime.strptime(e.finish_time, "%Y-%m-%dT%H:%M:%SZ") < datetime.strptime(edge[2]["finish_time"], "%Y-%m-%dT%H:%M:%SZ")): e.finish_time = edge[2]["finish_time"] e.save() else: if relationship in edge[2]: edge[2]["rel_{0}".format(relationship)] = edge[2].pop( relationship ) # Titan can't handle a property key being the same as the relationship value try: # print "src:{0}\ndst:{1}\nAttr:\n{2}\n".format(src, dst, edge[2]) if relationship == 'describedBy': titan_graph.described_by.create(src, dst, edge[2]) elif relationship == 'talksTo': titan_graph.talks_to.create(src, dst, edge[2]) elif relationship == 'influences': titan_graph.influences.create(src, dst, edge[2]) else: titan_graph.edges.create( src, ''.join(e for e in str(relationship) if e.isalnum()), dst, edge[2]) except: print "src:{0}\ndst:{1}\nAttr:\n{2}".format( src, dst, edge[2]) raise # raise error, None, sys.exc_info()[2] # print "edge 2 after adding edge is\n{0}".format(edge[2]) # DEBUG # Get all nodes with no neighbors nodes = [k for k, v in g.degree().iteritems() if v == 0] # For those nodes, get or create them in the graph and update the times for node_uri in nodes: attr = g.node[node_uri] # Get/Create node in titan node = titan_graph.vertices.get_or_create( "uri", node_uri, attr) # WARNING: This only works if g was created correctly # Update the times if node.start_time == "" or datetime.strptime(node.start_time, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(attr["start_time"], "%Y-%m-%dT%H:%M:%SZ"): node.start_time = attr["start_time"] if "finish_time" in node and datetime.strptime(node.finish_time, "%Y-%m-%dT%H:%M:%SZ") < \ datetime.strptime(attr["finish_time"], "%Y-%m-%dT%H:%M:%SZ"): node.finish_time = attr["finish_time"] node.save() return
#Run these commands in the terminal. #http://bulbflow.com/quickstart/ #It looks like we can write Gremlin code in and execute it: #http://stackoverflow.com/questions/16954378/bulb-flow-how-to-get-a-range-of-vertices #stack tag: http://stackoverflow.com/questions/tagged/bulbs?page=1&sort=newest&pagesize=15 from people import Person, Knows from bulbs.titan import Graph import requests import json g = Graph() g.add_proxy("people", Person) g.add_proxy("knows", Knows) james = g.people.create(name="James") julie = g.people.create(name="Julie") nodes = g.people.index.lookup(name="James") vertices = list(nodes) vertex = vertices[1] knows = vertex.bothV("knows") for k in knows: print k.data() nodes = g.people.index.lookup(name="mark") vertices = list(nodes)
# -*- coding: utf-8 -*- """ Created on Fri Jun 20 15:00:48 2014 @author: turbosnail9 """ # # Imports # from graphelements import User, Chooses, Sport, Location, Budget, Has, LivesIn from bulbs.titan import Graph import requests import json g = Graph() g.add_proxy("user", User) g.add_proxy("chooses", Chooses) g.add_proxy("has", Has) g.add_proxy("sport", Sport) g.add_proxy("location", Location) g.add_proxy("lives_in", LivesIn) g.add_proxy("budget", Budget) # # Method definitions # def GetOrCreateUser(p_name, p_age, p_gender, p_email, p_sportList): #Create a new person node and pass in the properties person = g.user.get_or_create('email',p_email,{'email':p_email})
def enrich(self, g): """ :param g: graph to be merged :param titan: reference to titan database :return: Nonetype NOTE: Merge occurs on node name rather than attributes NOTE: Merge iterates through edges, finds the edge's nodes, looks for the edge & creates if it doesn't exist. Any nodes without edges are iterated through and created if they do not already exist. """ # Get config titan = self.titandb_config # Connect to TitanDB Database titan_graph = TITAN_Graph(titan) # Add schema relationships titan_graph.add_proxy("talks_to", TalksTo) titan_graph.add_proxy("described_by", DescribedBy) titan_graph.add_proxy("influences", Influences) for edge in g.edges(data=True): # print edge # DEBUG # Get the src node src_uri = edge[0] attr = g.node[src_uri] # print "Node {0} with attributes:\n{1}".format(src_uri, attr) # DEBUG # Get/Create node in titan src = titan_graph.vertices.get_or_create("uri", src_uri, attr) # WARNING: This only works if g was created correctly # Update the times if "start_time" in attr and attr["start_time"] is not "": if "start_time" in src and (src.start_time == "" or datetime.strptime(src.start_time, "%Y-%m-%dT%H:%M:%SZ") > datetime.strptime(attr["start_time"], "%Y-%m-%dT%H:%M:%SZ")): src.start_time = attr["start_time"] if "finish_time" in attr: if "finish_time" in src and (src.finish_time == "" or datetime.strptime(src.finish_time, "%Y-%m-%dT%H:%M:%SZ") < datetime.strptime(attr["finish_time"], "%Y-%m-%dT%H:%M:%SZ")): src.finish_time = attr["finish_time"] src.save() # Get the dst node dst_uri = edge[1] attr = g.node[dst_uri] # Get/Create node in titan dst = titan_graph.vertices.get_or_create("uri", dst_uri, attr) # WARNING: This only works if g was created correctly # Update the times if "start_time" in attr and attr["start_time"] is not "": if "start_time" in dst and (dst.start_time == "" or datetime.strptime(dst.start_time, "%Y-%m-%dT%H:%M:%SZ") > datetime.strptime(attr["start_time"], "%Y-%m-%dT%H:%M:%SZ")): dst.start_time = attr["start_time"] if "finish_time" in attr: if "finish_time" in dst and (dst.finish_time == "" or datetime.strptime(dst.finish_time, "%Y-%m-%dT%H:%M:%SZ") < datetime.strptime(attr["finish_time"], "%Y-%m-%dT%H:%M:%SZ")): dst.finish_time = attr["finish_time"] dst.save() # print "edge 2 before relationship is\n{0}".format(edge[2]) # DEBUG # Create the edge if it doesn't exist ## This matches on src, dst, the relationship & it's chain (relationship->described_by->___) and origin # fixed "described_by" relationship for how it's stored in TitanDB try: relationship = edge[2].pop('relationship') except: # default to 'described_by' relationship = 'describedBy' if relationship == 'described_by': relationship = 'describedBy' if relationship == 'talks_to': relationship = 'talksTo' # Match on the relationship chain chain = relationship edge_attr = "" # print "edge 2 before while is\n{0}".format(edge[2]) # DEBUG while chain in edge[2]: edge_attr += "it.{0} == '{1}' & ".format(chain, edge[2][chain]) chain = edge[2][chain] # Remove the irrelevant edge properties # print "edge 2 before origin is\n{0}".format(edge[2]) # DEBUG if 'origin' in edge[2]: edge_attr += "it.origin == '{0}' & ".format(edge[2]['origin']) else: edge_attr = "" if edge_attr: edge_attr = ".filter{0}".format("{" + edge_attr.rstrip(" & ") + "}") # Execute a gremlin query from src to dst to get the edges between them that match the attributes of the edge query = "g.v({0}).outE('{3}'){2}.as('r').inV.retain([g.v({1})]).back('r')".format( src.eid, dst.eid, edge_attr, relationship ) # print query # DEBUG edges = titan_graph.gremlin.query(query) # If an edge exists, update it's times, otherwise create the edge if edges: e = edges.next() # print "e is\n".format(e) # DEBUG # print "edge 2 is\n{0}".format(edge[2]) if "start_time" in e and (e.start_time == "" or datetime.strptime(e.start_time, "%Y-%m-%dT%H:%M:%SZ") > datetime.strptime(edge[2]["start_time"], "%Y-%m-%dT%H:%M:%SZ")): e.start_time = edge[2]["start_time"] if "finish_time" in e and (e.finish_time == "" or datetime.strptime(e.finish_time, "%Y-%m-%dT%H:%M:%SZ") < datetime.strptime(edge[2]["finish_time"], "%Y-%m-%dT%H:%M:%SZ")): e.finish_time = edge[2]["finish_time"] e.save() else: if relationship in edge[2]: edge[2]["rel_{0}".format(relationship)] = edge[2].pop(relationship) # Titan can't handle a property key being the same as the relationship value try: # print "src:{0}\ndst:{1}\nAttr:\n{2}\n".format(src, dst, edge[2]) if relationship == 'describedBy': titan_graph.described_by.create(src, dst, edge[2]) elif relationship == 'talksTo': titan_graph.talks_to.create(src, dst, edge[2]) elif relationship == 'influences': titan_graph.influences.create(src, dst, edge[2]) else: titan_graph.edges.create(src, ''.join(e for e in str(relationship) if e.isalnum()), dst, edge[2]) except: print "src:{0}\ndst:{1}\nAttr:\n{2}".format(src, dst, edge[2]) raise # raise error, None, sys.exc_info()[2] # print "edge 2 after adding edge is\n{0}".format(edge[2]) # DEBUG # Get all nodes with no neighbors nodes = [k for k,v in g.degree().iteritems() if v==0] # For those nodes, get or create them in the graph and update the times for node_uri in nodes: attr = g.node[node_uri] # Get/Create node in titan node = titan_graph.vertices.get_or_create("uri", node_uri, attr) # WARNING: This only works if g was created correctly # Update the times if node.start_time == "" or datetime.strptime(node.start_time, "%Y-%m-%dT%H:%M:%SZ") > \ datetime.strptime(attr["start_time"], "%Y-%m-%dT%H:%M:%SZ"): node.start_time = attr["start_time"] if "finish_time" in node and datetime.strptime(node.finish_time, "%Y-%m-%dT%H:%M:%SZ") < \ datetime.strptime(attr["finish_time"], "%Y-%m-%dT%H:%M:%SZ"): node.finish_time = attr["finish_time"] node.save() return
from bulbs.rexster import Config from bulbs.titan import Graph from bulbs.model import Node, Relationship from bulbs.property import String, Integer, DateTime, Dictionary """ I used this for a Titan import, though any rexster-implementing graph database should work. It's important to note however, there are *much* better ways for batch importing graphs into Titan, see: http://s3.thinkaurelius.com/docs/titan/0.9.0-M2/bulk-loading.html """ config = Config('http://localhost:32813/graphs/graph') g = Graph(config) class Entity(Node): element_type = "entity" oid = String(nullable=False) date_added = DateTime() date_updated = DateTime() fullname = String(nullable=False) msgs = Dictionary(default={}) name = String(nullable=False) service = String(default="instagram") user_id = Integer() class Likes(Relationship): label = "likes"