class PersistentGraph(Graph): """ Persistent Graph database storing data to a file system. See :class:`~.IGraph` for doco. .. note:: Verices and Edges ID's are retained when the path is loaded. .. warning:: Use this persistent graph if performance is not important. There is a performance hit due to the extra disk I/O overhead when doing many writing/updating operations. .. code:: path |_ vertices | |_ constraints.json (file) | |_ label | | |_ 0 | | |_ properties.json (file) | | |_ in-edges | | | |_ 0 -> ../../../../edges/label/0 (symlink) | | |_ out-edges | | |_ | | | |_ label | | |_ 1 | | |_ properties.json (file) | | |_ in-edges | | | |_ | | |_ out-edges | | |_ 0 -> ../../../../edges/label/0 (symlink) | |_ edges |_ label | |_0 |_ properties.json (file) |_ head | |_ 0 -> ../../../vertices/0 (symlink) |_ tail |_ 1 -> ../../../vertices/1 (symlink) :param path: Path to ruruki graph data on disk. :param auto_create: If True, then missing ``vertices`` or ``edges`` directories will be created. :type auto_create: :class:`bool` :type path: :class:`str` :raises DatabasePathLocked: If the path is already locked by another persistence graph instance. """ def __init__(self, path, auto_create=True): super(PersistentGraph, self).__init__() self._vclass = PersistentVertex self._eclass = PersistentEdge self._lock = DirectoryLock(path) try: self._lock.acquire() except interfaces.AcquireError: logging.exception( "Path %r is already owned by another graph.", path ) raise interfaces.DatabasePathLocked( "Path {0!r} is already locked by anotherr persistent graph " "instance.".format(path) ) self.path = path self.vertices_path = os.path.join(self.path, "vertices") self.edges_path = os.path.join(self.path, "edges") self.vertices_constraints_path = os.path.join( self.vertices_path, "constraints.json" ) if auto_create is True: self._auto_create() self._load_from_path() def _auto_create(self): """ Check that ``vertices`` and ``edges`` directories exists, and if not create them and all the other required files and folders. """ status = [ os.path.exists(self.vertices_path), os.path.exists(self.edges_path), ] if not all(status): self._create_vertex_skel(self.path) self._create_edge_skel(self.path) def _load_from_path(self): """ Scan through the given database path and load/import up all the relevant vertices, vertices constraints, and edges. """ logging.info("Loading graph data from %r", self.path) self._load_vconstraints_from_path(self.vertices_constraints_path) self._load_vertices_from_path(self.vertices_path) self._load_edges_from_path(self.edges_path) logging.info("Completed %r graph import", self.path) def _load_vconstraints_from_path(self, path): """ Open, parse and load the vertices constraints. :param path: Vertices constraints file to open, parse and import. :type path: :class:`str` """ logging.info("Loading vertices constraints %r", path) with open(path) as vconstraints_fh: for each in json.load(vconstraints_fh): self.add_vertex_constraint(each["label"], each["key"]) def _load_vertices_from_path(self, path): """ Scan through the given path and load/import all the vertices. .. code:: path |_ vertices |_ constraints.json (file) |_ labelA | |_ 0 | |_ properties.json (file) | |_ labelB |_ 1 |_ properties.json (file) :param path: Vertices Path to walk and import. :type path: :class:`str` """ logging.info("Loading vertices from %r", path) sorted_to_import = sorted( _search_for_vertex_id(path), key=lambda x: x[0] ) for ident, label, prop_file in sorted_to_import: properties = json.load(open(prop_file)) if prop_file else {} # reset the id to the id being loaded. self._id_tracker.vid = ident vertex = super(PersistentGraph, self).add_vertex( label, **properties ) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors vertex.path = os.path.join(path, label, str(ident)) # pylint: disable=assigning-non-slot def _load_edges_from_path(self, path): """ Scan through the given path and load/import all the edges. .. code:: path |_ edges |_ label |_0 |_ properties.json (file) |_ head | |_ 0 -> ../../../vertices/0 (symlink) |_ tail |_ 1 -> ../../../vertices/1 (symlink) :param path: Edges Path to walk and import. :type path: :class:`str` :raises KeyError: If the head or tail of the edge being imported is unknown. """ logging.info("Loading edges from %r", path) sorted_to_import = sorted( _search_for_edge_ids(path), key=lambda x: x[0] ) for ident, head_id, label, tail_id, prop_file in sorted_to_import: properties = json.load(open(prop_file)) if prop_file else {} head = self.get_vertex(head_id) tail = self.get_vertex(tail_id) # reset the id to the id being loaded. self._id_tracker.eid = ident edge = super(PersistentGraph, self).add_edge( head, label, tail, **properties ) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors edge.path = os.path.join(path, label, str(ident)) # pylint: disable=assigning-non-slot def _create_vertex_skel(self, path): """ Create a vertex skeleton path. :param path: Path to create the vertex skeleton structure in. :type path: :class:`str` """ self.vertices_path = os.path.join(path, "vertices") os.makedirs(self.vertices_path) self.vertices_constraints_path = os.path.join( self.vertices_path, "constraints.json" ) with open(self.vertices_constraints_path, "w") as constraint_fh: constraint_fh.write("[]") def _create_edge_skel(self, path): """ Create a edge skeleton path. :param path: Path to create the edge skeleton structure in. :type path: :class:`str` """ self.edges_path = os.path.join(path, "edges") os.makedirs(self.edges_path) def add_vertex_constraint(self, label, key): super(PersistentGraph, self).add_vertex_constraint(label, key) with open(self.vertices_constraints_path, "w") as constraint_fh: data = [] for label, key in self.get_vertex_constraints(): data.append({"label": label, "key": key}) json.dump(data, constraint_fh, indent=4) def add_vertex(self, label=None, **kwargs): vertex = super(PersistentGraph, self).add_vertex(label, **kwargs) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors vertex.path = os.path.join(self.vertices_path, label, str(vertex.ident)) # pylint: disable=assigning-non-slot os.makedirs(vertex.path) os.makedirs(os.path.join(vertex.path, "in-edges")) os.makedirs(os.path.join(vertex.path, "out-edges")) with open(os.path.join(vertex.path, "properties.json"), "w") as fh: json.dump(vertex.properties, fh) return vertex def add_edge(self, head, label, tail, **kwargs): edge = super(PersistentGraph, self).add_edge( head, label, tail, **kwargs ) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors edge.path = os.path.join(self.edges_path, label, str(edge.ident)) # pylint: disable=assigning-non-slot head_path = os.path.join(edge.path, "head") tail_path = os.path.join(edge.path, "tail") os.makedirs(edge.path) os.makedirs(head_path) os.makedirs(tail_path) with open(os.path.join(edge.path, "properties.json"), "w") as fh: json.dump(edge.properties, fh) os.symlink(head.path, os.path.join(head_path, str(head.ident))) os.symlink(tail.path, os.path.join(tail_path, str(tail.ident))) os.symlink( edge.path, os.path.join( head.path, "out-edges", str(edge.ident) ) ) os.symlink( edge.path, os.path.join( tail.path, "in-edges", str(edge.ident) ) ) return edge def set_property(self, entity, **kwargs): super(PersistentGraph, self).set_property(entity, **kwargs) # Update the properties to the properties file properties_file = os.path.join( entity.path, "properties.json" ) with open(properties_file, "w") as prop_file: json.dump( dict( (k, v) for k, v in entity.properties.items() if k != "_path" ), prop_file, indent=4 ) def remove_edge(self, edge): super(PersistentGraph, self).remove_edge(edge) shutil.rmtree(edge.path) def remove_vertex(self, vertex): super(PersistentGraph, self).remove_vertex(vertex) shutil.rmtree(vertex.path) def close(self): self._lock.release()
class PersistentGraph(Graph): """ Persistent Graph database storing data to a file system. See :class:`~.IGraph` for doco. .. note:: Verices and Edges ID's are retained when the path is loaded. .. warning:: Use this persistent graph if performance is not important. There is a performance hit due to the extra disk I/O overhead when doing many writing/updating operations. .. code:: path |_ vertices | |_ constraints.json (file) | |_ label | | |_ 0 | | |_ properties.json (file) | | |_ in-edges | | | |_ 0 -> ../../../../edges/label/0 (symlink) | | |_ out-edges | | |_ | | | |_ label | | |_ 1 | | |_ properties.json (file) | | |_ in-edges | | | |_ | | |_ out-edges | | |_ 0 -> ../../../../edges/label/0 (symlink) | |_ edges |_ label | |_0 |_ properties.json (file) |_ head | |_ 0 -> ../../../vertices/0 (symlink) |_ tail |_ 1 -> ../../../vertices/1 (symlink) :param path: Path to ruruki graph data on disk. :param auto_create: If True, then missing ``vertices`` or ``edges`` directories will be created. :type auto_create: :class:`bool` :type path: :class:`str` :raises DatabasePathLocked: If the path is already locked by another persistence graph instance. """ def __init__(self, path, auto_create=True): super(PersistentGraph, self).__init__() self._vclass = PersistentVertex self._eclass = PersistentEdge self._lock = DirectoryLock(path) try: self._lock.acquire() except interfaces.AcquireError: logging.exception("Path %r is already owned by another graph.", path) raise interfaces.DatabasePathLocked( "Path {0!r} is already locked by anotherr persistent graph " "instance.".format(path)) self.path = path self.vertices_path = os.path.join(self.path, "vertices") self.edges_path = os.path.join(self.path, "edges") self.vertices_constraints_path = os.path.join(self.vertices_path, "constraints.json") if auto_create is True: self._auto_create() self._load_from_path() def _auto_create(self): """ Check that ``vertices`` and ``edges`` directories exists, and if not create them and all the other required files and folders. """ status = [ os.path.exists(self.vertices_path), os.path.exists(self.edges_path), ] if not all(status): self._create_vertex_skel(self.path) self._create_edge_skel(self.path) def _load_from_path(self): """ Scan through the given database path and load/import up all the relevant vertices, vertices constraints, and edges. """ logging.info("Loading graph data from %r", self.path) self._load_vconstraints_from_path(self.vertices_constraints_path) self._load_vertices_from_path(self.vertices_path) self._load_edges_from_path(self.edges_path) logging.info("Completed %r graph import", self.path) def _load_vconstraints_from_path(self, path): """ Open, parse and load the vertices constraints. :param path: Vertices constraints file to open, parse and import. :type path: :class:`str` """ logging.info("Loading vertices constraints %r", path) with open(path) as vconstraints_fh: for each in json.load(vconstraints_fh): self.add_vertex_constraint(each["label"], each["key"]) def _load_vertices_from_path(self, path): """ Scan through the given path and load/import all the vertices. .. code:: path |_ vertices |_ constraints.json (file) |_ labelA | |_ 0 | |_ properties.json (file) | |_ labelB |_ 1 |_ properties.json (file) :param path: Vertices Path to walk and import. :type path: :class:`str` """ logging.info("Loading vertices from %r", path) sorted_to_import = sorted(_search_for_vertex_id(path), key=lambda x: x[0]) for ident, label, prop_file in sorted_to_import: properties = json.load(open(prop_file)) if prop_file else {} # reset the id to the id being loaded. self._id_tracker.vid = ident vertex = super(PersistentGraph, self).add_vertex(label, **properties) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors vertex.path = os.path.join(path, label, str(ident)) # pylint: disable=assigning-non-slot def _load_edges_from_path(self, path): """ Scan through the given path and load/import all the edges. .. code:: path |_ edges |_ label |_0 |_ properties.json (file) |_ head | |_ 0 -> ../../../vertices/0 (symlink) |_ tail |_ 1 -> ../../../vertices/1 (symlink) :param path: Edges Path to walk and import. :type path: :class:`str` :raises KeyError: If the head or tail of the edge being imported is unknown. """ logging.info("Loading edges from %r", path) sorted_to_import = sorted(_search_for_edge_ids(path), key=lambda x: x[0]) for ident, head_id, label, tail_id, prop_file in sorted_to_import: properties = json.load(open(prop_file)) if prop_file else {} head = self.get_vertex(head_id) tail = self.get_vertex(tail_id) # reset the id to the id being loaded. self._id_tracker.eid = ident edge = super(PersistentGraph, self).add_edge(head, label, tail, **properties) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors edge.path = os.path.join(path, label, str(ident)) # pylint: disable=assigning-non-slot def _create_vertex_skel(self, path): """ Create a vertex skeleton path. :param path: Path to create the vertex skeleton structure in. :type path: :class:`str` """ self.vertices_path = os.path.join(path, "vertices") os.makedirs(self.vertices_path) self.vertices_constraints_path = os.path.join(self.vertices_path, "constraints.json") with open(self.vertices_constraints_path, "w") as constraint_fh: constraint_fh.write("[]") def _create_edge_skel(self, path): """ Create a edge skeleton path. :param path: Path to create the edge skeleton structure in. :type path: :class:`str` """ self.edges_path = os.path.join(path, "edges") os.makedirs(self.edges_path) def add_vertex_constraint(self, label, key): super(PersistentGraph, self).add_vertex_constraint(label, key) with open(self.vertices_constraints_path, "w") as constraint_fh: data = [] for label, key in self.get_vertex_constraints(): data.append({"label": label, "key": key}) json.dump(data, constraint_fh, indent=4) def add_vertex(self, label=None, **kwargs): vertex = super(PersistentGraph, self).add_vertex(label, **kwargs) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors vertex.path = os.path.join(self.vertices_path, label, str(vertex.ident)) # pylint: disable=assigning-non-slot os.makedirs(vertex.path) os.makedirs(os.path.join(vertex.path, "in-edges")) os.makedirs(os.path.join(vertex.path, "out-edges")) with open(os.path.join(vertex.path, "properties.json"), "w") as fh: json.dump(vertex.properties, fh) return vertex def add_edge(self, head, label, tail, **kwargs): edge = super(PersistentGraph, self).add_edge(head, label, tail, **kwargs) # due to pylint bug https://github.com/PyCQA/pylint/issues/379, we # need to disable assigning-non-slot errors edge.path = os.path.join(self.edges_path, label, str(edge.ident)) # pylint: disable=assigning-non-slot head_path = os.path.join(edge.path, "head") tail_path = os.path.join(edge.path, "tail") os.makedirs(edge.path) os.makedirs(head_path) os.makedirs(tail_path) with open(os.path.join(edge.path, "properties.json"), "w") as fh: json.dump(edge.properties, fh) os.symlink(head.path, os.path.join(head_path, str(head.ident))) os.symlink(tail.path, os.path.join(tail_path, str(tail.ident))) os.symlink(edge.path, os.path.join(head.path, "out-edges", str(edge.ident))) os.symlink(edge.path, os.path.join(tail.path, "in-edges", str(edge.ident))) return edge def set_property(self, entity, **kwargs): super(PersistentGraph, self).set_property(entity, **kwargs) # Update the properties to the properties file properties_file = os.path.join(entity.path, "properties.json") with open(properties_file, "w") as prop_file: json.dump(dict( (k, v) for k, v in entity.properties.items() if k != "_path"), prop_file, indent=4) def remove_edge(self, edge): super(PersistentGraph, self).remove_edge(edge) shutil.rmtree(edge.path) def remove_vertex(self, vertex): super(PersistentGraph, self).remove_vertex(vertex) shutil.rmtree(vertex.path) def close(self): self._lock.release()