class Graph: """ https://www.w3.org/TR/WD-rdf-syntax-971002/ https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md """ def __init__(self, graph_name, cog_dir): ''' :param graph_name: :param cog_dir: list of ''' self.config = cfg self.cog = Cog(db_path=cog_dir, config=cfg) self.graph_name = graph_name self.cog_dir = cog_dir self.all_predicates = self.cog.list_tables() self.last_visited_vertices = None self.cog.create_namespace(self.graph_name) #self.cog.create_or_load_table(self.config.GRAPH_NODE_SET_TABLE_NAME, self.graph_name) def load_edgelist(self, edgelist_file_path, graph_name, predicate="none"): self.cog.load_edgelist(edgelist_file_path, graph_name, predicate) self.all_predicates = self.cog.list_tables() def load_triples(self, graph_data_path, graph_name): ''' Loads a list of triples :param graph_data_path: :param graph_name: :return: ''' self.cog.load_triples(graph_data_path, graph_name) self.all_predicates = self.cog.list_tables() def put(self, vertex1, predicate, vertex2): #self.cog.create_or_load_table(predicate, self.graph_name) self.cog.use_namespace(self.graph_name).use_table(predicate) self.cog.put_node(vertex1, predicate, vertex2) self.all_predicates = self.cog.list_tables() return self def list_predicate_tables(self, cog_dir, graph_name): p = set(()) path = "/".join([cog_dir, graph_name]) if not os.path.exists(path): return p files = [f for f in listdir(path) if isfile(join(path, f))] for f in files: p.add(f.split("-")[0]) return p def v(self, vertex=None): #TODO: need to check if node exists if vertex: self.last_visited_vertices = [Vertex(vertex)] else: self.last_visited_vertices = [] self.cog.use_namespace(self.graph_name).use_table( self.config.GRAPH_NODE_SET_TABLE_NAME) for r in self.cog.scanner(): self.last_visited_vertices.append(Vertex(r)) return self def out(self, predicates=None): ''' List of string predicates :param predicates: :return: ''' if predicates: assert type(predicates) == list self.__hop("out", predicates) return self def inc(self, predicates=None): self.__hop("in", predicates) return self def __hop(self, direction, predicates=None, tag=NOTAG): self.cog.use_namespace(self.graph_name) predicates = self.all_predicates if not predicates else predicates #print "hopping from vertices: " + str(map(lambda x : x.id, self.last_visited_vertices)) #print "direction: " + str(direction) + " predicates: "+str(self.all_predicates) traverse_vertex = [] for predicate in predicates: for v in self.last_visited_vertices: if direction == "out": record = self.cog.use_table(predicate).get(out_nodes(v.id)) else: record = self.cog.use_table(predicate).get(in_nodes(v.id)) #print "==? " + str(direction)+ " <> " + str(predicate) + " ::: " + str(v.id) + " ==> " + str(record) if record: for v_adjacent in ast.literal_eval(record[1][1]): v_adjacent_obj = Vertex(v_adjacent) v_adjacent_obj.tags.update(v.tags) traverse_vertex.append(v_adjacent_obj) self.last_visited_vertices = traverse_vertex def tag(self, tag_name): ''' Saves nodes with a tag name and returned in the result set. Primarily used to capture nodes while navigating the graph. :param tag_name: :return: ''' for v in self.last_visited_vertices: v.tags[tag_name] = v.id return self def count(self): return len(self.last_visited_vertices) def all(self): """ returns all the nodes in the result. https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md :return: """ result = [] for v in self.last_visited_vertices: #print "all:: tag: " + v + " vertex:"+ str(self.last_visited_vertices[v]) item = {"id": v.id} item.update(v.tags) result.append(item) return {"result": result}
class Graph: """ Creates a graph object. """ def __init__(self, graph_name, cog_home="cog_home", cog_path_prefix=None): ''' :param graph_name: :param cog_home: Home directory name, for most use cases use default. :param cog_path_prefix: sets the root directory location for Cog db. Default: '/tmp' set in cog.Config. Change this to current directory when running in an IPython environment. ''' self.config = cfg self.config.COG_HOME = cog_home if cog_path_prefix: self.config.COG_PATH_PREFIX = cog_path_prefix self.graph_name = graph_name self.cache = {} dictConfig(self.config.logging_config) self.logger = logging.getLogger("torque") self.logger.debug("Torque init on graph: " + graph_name + " predicates: ") self.cog = Cog(self.cache) self.cog.create_or_load_namespace(self.graph_name) self.all_predicates = self.cog.list_tables() self.views_dir = self.config.cog_views_dir() if not os.path.exists(self.views_dir): os.mkdir(self.views_dir) self.logger.debug("predicates: " + str(self.all_predicates)) self.last_visited_vertices = None def refresh(self): self.cog.refresh_all() def load_triples(self, graph_data_path, graph_name=None): ''' Loads a list of triples :param graph_data_path: :param graph_name: :return: ''' graph_name = self.graph_name if graph_name is None else graph_name self.cog.load_triples(graph_data_path, graph_name) self.all_predicates = self.cog.list_tables() return None def load_csv(self, csv_path, id_column_name, graph_name=None): """ Loads CSV to a graph. One column must be designated as ID column. :param csv_path: :param id_column_name: :param graph_name: :return: """ if id_column_name is None: raise Exception("id_column_name must not be None") graph_name = self.graph_name if graph_name is None else graph_name self.cog.load_csv(csv_path, id_column_name, graph_name) self.all_predicates = self.cog.list_tables() def close(self): self.logger.info("closing graph: "+self.graph_name) self.cog.close() def put(self, vertex1, predicate, vertex2): self.cog.use_namespace(self.graph_name).use_table(predicate) self.cog.put_node(vertex1, predicate, vertex2) self.all_predicates = self.cog.list_tables() return self def v(self, vertex=None, func=None): if vertex is not None: if isinstance(vertex, list): self.last_visited_vertices = [Vertex(v) for v in vertex] else: self.last_visited_vertices = [Vertex(vertex)] else: self.last_visited_vertices = [] self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_NODE_SET_TABLE_NAME) for r in self.cog.scanner(): if func is not None and not func(r.key): continue self.last_visited_vertices.append(Vertex(r.key)) return self def out(self, predicates=None, func=None): ''' Traverse forward through edges. :param predicates: A string or a List of strings. :return: ''' if func: assert callable(func), "func must be a lambda. Example: func = lambda d: int(d) > 5" assert not isinstance(predicates, list), "func cannot be used with a list of predicates" if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) else: predicates = self.all_predicates self.logger.debug("OUT: predicates: "+str(predicates)) self.__hop("out", predicates=predicates, func=func) return self def inc(self, predicates=None, func=None): ''' Traverse backward through edges. :param predicates: :return: ''' if func: assert callable(func), "func must be a lambda. Example: func = lambda d: int(d) > 5" assert not isinstance(predicates, list), "func cannot be used with a list of predicates" if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) else: predicates = self.all_predicates self.__hop("in", predicates, func=func) return self def __adjacent_vertices(self, vertex, predicates, direction='out'): self.cog.use_namespace(self.graph_name) adjacent_vertices = [] for predicate in predicates: if direction == 'out': out_record = self.cog.use_table(predicate).get(out_nodes(vertex.id)) if out_record is not None: for v_adj in out_record.value: adjacent_vertices.append(Vertex(v_adj).set_edge(predicate)) elif direction == 'in': in_record = self.cog.use_table(predicate).get(in_nodes(vertex.id)) if not in_record is not None: for v_adj in in_record.value: adjacent_vertices.append(Vertex(v_adj).set_edge(predicate)) return adjacent_vertices def has(self, predicates, vertex): """ Filters all outgoing edges from a vertex that matches a list of predicates. :param predicates: :param vertex: :return: """ if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) has_vertices = [] for lv in self.last_visited_vertices: adj_vertices = self.__adjacent_vertices(lv, predicates) for av in adj_vertices: if av.id == vertex: has_vertices.append(lv) self.last_visited_vertices = has_vertices return self def hasr(self, predicates, vertex): """ 'Has' in reverse. Filters all incoming edges from a vertex that matches a list of predicates. :param predicates: :param vertex: :return: """ if predicates is not None: if not isinstance(predicates, list): predicates = [predicates] predicates = list(map(hash_predicate, predicates)) has_vertices = [] for lv in self.last_visited_vertices: adj_vertices = self.__adjacent_vertices(lv, predicates, 'in') # print(lv.id + " -> " + str([x.id for x in adj_vertices])) for av in adj_vertices: if av.id == vertex: has_vertices.append(lv) self.last_visited_vertices = has_vertices return self def scan(self, limit=10, scan_type='v'): ''' Scans vertices or edges in a graph. :param limit: :param scan_type: :return: ''' assert type(scan_type) is str, "Scan type must be either 'v' for vertices or 'e' for edges." if scan_type == 'e': self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME) else: self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_NODE_SET_TABLE_NAME) result = [] for i, r in enumerate(self.cog.scanner()): if i < limit: if scan_type == 'v': v = Vertex(r.key) else: v = Vertex(r.value) result.append({"id": v.id}) else: break return {"result": result} def __hop(self, direction, predicates=None, tag=NOTAG, func=None): self.logger.debug("__hop : direction: " + str(direction) + " predicates: " + str(predicates) + " graph name: "+self.graph_name) self.cog.use_namespace(self.graph_name) self.logger.debug("hopping from vertices: " + str(map(lambda x : x.id, self.last_visited_vertices))) self.logger.debug("direction: " + str(direction) + " predicates: "+str(self.all_predicates)) traverse_vertex = [] for predicate in predicates: self.logger.debug("__hop predicate: "+predicate + " of "+ str(predicates)) for v in self.last_visited_vertices: if direction == "out": record = self.cog.use_table(predicate).get(out_nodes(v.id)) else: record = self.cog.use_table(predicate).get(in_nodes(v.id)) if record is not None: for v_adjacent in record.value: if func is not None and not func(v_adjacent): continue v_adjacent_obj = Vertex(v_adjacent).set_edge(predicate) v_adjacent_obj.tags.update(v.tags) traverse_vertex.append(v_adjacent_obj) self.last_visited_vertices = traverse_vertex def tag(self, tag_name): ''' Saves vertices with a tag name. Used to capture vertices while traversing a graph. :param tag_name: :return: ''' for v in self.last_visited_vertices: v.tags[tag_name] = v.id return self def count(self): return len(self.last_visited_vertices) def all(self, options=None): """ Returns all the vertices that are resultant of the graph query. Options 'e' would include the edges that were traversed. https://github.com/cayleygraph/cayley/blob/master/docs/GizmoAPI.md :return: """ result = [] show_edge = True if options is not None and 'e' in options else False for v in self.last_visited_vertices: item = {"id": v.id} if show_edge and v.edges: item['edges'] = [self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME).get(edge).value for edge in v.edges] # item['edge'] = self.cog.use_namespace(self.graph_name).use_table(self.config.GRAPH_EDGE_SET_TABLE_NAME).get(item['edge']).value item.update(v.tags) result.append(item) res = {"result": result} return res def view(self, view_name, js_src="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.js"): """ Returns html view of the resulting graph from a query. :return: """ assert view_name is not None, "a view name is required to create a view, it can be any string." result = self.all() view_html = script_part1 + graph_lib_src.format(js_src=js_src) + graph_template.format(plot_data_insert=json.dumps(result['result'])) + script_part2 view = self.views_dir+"/{view_name}.html".format(view_name=view_name) view = View(view, view_html) view.persist() return view def getv(self, view_name): view = self.views_dir + "/{view_name}.html".format(view_name=view_name) assert os.path.isfile(view), "view not found, create a view by calling .view()" with open(view, 'r') as f: view_html = f.read() view = View(view, view_html) return view def lsv(self): return [f.split(".")[0] for f in listdir(self.views_dir)] def get_new_graph_instance(self): return Graph(self.graph_name, self.config.COG_HOME, self.config.COG_PATH_PREFIX)