def __init__(self, incoming_graph_data=None, default_label=None, **attr): self.graph_attr_dict_factory = self.graph_attr_dict_factory self.node_dict_factory = self.node_dict_factory self.adjlist_dict_factory = self.adjlist_dict_factory self.graph = self.graph_attr_dict_factory() self.cache = self.graph_cache_factory(self) # init node and adj (must be after cache) self._node = self.node_dict_factory(self) self._adj = self.adjlist_dict_factory(self) self._pred = self.adjlist_dict_factory(self, pred=True) self._succ = self._adj self._key = None self._op = None self._session_id = None self._graph_type = self._graph_type self._schema = GraphSchema() self._schema.init_nx_schema() # cache for add_node and add_edge self._add_node_cache = [] self._add_edge_cache = [] self._remove_node_cache = [] self._remove_edge_cache = [] create_empty_in_engine = attr.pop("create_empty_in_engine", True) # a hidden parameter self._distributed = attr.pop("dist", False) if incoming_graph_data is not None and self._is_gs_graph( incoming_graph_data): # convert from gs graph always use distributed mode self._distributed = True if self._session is None: self._session = get_session_by_id( incoming_graph_data.session_id) self._default_label = default_label if self._session is None: self._try_to_get_default_session() if not self._is_gs_graph( incoming_graph_data) and create_empty_in_engine: graph_def = empty_graph_in_engine(self, self.is_directed(), self._distributed) self._key = graph_def.key # attempt to load graph with data if incoming_graph_data is not None: if self._is_gs_graph(incoming_graph_data): self._init_with_arrow_property_graph(incoming_graph_data) self.cache.warmup() else: g = to_networkx_graph(incoming_graph_data, create_using=self) check_argument(isinstance(g, Graph)) # load graph attributes (must be after to_networkx_graph) self.graph.update(attr) self._saved_signature = self.signature
def __init__(self, session_id, incoming_data=None): """Construct a :class:`Graph` object. Args: session_id (str): Session id of the session the graph is created in. incoming_data: Graph can be initialized through various type of sources, which can be one of: - :class:`GraphDef` - :class:`nx.Graph` - :class:`Graph` - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName` """ # Don't import the :code:`NXGraph` in top-level statments to improve the # performance of :code:`import graphscope`. from graphscope.experimental.nx.classes.graph import Graph as NXGraph self._key = None self._op = None self._graph_type = None self.directed = False self._vineyard_id = 0 self._schema = GraphSchema() self._session_id = session_id self._detached = False self._interactive_instance_list = [] self._learning_instance_list = [] if isinstance(incoming_data, GraphDef): graph_def = incoming_data elif isinstance(incoming_data, NXGraph): graph_def = self._from_nx_graph(incoming_data) elif isinstance(incoming_data, Graph): graph_def = self._copy_from(incoming_data) elif isinstance( incoming_data, (vineyard.Object, vineyard.ObjectID, vineyard.ObjectName)): graph_def = self._from_vineyard(incoming_data) else: raise ValueError( "Failed to create a graph on graphscope engine: %s", incoming_data) if graph_def: self._key = graph_def.key self._vineyard_id = graph_def.vineyard_id self._graph_type = graph_def.graph_type self._directed = graph_def.directed self._schema.get_schema_from_def(graph_def.schema_def) self._schema_path = graph_def.schema_path # init saved_signature (must be after init schema) self._saved_signature = self.signature
class Graph: def __init__(self, graph_def, conn=None) -> None: self._schema = GraphSchema() self._schema.from_graph_def(graph_def) self._conn: Connection = conn self._schema._conn = conn def schema(self): return self._schema def insert_vertex(self, vertex: VertexRecordKey, properties: dict): return self.insert_vertices([[vertex, properties]]) def insert_vertices(self, vertices: list): request = to_write_requests_pb("VERTEX", vertices, write_service_pb2.INSERT) return self._conn.batch_write(request) def update_vertex_properties(self, vertex: VertexRecordKey, properties: dict): request = to_write_requests_pb("VERTEX", [[vertex, properties]], write_service_pb2.UPDATE) return self._conn.batch_write(request) def delete_vertex(self, vertex_pk: VertexRecordKey): return self.delete_vertices([vertex_pk]) def delete_vertices(self, vertex_pks: list): request = to_write_requests_pb("VERTEX", [[pk, {}] for pk in vertex_pks], write_service_pb2.DELETE) return self._conn.batch_write(request) def insert_edge(self, edge: EdgeRecordKey, properties: dict): return self.insert_edges([[edge, properties]]) def insert_edges(self, edges: list): request = to_write_requests_pb("EDGE", edges, write_service_pb2.INSERT) return self._conn.batch_write(request) def update_edge_properties(self, edge: EdgeRecordKey, properties: dict): request = to_write_requests_pb("EDGE", [[edge, properties]], write_service_pb2.UPDATE) return self._conn.batch_write(request) def delete_edge(self, edge: EdgeRecordKey): return self.delete_edges([edge]) def delete_edges(self, edge_pks: list): request = to_write_requests_pb("EDGE", [[pk, {}] for pk in edge_pks], write_service_pb2.DELETE) return self._conn.batch_write(request)
def __init__( self, graph_node, ): """Construct a :class:`Graph` object.""" self._graph_node = graph_node self._session = self._graph_node.session # copy and set op evaluated self._graph_node.op = deepcopy(self._graph_node.op) self._graph_node.evaluated = True self._session.dag.add_op(self._graph_node.op) self._key = None self._vineyard_id = 0 self._schema = GraphSchema() self._detached = False self._interactive_instance_launching_thread = None self._interactive_instance_list = [] self._learning_instance_list = []
def __init__( self, session, incoming_data=None, oid_type="int64", directed=True, generate_eid=True, ): """Construct a :class:`Graph` object. Args: session_id (str): Session id of the session the graph is created in. incoming_data: Graph can be initialized through various type of sources, which can be one of: - :class:`Operation` - :class:`nx.Graph` - :class:`Graph` - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName` """ self._key = None self._graph_type = types_pb2.ARROW_PROPERTY self._vineyard_id = 0 self._schema = GraphSchema() self._session = session self._detached = False self._interactive_instance_launching_thread = None self._interactive_instance_list = [] self._learning_instance_list = [] # Hold uncompleted operation for lazy evaluation self._pending_op = None # Hold a reference to base graph of modify operation, # to avoid being garbage collected self._base_graph = None oid_type = utils.normalize_data_type_str(oid_type) if oid_type not in ("int64_t", "std::string"): raise ValueError("oid_type can only be int64_t or string.") self._oid_type = oid_type self._directed = directed self._generate_eid = generate_eid self._unsealed_vertices = {} self._unsealed_edges = {} # Used to isplay schema without load into vineyard, # and do sanity checking for newly added vertices and edges. self._v_labels = [] self._e_labels = [] self._e_relationships = [] if incoming_data is not None: # Don't import the :code:`NXGraph` in top-level statements to improve the # performance of :code:`import graphscope`. from graphscope.experimental import nx if isinstance(incoming_data, Operation): self._pending_op = incoming_data if self._pending_op.type == types_pb2.PROJECT_GRAPH: self._graph_type = types_pb2.ARROW_PROJECTED elif isinstance(incoming_data, nx.Graph): self._pending_op = self._from_nx_graph(incoming_data) elif isinstance(incoming_data, Graph): self._pending_op = self._copy_from(incoming_data) elif isinstance( incoming_data, (vineyard.Object, vineyard.ObjectID, vineyard.ObjectName)): self._pending_op = self._from_vineyard(incoming_data) else: raise RuntimeError("Not supported incoming data.")
class Graph(object): """A class for representing metadata of a graph in the GraphScope. A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not. It is worth noting that the graph is stored by the backend such as Analytical Engine, Vineyard. In other words, the graph object holds nothing but metadata. The following example demonstrates its usage: .. code:: python >>> import graphscope as gs >>> from graphscope.framework.loader import Loader >>> sess = gs.session() >>> graph = sess.g() >>> graph = graph.add_vertices("person.csv","person") >>> graph = graph.add_vertices("software.csv", "software") >>> graph = graph.add_edges("knows.csv", "knows", src_label="person", dst_label="person") >>> graph = graph.add_edges("created.csv", "created", src_label="person", dst_label="software") >>> print(graph) >>> print(graph.schema) """ def __init__( self, session, incoming_data=None, oid_type="int64", directed=True, generate_eid=True, ): """Construct a :class:`Graph` object. Args: session_id (str): Session id of the session the graph is created in. incoming_data: Graph can be initialized through various type of sources, which can be one of: - :class:`Operation` - :class:`nx.Graph` - :class:`Graph` - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName` """ self._key = None self._graph_type = types_pb2.ARROW_PROPERTY self._vineyard_id = 0 self._schema = GraphSchema() self._session = session self._detached = False self._interactive_instance_launching_thread = None self._interactive_instance_list = [] self._learning_instance_list = [] # Hold uncompleted operation for lazy evaluation self._pending_op = None # Hold a reference to base graph of modify operation, # to avoid being garbage collected self._base_graph = None oid_type = utils.normalize_data_type_str(oid_type) if oid_type not in ("int64_t", "std::string"): raise ValueError("oid_type can only be int64_t or string.") self._oid_type = oid_type self._directed = directed self._generate_eid = generate_eid self._unsealed_vertices = {} self._unsealed_edges = {} # Used to isplay schema without load into vineyard, # and do sanity checking for newly added vertices and edges. self._v_labels = [] self._e_labels = [] self._e_relationships = [] if incoming_data is not None: # Don't import the :code:`NXGraph` in top-level statements to improve the # performance of :code:`import graphscope`. from graphscope.experimental import nx if isinstance(incoming_data, Operation): self._pending_op = incoming_data if self._pending_op.type == types_pb2.PROJECT_GRAPH: self._graph_type = types_pb2.ARROW_PROJECTED elif isinstance(incoming_data, nx.Graph): self._pending_op = self._from_nx_graph(incoming_data) elif isinstance(incoming_data, Graph): self._pending_op = self._copy_from(incoming_data) elif isinstance( incoming_data, (vineyard.Object, vineyard.ObjectID, vineyard.ObjectName)): self._pending_op = self._from_vineyard(incoming_data) else: raise RuntimeError("Not supported incoming data.") def __del__(self): # cleanly ignore all exceptions, cause session may already closed / destroyed. try: self.unload() except Exception: # pylint: disable=broad-except pass def _close_interactive_instances(self): # Close related interactive instances when graph unloaded. # Since the graph is gone, quering via interactive client is meaningless. for instance in self._interactive_instance_list: instance.close() self._interactive_instance_list.clear() def _close_learning_instances(self): for instance in self._learning_instance_list: instance.close() self._learning_instance_list.clear() def _launch_interactive_instance_impl(self): try: self._session.gremlin(self) except: # noqa: E722 # Record error msg in `InteractiveQuery` when launching failed. # Unexpect and suppress all exceptions here. pass def _from_graph_def(self, graph_def): check_argument(self._graph_type == graph_def.graph_type, "Graph type doesn't match.") self._key = graph_def.key self._vineyard_id = graph_def.vineyard_id self._oid_type = graph_def.schema_def.oid_type self._directed = graph_def.directed self._generate_eid = graph_def.generate_eid self._schema_path = graph_def.schema_path self._schema.get_schema_from_def(graph_def.schema_def) self._v_labels = self._schema.vertex_labels self._e_labels = self._schema.edge_labels self._e_relationships = self._schema.edge_relationships def _ensure_loaded(self): if self._key is not None and self._pending_op is None: return # Unloaded if self._session is None: raise RuntimeError("The graph is not loaded") # Empty graph if self._key is None and self._pending_op is None: raise RuntimeError("Empty graph.") # Try to load if self._pending_op is not None: # Create a graph from scratch. graph_def = self._pending_op.eval() self._from_graph_def(graph_def) self._pending_op = None self._base_graph = None self._unsealed_vertices.clear() self._unsealed_edges.clear() # init saved_signature (must be after init schema) self._saved_signature = self.signature # create gremlin server pod asynchronously if gs_config.initializing_interactive_engine: self._interactive_instance_launching_thread = threading.Thread( target=self._launch_interactive_instance_impl, args=()) self._interactive_instance_launching_thread.start() @property def key(self): """The key of the corresponding graph in engine.""" self._ensure_loaded() return self._key @property def graph_type(self): """The type of the graph object. Returns: type (`types_pb2.GraphType`): the type of the graph. """ return self._graph_type @property def schema(self): """Schema of the graph. Returns: :class:`GraphSchema`: the schema of the graph """ self._ensure_loaded() return self._schema @property def schema_path(self): """Path that Coordinator will write interactive schema path to. Returns: str: The path contains the schema. for interactive engine. """ self._ensure_loaded() return self._schema_path @property def signature(self): self._ensure_loaded() return hashlib.sha256("{}.{}".format( self._schema.signature(), self._key).encode("utf-8")).hexdigest() @property def template_str(self): self._ensure_loaded() # transform str/string to std::string oid_type = utils.normalize_data_type_str(self._oid_type) vid_type = self._schema.vid_type vdata_type = utils.data_type_to_cpp(self._schema.vdata_type) edata_type = utils.data_type_to_cpp(self._schema.edata_type) if self._graph_type == types_pb2.ARROW_PROPERTY: template = f"vineyard::ArrowFragment<{oid_type},{vid_type}>" elif self._graph_type == types_pb2.ARROW_PROJECTED: template = f"gs::ArrowProjectedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>" elif self._graph_type == types_pb2.DYNAMIC_PROJECTED: template = f"gs::DynamicProjectedFragment<{vdata_type},{edata_type}>" else: raise ValueError(f"Unsupported graph type: {self._graph_type}") return template @property def vineyard_id(self): """Get the vineyard object_id of this graph. Returns: str: return vineyard id of this graph """ self._ensure_loaded() return self._vineyard_id @property def session_id(self): """Get the currrent session_id. Returns: str: Return session id that the graph belongs to. """ return self._session.session_id def detach(self): """Detaching a graph makes it being left in vineyard even when the varaible for this :class:`Graph` object leaves the lexical scope. The graph can be accessed using the graph's :code:`ObjectID` or its name later. """ self._detached = True def loaded(self): try: self._ensure_loaded() except RuntimeError: return False return self._key is not None def __str__(self): v_str = "\n".join([f"VERTEX: {label}" for label in self._v_labels]) relations = [] for i in range(len(self._e_labels)): relations.extend([(self._e_labels[i], src, dst) for src, dst in self._e_relationships[i]]) e_str = "\n".join([ f"EDGE: {label}\tsrc: {src}\tdst: {dst}" for label, src, dst in relations ]) return f"graphscope.Graph\n{types_pb2.GraphType.Name(self._graph_type)}\n{v_str}\n{e_str}" def __repr__(self): return self.__str__() def unload(self): """Unload this graph from graphscope engine.""" if self._session is None: raise RuntimeError("The graph is not loaded") if self._key is None: self._session = None self._pending_op = None return # close interactive instances first try: if (self._interactive_instance_launching_thread is not None and self._interactive_instance_launching_thread.is_alive()): # join raises a RuntimeError if an attempt is made to join the current thread. # this exception occurs when a object collected by gc mechanism contains a running thread. if (threading.current_thread() != self._interactive_instance_launching_thread): self._interactive_instance_launching_thread.join() self._close_interactive_instances() except Exception as e: logger.error("Failed to close interactive instances: %s" % e) try: self._close_learning_instances() except Exception as e: logger.error("Failed to close learning instances: %s" % e) if not self._detached: op = dag_utils.unload_graph(self) op.eval() self._key = None self._session = None self._pending_op = None def project_to_simple(self, v_label="_", e_label="_", v_prop=None, e_prop=None): """Project a property graph to a simple graph, useful for analytical engine. Will translate name represented label or property to index, which is broadedly used in internal engine. Args: v_label (str, optional): vertex label to project. Defaults to "_". e_label (str, optional): edge label to project. Defaults to "_". v_prop (str, optional): vertex property of the v_label. Defaults to None. e_prop (str, optional): edge property of the e_label. Defaults to None. Returns: :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED` """ self._ensure_loaded() check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) self._check_unmodified() def check_out_of_range(id, length): if id >= length or id < 0: raise IndexError("id {} is out of range.".format(id)) try: if isinstance(v_label, str): v_label_id = self._schema.vertex_label_index(v_label) else: v_label_id = v_label check_out_of_range(v_label_id, self._schema.vertex_label_num) v_label = self._schema.vertex_labels[v_label_id] if isinstance(e_label, str): e_label_id = self._schema.edge_label_index(e_label) else: e_label_id = e_label check_out_of_range(e_label_id, self._schema.edge_label_num) e_label = self._schema.edge_labels[e_label] except ValueError as e: raise ValueError("Label does not exists.") from e # Check relation v_label -> e_label <- v_label exists. relation = (v_label, v_label) if relation not in self._schema.edge_relationships[e_label_id]: raise ValueError( f"Graph doesn't contain such relationship: {v_label} -> {e_label} <- {v_label}." ) try: if v_prop is None: v_prop_id = -1 vdata_type = None else: if isinstance(v_prop, str): v_prop_id = self._schema.vertex_property_index( v_label_id, v_prop) else: v_prop_id = v_prop properties = self._schema.vertex_properties[v_label_id] check_out_of_range(v_prop_id, len(properties)) vdata_type = list(properties.values())[v_prop_id] if e_prop is None: e_prop_id = -1 edata_type = None else: if isinstance(e_prop, str): e_prop_id = self._schema.edge_property_index( e_label_id, e_prop) else: e_prop_id = e_prop properties = self._schema.edge_properties[e_label_id] check_out_of_range(e_prop_id, len(properties)) edata_type = list(properties.values())[e_prop_id] except ValueError as e: raise ValueError("Property does not exists.") from e oid_type = self._schema.oid_type vid_type = self._schema.vid_type op = dag_utils.project_arrow_property_graph( self, v_label_id, v_prop_id, e_label_id, e_prop_id, vdata_type, edata_type, oid_type, vid_type, ) return Graph(self._session, op) def add_column(self, results, selector): """Add the results as a column to the graph. Modification rules are given by the selector. Args: results (:class:`Context`): A `Context` that created by doing a query. selector (dict): Select results to add as column. Format is similar to selectors in `Context` Returns: :class:`Graph`: A new `Graph` with new columns. """ self._ensure_loaded() check_argument(isinstance(selector, Mapping), "selector of add column must be a dict") check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) self._check_unmodified() selector = { key: results._transform_selector(value) for key, value in selector.items() } selector = json.dumps(selector) op = dag_utils.add_column(self, results, selector) return Graph(self._session, op) def to_numpy(self, selector, vertex_range=None): """Select some elements of the graph and output to numpy. Args: selector (str): Select a portion of graph as a numpy.ndarray. vertex_range(dict, optional): Slice vertices. Defaults to None. Returns: `numpy.ndarray` """ check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) self._ensure_loaded() self._check_unmodified() selector = utils.transform_labeled_vertex_property_data_selector( self, selector) vertex_range = utils.transform_vertex_range(vertex_range) op = dag_utils.graph_to_numpy(self, selector, vertex_range) ret = op.eval() return utils.decode_numpy(ret) def to_dataframe(self, selector, vertex_range=None): """Select some elements of the graph and output as a pandas.DataFrame Args: selector (dict): Select some portions of graph. vertex_range (dict, optional): Slice vertices. Defaults to None. Returns: `pandas.DataFrame` """ check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) self._ensure_loaded() self._check_unmodified() check_argument( isinstance(selector, Mapping), "selector of to_vineyard_dataframe must be a dict", ) selector = { key: utils.transform_labeled_vertex_property_data_selector(self, value) for key, value in selector.items() } selector = json.dumps(selector) vertex_range = utils.transform_vertex_range(vertex_range) op = dag_utils.graph_to_dataframe(self, selector, vertex_range) ret = op.eval() return utils.decode_dataframe(ret) def is_directed(self): self._ensure_loaded() return self._directed def _check_unmodified(self): self._ensure_loaded() check_argument(self.signature == self._saved_signature, "Graph has been modified!") def _from_nx_graph(self, incoming_graph): """Create a gs graph from a nx graph. Args: incoming_graph (:class:`nx.graph`): A nx graph that contains graph data. Returns: that will be used to construct a gs.Graph Raises: TypeError: Raise Error if graph type not match. Examples: >>> nx_g = nx.path_graph(10) >>> gs_g = gs.Graph(nx_g) """ if hasattr(incoming_graph, "_graph"): msg = "graph view can not convert to gs graph" raise TypeError(msg) return dag_utils.dynamic_to_arrow(incoming_graph) def _copy_from(self, incoming_graph): """Copy a graph. Args: incoming_graph (:class:`Graph`): Source graph to be copied from Returns: :class:`Graph`: An identical graph, but with a new vineyard id. """ check_argument(incoming_graph.graph_type == types_pb2.ARROW_PROPERTY) check_argument(incoming_graph.loaded()) return dag_utils.copy_graph(incoming_graph) def _from_vineyard(self, vineyard_object): """Load a graph from a already existed vineyard graph. Args: vineyard_object (:class:`vineyard.Object`, :class:`vineyard.ObjectID` or :class:`vineyard.ObjectName`): vineyard object, which represents a graph. Returns: A graph_def. """ if isinstance(vineyard_object, vineyard.Object): return self._from_vineyard_id(vineyard_object.id) if isinstance(vineyard_object, vineyard.ObjectID): return self._from_vineyard_id(vineyard_object) if isinstance(vineyard_object, vineyard.ObjectName): return self._from_vineyard_name(vineyard_object) def _from_vineyard_id(self, vineyard_id): config = {} config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True) config[types_pb2.VINEYARD_ID] = utils.i_to_attr(int(vineyard_id)) # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard # # the metadata should be retrived from vineyard config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t") config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t") return dag_utils.create_graph(self.session_id, types_pb2.ARROW_PROPERTY, attrs=config) def _from_vineyard_name(self, vineyard_name): config = {} config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True) config[types_pb2.VINEYARD_NAME] = utils.s_to_attr(str(vineyard_name)) # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard # # the metadata should be retrived from vineyard config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t") config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t") return dag_utils.create_graph(self.session_id, types_pb2.ARROW_PROPERTY, attrs=config) def _attach_interactive_instance(self, instance): """Store the instance when a new interactive instance is started. Args: instance: interactive instance """ self._interactive_instance_list.append(instance) def _attach_learning_instance(self, instance): """Store the instance when a new learning instance is created. Args: instance: learning instance """ self._learning_instance_list.append(instance) def save_to(self, path, **kwargs): """Serialize graph to a location. The meta and data of graph is dumped to specified location, and can be restored by `Graph.deserialize` in other sessions. Each worker will write a `path_{worker_id}.meta` file and a `path_{worker_id}` file to storage. Args: path (str): supported storages are local, hdfs, oss, s3 """ import vineyard import vineyard.io self._ensure_loaded() sess = self._session deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") vineyard.io.serialize( path, vineyard.ObjectID(self._vineyard_id), type="global", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, ) @classmethod def load_from(cls, path, sess, **kwargs): """Construct a `Graph` by deserialize from `path`. It will read all serialization files, which is dumped by `Graph.serialize`. If any serialize file doesn't exists or broken, will error out. Args: path (str): Path contains the serialization files. sess (`graphscope.Session`): The target session that the graph will be construct in Returns: `Graph`: A new graph object. Schema and data is supposed to be identical with the one that called serialized method. """ import vineyard import vineyard.io deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") graph_id = vineyard.io.deserialize( path, type="global", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, ) return cls(sess, vineyard.ObjectID(graph_id)) def draw(self, vertices, hop=1): """Visualize the graph data in the result cell when the draw functions are invoked Args: vertices (list): selected vertices. hop (int): draw induced subgraph with hop extension. Defaults to 1. Returns: A GraphModel. """ from ipygraphin import GraphModel self._ensure_loaded() interactive_query = self._session.gremlin(self) graph = GraphModel() graph.queryGraphData(vertices, hop, interactive_query) # listen on the 1~2 hops operation of node graph.on_msg(graph.queryNeighbor) return graph def _construct_graph(self, vertices, edges, v_labels, e_labels, e_relations, mutation_func=None): """Construct graph. 1. Construct a graph from scratch. If the vertices and edges is empty, return a empty graph. 2. Construct a graph from existed builded graph. If the vertices and edges is empty, return a copied graph. Args: vertices ([type]): [description] edges ([type]): [description] v_labels ([type]): [description] e_labels ([type]): [description] e_relations ([type]): [description] mutation_func ([type], optional): [description]. Defaults to None. Returns: [type]: [description] """ config = graph_utils.assemble_op_config( vertices.values(), edges.values(), self._oid_type, self._directed, self._generate_eid, ) # edge case. if not vertices and not edges: if mutation_func: # Rely on `self._key` return Graph(self._session, self) else: return Graph( self._session, None, self._oid_type, self._directed, self._generate_eid, ) if mutation_func: op = mutation_func(self, attrs=config) else: op = dag_utils.create_graph(self.session_id, types_pb2.ARROW_PROPERTY, attrs=config) graph = Graph(self._session, op, self._oid_type, self._directed, self._generate_eid) graph._unsealed_vertices = vertices graph._unsealed_edges = edges graph._v_labels = v_labels graph._e_labels = e_labels graph._e_relationships = e_relations # propage info about whether is a loaded graph. # graph._key = self._key if mutation_func: graph._base_graph = self._base_graph or self return graph def add_vertices(self, vertices, label="_", properties=[], vid_field=0): is_from_existed_graph = len(self._unsealed_vertices) != len( self._v_labels) or len(self._unsealed_edges) != len(self._e_labels) if label in self._v_labels: raise ValueError(f"Label {label} already existed in graph.") if not self._v_labels and self._e_labels: raise ValueError( "Cannot manually add vertices after inferred vertices.") unsealed_vertices = deepcopy(self._unsealed_vertices) unsealed_vertices[label] = VertexLabel(label=label, loader=vertices, properties=properties, vid_field=vid_field) v_labels = deepcopy(self._v_labels) v_labels.append(label) # Load after validity check and before create add_vertices op. # TODO(zsy): Add ability to add vertices and edges to existed graph simultaneously. if is_from_existed_graph and self._unsealed_edges: self._ensure_loaded() func = dag_utils.add_vertices if is_from_existed_graph else None return self._construct_graph( unsealed_vertices, self._unsealed_edges, v_labels, self._e_labels, self._e_relationships, func, ) def add_edges( self, edges, label="_", properties=[], src_label=None, dst_label=None, src_field=0, dst_field=1, ): """Add edges to graph. 1. Add edges to a uninitialized graph. i. src_label and dst_label both unspecified. In this case, current graph must has 0 (we deduce vertex label from edge table, and set vertex label name to '_'), or 1 vertex label (we set src_label and dst label to this). ii. src_label and dst_label both specified and existed in current graph's vertex labels. iii. src_label and dst_label both specified and there is no vertex labels in current graph. we deduce all vertex labels from edge tables. Note that you either provide all vertex labels, or let graphscope deduce all vertex labels. We don't support mixed style. 2. Add edges to a existed graph. Must add a new kind of edge label, not a new relation to builded graph. But you can add a new relation to uninitialized part of the graph. src_label and dst_label must be specified and existed in current graph. Args: edges ([type]): [description] label (str, optional): [description]. Defaults to "_". properties ([type], optional): [description]. Defaults to None. src_label ([type], optional): [description]. Defaults to None. dst_label ([type], optional): [description]. Defaults to None. src_field (int, optional): [description]. Defaults to 0. dst_field (int, optional): [description]. Defaults to 1. Raises: RuntimeError: [description] Returns: Graph: [description] """ is_from_existed_graph = len(self._unsealed_vertices) != len( self._v_labels) or len(self._unsealed_edges) != len(self._e_labels) if is_from_existed_graph: if label in self._e_labels and label not in self._unsealed_edges: raise ValueError("Cannot add new relation to existed graph.") if src_label is None or dst_label is None: raise ValueError("src label and dst label cannot be None.") if src_label not in self._v_labels or dst_label not in self._v_labels: raise ValueError( "src label or dst_label not existed in graph.") else: if src_label is None and dst_label is None: check_argument( len(self._v_labels) <= 1, "ambiguous vertex label") if len(self._v_labels) == 1: src_label = dst_label = self._v_labels[0] else: src_label = dst_label = "_" elif src_label is not None and dst_label is not None: if self._v_labels: if (src_label not in self._v_labels or dst_label not in self._v_labels): raise ValueError( "src label or dst_label not existed in graph.") else: # Infer all v_labels from edge tables. pass else: raise ValueError( "src and dst label must be both specified or either unspecified." ) check_argument(src_field != dst_field, "src and dst field cannot refer to the same field") unsealed_edges = deepcopy(self._unsealed_edges) e_labels = deepcopy(self._e_labels) relations = deepcopy(self._e_relationships) if label in unsealed_edges: assert label in self._e_labels label_idx = self._e_labels.index(label) # Will check conflict in `add_sub_label` relations[label_idx].append((src_label, dst_label)) cur_label = unsealed_edges[label] else: e_labels.append(label) relations.append([(src_label, dst_label)]) cur_label = EdgeLabel(label) cur_label.add_sub_label( EdgeSubLabel(edges, properties, src_label, dst_label, src_field, dst_field)) unsealed_edges[label] = cur_label # Load after validity check and before create add_vertices op. # TODO(zsy): Add ability to add vertices and edges to existed graph simultaneously. if is_from_existed_graph and self._unsealed_vertices: self._ensure_loaded() func = dag_utils.add_edges if is_from_existed_graph else None return self._construct_graph( self._unsealed_vertices, unsealed_edges, self._v_labels, e_labels, relations, func, )
class Graph(object): """A class for representing metadata of a graph in the GraphScope. A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not. It is worth noting that the graph is stored by the backend such as Analytical Engine, Vineyard. In other words, the graph object holds nothing but metadata. The graph object should not be created directly from :class:`Graph`. Instead, the graph should be created by `Session.load_from` The following example demonstrates its usage: .. code:: python >>> import graphscope as gs >>> from graphscope.framework.loader import Loader >>> sess = gs.session() >>> g = sess.load_from( ... edges={ ... "knows": ( ... Loader("{}/p2p-31_property_e_0".format(property_dir), header_row=True), ... ["src_label_id", "dst_label_id", "dist"], ... ("src_id", "person"), ... ("dst_id", "person"), ... ), ... }, ... vertices={ ... "person": Loader( ... "{}/p2p-31_property_v_0".format(property_dir), header_row=True ... ), ... } ... ) """ def __init__(self, session_id, incoming_data=None): """Construct a :class:`Graph` object. Args: session_id (str): Session id of the session the graph is created in. incoming_data: Graph can be initialized through various type of sources, which can be one of: - :class:`GraphDef` - :class:`nx.Graph` - :class:`Graph` - :class:`VineyardObject` """ # Don't import the :code:`NXGraph` in top-level statments to improve the # performance of :code:`import graphscope`. from graphscope.experimental.nx.classes.graph import Graph as NXGraph self._key = None self._op = None self._graph_type = None self.directed = False self._vineyard_id = 0 self._schema = GraphSchema() self._session_id = session_id self._detached = False self._interactive_instance_list = [] self._learning_instance_list = [] if isinstance(incoming_data, GraphDef): graph_def = incoming_data elif isinstance(incoming_data, NXGraph): graph_def = self._from_nx_graph(incoming_data) elif isinstance(incoming_data, Graph): graph_def = self._copy_from(incoming_data) elif isinstance(incoming_data, VineyardObject): graph_def = self._from_vineyard(incoming_data) else: raise ValueError( "Failed to create a graph on graphscope engine: %s", incoming_data ) if graph_def: self._key = graph_def.key self._vineyard_id = graph_def.vineyard_id self._graph_type = graph_def.graph_type self._directed = graph_def.directed self._schema.get_schema_from_def(graph_def.schema_def) self._schema_path = graph_def.schema_path # init saved_signature (must be after init schema) self._saved_signature = self.signature def __del__(self): # cleanly ignore all exceptions, cause session may already closed / destroyed. try: self.unload() except Exception: # pylint: disable=broad-except pass def _close_interactive_instances(self): # Close related interactive instances when graph unloaded. # Since the graph is gone, quering via interactive client is meaningless. for instance in self._interactive_instance_list: instance.close() self._interactive_instance_list.clear() def _close_learning_instances(self): for instance in self._learning_instance_list: instance.close() self._learning_instance_list.clear() @property def op(self): """The DAG op of this graph.""" return self._op @property def key(self): """The key of the corresponding graph in engine.""" return self._key @property def graph_type(self): """The type of the graph object. Returns: type (`types_pb2.GraphType`): the type of the graph. """ return self._graph_type @property def schema(self): """Schema of the graph. Returns: :class:`GraphSchema`: the schema of the graph """ return self._schema @property def schema_path(self): """Path that Coordinator will write interactive schema path to. Returns: str: The path contains the schema. for interactive engine. """ return self._schema_path @property def signature(self): if self._key is None: raise RuntimeError("graph should be registered in remote.") return hashlib.sha256( "{}.{}".format(self._schema.signature(), self._key).encode("utf-8") ).hexdigest() @property def template_sigature(self): if self._key is None: raise RuntimeError("graph should be registered in remote.") return hashlib.sha256( "{}.{}.{}.{}.{}".format( self._graph_type, self._schema.oid_type, self._schema.vid_type, self._schema.vdata_type, self._schema.edata_type, ).encode("utf-8") ).hexdigest() @property def vineyard_id(self): """Get the vineyard object_id of this graph. Returns: str: return vineyard id of this graph """ return self._vineyard_id @property def session_id(self): """Get the currrent session_id. Returns: str: Return session id that the graph belongs to. """ return self._session_id def detach(self): """Detaching a graph makes it being left in vineyard even when the varaible for this :class:`Graph` object leaves the lexical scope. The graph can be accessed using the graph's :code:`ObjectID` or its name later. """ self._detached = True def loaded(self): return self._key is not None def __repr__(self): return "<grape.Graph '%s'>" % self._key def unload(self): """Unload this graph from graphscope engine.""" if not self.loaded(): raise RuntimeError("The graph is not registered in remote.") # close interactive instances first try: self._close_interactive_instances() except Exception as e: logger.error("Failed to close interactive instances: %s" % e) try: self._close_learning_instances() except Exception as e: logger.error("Failed to close learning instances: %s" % e) if not self._detached: op = unload_graph(self) op.eval() self._key = None def project_to_simple(self, v_label="_", e_label="_", v_prop=None, e_prop=None): """Project a property graph to a simple graph, useful for analytical engine. Will translate name represented label or property to index, which is broadedly used in internal engine. Args: v_label (str, optional): vertex label to project. Defaults to "_". e_label (str, optional): edge label to project. Defaults to "_". v_prop (str, optional): vertex property of the v_label. Defaults to None. e_prop (str, optional): edge property of the e_label. Defaults to None. Returns: :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED` """ if not self.loaded(): raise RuntimeError( "The graph is not registered in remote, and can't project to simple" ) self.check_unmodified() check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) check_argument(isinstance(v_label, (int, str))) check_argument(isinstance(e_label, (int, str))) def check_out_of_range(id, length): if id < length and id > -1: return id else: raise KeyError("id {} is out of range.".format(id)) try: v_label_id = ( check_out_of_range(v_label, self._schema.vertex_label_num) if isinstance(v_label, int) else self._schema.vertex_label_index(v_label) ) except ValueError as e: raise ValueError( "graph not contains the vertex label {}.".format(v_label) ) from e try: e_label_id = ( check_out_of_range(e_label, self._schema.edge_label_num) if isinstance(e_label, int) else self._schema.edge_label_index(e_label) ) except ValueError as e: raise InvalidArgumentError( "graph not contains the edge label {}.".format(e_label) ) from e if v_prop is None: # NB: -1 means vertex property is None v_prop_id = -1 v_properties = None else: check_argument(isinstance(v_prop, (int, str))) v_properties = self._schema.vertex_properties[v_label_id] try: v_prop_id = ( check_out_of_range(v_prop, len(v_properties)) if isinstance(v_prop, int) else self._schema.vertex_property_index(v_label_id, v_prop) ) except ValueError as e: raise ValueError( "vertex label {} not contains the property {}".format( v_label, v_prop ) ) from e if e_prop is None: # NB: -1 means edge property is None e_prop_id = -1 e_properties = None else: check_argument(isinstance(e_prop, (int, str))) e_properties = self._schema.edge_properties[e_label_id] try: e_prop_id = ( check_out_of_range(e_prop, len(e_properties)) if isinstance(e_prop, int) else self._schema.edge_property_index(e_label_id, e_prop) ) except ValueError as e: raise ValueError( "edge label {} not contains the property {}".format(e_label, e_prop) ) from e oid_type = self._schema.oid_type vid_type = self._schema.vid_type vdata_type = None if v_properties: vdata_type = list(v_properties.values())[v_prop_id] edata_type = None if e_properties: edata_type = list(e_properties.values())[e_prop_id] op = project_arrow_property_graph( self, v_label_id, v_prop_id, e_label_id, e_prop_id, vdata_type, edata_type, oid_type, vid_type, ) graph_def = op.eval() return Graph(self.session_id, graph_def) def add_column(self, results, selector): """Add the results as a column to the graph. Modification rules are given by the selector. Args: results (:class:`Context`): A `Context` that created by doing a query. selector (dict): Select results to add as column. Format is similar to selectors in `Context` Returns: :class:`Graph`: A new `Graph` with new columns. """ check_argument( isinstance(selector, Mapping), "selector of add column must be a dict" ) self.check_unmodified() check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) selector = { key: results._transform_selector(value) for key, value in selector.items() } selector = json.dumps(selector) op = add_column(self, results, selector) graph_def = op.eval() return Graph(self.session_id, graph_def) def to_numpy(self, selector, vertex_range=None): """Select some elements of the graph and output to numpy. Args: selector (str): Select a portion of graph as a numpy.ndarray. vertex_range(dict, optional): Slice vertices. Defaults to None. Returns: `numpy.ndarray` """ self.check_unmodified() selector = transform_labeled_vertex_property_data_selector(self, selector) vertex_range = transform_vertex_range(vertex_range) op = graph_to_numpy(self, selector, vertex_range) ret = op.eval() return decode_numpy(ret) def to_dataframe(self, selector, vertex_range=None): """Select some elements of the graph and output as a pandas.DataFrame Args: selector (dict): Select some portions of graph. vertex_range (dict, optional): Slice vertices. Defaults to None. Returns: `pandas.DataFrame` """ self.check_unmodified() check_argument( isinstance(selector, Mapping), "selector of to_vineyard_dataframe must be a dict", ) selector = { key: transform_labeled_vertex_property_data_selector(self, value) for key, value in selector.items() } selector = json.dumps(selector) vertex_range = transform_vertex_range(vertex_range) op = graph_to_dataframe(self, selector, vertex_range) ret = op.eval() return decode_dataframe(ret) def is_directed(self): return self._directed def check_unmodified(self): check_argument( self.signature == self._saved_signature, "Graph has been modified!" ) def _from_nx_graph(self, incoming_graph): """Create a gs graph from a nx graph. Args: incoming_graph (:class:`nx.graph`): A nx graph that contains graph data. Returns: that will be used to construct a gs.Graph Raises: TypeError: Raise Error if graph type not match. Examples: >>> nx_g = nx.path_graph(10) >>> gs_g = gs.Graph(nx_g) """ if hasattr(incoming_graph, "_graph"): msg = "graph view can not convert to gs graph" raise TypeError(msg) op = dynamic_to_arrow(incoming_graph) graph_def = op.eval() return graph_def def _copy_from(self, incoming_graph): """Copy a graph. Args: incoming_graph (:class:`Graph`): Source graph to be copied from Returns: :class:`Graph`: An identical graph, but with a new vineyard id. """ check_argument(incoming_graph.graph_type == types_pb2.ARROW_PROPERTY) check_argument(incoming_graph.loaded()) op = copy_graph(incoming_graph) graph_def = op.eval() return graph_def def _from_vineyard(self, vineyard_object): """Load a graph from a already existed vineyard graph. Args: vineyard_object (:class:`VineyardObject`): vineyard object, which contains a graph. Returns: A graph_def. """ if vineyard_object.object_id is not None: return self._from_vineyard_id(vineyard_object.object_id) elif vineyard_object.object_name is not None: return self._from_vineyard_name(vineyard_object.object_name) def _from_vineyard_id(self, vineyard_id): config = {} config[types_pb2.IS_FROM_VINEYARD_ID] = b_to_attr(True) config[types_pb2.VINEYARD_ID] = i_to_attr(vineyard_id) # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard # # the metadata should be retrived from vineyard config[types_pb2.OID_TYPE] = s_to_attr("int64_t") config[types_pb2.VID_TYPE] = s_to_attr("uint64_t") op = create_graph(self._session_id, types_pb2.ARROW_PROPERTY, attrs=config) graph_def = op.eval() return graph_def def _from_vineyard_name(self, vineyard_name): config = {} config[types_pb2.IS_FROM_VINEYARD_ID] = b_to_attr(True) config[types_pb2.VINEYARD_NAME] = s_to_attr(vineyard_name) # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard # # the metadata should be retrived from vineyard config[types_pb2.OID_TYPE] = s_to_attr("int64_t") config[types_pb2.VID_TYPE] = s_to_attr("uint64_t") op = create_graph(self._session_id, types_pb2.ARROW_PROPERTY, attrs=config) graph_def = op.eval() return graph_def def attach_interactive_instance(self, instance): """Store the instance when a new interactive instance is started. Args: instance: interactive instance """ self._interactive_instance_list.append(instance) def attach_learning_instance(self, instance): """Store the instance when a new learning instance is created. Args: instance: learning instance """ self._learning_instance_list.append(instance)
class DiGraph(Graph): """ Base class for directed graphs. A DiGraph that holds the metadata of a graph, and provides NetworkX-like DiGraph APIs. It is worth noticing that the graph is actually stored by the Analytical Engine backend. In other words, the Graph object holds nothing but metadata of a graph DiGraph support nodes and edges with optional data, or attributes. DiGraphs support directed edges. Self loops are allowed but multiple (parallel) edges are not. Nodes can be arbitrary int/str/float/bool objects with optional key/value attributes. Edges are represented as links between nodes with optional key/value attributes. DiGraph support node label if it's created from a GraphScope graph object. nodes are identified by `(label, id)` tuple. Parameters ---------- incoming_graph_data : input graph (optional, default: None) Data to initialize graph. If None (default) an empty graph is created. The data can be any format that is supported by the to_networkx_graph() function, currently including edge list, dict of dicts, dict of lists, NetworkX graph, NumPy matrix or 2d ndarray, Pandas DataFrame, SciPy sparse matrix, or a GraphScope graph object. default_label : default node label (optional, default: None) if incoming_graph_data is a GraphScope graph object, default label means the nodes of the label can be identified by id directly, other label nodes need to use `(label, id)` to identify. attr : keyword arguments, optional (default= no attributes) Attributes to add to graph as key=value pairs. See Also -------- Graph Examples -------- Create an empty graph structure (a "null graph") with no nodes and no edges. >>> G = nx.DiGraph() G can be grown in several ways. **Nodes:** Add one node at a time: >>> G.add_node(1) Add the nodes from any container (a list, dict, set or even the lines from a file or the nodes from another graph). >>> G.add_nodes_from([2, 3]) >>> G.add_nodes_from(range(100, 110)) >>> H = nx.path_graph(10) >>> G.add_nodes_from(H) In addition integers, strings can represent a node. >>> G.add_node('a node') **Edges:** G can also be grown by adding edges. Add one edge, >>> G.add_edge(1, 2) a list of edges, >>> G.add_edges_from([(1, 2), (1, 3)]) or a collection of edges, >>> G.add_edges_from(H.edges) If some edges connect nodes not yet in the graph, the nodes are added automatically. There are no errors when adding nodes or edges that already exist. **Attributes:** Each graph, node, and edge can hold key/value attribute pairs in an associated attribute dictionary (the keys must be hashable). By default these are empty, but can be added or changed using add_edge, add_node or direct manipulation of the attribute dictionaries named graph, node and edge respectively. >>> G = nx.DiGraph(day="Friday") >>> G.graph {'day': 'Friday'} Add node attributes using add_node(), add_nodes_from() or G.nodes >>> G.add_node(1, time='5pm') >>> G.add_nodes_from([3], time='2pm') >>> G.nodes[1] {'time': '5pm'} >>> G.nodes[1]['room'] = 714 >>> del G.nodes[1]['room'] # remove attribute >>> list(G.nodes(data=True)) [(1, {'time': '5pm'}), (3, {'time': '2pm'})] Add edge attributes using add_edge(), add_edges_from(), subscript notation, or G.edges. >>> G.add_edge(1, 2, weight=4.7 ) >>> G.add_edges_from([(3, 4), (4, 5)], color='red') >>> G.add_edges_from([(1, 2, {'color':'blue'}), (2, 3, {'weight':8})]) >>> G[1][2]['weight'] = 4.7 >>> G.edges[1, 2]['weight'] = 4 Warning: we protect the graph data structure by making `G.edges[1, 2]` a read-only dict-like structure. However, you can assign to attributes in e.g. `G.edges[1, 2]`. Thus, use 2 sets of brackets to add/change data attributes: `G.edges[1, 2]['weight'] = 4` (For multigraphs: `MG.edges[u, v, key][name] = value`). **Shortcuts:** Many common graph features allow python syntax to speed reporting. >>> 1 in G # check if node in graph True >>> [n for n in G if n < 3] # iterate through nodes [1, 2] >>> len(G) # number of nodes in graph 5 Often the best way to traverse all edges of a graph is via the neighbors. The neighbors are reported as an adjacency-dict `G.adj` or `G.adjacency()` >>> for n, nbrsdict in G.adjacency(): ... for nbr, eattr in nbrsdict.items(): ... if 'weight' in eattr: ... # Do something useful with the edges ... pass But the edges reporting object is often more convenient: >>> for u, v, weight in G.edges(data='weight'): ... if weight is not None: ... # Do something useful with the edges ... pass **Transformation** Create a graph with GraphScope graph object. First we init a GraphScope graph with two node labels: person and comment` >>> g = graphscope.g(directed=True).add_vertice("persion.csv", label="person").add_vertice("comment.csv", label="comment") create a graph with g, set default_label to 'person' >>> G = nx.DiGraph(g, default_label="person") `person` label nodes can be identified by id directly, for `comment` label, we has to use tuple `("comment", id)` identify. Like, add a person label node and a comment label node >>> G.add_node(0, type="person") >>> G.add_node(("comment", 0), type="comment") print property of two nodes >>> G.nodes[0] {"type", "person"} >>> G.nodes[("comment", 0)] {"type", "comment"} **Reporting:** Simple graph information is obtained using object-attributes and methods. Reporting usually provides views instead of containers to reduce memory usage. The views update as the graph is updated similarly to dict-views. The objects `nodes, `edges` and `adj` provide access to data attributes via lookup (e.g. `nodes[n], `edges[u, v]`, `adj[u][v]`) and iteration (e.g. `nodes.items()`, `nodes.data('color')`, `nodes.data('color', default='blue')` and similarly for `edges`) Views exist for `nodes`, `edges`, `neighbors()`/`adj` and `degree`. For details on these and other miscellaneous methods, see below. """ @patch_docstring(Graph.__init__) def __init__(self, incoming_graph_data=None, default_label=None, **attr): self.graph_attr_dict_factory = self.graph_attr_dict_factory self.node_dict_factory = self.node_dict_factory self.adjlist_dict_factory = self.adjlist_dict_factory self.graph = self.graph_attr_dict_factory() self.cache = self.graph_cache_factory(self) # init node and adj (must be after cache) self._node = self.node_dict_factory(self) self._adj = self.adjlist_dict_factory(self) self._pred = self.adjlist_dict_factory(self, pred=True) self._succ = self._adj self._key = None self._op = None self._session_id = None self._graph_type = self._graph_type self._schema = GraphSchema() self._schema.init_nx_schema() # cache for add_node and add_edge self._add_node_cache = [] self._add_edge_cache = [] self._remove_node_cache = [] self._remove_edge_cache = [] create_empty_in_engine = attr.pop("create_empty_in_engine", True) # a hidden parameter self._distributed = attr.pop("dist", False) if incoming_graph_data is not None and self._is_gs_graph( incoming_graph_data): # convert from gs graph always use distributed mode self._distributed = True if self._session is None: self._session = get_session_by_id( incoming_graph_data.session_id) self._default_label = default_label if self._session is None: self._try_to_get_default_session() if not self._is_gs_graph( incoming_graph_data) and create_empty_in_engine: graph_def = empty_graph_in_engine(self, self.is_directed(), self._distributed) self._key = graph_def.key # attempt to load graph with data if incoming_graph_data is not None: if self._is_gs_graph(incoming_graph_data): self._init_with_arrow_property_graph(incoming_graph_data) self.cache.warmup() else: g = to_networkx_graph(incoming_graph_data, create_using=self) check_argument(isinstance(g, Graph)) # load graph attributes (must be after to_networkx_graph) self.graph.update(attr) self._saved_signature = self.signature @property @clear_mutation_cache @patch_docstring(RefDiGraph.adj) def adj(self): return AdjacencyView(self._succ) succ = adj @property @clear_mutation_cache @patch_docstring(RefDiGraph.pred) def pred(self): return AdjacencyView(self._pred) @clear_mutation_cache @patch_docstring(RefDiGraph.has_predecessor) def has_successor(self, u, v): return self.has_edge(u, v) @clear_mutation_cache @patch_docstring(RefDiGraph.has_predecessor) def has_predecessor(self, u, v): return self.has_edge(v, u) @clear_mutation_cache @patch_docstring(RefDiGraph.successors) def successors(self, n): try: return iter(self._succ[n]) except KeyError: raise NetworkXError("The node %s is not in the digraph." % (n, )) # digraph definitions neighbors = successors @clear_mutation_cache @patch_docstring(RefDiGraph.predecessors) def predecessors(self, n): try: return iter(self._pred[n]) except KeyError: raise NetworkXError("The node %s is not in the digraph." % (n, )) @property @clear_mutation_cache def edges(self): """An OutEdgeView of the DiGraph as G.edges or G.edges(). edges(self, nbunch=None, data=False, default=None) The OutEdgeView provides set-like operations on the edge-tuples as well as edge attribute lookup. When called, it also provides an EdgeDataView object which allows control of access to edge attributes (but does not provide set-like operations). Hence, `G.edges[u, v]['color']` provides the value of the color attribute for edge `(u, v)` while `for (u, v, c) in G.edges.data('color', default='red'):` iterates through all the edges yielding the color attribute with default `'red'` if no color attribute exists. Parameters ---------- nbunch : single node, container, or all nodes (default= all nodes) The view will only report edges incident to these nodes. data : string or bool, optional (default=False) The edge attribute returned in 3-tuple (u, v, ddict[data]). If True, return edge attribute dict in 3-tuple (u, v, ddict). If False, return 2-tuple (u, v). default : value, optional (default=None) Value used for edges that don't have the requested attribute. Only relevant if data is not True or False. Returns ------- edges : OutEdgeView A view of edge attributes, usually it iterates over (u, v) or (u, v, d) tuples of edges, but can also be used for attribute lookup as `edges[u, v]['foo']`. See Also -------- in_edges, out_edges Notes ----- Nodes in nbunch that are not in the graph will be (quietly) ignored. For directed graphs this returns the out-edges. Examples -------- >>> G = nx.DiGraph() >>> nx.add_path(G, [0, 1, 2]) >>> G.add_edge(2, 3, weight=5) >>> [e for e in G.edges] [(0, 1), (1, 2), (2, 3)] >>> G.edges.data() # default data is {} (empty dict) OutEdgeDataView([(0, 1, {}), (1, 2, {}), (2, 3, {'weight': 5})]) >>> G.edges.data("weight", default=1) OutEdgeDataView([(0, 1, 1), (1, 2, 1), (2, 3, 5)]) >>> G.edges([0, 2]) # only edges incident to these nodes OutEdgeDataView([(0, 1), (2, 3)]) >>> G.edges(0) # only edges incident to a single node (use G.adj[0]?) OutEdgeDataView([(0, 1)]) """ return OutEdgeView(self) # alias out_edges to edges out_edges = edges @property @clear_mutation_cache @patch_docstring(RefDiGraph.in_edges) def in_edges(self): return InEdgeView(self) @property @clear_mutation_cache def degree(self): """A DegreeView for the Graph as G.degree or G.degree(). The node degree is the number of edges adjacent to the node. The weighted node degree is the sum of the edge weights for edges incident to that node. This object provides an iterator for (node, degree) as well as lookup for the degree for a single node. Parameters ---------- nbunch : single node, container, or all nodes (default= all nodes) The view will only report edges incident to these nodes. weight : string or None, optional (default=None) The name of an edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. The degree is the sum of the edge weights adjacent to the node. Returns ------- If a single node is requested deg : int Degree of the node OR if multiple nodes are requested nd_iter : iterator The iterator returns two-tuples of (node, degree). See Also -------- in_degree, out_degree Examples -------- >>> G = nx.DiGraph() >>> nx.add_path(G, [0, 1, 2, 3]) >>> G.degree(0) # node 0 with degree 1 1 >>> list(G.degree([0, 1, 2])) [(0, 1), (1, 2), (2, 2)] """ return DiDegreeView(self) @property @clear_mutation_cache @patch_docstring(RefDiGraph.in_degree) def in_degree(self): return InDegreeView(self) @property @clear_mutation_cache @patch_docstring(RefDiGraph.out_degree) def out_degree(self): return OutDegreeView(self) @patch_docstring(RefDiGraph.is_directed) def is_directed(self): return True @patch_docstring(RefDiGraph.is_multigraph) def is_multigraph(self): return False @clear_mutation_cache @patch_docstring(RefDiGraph.reverse) def reverse(self, copy=True): self._convert_arrow_to_dynamic() if not copy: g = reverse_view(self) g._op = self._op g._key = self._key g._schema = deepcopy(self._schema) g._is_client_view = True else: g = self.__class__(create_empty_in_engine=False) g.graph = self.graph g.name = self.name op = copy_graph(self, "reverse") g._op = op graph_def = op.eval() g._key = graph_def.key g._schema = deepcopy(self._schema) g.cache.warmup() g._session = self._session return g
class Graph(object): """A class for representing metadata of a graph in the GraphScope. A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not. It is worth noting that the graph is stored by the backend such as Analytical Engine, Vineyard. In other words, the graph object holds nothing but metadata. The graph object should not be created directly from :class:`Graph`. Instead, the graph should be created by `Session.load_from` The following example demonstrates its usage: .. code:: python >>> import graphscope as gs >>> from graphscope.framework.loader import Loader >>> sess = gs.session() >>> g = sess.load_from( ... edges={ ... "knows": ( ... Loader("{}/p2p-31_property_e_0".format(property_dir), header_row=True), ... ["src_label_id", "dst_label_id", "dist"], ... ("src_id", "person"), ... ("dst_id", "person"), ... ), ... }, ... vertices={ ... "person": Loader( ... "{}/p2p-31_property_v_0".format(property_dir), header_row=True ... ), ... } ... ) """ def __init__(self, session_id, incoming_data=None): """Construct a :class:`Graph` object. Args: session_id (str): Session id of the session the graph is created in. incoming_data: Graph can be initialized through various type of sources, which can be one of: - :class:`GraphDef` - :class:`nx.Graph` - :class:`Graph` - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName` """ # Don't import the :code:`NXGraph` in top-level statments to improve the # performance of :code:`import graphscope`. from graphscope.experimental.nx.classes.graph import Graph as NXGraph self._key = None self._op = None self._graph_type = None self.directed = False self._vineyard_id = 0 self._schema = GraphSchema() self._session_id = session_id self._detached = False self._interactive_instance_launching_thread = None self._interactive_instance_list = [] self._learning_instance_list = [] if isinstance(incoming_data, GraphDef): graph_def = incoming_data elif isinstance(incoming_data, NXGraph): graph_def = self._from_nx_graph(incoming_data) elif isinstance(incoming_data, Graph): graph_def = self._copy_from(incoming_data) elif isinstance( incoming_data, (vineyard.Object, vineyard.ObjectID, vineyard.ObjectName)): graph_def = self._from_vineyard(incoming_data) else: raise ValueError( "Failed to create a graph on graphscope engine: %s", incoming_data) if graph_def: self._key = graph_def.key self._vineyard_id = graph_def.vineyard_id self._graph_type = graph_def.graph_type self._directed = graph_def.directed self._generate_eid = graph_def.generate_eid self._schema.get_schema_from_def(graph_def.schema_def) self._schema_path = graph_def.schema_path # init saved_signature (must be after init schema) self._saved_signature = self.signature # create gremlin server pod asynchronously if gs_config.initializing_interactive_engine: self._interactive_instance_launching_thread = threading.Thread( target=self._launch_interactive_instance_impl, args=()) self._interactive_instance_launching_thread.start() def __del__(self): # cleanly ignore all exceptions, cause session may already closed / destroyed. try: self.unload() except Exception: # pylint: disable=broad-except pass def _close_interactive_instances(self): # Close related interactive instances when graph unloaded. # Since the graph is gone, quering via interactive client is meaningless. for instance in self._interactive_instance_list: instance.close() self._interactive_instance_list.clear() def _close_learning_instances(self): for instance in self._learning_instance_list: instance.close() self._learning_instance_list.clear() def _launch_interactive_instance_impl(self): try: sess = get_session_by_id(self.session_id) sess.gremlin(self) except: # noqa: E722 # Record error msg in `InteractiveQuery` when launching failed. # Unexpect and suppress all exceptions here. pass @property def op(self): """The DAG op of this graph.""" return self._op @property def key(self): """The key of the corresponding graph in engine.""" return self._key @property def graph_type(self): """The type of the graph object. Returns: type (`types_pb2.GraphType`): the type of the graph. """ return self._graph_type @property def schema(self): """Schema of the graph. Returns: :class:`GraphSchema`: the schema of the graph """ return self._schema @property def schema_path(self): """Path that Coordinator will write interactive schema path to. Returns: str: The path contains the schema. for interactive engine. """ return self._schema_path @property def signature(self): if self._key is None: raise RuntimeError("graph should be registered in remote.") return hashlib.sha256("{}.{}".format( self._schema.signature(), self._key).encode("utf-8")).hexdigest() @property def template_str(self): if self._key is None: raise RuntimeError("graph should be registered in remote.") graph_type = self._graph_type # transform str/string to std::string oid_type = utils.normalize_data_type_str(self._schema.oid_type) vid_type = self._schema.vid_type vdata_type = utils.data_type_to_cpp(self._schema.vdata_type) edata_type = utils.data_type_to_cpp(self._schema.edata_type) if graph_type == types_pb2.ARROW_PROPERTY: template = f"vineyard::ArrowFragment<{oid_type},{vid_type}>" elif graph_type == types_pb2.ARROW_PROJECTED: template = f"gs::ArrowProjectedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>" elif graph_type == types_pb2.DYNAMIC_PROJECTED: template = f"gs::DynamicProjectedFragment<{vdata_type},{edata_type}>" else: raise ValueError(f"Unsupported graph type: {graph_type}") return template @property def vineyard_id(self): """Get the vineyard object_id of this graph. Returns: str: return vineyard id of this graph """ return self._vineyard_id @property def session_id(self): """Get the currrent session_id. Returns: str: Return session id that the graph belongs to. """ return self._session_id def detach(self): """Detaching a graph makes it being left in vineyard even when the varaible for this :class:`Graph` object leaves the lexical scope. The graph can be accessed using the graph's :code:`ObjectID` or its name later. """ self._detached = True def loaded(self): return self._key is not None def __str__(self): return f"graphscope.Graph <{self.template_str}> {self._vineyard_id}" def __repr__(self): return ("graphscope.Graph\n" f"type: {self.template_str.split('<')[0]}\n" f"vineyard_id: {self._vineyard_id}\n\n" f"{str(self._schema)}") def unload(self): """Unload this graph from graphscope engine.""" if not self.loaded(): raise RuntimeError("The graph is not registered in remote.") # close interactive instances first try: if (self._interactive_instance_launching_thread is not None and self._interactive_instance_launching_thread.is_alive()): # join raises a RuntimeError if an attempt is made to join the current thread. # this exception occurs when a object collected by gc mechanism contains a running thread. if (threading.current_thread() != self._interactive_instance_launching_thread): self._interactive_instance_launching_thread.join() self._close_interactive_instances() except Exception as e: logger.error("Failed to close interactive instances: %s" % e) try: self._close_learning_instances() except Exception as e: logger.error("Failed to close learning instances: %s" % e) if not self._detached: op = dag_utils.unload_graph(self) op.eval() self._key = None def project_to_simple(self, v_label="_", e_label="_", v_prop=None, e_prop=None): """Project a property graph to a simple graph, useful for analytical engine. Will translate name represented label or property to index, which is broadedly used in internal engine. Args: v_label (str, optional): vertex label to project. Defaults to "_". e_label (str, optional): edge label to project. Defaults to "_". v_prop (str, optional): vertex property of the v_label. Defaults to None. e_prop (str, optional): edge property of the e_label. Defaults to None. Returns: :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED` """ if not self.loaded(): raise RuntimeError( "The graph is not registered in remote, and can't project to simple" ) self.check_unmodified() check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) check_argument(isinstance(v_label, (int, str))) check_argument(isinstance(e_label, (int, str))) def check_out_of_range(id, length): if id < length and id > -1: return id else: raise KeyError("id {} is out of range.".format(id)) try: v_label_id = (check_out_of_range( v_label, self._schema.vertex_label_num) if isinstance( v_label, int) else self._schema.vertex_label_index(v_label)) except ValueError as e: raise ValueError("graph not contains the vertex label {}.".format( v_label)) from e try: e_label_id = (check_out_of_range( e_label, self._schema.edge_label_num) if isinstance( e_label, int) else self._schema.edge_label_index(e_label)) except ValueError as e: raise InvalidArgumentError( "graph not contains the edge label {}.".format(e_label)) from e if v_prop is None: # NB: -1 means vertex property is None v_prop_id = -1 v_properties = None else: check_argument(isinstance(v_prop, (int, str))) v_properties = self._schema.vertex_properties[v_label_id] try: v_prop_id = (check_out_of_range(v_prop, len(v_properties)) if isinstance(v_prop, int) else self._schema.vertex_property_index( v_label_id, v_prop)) except ValueError as e: raise ValueError( "vertex label {} not contains the property {}".format( v_label, v_prop)) from e if e_prop is None: # NB: -1 means edge property is None e_prop_id = -1 e_properties = None else: check_argument(isinstance(e_prop, (int, str))) e_properties = self._schema.edge_properties[e_label_id] try: e_prop_id = (check_out_of_range(e_prop, len(e_properties)) if isinstance(e_prop, int) else self._schema.edge_property_index( e_label_id, e_prop)) except ValueError as e: raise ValueError( "edge label {} not contains the property {}".format( e_label, e_prop)) from e oid_type = self._schema.oid_type vid_type = self._schema.vid_type vdata_type = None if v_properties: vdata_type = list(v_properties.values())[v_prop_id] edata_type = None if e_properties: edata_type = list(e_properties.values())[e_prop_id] op = dag_utils.project_arrow_property_graph( self, v_label_id, v_prop_id, e_label_id, e_prop_id, vdata_type, edata_type, oid_type, vid_type, ) graph_def = op.eval() return Graph(self.session_id, graph_def) def add_column(self, results, selector): """Add the results as a column to the graph. Modification rules are given by the selector. Args: results (:class:`Context`): A `Context` that created by doing a query. selector (dict): Select results to add as column. Format is similar to selectors in `Context` Returns: :class:`Graph`: A new `Graph` with new columns. """ check_argument(isinstance(selector, Mapping), "selector of add column must be a dict") self.check_unmodified() check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) selector = { key: results._transform_selector(value) for key, value in selector.items() } selector = json.dumps(selector) op = dag_utils.add_column(self, results, selector) graph_def = op.eval() return Graph(self.session_id, graph_def) def to_numpy(self, selector, vertex_range=None): """Select some elements of the graph and output to numpy. Args: selector (str): Select a portion of graph as a numpy.ndarray. vertex_range(dict, optional): Slice vertices. Defaults to None. Returns: `numpy.ndarray` """ self.check_unmodified() selector = utils.transform_labeled_vertex_property_data_selector( self, selector) vertex_range = utils.transform_vertex_range(vertex_range) op = dag_utils.graph_to_numpy(self, selector, vertex_range) ret = op.eval() return utils.decode_numpy(ret) def to_dataframe(self, selector, vertex_range=None): """Select some elements of the graph and output as a pandas.DataFrame Args: selector (dict): Select some portions of graph. vertex_range (dict, optional): Slice vertices. Defaults to None. Returns: `pandas.DataFrame` """ self.check_unmodified() check_argument( isinstance(selector, Mapping), "selector of to_vineyard_dataframe must be a dict", ) selector = { key: utils.transform_labeled_vertex_property_data_selector(self, value) for key, value in selector.items() } selector = json.dumps(selector) vertex_range = utils.transform_vertex_range(vertex_range) op = dag_utils.graph_to_dataframe(self, selector, vertex_range) ret = op.eval() return utils.decode_dataframe(ret) def is_directed(self): return self._directed def check_unmodified(self): check_argument(self.signature == self._saved_signature, "Graph has been modified!") def _from_nx_graph(self, incoming_graph): """Create a gs graph from a nx graph. Args: incoming_graph (:class:`nx.graph`): A nx graph that contains graph data. Returns: that will be used to construct a gs.Graph Raises: TypeError: Raise Error if graph type not match. Examples: >>> nx_g = nx.path_graph(10) >>> gs_g = gs.Graph(nx_g) """ if hasattr(incoming_graph, "_graph"): msg = "graph view can not convert to gs graph" raise TypeError(msg) op = dag_utils.dynamic_to_arrow(incoming_graph) graph_def = op.eval() return graph_def def _copy_from(self, incoming_graph): """Copy a graph. Args: incoming_graph (:class:`Graph`): Source graph to be copied from Returns: :class:`Graph`: An identical graph, but with a new vineyard id. """ check_argument(incoming_graph.graph_type == types_pb2.ARROW_PROPERTY) check_argument(incoming_graph.loaded()) op = dag_utils.copy_graph(incoming_graph) graph_def = op.eval() return graph_def def _from_vineyard(self, vineyard_object): """Load a graph from a already existed vineyard graph. Args: vineyard_object (:class:`vineyard.Object`, :class:`vineyard.ObjectID` or :class:`vineyard.ObjectName`): vineyard object, which represents a graph. Returns: A graph_def. """ if isinstance(vineyard_object, vineyard.Object): return self._from_vineyard_id(vineyard_object.id) if isinstance(vineyard_object, vineyard.ObjectID): return self._from_vineyard_id(vineyard_object) if isinstance(vineyard_object, vineyard.ObjectName): return self._from_vineyard_name(vineyard_object) def _from_vineyard_id(self, vineyard_id): config = {} config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True) config[types_pb2.VINEYARD_ID] = utils.i_to_attr(int(vineyard_id)) # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard # # the metadata should be retrived from vineyard config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t") config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t") op = dag_utils.create_graph(self._session_id, types_pb2.ARROW_PROPERTY, attrs=config) graph_def = op.eval() return graph_def def _from_vineyard_name(self, vineyard_name): config = {} config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True) config[types_pb2.VINEYARD_NAME] = utils.s_to_attr(str(vineyard_name)) # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard # # the metadata should be retrived from vineyard config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t") config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t") op = dag_utils.create_graph(self._session_id, types_pb2.ARROW_PROPERTY, attrs=config) graph_def = op.eval() return graph_def def attach_interactive_instance(self, instance): """Store the instance when a new interactive instance is started. Args: instance: interactive instance """ self._interactive_instance_list.append(instance) def attach_learning_instance(self, instance): """Store the instance when a new learning instance is created. Args: instance: learning instance """ self._learning_instance_list.append(instance) def serialize(self, path, **kwargs): """Serialize graph to a location. The meta and data of graph is dumped to specified location, and can be restored by `Graph.deserialize` in other sessions. Each worker will write a `path_{worker_id}.meta` file and a `path_{worker_id}` file to storage. Args: path (str): supported storages are local, hdfs, oss, s3 """ import vineyard import vineyard.io sess = get_session_by_id(self.session_id) deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") vineyard.io.serialize( path, vineyard.ObjectID(self._vineyard_id), type="global", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, ) @classmethod def deserialize(cls, path, sess, **kwargs): """Construct a `Graph` by deserialize from `path`. It will read all serialization files, which is dumped by `Graph.serialize`. If any serialize file doesn't exists or broken, will error out. Args: path (str): Path contains the serialization files. sess (`graphscope.Session`): The target session that the graph will be construct in Returns: `Graph`: A new graph object. Schema and data is supposed to be identical with the one that called serialized method. """ import vineyard import vineyard.io deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") graph_id = vineyard.io.deserialize( path, type="global", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, ) return cls(sess.session_id, vineyard.ObjectID(graph_id)) def draw(self, vertices, hop=1): """Visualize the graph data in the result cell when the draw functions are invoked Args: vertices (list): selected vertices. hop (int): draw induced subgraph with hop extension. Defaults to 1. Returns: A GraphModel. """ from ipygraphin import GraphModel sess = get_session_by_id(self.session_id) interactive_query = sess.gremlin(self) graph = GraphModel() graph.queryGraphData(vertices, hop, interactive_query) # listen on the 1~2 hops operation of node graph.on_msg(graph.queryNeighbor) return graph def add_vertices(self, vertices): vertices = graph_utils.normalize_parameter_vertices(vertices) # Configurations inherited from input graph # oid_type # CHECK label name not in existed edge labels vertex_labels = self._schema.vertex_labels for vertex in vertices: check_argument( vertex.label not in vertex_labels, f"Duplicate label name with existing vertex labels: {vertex.label}", ) config = graph_utils.assemble_op_config([], vertices, self._directed, self._schema.oid_type, self._generate_eid) op = dag_utils.add_vertices(self, attrs=config) graph_def = op.eval() return Graph(self.session_id, graph_def) def add_edges(self, edges): edges = graph_utils.normalize_parameter_edges(edges) # directed, oid_type, generate_eid # CHECK: # 1. edge's src/dst labels must existed in vertex_labels # 2. label name not in existed edge labels vertex_labels = self._schema.vertex_labels edge_labels = self.schema.edge_labels graph_utils.check_edge_validity(edges, vertex_labels) for edge in edges: check_argument( edge.label not in edge_labels, f"Duplicate label name with existing edge labels: {edge.label}", ) config = graph_utils.assemble_op_config(edges, [], self._directed, self._schema.oid_type, self._generate_eid) op = dag_utils.add_edges(self, attrs=config) graph_def = op.eval() return Graph(self.session_id, graph_def)
def __init__(self, graph_def, conn=None) -> None: self._schema = GraphSchema() self._schema.from_graph_def(graph_def) self._conn: Connection = conn self._schema._conn = conn
class DiGraph(Graph): """ Base class for directed graphs. A DiGraph stores nodes and edges with optional data, or attributes. DiGraphs hold directed edges. Self loops are allowed but multiple (parallel) edges are not. Nodes can be strings or integers objects with optional key/value attributes. Edges are represented as links between nodes with optional key/value attributes. Parameters ---------- incoming_graph_data : input graph (optional, default: None) Data to initialize graph. If None (default) an empty graph is created. The data can be any format that is supported by the to_networkx_graph() function, currently including edge list, dict of dicts, dict of lists, NetworkX graph, NumPy matrix or 2d ndarray, SciPy sparse matrix, or a graphscope graph. attr : keyword arguments, optional (default= no attributes) Attributes to add to graph as key=value pairs. See Also -------- Graph graphscope.Graph Examples -------- Create an empty graph structure (a "null graph") with no nodes and no edges. >>> G = nx.DiGraph() G can be grown in several ways. **Nodes:** Add one node at a time: >>> G.add_node(1) Add the nodes from any container (a list, dict, set or even the lines from a file or the nodes from another graph). >>> G.add_nodes_from([2, 3]) >>> G.add_nodes_from(range(100, 110)) >>> H = nx.path_graph(10) >>> G.add_nodes_from(H) In addition integers, strings can represent a node. >>> G.add_node('a node') **Edges:** G can also be grown by adding edges. Add one edge, >>> G.add_edge(1, 2) a list of edges, >>> G.add_edges_from([(1, 2), (1, 3)]) or a collection of edges, >>> G.add_edges_from(H.edges) If some edges connect nodes not yet in the graph, the nodes are added automatically. There are no errors when adding nodes or edges that already exist. **Attributes:** Each graph, node, and edge can hold key/value attribute pairs in an associated attribute dictionary (the keys must be hashable). By default these are empty, but can be added or changed using add_edge, add_node or direct manipulation of the attribute dictionaries named graph, node and edge respectively. >>> G = nx.DiGraph(day="Friday") >>> G.graph {'day': 'Friday'} Add node attributes using add_node(), add_nodes_from() or G.nodes >>> G.add_node(1, time='5pm') >>> G.add_nodes_from([3], time='2pm') >>> G.nodes[1] {'time': '5pm'} >>> G.nodes[1]['room'] = 714 >>> del G.nodes[1]['room'] # remove attribute >>> list(G.nodes(data=True)) [(1, {'time': '5pm'}), (3, {'time': '2pm'})] Add edge attributes using add_edge(), add_edges_from(), subscript notation, or G.edges. >>> G.add_edge(1, 2, weight=4.7 ) >>> G.add_edges_from([(3, 4), (4, 5)], color='red') >>> G.add_edges_from([(1, 2, {'color':'blue'}), (2, 3, {'weight':8})]) >>> G[1][2]['weight'] = 4.7 >>> G.edges[1, 2]['weight'] = 4 Warning: we protect the graph data structure by making `G.edges[1, 2]` a read-only dict-like structure. However, you can assign to attributes in e.g. `G.edges[1, 2]`. Thus, use 2 sets of brackets to add/change data attributes: `G.edges[1, 2]['weight'] = 4` (For multigraphs: `MG.edges[u, v, key][name] = value`). **Shortcuts:** Many common graph features allow python syntax to speed reporting. >>> 1 in G # check if node in graph True >>> [n for n in G if n < 3] # iterate through nodes [1, 2] >>> len(G) # number of nodes in graph 5 Often the best way to traverse all edges of a graph is via the neighbors. The neighbors are reported as an adjacency-dict `G.adj` or `G.adjacency()` >>> for n, nbrsdict in G.adjacency(): ... for nbr, eattr in nbrsdict.items(): ... if 'weight' in eattr: ... # Do something useful with the edges ... pass But the edges reporting object is often more convenient: >>> for u, v, weight in G.edges(data='weight'): ... if weight is not None: ... # Do something useful with the edges ... pass **Reporting:** Simple graph information is obtained using object-attributes and methods. Reporting usually provides views instead of containers to reduce memory usage. The views update as the graph is updated similarly to dict-views. The objects `nodes, `edges` and `adj` provide access to data attributes via lookup (e.g. `nodes[n], `edges[u, v]`, `adj[u][v]`) and iteration (e.g. `nodes.items()`, `nodes.data('color')`, `nodes.data('color', default='blue')` and similarly for `edges`) Views exist for `nodes`, `edges`, `neighbors()`/`adj` and `degree`. For details on these and other miscellaneous methods, see below. """ def __init__(self, incoming_graph_data=None, **attr): """Initialize a graph with edges, name, or graph attributes Parameters ---------- incoming_graph_data : input graph (optional, default: None) Data to initialize graph. If None (default) an empty graph is created. The data can be any format that is supported by the to_nx_graph() function, currently including edge list, dict of dicts, dict of lists, NetworkX graph, NumPy matrix or 2d ndarray, Pandas DataFrame, SciPy sparse matrix, or a graphscope graph. attr : keyword arguments, optional (default= no attributes) Attributes to add to graph as key=value pairs. See Also -------- convert Examples -------- >>> G = nx.Graph() # or DiGraph >>> G = nx.Graph(name='my graph') >>> e = [(1, 2), (2, 3), (3, 4)] # list of edges >>> G = nx.Graph(e) Arbitrary graph attribute pairs (key=value) may be assigned >>> G = nx.Graph(e, day="Friday") >>> G.graph {'day': 'Friday'} """ sess = get_default_session() if sess is None: raise ValueError( "Cannot find a default session. " "Please register a session using graphscope.session(...).as_default()" ) self._session_id = sess.session_id self._key = None self._op = None self._graph_type = self._graph_type self._schema = GraphSchema() self._schema.init_nx_schema() create_empty_in_engine = attr.pop("create_empty_in_engine", True) # a hidden parameter if not self.is_gs_graph( incoming_graph_data) and create_empty_in_engine: graph_def = empty_graph_in_engine(self, self.is_directed()) self._key = graph_def.key self.graph_attr_dict_factory = self.graph_attr_dict_factory self.node_dict_factory = self.node_dict_factory self.adjlist_dict_factory = self.adjlist_dict_factory self.graph = self.graph_attr_dict_factory() self._node = self.node_dict_factory(self) self._adj = self.adjlist_dict_factory(self) self._pred = self.adjlist_dict_factory(self, types_pb2.PREDS_BY_NODE) self._succ = self._adj # attempt to load graph with data if incoming_graph_data is not None: if self.is_gs_graph(incoming_graph_data): graph_def = from_gs_graph(incoming_graph_data, self) self._key = graph_def.key self._schema.init_nx_schema(incoming_graph_data.schema) else: to_nx_graph(incoming_graph_data, create_using=self) # load graph attributes (must be after to_nx_graph) self.graph.update(attr) self._saved_signature = self.signature def __repr__(self): s = "graphscope.nx.DiGraph\n" s += "type: " + self.template_str.split("<")[0] s += str(self._schema) return s @property def adj(self): """Graph adjacency object holding the successors of each node. This object is a read-only dict-like structure with node keys and neighbor-dict values. The neighbor-dict is keyed by neighbor to the edge-data-dict. So `G.succ[3][2]['color'] = 'blue'` sets the color of the edge `(3, 2)` to `"blue"`. Iterating over G.succ behaves like a dict. Useful idioms include `for nbr, datadict in G.succ[n].items():`. A data-view not provided by dicts also exists: `for nbr, foovalue in G.succ[node].data('foo'):` and a default can be set via a `default` argument to the `data` method. The neighbor information is also provided by subscripting the graph. So `for nbr, foovalue in G[node].data('foo', default=1):` works. For directed graphs, `G.adj` is identical to `G.succ`. """ return AdjacencyView(self._succ) succ = adj @property def pred(self): """Graph adjacency object holding the predecessors of each node. This object is a read-only dict-like structure with node keys and neighbor-dict values. The neighbor-dict is keyed by neighbor to the edge-data-dict. So `G.pred[2][3]['color'] = 'blue'` sets the color of the edge `(3, 2)` to `"blue"`. Iterating over G.pred behaves like a dict. Useful idioms include `for nbr, datadict in G.pred[n].items():`. A data-view not provided by dicts also exists: `for nbr, foovalue in G.pred[node].data('foo'):` A default can be set via a `default` argument to the `data` method. """ return AdjacencyView(self._pred) def is_gs_graph(self, incoming_graph_data): return (hasattr(incoming_graph_data, "graph_type") and incoming_graph_data.graph_type == types_pb2.ARROW_PROPERTY) def has_successor(self, u, v): """Returns True if node u has successor v. This is true if graph has the edge u->v. """ return self.has_edge(u, v) def has_predecessor(self, u, v): """Returns True if node u has predecessor v. This is true if graph has the edge u<-v. """ return self.has_edge(v, u) def successors(self, n): """Returns an iterator over successor nodes of n. A successor of n is a node m such that there exists a directed edge from n to m. Parameters ---------- n : node A node in the graph Raises ------- KeyError If n is not in the graph. See Also -------- predecessors Notes ----- neighbors() and successors() are the same. """ try: return iter(self._succ[n]) except KeyError: raise NetworkXError("The node %s is not in the digraph." % (n, )) # digraph definitions neighbors = successors def predecessors(self, n): """Returns an iterator over predecessor nodes of n. A predecessor of n is a node m such that there exists a directed edge from m to n. Parameters ---------- n : node A node in the graph Raises ------- Error If n is not in the graph. See Also -------- successors """ try: return iter(self._pred[n]) except KeyError: raise NetworkXError("The node %s is not in the digraph." % (n, )) @property def edges(self): """An OutEdgeView of the DiGraph as G.edges or G.edges(). edges(self, nbunch=None, data=False, default=None) The OutEdgeView provides set-like operations on the edge-tuples as well as edge attribute lookup. When called, it also provides an EdgeDataView object which allows control of access to edge attributes (but does not provide set-like operations). Hence, `G.edges[u, v]['color']` provides the value of the color attribute for edge `(u, v)` while `for (u, v, c) in G.edges.data('color', default='red'):` iterates through all the edges yielding the color attribute with default `'red'` if no color attribute exists. Parameters ---------- nbunch : single node, container, or all nodes (default= all nodes) The view will only report edges incident to these nodes. data : string or bool, optional (default=False) The edge attribute returned in 3-tuple (u, v, ddict[data]). If True, return edge attribute dict in 3-tuple (u, v, ddict). If False, return 2-tuple (u, v). default : value, optional (default=None) Value used for edges that don't have the requested attribute. Only relevant if data is not True or False. Returns ------- edges : OutEdgeView A view of edge attributes, usually it iterates over (u, v) or (u, v, d) tuples of edges, but can also be used for attribute lookup as `edges[u, v]['foo']`. See Also -------- in_edges, out_edges Notes ----- Nodes in nbunch that are not in the graph will be (quietly) ignored. For directed graphs this returns the out-edges. Examples -------- >>> G = nx.DiGraph() # or MultiDiGraph, etc >>> nx.add_path(G, [0, 1, 2]) >>> G.add_edge(2, 3, weight=5) >>> [e for e in G.edges] [(0, 1), (1, 2), (2, 3)] >>> G.edges.data() # default data is {} (empty dict) OutEdgeDataView([(0, 1, {}), (1, 2, {}), (2, 3, {'weight': 5})]) >>> G.edges.data('weight', default=1) OutEdgeDataView([(0, 1, 1), (1, 2, 1), (2, 3, 5)]) >>> G.edges([0, 2]) # only edges incident to these nodes OutEdgeDataView([(0, 1), (2, 3)]) >>> G.edges(0) # only edges incident to a single node (use G.adj[0]?) OutEdgeDataView([(0, 1)]) """ return OutEdgeView(self) # alias out_edges to edges out_edges = edges @property def in_edges(self): """An InEdgeView of the Graph as G.in_edges or G.in_edges(). in_edges(self, nbunch=None, data=False, default=None): Parameters ---------- nbunch : single node, container, or all nodes (default= all nodes) The view will only report edges incident to these nodes. data : string or bool, optional (default=False) The edge attribute returned in 3-tuple (u, v, ddict[data]). If True, return edge attribute dict in 3-tuple (u, v, ddict). If False, return 2-tuple (u, v). default : value, optional (default=None) Value used for edges that don't have the requested attribute. Only relevant if data is not True or False. Returns ------- in_edges : InEdgeView A view of edge attributes, usually it iterates over (u, v) or (u, v, d) tuples of edges, but can also be used for attribute lookup as `edges[u, v]['foo']`. See Also -------- edges """ return InEdgeView(self) @property def degree(self): """A DegreeView for the Graph as G.degree or G.degree(). The node degree is the number of edges adjacent to the node. The weighted node degree is the sum of the edge weights for edges incident to that node. This object provides an iterator for (node, degree) as well as lookup for the degree for a single node. Parameters ---------- nbunch : single node, container, or all nodes (default= all nodes) The view will only report edges incident to these nodes. weight : string or None, optional (default=None) The name of an edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. The degree is the sum of the edge weights adjacent to the node. Returns ------- If a single node is requested deg : int Degree of the node OR if multiple nodes are requested nd_iter : iterator The iterator returns two-tuples of (node, degree). See Also -------- in_degree, out_degree Examples -------- >>> G = nx.DiGraph() # or MultiDiGraph >>> nx.add_path(G, [0, 1, 2, 3]) >>> G.degree(0) # node 0 with degree 1 1 >>> list(G.degree([0, 1, 2])) [(0, 1), (1, 2), (2, 2)] """ return DiDegreeView(self) @property def in_degree(self): """An InDegreeView for (node, in_degree) or in_degree for single node. The node in_degree is the number of edges pointing to the node. The weighted node degree is the sum of the edge weights for edges incident to that node. This object provides an iteration over (node, in_degree) as well as lookup for the degree for a single node. Parameters ---------- nbunch : single node, container, or all nodes (default= all nodes) The view will only report edges incident to these nodes. weight : string or None, optional (default=None) The name of an edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. The degree is the sum of the edge weights adjacent to the node. Returns ------- If a single node is requested deg : int In-degree of the node OR if multiple nodes are requested nd_iter : iterator The iterator returns two-tuples of (node, in-degree). See Also -------- degree, out_degree Examples -------- >>> G = nx.DiGraph() >>> nx.add_path(G, [0, 1, 2, 3]) >>> G.in_degree(0) # node 0 with degree 0 0 >>> list(G.in_degree([0, 1, 2])) [(0, 0), (1, 1), (2, 1)] """ return InDegreeView(self) @property def out_degree(self): """An OutDegreeView for (node, out_degree) The node out_degree is the number of edges pointing out of the node. The weighted node degree is the sum of the edge weights for edges incident to that node. This object provides an iterator over (node, out_degree) as well as lookup for the degree for a single node. Parameters ---------- nbunch : single node, container, or all nodes (default= all nodes) The view will only report edges incident to these nodes. weight : string or None, optional (default=None) The name of an edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. The degree is the sum of the edge weights adjacent to the node. Returns ------- If a single node is requested deg : int Out-degree of the node OR if multiple nodes are requested nd_iter : iterator The iterator returns two-tuples of (node, out-degree). See Also -------- degree, in_degree Examples -------- >>> G = nx.DiGraph() >>> nx.add_path(G, [0, 1, 2, 3]) >>> G.out_degree(0) # node 0 with degree 1 1 >>> list(G.out_degree([0, 1, 2])) [(0, 1), (1, 1), (2, 1)] """ return OutDegreeView(self) def is_directed(self): """Returns True if graph is directed, False otherwise.""" return True def is_multigraph(self): return False def reverse(self, copy=True): """Returns the reverse of the graph. The reverse is a graph with the same nodes and edges but with the directions of the edges reversed. Parameters ---------- copy : bool optional (default=True) If True, return a new DiGraph holding the reversed edges. If False, the reverse graph is created using a view of the original graph. """ if not copy: return reverse_view(self) g = self.__class__(create_empty_in_engine=False) g.graph = self.graph g.name = self.name g._op = self._op op = copy_graph(self, "reverse") graph_def = op.eval() g._key = graph_def.key g._schema = deepcopy(self._schema) return g
def __init__(self, incoming_graph_data=None, **attr): """Initialize a graph with edges, name, or graph attributes Parameters ---------- incoming_graph_data : input graph (optional, default: None) Data to initialize graph. If None (default) an empty graph is created. The data can be any format that is supported by the to_nx_graph() function, currently including edge list, dict of dicts, dict of lists, NetworkX graph, NumPy matrix or 2d ndarray, Pandas DataFrame, SciPy sparse matrix, or a graphscope graph. attr : keyword arguments, optional (default= no attributes) Attributes to add to graph as key=value pairs. See Also -------- convert Examples -------- >>> G = nx.Graph() # or DiGraph >>> G = nx.Graph(name='my graph') >>> e = [(1, 2), (2, 3), (3, 4)] # list of edges >>> G = nx.Graph(e) Arbitrary graph attribute pairs (key=value) may be assigned >>> G = nx.Graph(e, day="Friday") >>> G.graph {'day': 'Friday'} """ sess = get_default_session() if sess is None: raise ValueError( "Cannot find a default session. " "Please register a session using graphscope.session(...).as_default()" ) self._session_id = sess.session_id self._key = None self._op = None self._graph_type = self._graph_type self._schema = GraphSchema() self._schema.init_nx_schema() create_empty_in_engine = attr.pop("create_empty_in_engine", True) # a hidden parameter if not self.is_gs_graph( incoming_graph_data) and create_empty_in_engine: graph_def = empty_graph_in_engine(self, self.is_directed()) self._key = graph_def.key self.graph_attr_dict_factory = self.graph_attr_dict_factory self.node_dict_factory = self.node_dict_factory self.adjlist_dict_factory = self.adjlist_dict_factory self.graph = self.graph_attr_dict_factory() self._node = self.node_dict_factory(self) self._adj = self.adjlist_dict_factory(self) self._pred = self.adjlist_dict_factory(self, types_pb2.PREDS_BY_NODE) self._succ = self._adj # attempt to load graph with data if incoming_graph_data is not None: if self.is_gs_graph(incoming_graph_data): graph_def = from_gs_graph(incoming_graph_data, self) self._key = graph_def.key self._schema.init_nx_schema(incoming_graph_data.schema) else: to_nx_graph(incoming_graph_data, create_using=self) # load graph attributes (must be after to_nx_graph) self.graph.update(attr) self._saved_signature = self.signature
class Graph(GraphInterface): """A class for representing metadata of a graph in the GraphScope. A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not. It is worth noticing that the graph is stored by the backend such as Analytical Engine, Vineyard. In other words, the graph object holds nothing but metadata. The following example demonstrates its usage: .. code:: python >>> import graphscope as gs >>> sess = gs.session() >>> graph = sess.g() >>> graph = graph.add_vertices("person.csv", "person") >>> graph = graph.add_vertices("software.csv", "software") >>> graph = graph.add_edges("knows.csv", "knows", src_label="person", dst_label="person") >>> graph = graph.add_edges("created.csv", "created", src_label="person", dst_label="software") >>> print(graph) >>> print(graph.schema) """ def __init__( self, graph_node, ): """Construct a :class:`Graph` object.""" self._graph_node = graph_node self._session = self._graph_node.session # copy and set op evaluated self._graph_node.op = deepcopy(self._graph_node.op) self._graph_node.evaluated = True self._session.dag.add_op(self._graph_node.op) self._key = None self._vineyard_id = 0 self._schema = GraphSchema() self._detached = False self._interactive_instance_launching_thread = None self._interactive_instance_list = [] self._learning_instance_list = [] def __del__(self): # cleanly ignore all exceptions, cause session may already closed / destroyed. try: self.unload() except Exception: # pylint: disable=broad-except pass def _close_interactive_instances(self): # Close related interactive instances when graph unloaded. # Since the graph is gone, quering via interactive client is meaningless. for instance in self._interactive_instance_list: instance.close() self._interactive_instance_list.clear() def _close_learning_instances(self): for instance in self._learning_instance_list: instance.close() self._learning_instance_list.clear() def _launch_interactive_instance_impl(self): try: self._session.gremlin(self) except: # noqa: E722 # Record error msg in `InteractiveQuery` when launching failed. # Unexpect and suppress all exceptions here. pass def update_from_graph_def(self, graph_def): if graph_def.graph_type == graph_def_pb2.ARROW_FLATTENED: self._graph_node._graph_type = graph_def_pb2.ARROW_FLATTENED check_argument( self._graph_node.graph_type == graph_def.graph_type, "Graph type doesn't match {} versus {}".format( self._graph_node.graph_type, graph_def.graph_type ), ) self._key = graph_def.key self._directed = graph_def.directed self._is_multigraph = graph_def.is_multigraph vy_info = graph_def_pb2.VineyardInfoPb() graph_def.extension.Unpack(vy_info) self._vineyard_id = vy_info.vineyard_id self._oid_type = data_type_to_cpp(vy_info.oid_type) self._generate_eid = vy_info.generate_eid self._schema_path = vy_info.schema_path self._schema.from_graph_def(graph_def) self._v_labels = self._schema.vertex_labels self._e_labels = self._schema.edge_labels self._e_relationships = self._schema.edge_relationships # init saved_signature (must be after init schema) self._saved_signature = self.signature # create gremlin server pod asynchronously if self._session.eager() and gs_config.initializing_interactive_engine: self._interactive_instance_launching_thread = threading.Thread( target=self._launch_interactive_instance_impl, args=() ) self._interactive_instance_launching_thread.start() def __getattr__(self, name): if hasattr(self._graph_node, name): return getattr(self._graph_node, name) raise AttributeError("{0} not found.".format(name)) @property def key(self): """The key of the corresponding graph in engine.""" return self._key @property def schema(self): """Schema of the graph. Returns: :class:`GraphSchema`: the schema of the graph """ return self._schema @property def schema_path(self): """Path that Coordinator will write interactive schema path to. Returns: str: The path contains the schema. for interactive engine. """ return self._schema_path @property def signature(self): return hashlib.sha256( "{}.{}".format(self._schema.signature(), self._key).encode("utf-8") ).hexdigest() @property def op(self): return self._graph_node.op @property def template_str(self): # transform str/string to std::string oid_type = utils.normalize_data_type_str(self._oid_type) vid_type = utils.data_type_to_cpp(self._schema._vid_type) vdata_type = utils.data_type_to_cpp(self._schema.vdata_type) edata_type = utils.data_type_to_cpp(self._schema.edata_type) if self._graph_type == graph_def_pb2.ARROW_PROPERTY: template = f"vineyard::ArrowFragment<{oid_type},{vid_type}>" elif self._graph_type == graph_def_pb2.ARROW_PROJECTED: template = f"gs::ArrowProjectedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>" elif self._graph_type == graph_def_pb2.ARROW_FLATTENED: template = f"ArrowFlattenedFragmen<{oid_type},{vid_type},{vdata_type},{edata_type}>" elif self._graph_type == graph_def_pb2.DYNAMIC_PROJECTED: template = f"gs::DynamicProjectedFragment<{vdata_type},{edata_type}>" else: raise ValueError(f"Unsupported graph type: {self._graph_type}") return template @property def vineyard_id(self): """Get the vineyard object_id of this graph. Returns: str: return vineyard id of this graph """ return self._vineyard_id @property def session_id(self): """Get the currrent session_id. Returns: str: Return session id that the graph belongs to. """ return self._session.session_id def detach(self): """Detaching a graph makes it being left in vineyard even when the varaible for this :class:`Graph` object leaves the lexical scope. The graph can be accessed using the graph's :code:`ObjectID` or its name later. """ self._detached = True def loaded(self): """True if current graph has been loaded in the session.""" return self._session.info["status"] == "active" and self._key is not None def __str__(self): v_str = "\n".join([f"VERTEX: {label}" for label in self._v_labels]) relations = [] for i in range(len(self._e_labels)): relations.extend( [(self._e_labels[i], src, dst) for src, dst in self._e_relationships[i]] ) e_str = "\n".join( [f"EDGE: {label}\tsrc: {src}\tdst: {dst}" for label, src, dst in relations] ) return f"graphscope.Graph\n{graph_def_pb2.GraphTypePb.Name(self._graph_type)}\n{v_str}\n{e_str}" def __repr__(self): return self.__str__() def unload(self): """Unload this graph from graphscope engine.""" if self._session.info["status"] != "active" or self._key is None: return # close interactive instances first try: if ( self._interactive_instance_launching_thread is not None and self._interactive_instance_launching_thread.is_alive() ): # join raises a RuntimeError if an attempt is made to join the current thread. # this exception occurs when a object collected by gc mechanism contains a running thread. if ( threading.current_thread() != self._interactive_instance_launching_thread ): self._interactive_instance_launching_thread.join() self._close_interactive_instances() except Exception as e: logger.error("Failed to close interactive instances: %s" % e) try: self._close_learning_instances() except Exception as e: logger.error("Failed to close learning instances: %s" % e) rlt = None if not self._detached: rlt = self._session._wrapper(self._graph_node.unload()) self._key = None return rlt def _project_to_simple(self, v_prop=None, e_prop=None): return self._session._wrapper( self._graph_node._project_to_simple(v_prop, e_prop) ) def add_column(self, results, selector): return self._session._wrapper(self._graph_node.add_column(results, selector)) def to_numpy(self, selector, vertex_range=None): """Select some elements of the graph and output to numpy. Args: selector (str): Select a portion of graph as a numpy.ndarray. vertex_range(dict, optional): Slice vertices. Defaults to None. Returns: `numpy.ndarray` """ self._check_unmodified() return self._session._wrapper(self._graph_node.to_numpy(selector, vertex_range)) def to_dataframe(self, selector, vertex_range=None): """Select some elements of the graph and output as a pandas.DataFrame Args: selector (dict): Select some portions of graph. vertex_range (dict, optional): Slice vertices. Defaults to None. Returns: `pandas.DataFrame` """ self._check_unmodified() return self._session._wrapper( self._graph_node.to_dataframe(selector, vertex_range) ) def to_directed(self): """Returns a directed representation of the graph. Returns: :class:`Graph`: A directed graph with the same name, same nodes, and with each edge (u, v, data) replaced by two directed edges (u, v, data) and (v, u, data). """ if self._directed: return self return self._session._wrapper(self._graph_node.to_directed()) def to_undirected(self): """Returns an undirected representation of the digraph. Returns: :class:`Graph`: An undirected graph with the same name and nodes and with edge (u, v, data) if either (u, v, data) or (v, u, data) is in the digraph. If both edges exist in digraph, they will both be preserved. You must check and correct for this manually if desired. """ if not self._directed: return self return self._session._wrapper(self._graph_node.to_undirected()) def is_directed(self): return self._directed def is_multigraph(self): return self._is_multigraph def _check_unmodified(self): check_argument( self.signature == self._saved_signature, "Graph has been modified!" ) def _attach_interactive_instance(self, instance): """Store the instance when a new interactive instance is started. Args: instance: interactive instance """ self._interactive_instance_list.append(instance) def _attach_learning_instance(self, instance): """Store the instance when a new learning instance is created. Args: instance: learning instance """ self._learning_instance_list.append(instance) def save_to(self, path, **kwargs): """Serialize graph to a location. The meta and data of graph is dumped to specified location, and can be restored by `Graph.deserialize` in other sessions. Each worker will write a `path_{worker_id}.meta` file and a `path_{worker_id}` file to storage. Args: path (str): supported storages are local, hdfs, oss, s3 """ try: import vineyard import vineyard.io except ImportError: raise RuntimeError( "Saving context to locations requires 'vineyard', " "please install those two dependencies via " "\n" "\n" " pip3 install vineyard vineyard-io" "\n" "\n" ) sess = self._session deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") vineyard.io.serialize( path, vineyard.ObjectID(self._vineyard_id), type="global", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, ) @classmethod def load_from(cls, path, sess, **kwargs): """Construct a `Graph` by deserialize from `path`. It will read all serialization files, which is dumped by `Graph.serialize`. If any serialize file doesn't exists or broken, will error out. Args: path (str): Path contains the serialization files. sess (`graphscope.Session`): The target session that the graph will be construct in Returns: `Graph`: A new graph object. Schema and data is supposed to be identical with the one that called serialized method. """ try: import vineyard import vineyard.io except ImportError: raise RuntimeError( "Saving context to locations requires 'vineyard', " "please install those two dependencies via " "\n" "\n" " pip3 install vineyard vineyard-io" "\n" "\n" ) deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") graph_id = vineyard.io.deserialize( path, type="global", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, ) return sess._wrapper(GraphDAGNode(sess, vineyard.ObjectID(graph_id))) def add_vertices(self, vertices, label="_", properties=None, vid_field=0): if not self.loaded(): raise RuntimeError("The graph is not loaded") return self._session._wrapper( self._graph_node.add_vertices(vertices, label, properties, vid_field) ) def add_edges( self, edges, label="_", properties=None, src_label=None, dst_label=None, src_field=0, dst_field=1, ): if not self.loaded(): raise RuntimeError("The graph is not loaded") return self._session._wrapper( self._graph_node.add_edges( edges, label, properties, src_label, dst_label, src_field, dst_field ) ) def project( self, vertices: Mapping[str, Union[List[str], None]], edges: Mapping[str, Union[List[str], None]], ): if not self.loaded(): raise RuntimeError("The graph is not loaded") return self._session._wrapper(self._graph_node.project(vertices, edges))