Ejemplo n.º 1
0
class Graph(object):
    """A class for representing metadata of a graph in the GraphScope.

    A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not.

    It is worth noting that the graph is stored by the backend such as Analytical Engine, Vineyard.
    In other words, the graph object holds nothing but metadata.

    The following example demonstrates its usage:

    .. code:: python

        >>> import graphscope as gs
        >>> from graphscope.framework.loader import Loader
        >>> sess = gs.session()
        >>> graph = sess.g()
        >>> graph = graph.add_vertices("person.csv","person")
        >>> graph = graph.add_vertices("software.csv", "software")
        >>> graph = graph.add_edges("knows.csv", "knows", src_label="person", dst_label="person")
        >>> graph = graph.add_edges("created.csv", "created", src_label="person", dst_label="software")
        >>> print(graph)
        >>> print(graph.schema)
    """
    def __init__(
        self,
        session,
        incoming_data=None,
        oid_type="int64",
        directed=True,
        generate_eid=True,
    ):
        """Construct a :class:`Graph` object.

        Args:
            session_id (str): Session id of the session the graph is created in.
            incoming_data: Graph can be initialized through various type of sources,
                which can be one of:

                    - :class:`Operation`
                    - :class:`nx.Graph`
                    - :class:`Graph`
                    - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName`
        """

        self._key = None
        self._graph_type = types_pb2.ARROW_PROPERTY
        self._vineyard_id = 0
        self._schema = GraphSchema()
        self._session = session
        self._detached = False

        self._interactive_instance_launching_thread = None
        self._interactive_instance_list = []
        self._learning_instance_list = []

        # Hold uncompleted operation for lazy evaluation
        self._pending_op = None
        # Hold a reference to base graph of modify operation,
        # to avoid being garbage collected
        self._base_graph = None

        oid_type = utils.normalize_data_type_str(oid_type)
        if oid_type not in ("int64_t", "std::string"):
            raise ValueError("oid_type can only be int64_t or string.")
        self._oid_type = oid_type
        self._directed = directed
        self._generate_eid = generate_eid

        self._unsealed_vertices = {}
        self._unsealed_edges = {}
        # Used to isplay schema without load into vineyard,
        # and do sanity checking for newly added vertices and edges.
        self._v_labels = []
        self._e_labels = []
        self._e_relationships = []

        if incoming_data is not None:
            # Don't import the :code:`NXGraph` in top-level statements to improve the
            # performance of :code:`import graphscope`.
            from graphscope.experimental import nx

            if isinstance(incoming_data, Operation):
                self._pending_op = incoming_data
                if self._pending_op.type == types_pb2.PROJECT_GRAPH:
                    self._graph_type = types_pb2.ARROW_PROJECTED
            elif isinstance(incoming_data, nx.Graph):
                self._pending_op = self._from_nx_graph(incoming_data)
            elif isinstance(incoming_data, Graph):
                self._pending_op = self._copy_from(incoming_data)
            elif isinstance(
                    incoming_data,
                (vineyard.Object, vineyard.ObjectID, vineyard.ObjectName)):
                self._pending_op = self._from_vineyard(incoming_data)
            else:
                raise RuntimeError("Not supported incoming data.")

    def __del__(self):
        # cleanly ignore all exceptions, cause session may already closed / destroyed.
        try:
            self.unload()
        except Exception:  # pylint: disable=broad-except
            pass

    def _close_interactive_instances(self):
        # Close related interactive instances when graph unloaded.
        # Since the graph is gone, quering via interactive client is meaningless.
        for instance in self._interactive_instance_list:
            instance.close()
        self._interactive_instance_list.clear()

    def _close_learning_instances(self):
        for instance in self._learning_instance_list:
            instance.close()
        self._learning_instance_list.clear()

    def _launch_interactive_instance_impl(self):
        try:
            self._session.gremlin(self)
        except:  # noqa: E722
            # Record error msg in `InteractiveQuery` when launching failed.
            # Unexpect and suppress all exceptions here.
            pass

    def _from_graph_def(self, graph_def):
        check_argument(self._graph_type == graph_def.graph_type,
                       "Graph type doesn't match.")

        self._key = graph_def.key
        self._vineyard_id = graph_def.vineyard_id
        self._oid_type = graph_def.schema_def.oid_type
        self._directed = graph_def.directed
        self._generate_eid = graph_def.generate_eid

        self._schema_path = graph_def.schema_path
        self._schema.get_schema_from_def(graph_def.schema_def)
        self._v_labels = self._schema.vertex_labels
        self._e_labels = self._schema.edge_labels
        self._e_relationships = self._schema.edge_relationships

    def _ensure_loaded(self):
        if self._key is not None and self._pending_op is None:
            return
        # Unloaded
        if self._session is None:
            raise RuntimeError("The graph is not loaded")
        # Empty graph
        if self._key is None and self._pending_op is None:
            raise RuntimeError("Empty graph.")
        # Try to load
        if self._pending_op is not None:
            # Create a graph from scratch.
            graph_def = self._pending_op.eval()
            self._from_graph_def(graph_def)
            self._pending_op = None
            self._base_graph = None
            self._unsealed_vertices.clear()
            self._unsealed_edges.clear()
            # init saved_signature (must be after init schema)
            self._saved_signature = self.signature
            # create gremlin server pod asynchronously
            if gs_config.initializing_interactive_engine:
                self._interactive_instance_launching_thread = threading.Thread(
                    target=self._launch_interactive_instance_impl, args=())
                self._interactive_instance_launching_thread.start()

    @property
    def key(self):
        """The key of the corresponding graph in engine."""
        self._ensure_loaded()
        return self._key

    @property
    def graph_type(self):
        """The type of the graph object.

        Returns:
            type (`types_pb2.GraphType`): the type of the graph.
        """
        return self._graph_type

    @property
    def schema(self):
        """Schema of the graph.

        Returns:
            :class:`GraphSchema`: the schema of the graph
        """
        self._ensure_loaded()
        return self._schema

    @property
    def schema_path(self):
        """Path that Coordinator will write interactive schema path to.

        Returns:
            str: The path contains the schema. for interactive engine.
        """
        self._ensure_loaded()
        return self._schema_path

    @property
    def signature(self):
        self._ensure_loaded()
        return hashlib.sha256("{}.{}".format(
            self._schema.signature(), self._key).encode("utf-8")).hexdigest()

    @property
    def template_str(self):
        self._ensure_loaded()

        # transform str/string to std::string
        oid_type = utils.normalize_data_type_str(self._oid_type)
        vid_type = self._schema.vid_type
        vdata_type = utils.data_type_to_cpp(self._schema.vdata_type)
        edata_type = utils.data_type_to_cpp(self._schema.edata_type)
        if self._graph_type == types_pb2.ARROW_PROPERTY:
            template = f"vineyard::ArrowFragment<{oid_type},{vid_type}>"
        elif self._graph_type == types_pb2.ARROW_PROJECTED:
            template = f"gs::ArrowProjectedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>"
        elif self._graph_type == types_pb2.DYNAMIC_PROJECTED:
            template = f"gs::DynamicProjectedFragment<{vdata_type},{edata_type}>"
        else:
            raise ValueError(f"Unsupported graph type: {self._graph_type}")
        return template

    @property
    def vineyard_id(self):
        """Get the vineyard object_id of this graph.

        Returns:
            str: return vineyard id of this graph
        """
        self._ensure_loaded()
        return self._vineyard_id

    @property
    def session_id(self):
        """Get the currrent session_id.

        Returns:
            str: Return session id that the graph belongs to.
        """
        return self._session.session_id

    def detach(self):
        """Detaching a graph makes it being left in vineyard even when the varaible for
        this :class:`Graph` object leaves the lexical scope.

        The graph can be accessed using the graph's :code:`ObjectID` or its name later.
        """
        self._detached = True

    def loaded(self):
        try:
            self._ensure_loaded()
        except RuntimeError:
            return False
        return self._key is not None

    def __str__(self):
        v_str = "\n".join([f"VERTEX: {label}" for label in self._v_labels])
        relations = []
        for i in range(len(self._e_labels)):
            relations.extend([(self._e_labels[i], src, dst)
                              for src, dst in self._e_relationships[i]])
        e_str = "\n".join([
            f"EDGE: {label}\tsrc: {src}\tdst: {dst}"
            for label, src, dst in relations
        ])

        return f"graphscope.Graph\n{types_pb2.GraphType.Name(self._graph_type)}\n{v_str}\n{e_str}"

    def __repr__(self):
        return self.__str__()

    def unload(self):
        """Unload this graph from graphscope engine."""
        if self._session is None:
            raise RuntimeError("The graph is not loaded")

        if self._key is None:
            self._session = None
            self._pending_op = None
            return

        # close interactive instances first
        try:
            if (self._interactive_instance_launching_thread is not None and
                    self._interactive_instance_launching_thread.is_alive()):
                # join raises a RuntimeError if an attempt is made to join the current thread.
                # this exception occurs when a object collected by gc mechanism contains a running thread.
                if (threading.current_thread() !=
                        self._interactive_instance_launching_thread):
                    self._interactive_instance_launching_thread.join()
            self._close_interactive_instances()
        except Exception as e:
            logger.error("Failed to close interactive instances: %s" % e)
        try:
            self._close_learning_instances()
        except Exception as e:
            logger.error("Failed to close learning instances: %s" % e)
        if not self._detached:
            op = dag_utils.unload_graph(self)
            op.eval()
        self._key = None
        self._session = None
        self._pending_op = None

    def project_to_simple(self,
                          v_label="_",
                          e_label="_",
                          v_prop=None,
                          e_prop=None):
        """Project a property graph to a simple graph, useful for analytical engine.
        Will translate name represented label or property to index, which is broadedly used
        in internal engine.

        Args:
            v_label (str, optional): vertex label to project. Defaults to "_".
            e_label (str, optional): edge label to project. Defaults to "_".
            v_prop (str, optional): vertex property of the v_label. Defaults to None.
            e_prop (str, optional): edge property of the e_label. Defaults to None.

        Returns:
            :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED`
        """
        self._ensure_loaded()
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)

        self._check_unmodified()

        def check_out_of_range(id, length):
            if id >= length or id < 0:
                raise IndexError("id {} is out of range.".format(id))

        try:
            if isinstance(v_label, str):
                v_label_id = self._schema.vertex_label_index(v_label)
            else:
                v_label_id = v_label
                check_out_of_range(v_label_id, self._schema.vertex_label_num)
                v_label = self._schema.vertex_labels[v_label_id]
            if isinstance(e_label, str):
                e_label_id = self._schema.edge_label_index(e_label)
            else:
                e_label_id = e_label
                check_out_of_range(e_label_id, self._schema.edge_label_num)
                e_label = self._schema.edge_labels[e_label]
        except ValueError as e:
            raise ValueError("Label does not exists.") from e

        # Check relation v_label -> e_label <- v_label exists.
        relation = (v_label, v_label)
        if relation not in self._schema.edge_relationships[e_label_id]:
            raise ValueError(
                f"Graph doesn't contain such relationship: {v_label} -> {e_label} <- {v_label}."
            )

        try:
            if v_prop is None:
                v_prop_id = -1
                vdata_type = None
            else:
                if isinstance(v_prop, str):
                    v_prop_id = self._schema.vertex_property_index(
                        v_label_id, v_prop)
                else:
                    v_prop_id = v_prop
                properties = self._schema.vertex_properties[v_label_id]
                check_out_of_range(v_prop_id, len(properties))
                vdata_type = list(properties.values())[v_prop_id]
            if e_prop is None:
                e_prop_id = -1
                edata_type = None
            else:
                if isinstance(e_prop, str):
                    e_prop_id = self._schema.edge_property_index(
                        e_label_id, e_prop)
                else:
                    e_prop_id = e_prop
                properties = self._schema.edge_properties[e_label_id]
                check_out_of_range(e_prop_id, len(properties))
                edata_type = list(properties.values())[e_prop_id]
        except ValueError as e:
            raise ValueError("Property does not exists.") from e

        oid_type = self._schema.oid_type
        vid_type = self._schema.vid_type

        op = dag_utils.project_arrow_property_graph(
            self,
            v_label_id,
            v_prop_id,
            e_label_id,
            e_prop_id,
            vdata_type,
            edata_type,
            oid_type,
            vid_type,
        )
        return Graph(self._session, op)

    def add_column(self, results, selector):
        """Add the results as a column to the graph. Modification rules are given by the selector.

        Args:
            results (:class:`Context`): A `Context` that created by doing a query.
            selector (dict): Select results to add as column. Format is similar to selectors in `Context`

        Returns:
            :class:`Graph`: A new `Graph` with new columns.
        """
        self._ensure_loaded()
        check_argument(isinstance(selector, Mapping),
                       "selector of add column must be a dict")
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        self._check_unmodified()
        selector = {
            key: results._transform_selector(value)
            for key, value in selector.items()
        }
        selector = json.dumps(selector)
        op = dag_utils.add_column(self, results, selector)
        return Graph(self._session, op)

    def to_numpy(self, selector, vertex_range=None):
        """Select some elements of the graph and output to numpy.

        Args:
            selector (str): Select a portion of graph as a numpy.ndarray.
            vertex_range(dict, optional): Slice vertices. Defaults to None.
        Returns:
            `numpy.ndarray`
        """
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        self._ensure_loaded()
        self._check_unmodified()
        selector = utils.transform_labeled_vertex_property_data_selector(
            self, selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.graph_to_numpy(self, selector, vertex_range)
        ret = op.eval()
        return utils.decode_numpy(ret)

    def to_dataframe(self, selector, vertex_range=None):
        """Select some elements of the graph and output as a pandas.DataFrame

        Args:
            selector (dict): Select some portions of graph.
            vertex_range (dict, optional): Slice vertices. Defaults to None.

        Returns:
            `pandas.DataFrame`
        """
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        self._ensure_loaded()
        self._check_unmodified()
        check_argument(
            isinstance(selector, Mapping),
            "selector of to_vineyard_dataframe must be a dict",
        )
        selector = {
            key:
            utils.transform_labeled_vertex_property_data_selector(self, value)
            for key, value in selector.items()
        }
        selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)

        op = dag_utils.graph_to_dataframe(self, selector, vertex_range)
        ret = op.eval()
        return utils.decode_dataframe(ret)

    def is_directed(self):
        self._ensure_loaded()
        return self._directed

    def _check_unmodified(self):
        self._ensure_loaded()
        check_argument(self.signature == self._saved_signature,
                       "Graph has been modified!")

    def _from_nx_graph(self, incoming_graph):
        """Create a gs graph from a nx graph.
        Args:
            incoming_graph (:class:`nx.graph`): A nx graph that contains graph data.

        Returns:
            that will be used to construct a gs.Graph

        Raises:
            TypeError: Raise Error if graph type not match.

        Examples:
            >>> nx_g = nx.path_graph(10)
            >>> gs_g = gs.Graph(nx_g)
        """
        if hasattr(incoming_graph, "_graph"):
            msg = "graph view can not convert to gs graph"
            raise TypeError(msg)
        return dag_utils.dynamic_to_arrow(incoming_graph)

    def _copy_from(self, incoming_graph):
        """Copy a graph.

        Args:
            incoming_graph (:class:`Graph`): Source graph to be copied from

        Returns:
            :class:`Graph`: An identical graph, but with a new vineyard id.
        """
        check_argument(incoming_graph.graph_type == types_pb2.ARROW_PROPERTY)
        check_argument(incoming_graph.loaded())
        return dag_utils.copy_graph(incoming_graph)

    def _from_vineyard(self, vineyard_object):
        """Load a graph from a already existed vineyard graph.

        Args:
            vineyard_object (:class:`vineyard.Object`, :class:`vineyard.ObjectID`
                            or :class:`vineyard.ObjectName`): vineyard object,
                            which represents a graph.

        Returns:
            A graph_def.
        """
        if isinstance(vineyard_object, vineyard.Object):
            return self._from_vineyard_id(vineyard_object.id)
        if isinstance(vineyard_object, vineyard.ObjectID):
            return self._from_vineyard_id(vineyard_object)
        if isinstance(vineyard_object, vineyard.ObjectName):
            return self._from_vineyard_name(vineyard_object)

    def _from_vineyard_id(self, vineyard_id):
        config = {}
        config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True)
        config[types_pb2.VINEYARD_ID] = utils.i_to_attr(int(vineyard_id))
        # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard
        #
        # the metadata should be retrived from vineyard
        config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t")
        config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t")
        return dag_utils.create_graph(self.session_id,
                                      types_pb2.ARROW_PROPERTY,
                                      attrs=config)

    def _from_vineyard_name(self, vineyard_name):
        config = {}
        config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True)
        config[types_pb2.VINEYARD_NAME] = utils.s_to_attr(str(vineyard_name))
        # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard
        #
        # the metadata should be retrived from vineyard
        config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t")
        config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t")
        return dag_utils.create_graph(self.session_id,
                                      types_pb2.ARROW_PROPERTY,
                                      attrs=config)

    def _attach_interactive_instance(self, instance):
        """Store the instance when a new interactive instance is started.

        Args:
            instance: interactive instance
        """
        self._interactive_instance_list.append(instance)

    def _attach_learning_instance(self, instance):
        """Store the instance when a new learning instance is created.

        Args:
            instance: learning instance
        """
        self._learning_instance_list.append(instance)

    def save_to(self, path, **kwargs):
        """Serialize graph to a location.
        The meta and data of graph is dumped to specified location,
        and can be restored by `Graph.deserialize` in other sessions.

        Each worker will write a `path_{worker_id}.meta` file and
        a `path_{worker_id}` file to storage.
        Args:
            path (str): supported storages are local, hdfs, oss, s3
        """
        import vineyard
        import vineyard.io

        self._ensure_loaded()
        sess = self._session
        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        vineyard.io.serialize(
            path,
            vineyard.ObjectID(self._vineyard_id),
            type="global",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )

    @classmethod
    def load_from(cls, path, sess, **kwargs):
        """Construct a `Graph` by deserialize from `path`.
        It will read all serialization files, which is dumped by
        `Graph.serialize`.
        If any serialize file doesn't exists or broken, will error out.

        Args:
            path (str): Path contains the serialization files.
            sess (`graphscope.Session`): The target session
                that the graph will be construct in

        Returns:
            `Graph`: A new graph object. Schema and data is supposed to be
                identical with the one that called serialized method.
        """
        import vineyard
        import vineyard.io

        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        graph_id = vineyard.io.deserialize(
            path,
            type="global",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )
        return cls(sess, vineyard.ObjectID(graph_id))

    def draw(self, vertices, hop=1):
        """Visualize the graph data in the result cell when the draw functions are invoked

        Args:
            vertices (list): selected vertices.
            hop (int): draw induced subgraph with hop extension. Defaults to 1.

        Returns:
            A GraphModel.
        """
        from ipygraphin import GraphModel

        self._ensure_loaded()
        interactive_query = self._session.gremlin(self)

        graph = GraphModel()
        graph.queryGraphData(vertices, hop, interactive_query)

        # listen on the 1~2 hops operation of node
        graph.on_msg(graph.queryNeighbor)
        return graph

    def _construct_graph(self,
                         vertices,
                         edges,
                         v_labels,
                         e_labels,
                         e_relations,
                         mutation_func=None):
        """Construct graph.
           1. Construct a graph from scratch.
              If the vertices and edges is empty, return a empty graph.
           2. Construct a graph from existed builded graph.
              If the vertices and edges is empty, return a copied graph.

        Args:
            vertices ([type]): [description]
            edges ([type]): [description]
            v_labels ([type]): [description]
            e_labels ([type]): [description]
            e_relations ([type]): [description]
            mutation_func ([type], optional): [description]. Defaults to None.

        Returns:
            [type]: [description]
        """
        config = graph_utils.assemble_op_config(
            vertices.values(),
            edges.values(),
            self._oid_type,
            self._directed,
            self._generate_eid,
        )

        # edge case.
        if not vertices and not edges:
            if mutation_func:
                # Rely on `self._key`
                return Graph(self._session, self)
            else:
                return Graph(
                    self._session,
                    None,
                    self._oid_type,
                    self._directed,
                    self._generate_eid,
                )
        if mutation_func:
            op = mutation_func(self, attrs=config)
        else:
            op = dag_utils.create_graph(self.session_id,
                                        types_pb2.ARROW_PROPERTY,
                                        attrs=config)

        graph = Graph(self._session, op, self._oid_type, self._directed,
                      self._generate_eid)
        graph._unsealed_vertices = vertices
        graph._unsealed_edges = edges
        graph._v_labels = v_labels
        graph._e_labels = e_labels
        graph._e_relationships = e_relations
        # propage info about whether is a loaded graph.
        # graph._key = self._key
        if mutation_func:
            graph._base_graph = self._base_graph or self
        return graph

    def add_vertices(self, vertices, label="_", properties=[], vid_field=0):
        is_from_existed_graph = len(self._unsealed_vertices) != len(
            self._v_labels) or len(self._unsealed_edges) != len(self._e_labels)

        if label in self._v_labels:
            raise ValueError(f"Label {label} already existed in graph.")
        if not self._v_labels and self._e_labels:
            raise ValueError(
                "Cannot manually add vertices after inferred vertices.")
        unsealed_vertices = deepcopy(self._unsealed_vertices)
        unsealed_vertices[label] = VertexLabel(label=label,
                                               loader=vertices,
                                               properties=properties,
                                               vid_field=vid_field)
        v_labels = deepcopy(self._v_labels)
        v_labels.append(label)

        # Load after validity check and before create add_vertices op.
        # TODO(zsy): Add ability to add vertices and edges to existed graph simultaneously.
        if is_from_existed_graph and self._unsealed_edges:
            self._ensure_loaded()

        func = dag_utils.add_vertices if is_from_existed_graph else None
        return self._construct_graph(
            unsealed_vertices,
            self._unsealed_edges,
            v_labels,
            self._e_labels,
            self._e_relationships,
            func,
        )

    def add_edges(
        self,
        edges,
        label="_",
        properties=[],
        src_label=None,
        dst_label=None,
        src_field=0,
        dst_field=1,
    ):
        """Add edges to graph.
        1. Add edges to a uninitialized graph.

            i.   src_label and dst_label both unspecified. In this case, current graph must
                 has 0 (we deduce vertex label from edge table, and set vertex label name to '_'),
                 or 1 vertex label (we set src_label and dst label to this).
            ii.  src_label and dst_label both specified and existed in current graph's vertex labels.
            iii. src_label and dst_label both specified and there is no vertex labels in current graph.
                 we deduce all vertex labels from edge tables.
                 Note that you either provide all vertex labels, or let graphscope deduce all vertex labels.
                 We don't support mixed style.

        2. Add edges to a existed graph.
            Must add a new kind of edge label, not a new relation to builded graph.
            But you can add a new relation to uninitialized part of the graph.
            src_label and dst_label must be specified and existed in current graph.

        Args:
            edges ([type]): [description]
            label (str, optional): [description]. Defaults to "_".
            properties ([type], optional): [description]. Defaults to None.
            src_label ([type], optional): [description]. Defaults to None.
            dst_label ([type], optional): [description]. Defaults to None.
            src_field (int, optional): [description]. Defaults to 0.
            dst_field (int, optional): [description]. Defaults to 1.

        Raises:
            RuntimeError: [description]

        Returns:
            Graph: [description]
        """
        is_from_existed_graph = len(self._unsealed_vertices) != len(
            self._v_labels) or len(self._unsealed_edges) != len(self._e_labels)

        if is_from_existed_graph:
            if label in self._e_labels and label not in self._unsealed_edges:
                raise ValueError("Cannot add new relation to existed graph.")
            if src_label is None or dst_label is None:
                raise ValueError("src label and dst label cannot be None.")
            if src_label not in self._v_labels or dst_label not in self._v_labels:
                raise ValueError(
                    "src label or dst_label not existed in graph.")
        else:
            if src_label is None and dst_label is None:
                check_argument(
                    len(self._v_labels) <= 1, "ambiguous vertex label")
                if len(self._v_labels) == 1:
                    src_label = dst_label = self._v_labels[0]
                else:
                    src_label = dst_label = "_"
            elif src_label is not None and dst_label is not None:
                if self._v_labels:
                    if (src_label not in self._v_labels
                            or dst_label not in self._v_labels):
                        raise ValueError(
                            "src label or dst_label not existed in graph.")
                else:
                    # Infer all v_labels from edge tables.
                    pass
            else:
                raise ValueError(
                    "src and dst label must be both specified or either unspecified."
                )

        check_argument(src_field != dst_field,
                       "src and dst field cannot refer to the same field")

        unsealed_edges = deepcopy(self._unsealed_edges)
        e_labels = deepcopy(self._e_labels)
        relations = deepcopy(self._e_relationships)
        if label in unsealed_edges:
            assert label in self._e_labels
            label_idx = self._e_labels.index(label)
            # Will check conflict in `add_sub_label`
            relations[label_idx].append((src_label, dst_label))
            cur_label = unsealed_edges[label]
        else:
            e_labels.append(label)
            relations.append([(src_label, dst_label)])
            cur_label = EdgeLabel(label)
        cur_label.add_sub_label(
            EdgeSubLabel(edges, properties, src_label, dst_label, src_field,
                         dst_field))
        unsealed_edges[label] = cur_label

        # Load after validity check and before create add_vertices op.
        # TODO(zsy): Add ability to add vertices and edges to existed graph simultaneously.
        if is_from_existed_graph and self._unsealed_vertices:
            self._ensure_loaded()

        func = dag_utils.add_edges if is_from_existed_graph else None
        return self._construct_graph(
            self._unsealed_vertices,
            unsealed_edges,
            self._v_labels,
            e_labels,
            relations,
            func,
        )
Ejemplo n.º 2
0
class Graph(object):
    """A class for representing metadata of a graph in the GraphScope.

    A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not.

    It is worth noting that the graph is stored by the backend such as Analytical Engine, Vineyard.
    In other words, the graph object holds nothing but metadata.

    The graph object should not be created directly from :class:`Graph`.
    Instead, the graph should be created by `Session.load_from`

    The following example demonstrates its usage:

    .. code:: python

        >>> import graphscope as gs
        >>> from graphscope.framework.loader import Loader
        >>> sess = gs.session()
        >>> g = sess.load_from(
        ...     edges={
        ...         "knows": (
        ...             Loader("{}/p2p-31_property_e_0".format(property_dir), header_row=True),
        ...             ["src_label_id", "dst_label_id", "dist"],
        ...             ("src_id", "person"),
        ...             ("dst_id", "person"),
        ...         ),
        ...     },
        ...     vertices={
        ...         "person": Loader(
        ...             "{}/p2p-31_property_v_0".format(property_dir), header_row=True
        ...         ),
        ...     }
        ... )
    """
    def __init__(self, session_id, incoming_data=None):
        """Construct a :class:`Graph` object.

        Args:
            session_id (str): Session id of the session the graph is created in.
            incoming_data: Graph can be initialized through various type of sources,
                which can be one of:
                    - :class:`GraphDef`
                    - :class:`nx.Graph`
                    - :class:`Graph`
                    - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName`
        """

        # Don't import the :code:`NXGraph` in top-level statments to improve the
        # performance of :code:`import graphscope`.
        from graphscope.experimental.nx.classes.graph import Graph as NXGraph

        self._key = None
        self._op = None
        self._graph_type = None
        self.directed = False
        self._vineyard_id = 0
        self._schema = GraphSchema()

        self._session_id = session_id
        self._detached = False

        self._interactive_instance_launching_thread = None
        self._interactive_instance_list = []
        self._learning_instance_list = []

        if isinstance(incoming_data, GraphDef):
            graph_def = incoming_data
        elif isinstance(incoming_data, NXGraph):
            graph_def = self._from_nx_graph(incoming_data)
        elif isinstance(incoming_data, Graph):
            graph_def = self._copy_from(incoming_data)
        elif isinstance(
                incoming_data,
            (vineyard.Object, vineyard.ObjectID, vineyard.ObjectName)):
            graph_def = self._from_vineyard(incoming_data)
        else:
            raise ValueError(
                "Failed to create a graph on graphscope engine: %s",
                incoming_data)

        if graph_def:
            self._key = graph_def.key
            self._vineyard_id = graph_def.vineyard_id
            self._graph_type = graph_def.graph_type
            self._directed = graph_def.directed
            self._generate_eid = graph_def.generate_eid
            self._schema.get_schema_from_def(graph_def.schema_def)
            self._schema_path = graph_def.schema_path
            # init saved_signature (must be after init schema)
            self._saved_signature = self.signature

            # create gremlin server pod asynchronously
            if gs_config.initializing_interactive_engine:
                self._interactive_instance_launching_thread = threading.Thread(
                    target=self._launch_interactive_instance_impl, args=())
                self._interactive_instance_launching_thread.start()

    def __del__(self):
        # cleanly ignore all exceptions, cause session may already closed / destroyed.
        try:
            self.unload()
        except Exception:  # pylint: disable=broad-except
            pass

    def _close_interactive_instances(self):
        # Close related interactive instances when graph unloaded.
        # Since the graph is gone, quering via interactive client is meaningless.
        for instance in self._interactive_instance_list:
            instance.close()
        self._interactive_instance_list.clear()

    def _close_learning_instances(self):
        for instance in self._learning_instance_list:
            instance.close()
        self._learning_instance_list.clear()

    def _launch_interactive_instance_impl(self):
        try:
            sess = get_session_by_id(self.session_id)
            sess.gremlin(self)
        except:  # noqa: E722
            # Record error msg in `InteractiveQuery` when launching failed.
            # Unexpect and suppress all exceptions here.
            pass

    @property
    def op(self):
        """The DAG op of this graph."""
        return self._op

    @property
    def key(self):
        """The key of the corresponding graph in engine."""
        return self._key

    @property
    def graph_type(self):
        """The type of the graph object.

        Returns:
            type (`types_pb2.GraphType`): the type of the graph.
        """
        return self._graph_type

    @property
    def schema(self):
        """Schema of the graph.

        Returns:
            :class:`GraphSchema`: the schema of the graph
        """
        return self._schema

    @property
    def schema_path(self):
        """Path that Coordinator will write interactive schema path to.

        Returns:
            str: The path contains the schema. for interactive engine.
        """
        return self._schema_path

    @property
    def signature(self):
        if self._key is None:
            raise RuntimeError("graph should be registered in remote.")
        return hashlib.sha256("{}.{}".format(
            self._schema.signature(), self._key).encode("utf-8")).hexdigest()

    @property
    def template_str(self):
        if self._key is None:
            raise RuntimeError("graph should be registered in remote.")
        graph_type = self._graph_type
        # transform str/string to std::string
        oid_type = utils.normalize_data_type_str(self._schema.oid_type)
        vid_type = self._schema.vid_type
        vdata_type = utils.data_type_to_cpp(self._schema.vdata_type)
        edata_type = utils.data_type_to_cpp(self._schema.edata_type)
        if graph_type == types_pb2.ARROW_PROPERTY:
            template = f"vineyard::ArrowFragment<{oid_type},{vid_type}>"
        elif graph_type == types_pb2.ARROW_PROJECTED:
            template = f"gs::ArrowProjectedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>"
        elif graph_type == types_pb2.DYNAMIC_PROJECTED:
            template = f"gs::DynamicProjectedFragment<{vdata_type},{edata_type}>"
        else:
            raise ValueError(f"Unsupported graph type: {graph_type}")
        return template

    @property
    def vineyard_id(self):
        """Get the vineyard object_id of this graph.

        Returns:
            str: return vineyard id of this graph
        """
        return self._vineyard_id

    @property
    def session_id(self):
        """Get the currrent session_id.

        Returns:
            str: Return session id that the graph belongs to.
        """
        return self._session_id

    def detach(self):
        """Detaching a graph makes it being left in vineyard even when the varaible for
        this :class:`Graph` object leaves the lexical scope.

        The graph can be accessed using the graph's :code:`ObjectID` or its name later.
        """
        self._detached = True

    def loaded(self):
        return self._key is not None

    def __str__(self):
        return f"graphscope.Graph <{self.template_str}> {self._vineyard_id}"

    def __repr__(self):
        return ("graphscope.Graph\n"
                f"type: {self.template_str.split('<')[0]}\n"
                f"vineyard_id: {self._vineyard_id}\n\n"
                f"{str(self._schema)}")

    def unload(self):
        """Unload this graph from graphscope engine."""
        if not self.loaded():
            raise RuntimeError("The graph is not registered in remote.")
        # close interactive instances first
        try:
            if (self._interactive_instance_launching_thread is not None and
                    self._interactive_instance_launching_thread.is_alive()):
                # join raises a RuntimeError if an attempt is made to join the current thread.
                # this exception occurs when a object collected by gc mechanism contains a running thread.
                if (threading.current_thread() !=
                        self._interactive_instance_launching_thread):
                    self._interactive_instance_launching_thread.join()
            self._close_interactive_instances()
        except Exception as e:
            logger.error("Failed to close interactive instances: %s" % e)
        try:
            self._close_learning_instances()
        except Exception as e:
            logger.error("Failed to close learning instances: %s" % e)
        if not self._detached:
            op = dag_utils.unload_graph(self)
            op.eval()
        self._key = None

    def project_to_simple(self,
                          v_label="_",
                          e_label="_",
                          v_prop=None,
                          e_prop=None):
        """Project a property graph to a simple graph, useful for analytical engine.
        Will translate name represented label or property to index, which is broadedly used
        in internal engine.

        Args:
            v_label (str, optional): vertex label to project. Defaults to "_".
            e_label (str, optional): edge label to project. Defaults to "_".
            v_prop (str, optional): vertex property of the v_label. Defaults to None.
            e_prop (str, optional): edge property of the e_label. Defaults to None.

        Returns:
            :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED`
        """
        if not self.loaded():
            raise RuntimeError(
                "The graph is not registered in remote, and can't project to simple"
            )
        self.check_unmodified()
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        check_argument(isinstance(v_label, (int, str)))
        check_argument(isinstance(e_label, (int, str)))

        def check_out_of_range(id, length):
            if id < length and id > -1:
                return id
            else:
                raise KeyError("id {} is out of range.".format(id))

        try:
            v_label_id = (check_out_of_range(
                v_label, self._schema.vertex_label_num) if isinstance(
                    v_label, int) else
                          self._schema.vertex_label_index(v_label))
        except ValueError as e:
            raise ValueError("graph not contains the vertex label {}.".format(
                v_label)) from e

        try:
            e_label_id = (check_out_of_range(
                e_label, self._schema.edge_label_num) if isinstance(
                    e_label, int) else self._schema.edge_label_index(e_label))
        except ValueError as e:
            raise InvalidArgumentError(
                "graph not contains the edge label {}.".format(e_label)) from e

        if v_prop is None:
            # NB: -1 means vertex property is None
            v_prop_id = -1
            v_properties = None
        else:
            check_argument(isinstance(v_prop, (int, str)))
            v_properties = self._schema.vertex_properties[v_label_id]
            try:
                v_prop_id = (check_out_of_range(v_prop, len(v_properties))
                             if isinstance(v_prop, int) else
                             self._schema.vertex_property_index(
                                 v_label_id, v_prop))
            except ValueError as e:
                raise ValueError(
                    "vertex label {} not contains the property {}".format(
                        v_label, v_prop)) from e

        if e_prop is None:
            # NB: -1 means edge property is None
            e_prop_id = -1
            e_properties = None
        else:
            check_argument(isinstance(e_prop, (int, str)))
            e_properties = self._schema.edge_properties[e_label_id]
            try:
                e_prop_id = (check_out_of_range(e_prop, len(e_properties))
                             if isinstance(e_prop, int) else
                             self._schema.edge_property_index(
                                 e_label_id, e_prop))
            except ValueError as e:
                raise ValueError(
                    "edge label {} not contains the property {}".format(
                        e_label, e_prop)) from e

        oid_type = self._schema.oid_type
        vid_type = self._schema.vid_type
        vdata_type = None
        if v_properties:
            vdata_type = list(v_properties.values())[v_prop_id]
        edata_type = None
        if e_properties:
            edata_type = list(e_properties.values())[e_prop_id]

        op = dag_utils.project_arrow_property_graph(
            self,
            v_label_id,
            v_prop_id,
            e_label_id,
            e_prop_id,
            vdata_type,
            edata_type,
            oid_type,
            vid_type,
        )
        graph_def = op.eval()
        return Graph(self.session_id, graph_def)

    def add_column(self, results, selector):
        """Add the results as a column to the graph. Modification rules are given by the selector.

        Args:
            results (:class:`Context`): A `Context` that created by doing a query.
            selector (dict): Select results to add as column. Format is similar to selectors in `Context`

        Returns:
            :class:`Graph`: A new `Graph` with new columns.
        """
        check_argument(isinstance(selector, Mapping),
                       "selector of add column must be a dict")
        self.check_unmodified()
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        selector = {
            key: results._transform_selector(value)
            for key, value in selector.items()
        }
        selector = json.dumps(selector)
        op = dag_utils.add_column(self, results, selector)
        graph_def = op.eval()
        return Graph(self.session_id, graph_def)

    def to_numpy(self, selector, vertex_range=None):
        """Select some elements of the graph and output to numpy.

        Args:
            selector (str): Select a portion of graph as a numpy.ndarray.
            vertex_range(dict, optional): Slice vertices. Defaults to None.
        Returns:
            `numpy.ndarray`
        """
        self.check_unmodified()
        selector = utils.transform_labeled_vertex_property_data_selector(
            self, selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.graph_to_numpy(self, selector, vertex_range)
        ret = op.eval()
        return utils.decode_numpy(ret)

    def to_dataframe(self, selector, vertex_range=None):
        """Select some elements of the graph and output as a pandas.DataFrame

        Args:
            selector (dict): Select some portions of graph.
            vertex_range (dict, optional): Slice vertices. Defaults to None.

        Returns:
            `pandas.DataFrame`
        """
        self.check_unmodified()
        check_argument(
            isinstance(selector, Mapping),
            "selector of to_vineyard_dataframe must be a dict",
        )
        selector = {
            key:
            utils.transform_labeled_vertex_property_data_selector(self, value)
            for key, value in selector.items()
        }
        selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)

        op = dag_utils.graph_to_dataframe(self, selector, vertex_range)
        ret = op.eval()
        return utils.decode_dataframe(ret)

    def is_directed(self):
        return self._directed

    def check_unmodified(self):
        check_argument(self.signature == self._saved_signature,
                       "Graph has been modified!")

    def _from_nx_graph(self, incoming_graph):
        """Create a gs graph from a nx graph.
        Args:
            incoming_graph (:class:`nx.graph`): A nx graph that contains graph data.

        Returns:
            that will be used to construct a gs.Graph

        Raises:
            TypeError: Raise Error if graph type not match.

        Examples:
            >>> nx_g = nx.path_graph(10)
            >>> gs_g = gs.Graph(nx_g)
        """
        if hasattr(incoming_graph, "_graph"):
            msg = "graph view can not convert to gs graph"
            raise TypeError(msg)
        op = dag_utils.dynamic_to_arrow(incoming_graph)
        graph_def = op.eval()
        return graph_def

    def _copy_from(self, incoming_graph):
        """Copy a graph.

        Args:
            incoming_graph (:class:`Graph`): Source graph to be copied from

        Returns:
            :class:`Graph`: An identical graph, but with a new vineyard id.
        """
        check_argument(incoming_graph.graph_type == types_pb2.ARROW_PROPERTY)
        check_argument(incoming_graph.loaded())
        op = dag_utils.copy_graph(incoming_graph)
        graph_def = op.eval()
        return graph_def

    def _from_vineyard(self, vineyard_object):
        """Load a graph from a already existed vineyard graph.

        Args:
            vineyard_object (:class:`vineyard.Object`, :class:`vineyard.ObjectID`
                            or :class:`vineyard.ObjectName`): vineyard object,
                            which represents a graph.

        Returns:
            A graph_def.
        """
        if isinstance(vineyard_object, vineyard.Object):
            return self._from_vineyard_id(vineyard_object.id)
        if isinstance(vineyard_object, vineyard.ObjectID):
            return self._from_vineyard_id(vineyard_object)
        if isinstance(vineyard_object, vineyard.ObjectName):
            return self._from_vineyard_name(vineyard_object)

    def _from_vineyard_id(self, vineyard_id):
        config = {}
        config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True)
        config[types_pb2.VINEYARD_ID] = utils.i_to_attr(int(vineyard_id))
        # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard
        #
        # the metadata should be retrived from vineyard
        config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t")
        config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t")
        op = dag_utils.create_graph(self._session_id,
                                    types_pb2.ARROW_PROPERTY,
                                    attrs=config)
        graph_def = op.eval()
        return graph_def

    def _from_vineyard_name(self, vineyard_name):
        config = {}
        config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(True)
        config[types_pb2.VINEYARD_NAME] = utils.s_to_attr(str(vineyard_name))
        # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard
        #
        # the metadata should be retrived from vineyard
        config[types_pb2.OID_TYPE] = utils.s_to_attr("int64_t")
        config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t")
        op = dag_utils.create_graph(self._session_id,
                                    types_pb2.ARROW_PROPERTY,
                                    attrs=config)
        graph_def = op.eval()
        return graph_def

    def attach_interactive_instance(self, instance):
        """Store the instance when a new interactive instance is started.

        Args:
            instance: interactive instance
        """
        self._interactive_instance_list.append(instance)

    def attach_learning_instance(self, instance):
        """Store the instance when a new learning instance is created.

        Args:
            instance: learning instance
        """
        self._learning_instance_list.append(instance)

    def serialize(self, path, **kwargs):
        """Serialize graph to a location.
        The meta and data of graph is dumped to specified location,
        and can be restored by `Graph.deserialize` in other sessions.

        Each worker will write a `path_{worker_id}.meta` file and
        a `path_{worker_id}` file to storage.
        Args:
            path (str): supported storages are local, hdfs, oss, s3
        """
        import vineyard
        import vineyard.io

        sess = get_session_by_id(self.session_id)
        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        vineyard.io.serialize(
            path,
            vineyard.ObjectID(self._vineyard_id),
            type="global",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )

    @classmethod
    def deserialize(cls, path, sess, **kwargs):
        """Construct a `Graph` by deserialize from `path`.
        It will read all serialization files, which is dumped by
        `Graph.serialize`.
        If any serialize file doesn't exists or broken, will error out.

        Args:
            path (str): Path contains the serialization files.
            sess (`graphscope.Session`): The target session
                that the graph will be construct in

        Returns:
            `Graph`: A new graph object. Schema and data is supposed to be
                identical with the one that called serialized method.
        """
        import vineyard
        import vineyard.io

        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        graph_id = vineyard.io.deserialize(
            path,
            type="global",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )
        return cls(sess.session_id, vineyard.ObjectID(graph_id))

    def draw(self, vertices, hop=1):
        """Visualize the graph data in the result cell when the draw functions are invoked

        Args:
            vertices (list): selected vertices.
            hop (int): draw induced subgraph with hop extension. Defaults to 1.

        Returns:
            A GraphModel.
        """
        from ipygraphin import GraphModel

        sess = get_session_by_id(self.session_id)
        interactive_query = sess.gremlin(self)

        graph = GraphModel()
        graph.queryGraphData(vertices, hop, interactive_query)

        # listen on the 1~2 hops operation of node
        graph.on_msg(graph.queryNeighbor)
        return graph

    def add_vertices(self, vertices):
        vertices = graph_utils.normalize_parameter_vertices(vertices)
        # Configurations inherited from input graph
        # oid_type
        # CHECK label name not in existed edge labels
        vertex_labels = self._schema.vertex_labels
        for vertex in vertices:
            check_argument(
                vertex.label not in vertex_labels,
                f"Duplicate label name with existing vertex labels: {vertex.label}",
            )

        config = graph_utils.assemble_op_config([], vertices, self._directed,
                                                self._schema.oid_type,
                                                self._generate_eid)
        op = dag_utils.add_vertices(self, attrs=config)
        graph_def = op.eval()
        return Graph(self.session_id, graph_def)

    def add_edges(self, edges):
        edges = graph_utils.normalize_parameter_edges(edges)
        # directed, oid_type, generate_eid
        # CHECK:
        # 1. edge's src/dst labels must existed in vertex_labels
        # 2. label name not in existed edge labels
        vertex_labels = self._schema.vertex_labels
        edge_labels = self.schema.edge_labels
        graph_utils.check_edge_validity(edges, vertex_labels)
        for edge in edges:
            check_argument(
                edge.label not in edge_labels,
                f"Duplicate label name with existing edge labels: {edge.label}",
            )

        config = graph_utils.assemble_op_config(edges, [], self._directed,
                                                self._schema.oid_type,
                                                self._generate_eid)
        op = dag_utils.add_edges(self, attrs=config)
        graph_def = op.eval()
        return Graph(self.session_id, graph_def)
Ejemplo n.º 3
0
class Graph(object):
    """A class for representing metadata of a graph in the GraphScope.

    A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not.

    It is worth noting that the graph is stored by the backend such as Analytical Engine, Vineyard.
    In other words, the graph object holds nothing but metadata.

    The graph object should not be created directly from :class:`Graph`.
    Instead, the graph should be created by `Session.load_from`

    The following example demonstrates its usage:

    .. code:: python

        >>> import graphscope as gs
        >>> from graphscope.framework.loader import Loader
        >>> sess = gs.session()
        >>> g = sess.load_from(
        ...     edges={
        ...         "knows": (
        ...             Loader("{}/p2p-31_property_e_0".format(property_dir), header_row=True),
        ...             ["src_label_id", "dst_label_id", "dist"],
        ...             ("src_id", "person"),
        ...             ("dst_id", "person"),
        ...         ),
        ...     },
        ...     vertices={
        ...         "person": Loader(
        ...             "{}/p2p-31_property_v_0".format(property_dir), header_row=True
        ...         ),
        ...     }
        ... )
    """

    def __init__(self, session_id, incoming_data=None):
        """Construct a :class:`Graph` object.

        Args:
            session_id (str): Session id of the session the graph is created in.
            incoming_data: Graph can be initialized through various type of sources,
                which can be one of:
                    - :class:`GraphDef`
                    - :class:`nx.Graph`
                    - :class:`Graph`
                    - :class:`VineyardObject`
        """

        # Don't import the :code:`NXGraph` in top-level statments to improve the
        # performance of :code:`import graphscope`.
        from graphscope.experimental.nx.classes.graph import Graph as NXGraph

        self._key = None
        self._op = None
        self._graph_type = None
        self.directed = False
        self._vineyard_id = 0
        self._schema = GraphSchema()

        self._session_id = session_id
        self._detached = False

        self._interactive_instance_list = []
        self._learning_instance_list = []

        if isinstance(incoming_data, GraphDef):
            graph_def = incoming_data
        elif isinstance(incoming_data, NXGraph):
            graph_def = self._from_nx_graph(incoming_data)
        elif isinstance(incoming_data, Graph):
            graph_def = self._copy_from(incoming_data)
        elif isinstance(incoming_data, VineyardObject):
            graph_def = self._from_vineyard(incoming_data)
        else:
            raise ValueError(
                "Failed to create a graph on graphscope engine: %s", incoming_data
            )

        if graph_def:
            self._key = graph_def.key
            self._vineyard_id = graph_def.vineyard_id
            self._graph_type = graph_def.graph_type
            self._directed = graph_def.directed
            self._schema.get_schema_from_def(graph_def.schema_def)
            self._schema_path = graph_def.schema_path
            # init saved_signature (must be after init schema)
            self._saved_signature = self.signature

    def __del__(self):
        # cleanly ignore all exceptions, cause session may already closed / destroyed.
        try:
            self.unload()
        except Exception:  # pylint: disable=broad-except
            pass

    def _close_interactive_instances(self):
        # Close related interactive instances when graph unloaded.
        # Since the graph is gone, quering via interactive client is meaningless.
        for instance in self._interactive_instance_list:
            instance.close()
        self._interactive_instance_list.clear()

    def _close_learning_instances(self):
        for instance in self._learning_instance_list:
            instance.close()
        self._learning_instance_list.clear()

    @property
    def op(self):
        """The DAG op of this graph."""
        return self._op

    @property
    def key(self):
        """The key of the corresponding graph in engine."""
        return self._key

    @property
    def graph_type(self):
        """The type of the graph object.

        Returns:
            type (`types_pb2.GraphType`): the type of the graph.
        """
        return self._graph_type

    @property
    def schema(self):
        """Schema of the graph.

        Returns:
            :class:`GraphSchema`: the schema of the graph
        """
        return self._schema

    @property
    def schema_path(self):
        """Path that Coordinator will write interactive schema path to.

        Returns:
            str: The path contains the schema. for interactive engine.
        """
        return self._schema_path

    @property
    def signature(self):
        if self._key is None:
            raise RuntimeError("graph should be registered in remote.")
        return hashlib.sha256(
            "{}.{}".format(self._schema.signature(), self._key).encode("utf-8")
        ).hexdigest()

    @property
    def template_sigature(self):
        if self._key is None:
            raise RuntimeError("graph should be registered in remote.")
        return hashlib.sha256(
            "{}.{}.{}.{}.{}".format(
                self._graph_type,
                self._schema.oid_type,
                self._schema.vid_type,
                self._schema.vdata_type,
                self._schema.edata_type,
            ).encode("utf-8")
        ).hexdigest()

    @property
    def vineyard_id(self):
        """Get the vineyard object_id of this graph.

        Returns:
            str: return vineyard id of this graph
        """
        return self._vineyard_id

    @property
    def session_id(self):
        """Get the currrent session_id.

        Returns:
            str: Return session id that the graph belongs to.
        """
        return self._session_id

    def detach(self):
        """Detaching a graph makes it being left in vineyard even when the varaible for
        this :class:`Graph` object leaves the lexical scope.

        The graph can be accessed using the graph's :code:`ObjectID` or its name later.
        """
        self._detached = True

    def loaded(self):
        return self._key is not None

    def __repr__(self):
        return "<grape.Graph '%s'>" % self._key

    def unload(self):
        """Unload this graph from graphscope engine."""
        if not self.loaded():
            raise RuntimeError("The graph is not registered in remote.")
        # close interactive instances first
        try:
            self._close_interactive_instances()
        except Exception as e:
            logger.error("Failed to close interactive instances: %s" % e)
        try:
            self._close_learning_instances()
        except Exception as e:
            logger.error("Failed to close learning instances: %s" % e)
        if not self._detached:
            op = unload_graph(self)
            op.eval()
        self._key = None

    def project_to_simple(self, v_label="_", e_label="_", v_prop=None, e_prop=None):
        """Project a property graph to a simple graph, useful for analytical engine.
        Will translate name represented label or property to index, which is broadedly used
        in internal engine.

        Args:
            v_label (str, optional): vertex label to project. Defaults to "_".
            e_label (str, optional): edge label to project. Defaults to "_".
            v_prop (str, optional): vertex property of the v_label. Defaults to None.
            e_prop (str, optional): edge property of the e_label. Defaults to None.

        Returns:
            :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED`
        """
        if not self.loaded():
            raise RuntimeError(
                "The graph is not registered in remote, and can't project to simple"
            )
        self.check_unmodified()
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        check_argument(isinstance(v_label, (int, str)))
        check_argument(isinstance(e_label, (int, str)))

        def check_out_of_range(id, length):
            if id < length and id > -1:
                return id
            else:
                raise KeyError("id {} is out of range.".format(id))

        try:
            v_label_id = (
                check_out_of_range(v_label, self._schema.vertex_label_num)
                if isinstance(v_label, int)
                else self._schema.vertex_label_index(v_label)
            )
        except ValueError as e:
            raise ValueError(
                "graph not contains the vertex label {}.".format(v_label)
            ) from e

        try:
            e_label_id = (
                check_out_of_range(e_label, self._schema.edge_label_num)
                if isinstance(e_label, int)
                else self._schema.edge_label_index(e_label)
            )
        except ValueError as e:
            raise InvalidArgumentError(
                "graph not contains the edge label {}.".format(e_label)
            ) from e

        if v_prop is None:
            # NB: -1 means vertex property is None
            v_prop_id = -1
            v_properties = None
        else:
            check_argument(isinstance(v_prop, (int, str)))
            v_properties = self._schema.vertex_properties[v_label_id]
            try:
                v_prop_id = (
                    check_out_of_range(v_prop, len(v_properties))
                    if isinstance(v_prop, int)
                    else self._schema.vertex_property_index(v_label_id, v_prop)
                )
            except ValueError as e:
                raise ValueError(
                    "vertex label {} not contains the property {}".format(
                        v_label, v_prop
                    )
                ) from e

        if e_prop is None:
            # NB: -1 means edge property is None
            e_prop_id = -1
            e_properties = None
        else:
            check_argument(isinstance(e_prop, (int, str)))
            e_properties = self._schema.edge_properties[e_label_id]
            try:
                e_prop_id = (
                    check_out_of_range(e_prop, len(e_properties))
                    if isinstance(e_prop, int)
                    else self._schema.edge_property_index(e_label_id, e_prop)
                )
            except ValueError as e:
                raise ValueError(
                    "edge label {} not contains the property {}".format(e_label, e_prop)
                ) from e

        oid_type = self._schema.oid_type
        vid_type = self._schema.vid_type
        vdata_type = None
        if v_properties:
            vdata_type = list(v_properties.values())[v_prop_id]
        edata_type = None
        if e_properties:
            edata_type = list(e_properties.values())[e_prop_id]

        op = project_arrow_property_graph(
            self,
            v_label_id,
            v_prop_id,
            e_label_id,
            e_prop_id,
            vdata_type,
            edata_type,
            oid_type,
            vid_type,
        )
        graph_def = op.eval()
        return Graph(self.session_id, graph_def)

    def add_column(self, results, selector):
        """Add the results as a column to the graph. Modification rules are given by the selector.

        Args:
            results (:class:`Context`): A `Context` that created by doing a query.
            selector (dict): Select results to add as column. Format is similar to selectors in `Context`

        Returns:
            :class:`Graph`: A new `Graph` with new columns.
        """
        check_argument(
            isinstance(selector, Mapping), "selector of add column must be a dict"
        )
        self.check_unmodified()
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        selector = {
            key: results._transform_selector(value) for key, value in selector.items()
        }
        selector = json.dumps(selector)
        op = add_column(self, results, selector)
        graph_def = op.eval()
        return Graph(self.session_id, graph_def)

    def to_numpy(self, selector, vertex_range=None):
        """Select some elements of the graph and output to numpy.

        Args:
            selector (str): Select a portion of graph as a numpy.ndarray.
            vertex_range(dict, optional): Slice vertices. Defaults to None.
        Returns:
            `numpy.ndarray`
        """
        self.check_unmodified()
        selector = transform_labeled_vertex_property_data_selector(self, selector)
        vertex_range = transform_vertex_range(vertex_range)
        op = graph_to_numpy(self, selector, vertex_range)
        ret = op.eval()
        return decode_numpy(ret)

    def to_dataframe(self, selector, vertex_range=None):
        """Select some elements of the graph and output as a pandas.DataFrame

        Args:
            selector (dict): Select some portions of graph.
            vertex_range (dict, optional): Slice vertices. Defaults to None.

        Returns:
            `pandas.DataFrame`
        """
        self.check_unmodified()
        check_argument(
            isinstance(selector, Mapping),
            "selector of to_vineyard_dataframe must be a dict",
        )
        selector = {
            key: transform_labeled_vertex_property_data_selector(self, value)
            for key, value in selector.items()
        }
        selector = json.dumps(selector)
        vertex_range = transform_vertex_range(vertex_range)

        op = graph_to_dataframe(self, selector, vertex_range)
        ret = op.eval()
        return decode_dataframe(ret)

    def is_directed(self):
        return self._directed

    def check_unmodified(self):
        check_argument(
            self.signature == self._saved_signature, "Graph has been modified!"
        )

    def _from_nx_graph(self, incoming_graph):
        """Create a gs graph from a nx graph.
        Args:
            incoming_graph (:class:`nx.graph`): A nx graph that contains graph data.

        Returns:
            that will be used to construct a gs.Graph

        Raises:
            TypeError: Raise Error if graph type not match.

        Examples:
            >>> nx_g = nx.path_graph(10)
            >>> gs_g = gs.Graph(nx_g)
        """
        if hasattr(incoming_graph, "_graph"):
            msg = "graph view can not convert to gs graph"
            raise TypeError(msg)
        op = dynamic_to_arrow(incoming_graph)
        graph_def = op.eval()
        return graph_def

    def _copy_from(self, incoming_graph):
        """Copy a graph.

        Args:
            incoming_graph (:class:`Graph`): Source graph to be copied from

        Returns:
            :class:`Graph`: An identical graph, but with a new vineyard id.
        """
        check_argument(incoming_graph.graph_type == types_pb2.ARROW_PROPERTY)
        check_argument(incoming_graph.loaded())
        op = copy_graph(incoming_graph)
        graph_def = op.eval()
        return graph_def

    def _from_vineyard(self, vineyard_object):
        """Load a graph from a already existed vineyard graph.

        Args:
            vineyard_object (:class:`VineyardObject`): vineyard object, which contains a graph.

        Returns:
            A graph_def.
        """
        if vineyard_object.object_id is not None:
            return self._from_vineyard_id(vineyard_object.object_id)
        elif vineyard_object.object_name is not None:
            return self._from_vineyard_name(vineyard_object.object_name)

    def _from_vineyard_id(self, vineyard_id):
        config = {}
        config[types_pb2.IS_FROM_VINEYARD_ID] = b_to_attr(True)
        config[types_pb2.VINEYARD_ID] = i_to_attr(vineyard_id)
        # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard
        #
        # the metadata should be retrived from vineyard
        config[types_pb2.OID_TYPE] = s_to_attr("int64_t")
        config[types_pb2.VID_TYPE] = s_to_attr("uint64_t")
        op = create_graph(self._session_id, types_pb2.ARROW_PROPERTY, attrs=config)
        graph_def = op.eval()
        return graph_def

    def _from_vineyard_name(self, vineyard_name):
        config = {}
        config[types_pb2.IS_FROM_VINEYARD_ID] = b_to_attr(True)
        config[types_pb2.VINEYARD_NAME] = s_to_attr(vineyard_name)
        # FIXME(hetao) hardcode oid/vid type for codegen, when loading from vineyard
        #
        # the metadata should be retrived from vineyard
        config[types_pb2.OID_TYPE] = s_to_attr("int64_t")
        config[types_pb2.VID_TYPE] = s_to_attr("uint64_t")
        op = create_graph(self._session_id, types_pb2.ARROW_PROPERTY, attrs=config)
        graph_def = op.eval()
        return graph_def

    def attach_interactive_instance(self, instance):
        """Store the instance when a new interactive instance is started.

        Args:
            instance: interactive instance
        """
        self._interactive_instance_list.append(instance)

    def attach_learning_instance(self, instance):
        """Store the instance when a new learning instance is created.

        Args:
            instance: learning instance
        """
        self._learning_instance_list.append(instance)
Ejemplo n.º 4
0
class Graph(GraphInterface):
    """A class for representing metadata of a graph in the GraphScope.

    A :class:`Graph` object holds the metadata of a graph, such as key, schema, and the graph is directed or not.

    It is worth noticing that the graph is stored by the backend such as Analytical Engine, Vineyard.
    In other words, the graph object holds nothing but metadata.

    The following example demonstrates its usage:

    .. code:: python

        >>> import graphscope as gs
        >>> sess = gs.session()
        >>> graph = sess.g()
        >>> graph = graph.add_vertices("person.csv", "person")
        >>> graph = graph.add_vertices("software.csv", "software")
        >>> graph = graph.add_edges("knows.csv", "knows", src_label="person", dst_label="person")
        >>> graph = graph.add_edges("created.csv", "created", src_label="person", dst_label="software")
        >>> print(graph)
        >>> print(graph.schema)
    """

    def __init__(
        self,
        graph_node,
    ):
        """Construct a :class:`Graph` object."""

        self._graph_node = graph_node
        self._session = self._graph_node.session
        # copy and set op evaluated
        self._graph_node.op = deepcopy(self._graph_node.op)
        self._graph_node.evaluated = True
        self._session.dag.add_op(self._graph_node.op)

        self._key = None
        self._vineyard_id = 0
        self._schema = GraphSchema()
        self._detached = False

        self._interactive_instance_launching_thread = None
        self._interactive_instance_list = []
        self._learning_instance_list = []

    def __del__(self):
        # cleanly ignore all exceptions, cause session may already closed / destroyed.
        try:
            self.unload()
        except Exception:  # pylint: disable=broad-except
            pass

    def _close_interactive_instances(self):
        # Close related interactive instances when graph unloaded.
        # Since the graph is gone, quering via interactive client is meaningless.
        for instance in self._interactive_instance_list:
            instance.close()
        self._interactive_instance_list.clear()

    def _close_learning_instances(self):
        for instance in self._learning_instance_list:
            instance.close()
        self._learning_instance_list.clear()

    def _launch_interactive_instance_impl(self):
        try:
            self._session.gremlin(self)
        except:  # noqa: E722
            # Record error msg in `InteractiveQuery` when launching failed.
            # Unexpect and suppress all exceptions here.
            pass

    def update_from_graph_def(self, graph_def):
        if graph_def.graph_type == graph_def_pb2.ARROW_FLATTENED:
            self._graph_node._graph_type = graph_def_pb2.ARROW_FLATTENED
        check_argument(
            self._graph_node.graph_type == graph_def.graph_type,
            "Graph type doesn't match {} versus {}".format(
                self._graph_node.graph_type, graph_def.graph_type
            ),
        )
        self._key = graph_def.key
        self._directed = graph_def.directed
        self._is_multigraph = graph_def.is_multigraph
        vy_info = graph_def_pb2.VineyardInfoPb()
        graph_def.extension.Unpack(vy_info)
        self._vineyard_id = vy_info.vineyard_id
        self._oid_type = data_type_to_cpp(vy_info.oid_type)
        self._generate_eid = vy_info.generate_eid

        self._schema_path = vy_info.schema_path
        self._schema.from_graph_def(graph_def)
        self._v_labels = self._schema.vertex_labels
        self._e_labels = self._schema.edge_labels
        self._e_relationships = self._schema.edge_relationships
        # init saved_signature (must be after init schema)
        self._saved_signature = self.signature
        # create gremlin server pod asynchronously
        if self._session.eager() and gs_config.initializing_interactive_engine:
            self._interactive_instance_launching_thread = threading.Thread(
                target=self._launch_interactive_instance_impl, args=()
            )
            self._interactive_instance_launching_thread.start()

    def __getattr__(self, name):
        if hasattr(self._graph_node, name):
            return getattr(self._graph_node, name)
        raise AttributeError("{0} not found.".format(name))

    @property
    def key(self):
        """The key of the corresponding graph in engine."""
        return self._key

    @property
    def schema(self):
        """Schema of the graph.

        Returns:
            :class:`GraphSchema`: the schema of the graph
        """
        return self._schema

    @property
    def schema_path(self):
        """Path that Coordinator will write interactive schema path to.

        Returns:
            str: The path contains the schema. for interactive engine.
        """
        return self._schema_path

    @property
    def signature(self):
        return hashlib.sha256(
            "{}.{}".format(self._schema.signature(), self._key).encode("utf-8")
        ).hexdigest()

    @property
    def op(self):
        return self._graph_node.op

    @property
    def template_str(self):
        # transform str/string to std::string
        oid_type = utils.normalize_data_type_str(self._oid_type)
        vid_type = utils.data_type_to_cpp(self._schema._vid_type)
        vdata_type = utils.data_type_to_cpp(self._schema.vdata_type)
        edata_type = utils.data_type_to_cpp(self._schema.edata_type)
        if self._graph_type == graph_def_pb2.ARROW_PROPERTY:
            template = f"vineyard::ArrowFragment<{oid_type},{vid_type}>"
        elif self._graph_type == graph_def_pb2.ARROW_PROJECTED:
            template = f"gs::ArrowProjectedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>"
        elif self._graph_type == graph_def_pb2.ARROW_FLATTENED:
            template = f"ArrowFlattenedFragmen<{oid_type},{vid_type},{vdata_type},{edata_type}>"
        elif self._graph_type == graph_def_pb2.DYNAMIC_PROJECTED:
            template = f"gs::DynamicProjectedFragment<{vdata_type},{edata_type}>"
        else:
            raise ValueError(f"Unsupported graph type: {self._graph_type}")
        return template

    @property
    def vineyard_id(self):
        """Get the vineyard object_id of this graph.

        Returns:
            str: return vineyard id of this graph
        """
        return self._vineyard_id

    @property
    def session_id(self):
        """Get the currrent session_id.

        Returns:
            str: Return session id that the graph belongs to.
        """
        return self._session.session_id

    def detach(self):
        """Detaching a graph makes it being left in vineyard even when the varaible for
        this :class:`Graph` object leaves the lexical scope.

        The graph can be accessed using the graph's :code:`ObjectID` or its name later.
        """
        self._detached = True

    def loaded(self):
        """True if current graph has been loaded in the session."""
        return self._session.info["status"] == "active" and self._key is not None

    def __str__(self):
        v_str = "\n".join([f"VERTEX: {label}" for label in self._v_labels])
        relations = []
        for i in range(len(self._e_labels)):
            relations.extend(
                [(self._e_labels[i], src, dst) for src, dst in self._e_relationships[i]]
            )
        e_str = "\n".join(
            [f"EDGE: {label}\tsrc: {src}\tdst: {dst}" for label, src, dst in relations]
        )

        return f"graphscope.Graph\n{graph_def_pb2.GraphTypePb.Name(self._graph_type)}\n{v_str}\n{e_str}"

    def __repr__(self):
        return self.__str__()

    def unload(self):
        """Unload this graph from graphscope engine."""
        if self._session.info["status"] != "active" or self._key is None:
            return

        # close interactive instances first
        try:
            if (
                self._interactive_instance_launching_thread is not None
                and self._interactive_instance_launching_thread.is_alive()
            ):
                # join raises a RuntimeError if an attempt is made to join the current thread.
                # this exception occurs when a object collected by gc mechanism contains a running thread.
                if (
                    threading.current_thread()
                    != self._interactive_instance_launching_thread
                ):
                    self._interactive_instance_launching_thread.join()
            self._close_interactive_instances()
        except Exception as e:
            logger.error("Failed to close interactive instances: %s" % e)
        try:
            self._close_learning_instances()
        except Exception as e:
            logger.error("Failed to close learning instances: %s" % e)
        rlt = None
        if not self._detached:
            rlt = self._session._wrapper(self._graph_node.unload())
        self._key = None
        return rlt

    def _project_to_simple(self, v_prop=None, e_prop=None):
        return self._session._wrapper(
            self._graph_node._project_to_simple(v_prop, e_prop)
        )

    def add_column(self, results, selector):
        return self._session._wrapper(self._graph_node.add_column(results, selector))

    def to_numpy(self, selector, vertex_range=None):
        """Select some elements of the graph and output to numpy.

        Args:
            selector (str): Select a portion of graph as a numpy.ndarray.
            vertex_range(dict, optional): Slice vertices. Defaults to None.

        Returns:
            `numpy.ndarray`
        """
        self._check_unmodified()
        return self._session._wrapper(self._graph_node.to_numpy(selector, vertex_range))

    def to_dataframe(self, selector, vertex_range=None):
        """Select some elements of the graph and output as a pandas.DataFrame

        Args:
            selector (dict): Select some portions of graph.
            vertex_range (dict, optional): Slice vertices. Defaults to None.

        Returns:
            `pandas.DataFrame`
        """
        self._check_unmodified()
        return self._session._wrapper(
            self._graph_node.to_dataframe(selector, vertex_range)
        )

    def to_directed(self):
        """Returns a directed representation of the graph.

        Returns:
            :class:`Graph`: A directed graph with the same name, same nodes, and
                with each edge (u, v, data) replaced by two directed edges (u, v, data) and (v, u, data).

        """
        if self._directed:
            return self
        return self._session._wrapper(self._graph_node.to_directed())

    def to_undirected(self):
        """Returns an undirected representation of the digraph.

        Returns:
            :class:`Graph`: An undirected graph with the same name and nodes and
                with edge (u, v, data) if either (u, v, data) or (v, u, data) is in the digraph.
                If both edges exist in digraph, they will both be preserved.
                You must check and correct for this manually if desired.
        """
        if not self._directed:
            return self
        return self._session._wrapper(self._graph_node.to_undirected())

    def is_directed(self):
        return self._directed

    def is_multigraph(self):
        return self._is_multigraph

    def _check_unmodified(self):
        check_argument(
            self.signature == self._saved_signature, "Graph has been modified!"
        )

    def _attach_interactive_instance(self, instance):
        """Store the instance when a new interactive instance is started.

        Args:
            instance: interactive instance
        """
        self._interactive_instance_list.append(instance)

    def _attach_learning_instance(self, instance):
        """Store the instance when a new learning instance is created.

        Args:
            instance: learning instance
        """
        self._learning_instance_list.append(instance)

    def save_to(self, path, **kwargs):
        """Serialize graph to a location.
        The meta and data of graph is dumped to specified location,
        and can be restored by `Graph.deserialize` in other sessions.

        Each worker will write a `path_{worker_id}.meta` file and
        a `path_{worker_id}` file to storage.
        Args:
            path (str): supported storages are local, hdfs, oss, s3
        """
        try:
            import vineyard
            import vineyard.io
        except ImportError:
            raise RuntimeError(
                "Saving context to locations requires 'vineyard', "
                "please install those two dependencies via "
                "\n"
                "\n"
                "    pip3 install vineyard vineyard-io"
                "\n"
                "\n"
            )

        sess = self._session
        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        vineyard.io.serialize(
            path,
            vineyard.ObjectID(self._vineyard_id),
            type="global",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )

    @classmethod
    def load_from(cls, path, sess, **kwargs):
        """Construct a `Graph` by deserialize from `path`.
        It will read all serialization files, which is dumped by
        `Graph.serialize`.
        If any serialize file doesn't exists or broken, will error out.

        Args:
            path (str): Path contains the serialization files.
            sess (`graphscope.Session`): The target session
                that the graph will be construct in

        Returns:
            `Graph`: A new graph object. Schema and data is supposed to be
                identical with the one that called serialized method.
        """
        try:
            import vineyard
            import vineyard.io
        except ImportError:
            raise RuntimeError(
                "Saving context to locations requires 'vineyard', "
                "please install those two dependencies via "
                "\n"
                "\n"
                "    pip3 install vineyard vineyard-io"
                "\n"
                "\n"
            )

        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        graph_id = vineyard.io.deserialize(
            path,
            type="global",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )
        return sess._wrapper(GraphDAGNode(sess, vineyard.ObjectID(graph_id)))

    def add_vertices(self, vertices, label="_", properties=None, vid_field=0):
        if not self.loaded():
            raise RuntimeError("The graph is not loaded")
        return self._session._wrapper(
            self._graph_node.add_vertices(vertices, label, properties, vid_field)
        )

    def add_edges(
        self,
        edges,
        label="_",
        properties=None,
        src_label=None,
        dst_label=None,
        src_field=0,
        dst_field=1,
    ):
        if not self.loaded():
            raise RuntimeError("The graph is not loaded")
        return self._session._wrapper(
            self._graph_node.add_edges(
                edges, label, properties, src_label, dst_label, src_field, dst_field
            )
        )

    def project(
        self,
        vertices: Mapping[str, Union[List[str], None]],
        edges: Mapping[str, Union[List[str], None]],
    ):
        if not self.loaded():
            raise RuntimeError("The graph is not loaded")
        return self._session._wrapper(self._graph_node.project(vertices, edges))