Example #1
0
    def __init__(self, incoming_graph_data=None, default_label=None, **attr):

        self.graph_attr_dict_factory = self.graph_attr_dict_factory
        self.node_dict_factory = self.node_dict_factory
        self.adjlist_dict_factory = self.adjlist_dict_factory
        self.graph = self.graph_attr_dict_factory()
        self.cache = self.graph_cache_factory(self)

        # init node and adj (must be after cache)
        self._node = self.node_dict_factory(self)
        self._adj = self.adjlist_dict_factory(self)
        self._pred = self.adjlist_dict_factory(self, pred=True)
        self._succ = self._adj

        self._key = None
        self._op = None
        self._session_id = None
        self._graph_type = self._graph_type
        self._schema = GraphSchema()
        self._schema.init_nx_schema()

        # cache for add_node and add_edge
        self._add_node_cache = []
        self._add_edge_cache = []
        self._remove_node_cache = []
        self._remove_edge_cache = []

        create_empty_in_engine = attr.pop("create_empty_in_engine",
                                          True)  # a hidden parameter
        self._distributed = attr.pop("dist", False)
        if incoming_graph_data is not None and self._is_gs_graph(
                incoming_graph_data):
            # convert from gs graph always use distributed mode
            self._distributed = True
            if self._session is None:
                self._session = get_session_by_id(
                    incoming_graph_data.session_id)
        self._default_label = default_label

        if self._session is None:
            self._try_to_get_default_session()

        if not self._is_gs_graph(
                incoming_graph_data) and create_empty_in_engine:
            graph_def = empty_graph_in_engine(self, self.is_directed(),
                                              self._distributed)
            self._key = graph_def.key

        # attempt to load graph with data
        if incoming_graph_data is not None:
            if self._is_gs_graph(incoming_graph_data):
                self._init_with_arrow_property_graph(incoming_graph_data)
                self.cache.warmup()
            else:
                g = to_networkx_graph(incoming_graph_data, create_using=self)
                check_argument(isinstance(g, Graph))

        # load graph attributes (must be after to_networkx_graph)
        self.graph.update(attr)
        self._saved_signature = self.signature
Example #2
0
    def output(self, fd, selector, vertex_range=None, **kwargs):
        """Dump results to `fd`.
        Support dumps data to local (respect to pod) files, hdfs or oss.
        It first write results to a vineyard dataframe, and let vineyard
        do the data dumping job.
        `fd` must meet specific formats, with auth information if needed. As follows:

            - local
                `file:///tmp/result_path`
            - oss
                `oss:///bucket/object`
            - hdfs
                `hdfs:///tmp/result_path`

        Args:
            fd (str): Output location.
            selector (dict): Similar to `to_dataframe`.
            vertex_range (dict, optional): Similar to `to_dataframe`. Defaults to None.
            kwargs (dict, optional): Storage options with respect to output storage type.
                    for example:
                    key, secret, endpoint for oss,
                    key, secret, client_kwargs for s3,
                    host, port for hdfs,
                    None for local.
        """
        import vineyard
        import vineyard.io

        df = self.to_vineyard_dataframe(selector, vertex_range)
        sess = get_session_by_id(self.session_id)
        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        # Write vineyard dataframe as a readable stream
        dfstream = vineyard.io.open(
            "vineyard://" + str(df),
            mode="r",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            deployment=deployment,
            hosts=hosts,
        )
        vineyard.io.open(
            fd,
            dfstream,
            mode="w",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )
Example #3
0
    def serialize(self, path, **kwargs):
        """Serialize graph to a location.
        The meta and data of graph is dumped to specified location,
        and can be restored by `Graph.deserialize` in other sessions.

        Each worker will write a `path_{worker_id}.meta` file and
        a `path_{worker_id}` file to storage.
        Args:
            path (str): supported storages are local, hdfs, oss, s3
        """
        import vineyard
        import vineyard.io

        sess = get_session_by_id(self.session_id)
        deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh"
        conf = sess.info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        if sess.info["type"] == "k8s":
            hosts = [
                "{}:{}".format(sess.info["namespace"], s)
                for s in sess.info["engine_hosts"].split(",")
            ]
        else:  # type == "hosts"
            hosts = sess.info["engine_hosts"].split(",")
        vineyard.io.serialize(
            path,
            vineyard.ObjectID(self._vineyard_id),
            type="global",
            vineyard_ipc_socket=vineyard_ipc_socket,
            vineyard_endpoint=vineyard_endpoint,
            storage_options=kwargs,
            deployment=deployment,
            hosts=hosts,
        )
Example #4
0
 def _launch_interactive_instance_impl(self):
     try:
         sess = get_session_by_id(self.session_id)
         sess.gremlin(self)
     except:  # noqa: E722
         # Record error msg in `InteractiveQuery` when launching failed.
         # Unexpect and suppress all exceptions here.
         pass
Example #5
0
    def eval(self):
        # NB: to void cycle import
        # pylint: disable=import-outside-toplevel, cyclic-import
        from graphscope.client.session import get_session_by_id

        sess = get_session_by_id(self._session_id)
        res = sess.run(self)
        return res
Example #6
0
    def eval(self, leaf=True):
        """Evaluate by :code:`sess.run`.

        Args:
            leaf (bool, optional): Leaf Operation means there is no successor.
        """
        # NB: to void cycle import
        # pylint: disable=import-outside-toplevel, cyclic-import
        from graphscope.client.session import get_session_by_id

        self._leaf = leaf
        sess = get_session_by_id(self._session_id)
        if not self._leaf:
            sess.dag.add_op(self)
        res = sess.run(self)
        return res
Example #7
0
def empty_graph_in_engine(graph, directed):
    """create empty graph in grape_engine with the graph metadata.

    Parameters:
    -----------
    graph: the graph instance in python.
    graph_type: the graph type of graph (IMMUTABLE, ARROW, DYNAMIC).
    nx_graph_type: the networkx graph type of graph (Graph, DiGraph, MultiGraph, MultiDiGraph).

    """
    sess = get_session_by_id(graph._session_id)
    op = dag_utils.create_graph(
        sess.session_id,
        graph_type=graph._graph_type,
        directed=directed,
        efile="",
        vfile="",
    )
    graph_def = sess.run(op)
    return graph_def
Example #8
0
    def draw(self, vertices, hop=1):
        """Visualize the graph data in the result cell when the draw functions are invoked

        Args:
            vertices (list): selected vertices.
            hop (int): draw induced subgraph with hop extension. Defaults to 1.

        Returns:
            A GraphModel.
        """
        from ipygraphin import GraphModel

        sess = get_session_by_id(self.session_id)
        interactive_query = sess.gremlin(self)

        graph = GraphModel()
        graph.queryGraphData(vertices, hop, interactive_query)

        # listen on the 1~2 hops operation of node
        graph.on_msg(graph.queryNeighbor)
        return graph