예제 #1
0
    def gremlin(self, graph):
        """Get a interactive engine handler to execute gremlin queries.

        Args:
            graph: :class:`Graph`

        Raises:
            InvalidArgumentError: :code:`graph` is not a property graph or unloaded.

        Returns:
            :class:`InteractiveQuery`
        """
        if (graph.vineyard_id in self._interactive_instance_dict
                and self._interactive_instance_dict[graph.vineyard_id]
                is not None):
            return self._interactive_instance_dict[graph.vineyard_id]

        if not graph.loaded():
            raise InvalidArgumentError("The graph has already been unloaded")
        if not graph.graph_type == types_pb2.ARROW_PROPERTY:
            raise InvalidArgumentError("The graph should be a property graph.")

        from graphscope.interactive.query import InteractiveQuery

        response = self._grpc_client.create_interactive_engine(
            graph.vineyard_id, graph.schema_path)
        interactive_query = InteractiveQuery(
            graphscope_session=self,
            object_id=graph.vineyard_id,
            front_ip=response.frontend_host,
            front_port=response.frontend_port,
        )
        self._interactive_instance_dict[graph.vineyard_id] = interactive_query
        graph.attach_interactive_instance(interactive_query)
        return interactive_query
예제 #2
0
    def _not_compatible_for(not_compatible_for_func, *args, **kwargs):
        graph = args[0]
        if not hasattr(graph, "graph_type"):
            raise InvalidArgumentError(
                "Missing graph_type attribute in graph object.")

        terms = {
            "arrow_property": graph.graph_type == types_pb2.ARROW_PROPERTY,
            "dynamic_property": graph.graph_type == types_pb2.DYNAMIC_PROPERTY,
            "arrow_projected": graph.graph_type == types_pb2.ARROW_PROJECTED,
            "dynamic_projected":
            graph.graph_type == types_pb2.DYNAMIC_PROJECTED,
        }
        match = False
        try:
            for t in graph_types:
                match = match or terms[t]
        except KeyError:
            raise InvalidArgumentError(
                "Use one or more of arrow_property,dynamic_property,arrow_projected,dynamic_projected",
            )
        if match:
            raise InvalidArgumentError("Not compatible for %s type" %
                                       " ".join(graph_types))
        else:
            return not_compatible_for_func(*args, **kwargs)
예제 #3
0
def load_app(gar=None, algo=None, context=None, **kwargs):
    """Load an app from gar.
    bytes or the resource of the specified path or bytes.

    Args:
        algo: str
          Algo name inside resource. None will extract name from gar resource
          if there is only one app in it.
        gar: bytes or BytesIO or str
          str represent the path of resource.
          for java apps, gar can be none to indicate we should find the app in
          previouse added libs.

    Returns:
        Instance of <graphscope.framework.app.AppAssets>

    Raises:
        FileNotFoundError: File not exist.
        PermissionError: Permission denied of path.
        TypeError: File is not a zip file.

    Examples:
        >>> sssp = load_app(gar='./resource.gar', algo='sssp')
        >>> sssp(src=4)

        which will have following `.gs_conf.yaml` in resource.gar:
          app:
            - algo: sssp
              type: cpp_pie
              class_name: grape:SSSP
              context_type: vertex_data
              src: sssp/sssp.h
              compatible_graph:
                - gs::ArrowProjectedFragment
    """
    if isinstance(gar, (BytesIO, bytes)):
        return AppAssets(algo, context, gar, **kwargs)
    elif isinstance(gar, str):
        with open(gar, "rb") as f:
            content = f.read()
        if not zipfile.is_zipfile(gar):
            raise InvalidArgumentError("{} is not a zip file.".format(gar))
        return AppAssets(algo, context, content, **kwargs)
    elif isinstance(algo, str) and algo.startswith("giraph:"):
        if gar is not None:
            raise InvalidArgumentError(
                "Running giraph app expect no gar resource")
        return AppAssets(algo, "vertex_data", None, **kwargs)
    else:
        raise InvalidArgumentError("Wrong type with {}".format(gar))
예제 #4
0
    def gremlin(self, graph, engine_params=None):
        """Get a interactive engine handler to execute gremlin queries.

        Args:
            graph (:class:`Graph`): Use the graph to create interactive instance.
            engine_params (dict, optional): Configure startup parameters of interactive engine.
                See a list of configurable keys in
                `interactive_engine/deploy/docker/dockerfile/executor.vineyard.properties`

        Raises:
            InvalidArgumentError: :code:`graph` is not a property graph or unloaded.

        Returns:
            :class:`InteractiveQuery`
        """
        if (graph.vineyard_id in self._interactive_instance_dict
                and self._interactive_instance_dict[graph.vineyard_id]
                is not None):
            return self._interactive_instance_dict[graph.vineyard_id]

        if not graph.loaded():
            raise InvalidArgumentError("The graph has already been unloaded")
        if not graph.graph_type == types_pb2.ARROW_PROPERTY:
            raise InvalidArgumentError("The graph should be a property graph.")

        if engine_params is not None:
            engine_params = {
                str(key): str(value)
                for key, value in engine_params.items()
            }
        else:
            engine_params = {}
        from graphscope.interactive.query import InteractiveQuery

        response = self._grpc_client.create_interactive_engine(
            object_id=graph.vineyard_id,
            schema_path=graph.schema_path,
            gremlin_server_cpu=gs_config.k8s_gie_gremlin_server_cpu,
            gremlin_server_mem=gs_config.k8s_gie_gremlin_server_mem,
            engine_params=engine_params,
        )
        interactive_query = InteractiveQuery(
            graphscope_session=self,
            object_id=graph.vineyard_id,
            front_ip=response.frontend_host,
            front_port=response.frontend_port,
        )
        self._interactive_instance_dict[graph.vineyard_id] = interactive_query
        graph.attach_interactive_instance(interactive_query)
        return interactive_query
예제 #5
0
 def _check_selector(self, selector):
     """
     Raises:
         InvalidArgumentError:
             - Selector in vertex data context is None
         SyntaxError:
             - The syntax of selector is incorrect
         NotImplementedError:
             - Selector of e not supported
     """
     if selector is None:
         raise InvalidArgumentError(
             "Selector in vertex data context cannot be None")
     segments = selector.split(".")
     err_msg = f"Invalid selector: `{selector}`. "
     err_msg += (
         "Please inspect the result with `ret.schema` and choose a valid selector."
     )
     if segments[0] == "v":
         if selector not in ("v.id", "v.data"):
             raise SyntaxError(err_msg)
     elif segments[0] == "e":
         raise NotImplementedError("Selector of e is not supported yet")
         if selector not in ("e.src", "e.dst", "e.data"):
             raise SyntaxError(err_msg)
     elif segments[0] == "r":
         if selector != "r":
             raise SyntaxError(err_msg)
     else:
         raise SyntaxError(err_msg)
     return True
예제 #6
0
 def _check_selector(self, selector):
     """
     Raises:
         InvalidArgumentError:
             - Selector in labeled vertex data context is None
         SyntaxError:
             - The syntax of selector is incorrect
         NotImplementedError:
             - Selector of e not supported
     """
     if selector is None:
         raise InvalidArgumentError(
             "Selector in labeled vertex data context cannot be None")
     segments = selector.split(":")
     err_msg = f"Invalid selector: `{selector}`. "
     err_msg += (
         "Please inspect the result with `ret.schema` and choose a valid selector."
     )
     if len(segments) != 2:
         raise SyntaxError(err_msg)
     stype, segments = segments[0], segments[1]
     segments = segments.split(".")
     if stype == "v":
         if len(segments) != 2:
             raise SyntaxError(err_msg)
     elif stype == "e":
         raise NotImplementedError("Selector of e not supported yet")
     elif stype == "r":
         if len(segments) != 1:
             raise SyntaxError(err_msg)
     else:
         raise SyntaxError(err_msg)
     return True
예제 #7
0
 def _check_selector(self, selector):
     """
     Raises:
         InvalidArgumentError:
             - Selector in labeled vertex data context is None
         SyntaxError:
             - The syntax of selector is incorrect
         NotImplementedError:
             - Selector of e not supported
     """
     if selector is None:
         raise InvalidArgumentError(
             "Selector in vertex property context cannot be None")
     segments = selector.split(".")
     err_msg = f"Invalid selector: `{selector}`. "
     err_msg += (
         "Please inspect the result with `ret.schema` and choose a valid selector."
     )
     if len(segments) != 2:
         raise SyntaxError(err_msg)
     if segments[0] == "v":
         if selector not in ("v.id", "v.data", "v.label_id"):
             raise SyntaxError(err_msg)
     elif segments[0] == "e":
         raise NotImplementedError("Selector of e not supported yet")
     elif segments[0] == "r":
         # The second part of selector or r is user defined name.
         # So we will allow any str
         pass
     else:
         raise SyntaxError(err_msg)
     return True
예제 #8
0
    def __init__(self, algo, context=None, gar=None):
        """Init assets of the algorithm.

        Args:
            algo (str): Represent specific algo inside resource.
            context (str): Type of context that hold the calculation results.
            It will get from gar if param is None. Defaults to None.
            gar (bytes or BytesIO, optional): The bytes that encodes the application's source code.
                Defaults to None.
        """
        self._algo = algo
        self._context_type = context
        if isinstance(self._algo, str) and "giraph:" in self._algo:
            self._type = "java_pie"
        else:
            self._type = "cpp_pie"  # default is builtin app with `built_in` type
        self._meta = {}

        # used for gar resource
        if gar and isinstance(gar, (BytesIO, bytes)):
            self._gar = gar if isinstance(gar, bytes) else gar.getvalue()
            self._extract_meta_info()
        else:
            # built_in apps has no gar resource.
            self._gar = None

        if self._context_type not in self._support_context_type:
            raise InvalidArgumentError(
                "Unsupport context type: {0}".format(self._context_type)
            )

        self._op = create_app(self)
예제 #9
0
    def resolve_src_dst_value(value: Union[int, str, Tuple[Union[int, str],
                                                           str]]):
        """Resolve the edge's source and destination.

        Args:
            value (Union[int, str, Tuple[Union[int, str], str]]):
            1. a int, represent vid id. a str, represent vid name
            2. a ([int/str], str). former represents vid, latter represents label

        Raises:
            SyntaxError: If the format is incorrect.
        """
        if isinstance(value, (int, str)):
            check_argument(
                isinstance(value, int)
                or (isinstance(value, str) and not value.isdecimal()),
                "Column name cannot be decimal",
            )
            return value, ""
        elif isinstance(value, Sequence):
            check_argument(len(value) == 2)
            check_argument(
                isinstance(value[0], int)
                or (isinstance(value[0], str) and not value[0].isdecimal()),
                "Column name cannot be decimal",
            )
            check_argument(isinstance(value[1], str), "Label must be str")
            return value[0], value[1]
        else:
            raise InvalidArgumentError(
                "Source / destination format incorrect. Expect vid or [vid, source_label]"
            )
예제 #10
0
    def learning(self, graph, nodes=None, edges=None, gen_labels=None):
        """Start a graph learning engine.

        Args:
            nodes (list): The node types that will be used for gnn training.
            edges (list): The edge types that will be used for gnn training.
            gen_labels (list): Extra node and edge labels on original graph for gnn training.

        Returns:
            `graphscope.learning.Graph`: An instance of `graphscope.learning.Graph`
                that could be feed to the learning engine.
        """
        if (
            graph.vineyard_id in self._learning_instance_dict
            and self._learning_instance_dict[graph.vineyard_id] is not None
        ):
            return self._learning_instance_dict[graph.vineyard_id]

        if sys.platform != "linux" and sys.platform != "linux2":
            raise RuntimeError(
                "The learning engine currently supports Linux only, doesn't support %s"
                % sys.platform
            )

        if not graph.loaded():
            raise InvalidArgumentError("The graph has already been unloaded")
        if not graph.graph_type == types_pb2.ARROW_PROPERTY:
            raise InvalidArgumentError("The graph should be a property graph.")

        from graphscope.learning.graph import Graph as LearningGraph

        handle = self._get_gl_handle(graph)
        config = LearningGraph.preprocess_args(handle, nodes, edges, gen_labels)
        config = base64.b64encode(json.dumps(config).encode("utf-8")).decode("utf-8")
        endpoints = self._grpc_client.create_learning_engine(
            graph.vineyard_id, handle, config
        )

        handle = json.loads(base64.b64decode(handle.encode("utf-8")).decode("utf-8"))
        handle["server"] = endpoints
        handle["client_count"] = 1

        learning_graph = LearningGraph(handle, config, graph.vineyard_id, self)
        self._learning_instance_dict[graph.vineyard_id] = learning_graph
        graph.attach_learning_instance(learning_graph)
        return learning_graph
예제 #11
0
def load_app(algo, gar=None, **kwargs):
    """Load an app from gar.
    bytes orthe resource of the specified path or bytes.

    Args:
        algo: str
          Algo name inside resource.
        gar: bytes or BytesIO or str
          str represent the path of resource.

    Returns:
        Instance of <graphscope.AppAssets>

    Raises:
        FileNotFoundError: File not exist.
        PermissionError: Permission denied of path.
        TypeError: File is not a zip file.

    Examples:

        >>> sssp = load_app('sssp', gar='./resource.gar')
        >>> sssp(src=4)

        which will have following `.gs_conf.yaml` in resource.gar:
          app:
            - algo: sssp
              type: cpp_pie
              class_name: grape:SSSP
              src: sssp/sssp.h
              compatible_graph:
                - gs::ArrowProjectedFragment
    """
    if isinstance(gar, (BytesIO, bytes)):
        return AppAssets(str(algo), gar, **kwargs)
    elif isinstance(gar, str):
        with open(gar, "rb") as f:
            content = f.read()

        if not zipfile.is_zipfile(gar):
            raise InvalidArgumentError("{} is not a zip file.".format(gar))

        return AppAssets(str(algo), content, **kwargs)
    else:
        raise InvalidArgumentError("Wrong type with {}".format(gar))
예제 #12
0
    def is_compatible(self, graph):
        """Determine if this algorithm can run on this type of graph.

        Args:
            graph (:class:`Graph`): A graph instance.

        Raises:
            InvalidArgumentError:
                - :code:`gs_conf.yaml` not exist in gar resource.
                - App is not compatible with graph or
                - Algo not found in gar resource.

            ScannerError:
                - Yaml file format is incorrect.
        """
        if not isinstance(
                graph,
            (
                graphscope.framework.graph.Graph,
                graphscope.experimental.nx.classes.graph.Graph,
                graphscope.experimental.nx.classes.digraph.DiGraph,
            ),
        ):
            raise InvalidArgumentError("Wrong type of graph.")
        # builtin app
        if self._gar is None:
            self._type = "cpp_pie"
            return
        # check yaml file
        fp = BytesIO(self._gar)
        archive = zipfile.ZipFile(fp, "r")
        config = yaml.safe_load(archive.read(DEFAULT_GS_CONFIG_FILE))

        # check the compatibility with graph
        for application in config["app"]:
            if self._algo == application["algo"]:
                self._type = application["type"]
                graph_type = graph_type_to_cpp_class(graph.graph_type)
                if graph_type not in application["compatible_graph"]:
                    raise InvalidArgumentError(
                        "App is uncompatible with graph {}".format(graph_type))
                return True
        raise InvalidArgumentError("App not found in gar: {}".format(
            self._algo))
예제 #13
0
    def __call__(self, graph: Graph, *args, **kwargs):
        kwargs_extend = dict(app_class=self.java_app_class, **kwargs)
        if not hasattr(graph, "graph_type"):
            raise InvalidArgumentError("Missing graph_type attribute in graph object.")

        if (
            self.java_app_type.find("simple") != -1
            and graph.graph_type == graph_def_pb2.ARROW_PROPERTY
        ):
            graph = graph._project_to_simple()
        app_ = graph.session._wrapper(JavaAppDagNode(graph, self))
        return app_(*args, **kwargs_extend)
예제 #14
0
def create_context(context_type, session_id, context_key, graph):
    """A context factory, create concrete context class by context_type."""
    if context_type == "tensor":
        return TensorContext(session_id, context_key, graph)
    if context_type == "vertex_data":
        return VertexDataContext(session_id, context_key, graph)
    elif context_type == "labeled_vertex_data":
        return LabeledVertexDataContext(session_id, context_key, graph)
    elif context_type == "vertex_property":
        return VertexPropertyContext(session_id, context_key, graph)
    elif context_type == "labeled_vertex_property":
        return LabelVertexPropertyContext(session_id, context_key, graph)
    else:
        raise InvalidArgumentError("Not supported context type: " +
                                   context_type)
예제 #15
0
 def wrapper(*args, **kwargs):
     graph = args[0]
     if not hasattr(graph, "graph_type"):
         raise InvalidArgumentError(
             "Missing graph_type attribute in graph object.")
     if graph.graph_type == graph_def_pb2.ARROW_PROPERTY:
         if "weight" in kwargs:
             # func has 'weight' argument
             weight = kwargs.get("weight", None)
             graph = graph._project_to_simple(e_prop=weight)
         elif "attribute" in kwargs:
             # func has 'attribute' argument
             attribute = kwargs.get("attribute", None)
             graph = graph._project_to_simple(v_prop=attribute)
         else:
             graph = graph._project_to_simple()
     return func(graph, *args[1:], **kwargs)
예제 #16
0
def louvain(graph, min_progress=1000, progress_tries=1):
    """Compute best partition on the `graph` by louvain.

    Args:
        graph (:class:`graphscope.Graph`): A simple undirected graph.
        min_progress: The minimum delta X required to be considered progress, where X is the number of nodes
                      that have changed their community on a particular pass.
                      Delta X is then the difference in number of nodes that changed communities
                      on the current pass compared to the previous pass.
        progress_tries: number of times the min_progress setting is not met
                        before exiting form the current level and compressing the graph.

    Returns:
        :class:`graphscope.framework.context.VertexDataContextDAGNode`:
            A context with each vertex assigned with id of community it belongs to, evaluated in eager mode.

    References:
        [1] Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008).

        [2] https://github.com/Sotera/distributed-graph-analytics

        [3] https://sotera.github.io/distributed-graph-analytics/louvain/

    Notes:
        louvain now only support undirected graph. If input graph is directed graph, louvain would raise
        an InvalidArgumentError.

    Examples:

    .. code:: python

        >>> import graphscope
        >>> from graphscope.dataset import load_p2p_network
        >>> sess = graphscope.session(cluster_type="hosts", mode="eager")
        >>> g = load_p2p_network(sess, directed=False)
        >>> # project to a simple graph (if needed)
        >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]})
        >>> c = graphscope.louvain(pg, min_progress=1000, progress_tries=1)
        >>> sess.close()
    """
    if graph.is_directed():
        raise InvalidArgumentError("Louvain not support directed graph.")
    return AppAssets(algo="louvain",
                     context="vertex_data")(graph, min_progress,
                                            progress_tries)
예제 #17
0
 def wrapper(*args, **kwargs):
     graph = args[0]
     if not hasattr(graph, "graph_type"):
         raise InvalidArgumentError(
             "Unsupported graph to project to simple.")
     elif graph.graph_type in (
             graph_def_pb2.DYNAMIC_PROPERTY,
             graph_def_pb2.ARROW_PROPERTY,
     ):
         weight = None
         attribute = None
         if "attribute" in inspect.getfullargspec(func)[0]:
             attribute = kwargs.get("attribute", None)
         if "weight" in inspect.getfullargspec(func)[0]:
             # func has 'weight' argument
             weight = kwargs.get("weight", None)
         graph = graph._project_to_simple(v_prop=attribute, e_prop=weight)
     return func(graph, *args[1:], **kwargs)
예제 #18
0
    def _parse_value(self, op, response: message_pb2.RunStepResponse):
        # attach an output to op, indicating the op is already run.
        op.set_output(response.metrics)

        # if loads a arrow property graph, will return {'object_id': xxxx}
        if op.output_types == types_pb2.GRAPH:
            return response.graph_def
        if op.output_types == types_pb2.APP:
            return response.result.decode("utf-8")
        if op.output_types in (
                types_pb2.RESULTS,
                types_pb2.VINEYARD_TENSOR,
                types_pb2.VINEYARD_DATAFRAME,
        ):
            return response.result.decode("utf-8")
        if op.output_types in (types_pb2.TENSOR, types_pb2.DATAFRAME):
            return response.result
        else:
            raise InvalidArgumentError("Not recognized output type: %s",
                                       op.output_types)
예제 #19
0
    def is_compatible(self, graph):
        """Determine if this algorithm can run on this type of graph.

        Args:
            graph (:class:`GraphDAGNode`): A graph instance.

        Raises:
            InvalidArgumentError:
                - App is not compatible with graph

            ScannerError:
                - Yaml file format is incorrect.
        """
        # builtin app
        if self._gar is None:
            return
        # check yaml file
        graph_type = graph_type_to_cpp_class(graph.graph_type)
        if graph_type not in self._meta["compatible_graph"]:
            raise InvalidArgumentError(
                "App is uncompatible with graph {}".format(graph_type))
        return True
예제 #20
0
 def _extract_meta_info(self):
     """Extract app meta info from gar resource.
     Raises:
         InvalidArgumentError:
             - :code:`gs_conf.yaml` not exist in gar resource.
             - App not found in gar resource.
     """
     fp = BytesIO(self._gar)
     archive = zipfile.ZipFile(fp, "r")
     config = yaml.safe_load(archive.read(DEFAULT_GS_CONFIG_FILE))
     # default app will used if there is only one app in it
     if self._algo is None and len(config["app"]) == 1:
         self._algo = config["app"][0]["algo"]
         logger.info("Default app %s will be used.", self._algo)
     for meta in config["app"]:
         if self._algo == meta["algo"]:
             if "context_type" in meta:
                 self._context_type = meta["context_type"]
             self._type = meta["type"]
             self._meta = meta
             return
     raise InvalidArgumentError("App not found in gar: {}".format(self._algo))
예제 #21
0
def louvain(graph, min_progress=1000, progress_tries=1):
    """Compute best partition on the `graph` by louvain.

    Args:
        graph (:class:`Graph`): A projected simple graph.
        min_progress: The minimum delta X required to be considered progress, where X is the number of nodes
                      that have changed their community on a particular pass.
                      Delta X is then the difference in number of nodes that changed communities
                      on the current pass compared to the previous pass.
        progress_tries: number of times the min_progress setting is not met
                        before exiting form the current level and compressing the graph.


    Returns:
        :class:`VertexDataContext`: A context with each vertex assigned with id of community it belongs to.

    References:
    .. [1] Blondel, V.D. et al. Fast unfolding of communities in
    large networks. J. Stat. Mech 10008, 1-12(2008).
    .. [2] https://github.com/Sotera/distributed-graph-analytics
    .. [3] https://sotera.github.io/distributed-graph-analytics/louvain/

    Examples:

    .. code:: python

        import graphscope as gs
        s = gs.session()
        g = s.load_from('The parameters for loading a graph...')
        pg = g.project_to_simple(v_label='vlabel', e_label='elabel', v_prop=None, e_prop='weight')
        r = gs.louvain(pg)
        s.close()

    """
    if graph.is_directed():
        raise InvalidArgumentError("Louvain not support directed graph.")
    return AppAssets(algo="louvain")(graph, min_progress, progress_tries)
예제 #22
0
 def wrapper(*args, **kwargs):
     graph = args[0]
     if not hasattr(graph, "graph_type"):
         raise InvalidArgumentError(
             "Missing graph_type attribute in graph object.")
     elif graph.graph_type in (
             graph_def_pb2.DYNAMIC_PROPERTY,
             graph_def_pb2.ARROW_PROPERTY,
     ):
         if ("weight" in inspect.getfullargspec(func)[0]
             ):  # func has 'weight' argument
             weight = kwargs.get("weight", None)
             try:
                 e_label = graph.schema.edge_labels[0]
                 graph.schema.get_edge_property_id(e_label, weight)
             except KeyError:
                 weight = None
             graph = graph._project_to_simple(e_prop=weight)
         elif "attribute" in inspect.getfullargspec(func)[0]:
             attribute = kwargs.get("attribute", None)
             graph = graph._project_to_simple(v_prop=attribute)
         else:
             graph = graph._project_to_simple()
     return func(graph, *args[1:], **kwargs)
예제 #23
0
    def _get_gl_handle(self, graph):
        """Dump a handler for GraphLearn for interaction.

        Fields in :code:`schema` are:

        + the name of node type or edge type
        + whether the graph is weighted graph
        + whether the graph is labeled graph
        + the number of int attributes
        + the number of float attributes
        + the number of string attributes

        An example of the graph handle:

        .. code:: python

            {
                "server": "127.0.0.1:8888,127.0.0.1:8889",
                "client_count": 1,
                "vineyard_socket": "/var/run/vineyard.sock",
                "vineyard_id": 13278328736,
                "node_schema": [
                    "user:false:false:10:0:0",
                    "item:true:false:0:0:5"
                ],
                "edge_schema": [
                    "user:click:item:true:false:0:0:0",
                    "user:buy:item:true:true:0:0:0",
                    "item:similar:item:false:false:10:0:0"
                ],
                "node_attribute_types": {
                    "person": {
                        "age": "i",
                        "name": "s",
                    },
                },
                "edge_attribute_types": {
                    "knows": {
                        "weight": "f",
                    },
                },
            }

        The handle can be decoded using:

        .. code:: python

           base64.b64decode(handle.encode('ascii')).decode('ascii')

        Note that the ports are selected from a range :code:`(8000, 9000)`.

        Args:
            graph (:class:`Graph`): A Property Graph.
            client_number (int): Number of client.

        Returns:
            str: Base64 encoded handle

        Raises:
            InvalidArgumentError: If the graph is not loaded, or graph_type isn't
                `ARROW_PROPERTY`.
        """

        if not graph.loaded():
            raise InvalidArgumentError("The graph has already been unloaded")
        if not graph.graph_type == types_pb2.ARROW_PROPERTY:
            raise InvalidArgumentError("The graph should be a property graph.")

        def group_property_types(props):
            weighted, labeled, i, f, s, attr_types = "false", "false", 0, 0, 0, {}
            for field_name, field_type in props.items():
                if field_type in [types_pb2.STRING]:
                    s += 1
                    attr_types[field_name] = "s"
                elif field_type in (types_pb2.FLOAT, types_pb2.DOUBLE):
                    f += 1
                    attr_types[field_name] = "f"
                else:
                    i += 1
                    attr_types[field_name] = "i"
                if field_name == "weight":
                    weighted = "true"
                elif field_name == "label":
                    labeled = "true"
            return weighted, labeled, i, f, s, attr_types

        node_schema, node_attribute_types = [], dict()
        for index, label in enumerate(graph.schema.vertex_labels):
            weighted, labeled, i, f, s, attr_types = group_property_types(
                graph.schema.vertex_properties[index])
            node_schema.append("{}:{}:{}:{}:{}:{}".format(
                label, weighted, labeled, i, f, s))
            node_attribute_types[label] = attr_types

        edge_schema, edge_attribute_types = [], dict()
        for index, label in enumerate(graph.schema.edge_labels):
            weighted, labeled, i, f, s, attr_types = group_property_types(
                graph.schema.edge_properties[index])
            for rel in graph.schema.edge_relationships[index]:
                edge_schema.append("{}:{}:{}:{}:{}:{}:{}:{}".format(
                    rel[0], label, rel[1], weighted, labeled, i, f, s))
            edge_attribute_types[label] = attr_types

        handle = {
            "hosts": self.info["engine_hosts"],
            "client_count": 1,
            "vineyard_id": graph.vineyard_id,
            "vineyard_socket": self._engine_config["vineyard_socket"],
            "node_schema": node_schema,
            "edge_schema": edge_schema,
            "node_attribute_types": node_attribute_types,
            "edge_attribute_types": edge_attribute_types,
        }
        handle_json_string = json.dumps(handle)
        return base64.b64encode(
            handle_json_string.encode("utf-8")).decode("utf-8")
예제 #24
0
    def project_to_simple(self, v_label="_", e_label="_", v_prop=None, e_prop=None):
        """Project a property graph to a simple graph, useful for analytical engine.
        Will translate name represented label or property to index, which is broadedly used
        in internal engine.

        Args:
            v_label (str, optional): vertex label to project. Defaults to "_".
            e_label (str, optional): edge label to project. Defaults to "_".
            v_prop (str, optional): vertex property of the v_label. Defaults to None.
            e_prop (str, optional): edge property of the e_label. Defaults to None.

        Returns:
            :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED`
        """
        if not self.loaded():
            raise RuntimeError(
                "The graph is not registered in remote, and can't project to simple"
            )
        self.check_unmodified()
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        check_argument(isinstance(v_label, (int, str)))
        check_argument(isinstance(e_label, (int, str)))

        def check_out_of_range(id, length):
            if id < length and id > -1:
                return id
            else:
                raise KeyError("id {} is out of range.".format(id))

        try:
            v_label_id = (
                check_out_of_range(v_label, self._schema.vertex_label_num)
                if isinstance(v_label, int)
                else self._schema.vertex_label_index(v_label)
            )
        except ValueError as e:
            raise ValueError(
                "graph not contains the vertex label {}.".format(v_label)
            ) from e

        try:
            e_label_id = (
                check_out_of_range(e_label, self._schema.edge_label_num)
                if isinstance(e_label, int)
                else self._schema.edge_label_index(e_label)
            )
        except ValueError as e:
            raise InvalidArgumentError(
                "graph not contains the edge label {}.".format(e_label)
            ) from e

        if v_prop is None:
            # NB: -1 means vertex property is None
            v_prop_id = -1
            v_properties = None
        else:
            check_argument(isinstance(v_prop, (int, str)))
            v_properties = self._schema.vertex_properties[v_label_id]
            try:
                v_prop_id = (
                    check_out_of_range(v_prop, len(v_properties))
                    if isinstance(v_prop, int)
                    else self._schema.vertex_property_index(v_label_id, v_prop)
                )
            except ValueError as e:
                raise ValueError(
                    "vertex label {} not contains the property {}".format(
                        v_label, v_prop
                    )
                ) from e

        if e_prop is None:
            # NB: -1 means edge property is None
            e_prop_id = -1
            e_properties = None
        else:
            check_argument(isinstance(e_prop, (int, str)))
            e_properties = self._schema.edge_properties[e_label_id]
            try:
                e_prop_id = (
                    check_out_of_range(e_prop, len(e_properties))
                    if isinstance(e_prop, int)
                    else self._schema.edge_property_index(e_label_id, e_prop)
                )
            except ValueError as e:
                raise ValueError(
                    "edge label {} not contains the property {}".format(e_label, e_prop)
                ) from e

        oid_type = self._schema.oid_type
        vid_type = self._schema.vid_type
        vdata_type = None
        if v_properties:
            vdata_type = list(v_properties.values())[v_prop_id]
        edata_type = None
        if e_properties:
            edata_type = list(e_properties.values())[e_prop_id]

        op = project_arrow_property_graph(
            self,
            v_label_id,
            v_prop_id,
            e_label_id,
            e_prop_id,
            vdata_type,
            edata_type,
            oid_type,
            vid_type,
        )
        graph_def = op.eval()
        return Graph(self.session_id, graph_def)
예제 #25
0
    def preprocess_args(handle, nodes, edges, gen_labels):  # noqa: C901
        handle = json.loads(
            base64.b64decode(handle).decode("utf-8", errors="ignore"))
        node_names = []
        node_attributes = {}
        edge_names = []
        edge_attributes = {}

        def selected_property_schema(attr_types, attributes):
            prop_counts = collections.defaultdict(lambda: 0)
            for attr in attributes:
                prop_counts[attr_types[attr]] += 1
            return [prop_counts["i"], prop_counts["f"], prop_counts["s"]]

        if nodes is not None:
            for node in nodes:
                if isinstance(node, str):
                    if node in node_names:
                        raise InvalidArgumentError("Duplicate node type: %s" %
                                                   node)
                    node_names.append(node)
                elif isinstance(node, tuple):
                    if node[0] in node_names:
                        raise InvalidArgumentError("Duplicate node type: %s" %
                                                   node[0])
                    node_names.append(node[0])
                    attr_types = handle["node_attribute_types"][node[0]]
                    attr_schema = selected_property_schema(attr_types, node[1])
                    node_attributes[node[0]] = (node[1], attr_schema)
                else:
                    raise InvalidArgumentError(
                        "The node parameter is in bad format: %s" % node)
        else:
            for node in handle["node_schema"]:
                node_names.append(node.split(":")[0])

        if edges is not None:
            for edge in edges:
                if isinstance(edge, str):
                    if len(node_names) > 1:
                        raise InvalidArgumentError(
                            "Cannot inference edge type when multiple kinds of nodes exists"
                        )
                    edge_names.append((node_names[0], edge, node_names[0]))
                elif (isinstance(edge, tuple) and isinstance(edge[0], str)
                      and isinstance(edge[1], str)):
                    edge_names.append(edge)
                elif (isinstance(edge, tuple) and isinstance(edge[0], str)
                      and isinstance(edge[1], list)):
                    if len(node_names) > 1:
                        raise InvalidArgumentError(
                            "Cannot inference edge type when multiple kinds of nodes exists"
                        )
                    edge_names.append((node_names[0], edge[0], node_names[0]))
                    attr_types = handle["edge_attribute_types"][edge[0]]
                    attr_schema = selected_property_schema(attr_types, edge[1])
                    edge_attributes[edge[0]] = (edge[1], attr_schema)
                elif (isinstance(edge, tuple)
                      and isinstance(edge[0], (list, tuple))
                      and isinstance(edge[1], list)):
                    edge_names.append(edge[0])
                    attr_types = handle["edge_attribute_types"][edge[0][1]]
                    attr_schema = selected_property_schema(attr_types, edge[1])
                    edge_attributes[edge[0][1]] = (edge[1], attr_schema)
                else:
                    raise InvalidArgumentError(
                        "The edge parameter is in bad format: %s" % edge)

        split_groups = collections.defaultdict(list)
        if gen_labels is not None:
            for label in gen_labels:
                if len(label) == 3 or len(label) == 4:
                    split_groups[label[1]].append(label)
                else:
                    raise InvalidArgumentError("Bad gen_labels arguments: %s" %
                                               gen_labels)

        split_labels = []
        for label, group in split_groups.items():
            lengths = [len(split) for split in group]
            check_argument(lengths[:-1] == lengths[1:],
                           "Invalid gen labels: %s" % group)
            if len(group[0]) == 3:
                length_sum = sum(split[2] for split in group)
                s, ss = 0, []
                for split in group:
                    ss.append((s, s + split[2]))
                    s += split[2]
                group = [(split[0], split[1], length_sum, s)
                         for split, s in zip(group, ss)]
            for split in group:
                split_labels.append(split)

        return {
            "nodes": node_names if node_names else None,
            "edges": edge_names if edge_names else None,
            "node_attributes": node_attributes,
            "edge_attributes": edge_attributes,
            "gen_labels": split_labels,
        }
예제 #26
0
    def gremlin(self, graph, engine_params=None):
        """Get a interactive engine handler to execute gremlin queries.

        Note that this method will be executed implicitly when a property graph created
        and cache a instance of InteractiveQuery in session if `initializing_interactive_engine`
        is True. If you want to create a new instance under the same graph by different params,
        you should close the instance first.

        .. code:: python

            >>> # close and recreate InteractiveQuery.
            >>> interactive_query = sess.gremlin(g)
            >>> interactive_query.close()
            >>> interactive_query = sess.gremlin(g, engine_params={"xxx":"xxx"})


        Args:
            graph (:class:`Graph`): Use the graph to create interactive instance.
            engine_params (dict, optional): Configure startup parameters of interactive engine.
                You can also configure this param by `graphscope.set_option(engine_params={})`.
                See a list of configurable keys in
                `interactive_engine/deploy/docker/dockerfile/executor.vineyard.properties`

        Raises:
            InvalidArgumentError: :code:`graph` is not a property graph or unloaded.

        Returns:
            :class:`InteractiveQuery`
        """

        # self._interactive_instance_dict[graph.vineyard_id] will be None if
        # InteractiveQuery closed
        if (graph.vineyard_id in self._interactive_instance_dict
                and self._interactive_instance_dict[graph.vineyard_id]
                is not None):
            interactive_query = self._interactive_instance_dict[
                graph.vineyard_id]
            if interactive_query.status == InteractiveQueryStatus.Running:
                return interactive_query
            elif interactive_query.status == InteractiveQueryStatus.Failed:
                raise InteractiveEngineInternalError(
                    interactive_query.error_msg)
            else:
                # Initializing.
                # while True is ok, as the status is either running or failed eventually after timeout.
                while True:
                    time.sleep(1)
                    if interactive_query.status == InteractiveQueryStatus.Running:
                        return interactive_query
                    elif interactive_query.status == InteractiveQueryStatus.Failed:
                        raise InteractiveEngineInternalError(
                            interactive_query.error_msg)

        if not graph.loaded():
            raise InvalidArgumentError("The graph has already been unloaded")
        if not graph.graph_type == types_pb2.ARROW_PROPERTY:
            raise InvalidArgumentError("The graph should be a property graph.")

        interactive_query = InteractiveQuery(session=self,
                                             object_id=graph.vineyard_id)
        self._interactive_instance_dict[graph.vineyard_id] = interactive_query

        if engine_params is not None:
            engine_params = {
                str(key): str(value)
                for key, value in engine_params.items()
            }
        else:
            engine_params = {}

        try:
            response = self._grpc_client.create_interactive_engine(
                object_id=graph.vineyard_id,
                schema_path=graph.schema_path,
                gremlin_server_cpu=gs_config.k8s_gie_gremlin_server_cpu,
                gremlin_server_mem=gs_config.k8s_gie_gremlin_server_mem,
                engine_params=engine_params,
            )
        except Exception as e:
            interactive_query.status = InteractiveQueryStatus.Failed
            interactive_query.error_msg = str(e)
            raise InteractiveEngineInternalError(str(e)) from e
        else:
            interactive_query.set_frontend(front_ip=response.frontend_host,
                                           front_port=response.frontend_port)
            interactive_query.status = InteractiveQueryStatus.Running
            graph.attach_interactive_instance(interactive_query)

        return interactive_query