Exemple #1
0
    def to_dataframe(self, selector, vertex_range=None):
        """Select some elements of the graph and output as a pandas.DataFrame

        Args:
            selector (dict): Select some portions of graph.
            vertex_range (dict, optional): Slice vertices. Defaults to None.

        Returns:
            `pandas.DataFrame`
        """
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        self._ensure_loaded()
        self._check_unmodified()
        check_argument(
            isinstance(selector, Mapping),
            "selector of to_vineyard_dataframe must be a dict",
        )
        selector = {
            key:
            utils.transform_labeled_vertex_property_data_selector(self, value)
            for key, value in selector.items()
        }
        selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)

        op = dag_utils.graph_to_dataframe(self, selector, vertex_range)
        ret = op.eval()
        return utils.decode_dataframe(ret)
Exemple #2
0
    def to_vineyard_dataframe(self, selector=None, vertex_range=None):
        """Return results as a vineyard dataframe.
        Only object id is returned.

        Args:
            selector:  dict
                Key is used as column name of the dataframe,
                and the value describes how to select values of context.
                See more details in derived context class.
            vertex_range: dict, optional, default to None
                Works as slicing. The expression {'begin': m, 'end': n} select a portion
                of vertices from `m` to, but not including `n`. Type of `m`, `n` must be identical with vertices'
                oid type.
                Only the sub-ranges of vertices data will be retrieved.

        Returns:
            str: object id of vineyard tensor
        """
        self._check_unmodified()
        if selector is not None:
            check_argument(
                isinstance(selector, Mapping),
                "selector of to_vineyard_dataframe must be a dict",
            )
            selector = {
                key: self._transform_selector(value)
                for key, value in selector.items()
            }
            selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.to_vineyard_dataframe(self, selector, vertex_range)
        ret = op.eval()
        object_id = json.loads(ret)["object_id"]
        return object_id
Exemple #3
0
    def to_numpy(self, selector, vertex_range=None, axis=0):
        """Return context data as numpy array

        Args:
        selector (str): Describes how to select values of context.
            See more details in derived context class.
        vertex_range (dict): optional, default to None.
            Works as slicing. The expression {'begin': m, 'end': n} select a portion
            of vertices from `m` to, but not including `n`. Type of `m`, `n` must be identical with vertices'
            oid type.
            Omitting the first index starts the slice at the beginning of the vertices,
            and omitting the second index extends the slice to the end of the vertices.
            Note the comparision is not based on numeric order, but on alphabetic order.
        axis (int): optional, default to 0.

        Returns:
            numpy.ndarray.
        """
        self._check_unmodified()
        selector = self._transform_selector(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)

        op = dag_utils.context_to_numpy(self, selector, vertex_range, axis)
        raw_values = op.eval()
        return decode_numpy(raw_values)
Exemple #4
0
    def to_dataframe(self, selector, vertex_range=None):
        """Return results as a pandas DataFrame

        Args:
            selector: dict
                The key is column name in dataframe, and the value describes how to select values of context.
                See more details in derived context class.
            vertex_range: dict, optional, default to None.
                Works as slicing. The expression {'begin': m, 'end': n} select a portion
                of vertices from `m` to, but not including `n`. Type of `m`, `n` must be identical with vertices'
                oid type.
                Only the sub-ranges of vertices data will be retrieved.
                Note the comparision is not based on numeric order, but on alphabetic order.

        Returns:
            pandas.DataFrame
        """
        self._check_unmodified()

        check_argument(isinstance(selector, Mapping),
                       "selector of to_dataframe must be a dict")
        selector = {
            key: self._transform_selector(value)
            for key, value in selector.items()
        }
        selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.context_to_dataframe(self, selector, vertex_range)
        raw_values = op.eval()
        return decode_dataframe(raw_values)
Exemple #5
0
    def to_vineyard_dataframe(self, selector=None, vertex_range=None):
        """Get the context data as a vineyard dataframe and return the vineyard object id.

        Args:
            selector:  dict
                Key is used as column name of the dataframe, and the value describes how to
                select values of context. See more details in derived context DAG node class.
            vertex_range: dict, optional, default to None
                Works as slicing. The expression {'begin': m, 'end': n} select a portion
                of vertices from `m` to, but not including `n`. Type of `m`, `n` must be
                identical with vertices' oid type.
                Only the sub-ranges of vertices data will be retrieved.

        Returns:
            :class:`graphscope.framework.context.ResultDAGNode`:
                A result hold the object id of vineyard dataframe, evaluated in eager mode.
        """
        if selector is not None:
            check_argument(
                isinstance(selector, Mapping),
                "selector of to_vineyard_dataframe must be a dict",
            )
            for _, value in selector.items():
                self._check_selector(value)
            _ensure_consistent_label(self.context_type, selector)
            selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.to_vineyard_dataframe(self, selector, vertex_range)
        return ResultDAGNode(self, op)
Exemple #6
0
    def to_dataframe(self, selector, vertex_range=None):
        """Get the context data as a pandas DataFrame.

        Args:
            selector: dict
                The key is column name in dataframe, and the value describes how to select
                values of context. See more details in derived context DAG node class.
            vertex_range: dict, optional, default to None.
                Works as slicing. The expression {'begin': m, 'end': n} select a portion
                of vertices from `m` to, but not including `n`. Type of `m`, `n` must be
                identical with vertices' oid type.
                Only the sub-ranges of vertices data will be retrieved.
                Note the comparision is not based on numeric order, but on alphabetic order.

        Returns:
            :class:`graphscope.framework.context.ResultDAGNode`:
                A result holds the `pandas.DataFrame`, evaluated in eager mode.
        """
        check_argument(isinstance(selector, Mapping),
                       "selector of to_dataframe must be a dict")
        for _, value in selector.items():
            self._check_selector(value)
        _ensure_consistent_label(self.context_type, selector)
        selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.context_to_dataframe(self, selector, vertex_range)
        return ResultDAGNode(self, op)
Exemple #7
0
    def to_vineyard_tensor(self, selector=None, vertex_range=None, axis=0):
        """Get the context data as a vineyard tensor and return the vineyard object id.

        Returns:
            :class:`graphscope.framework.context.ResultDAGNode`:
                A result hold the object id of vineyard tensor, evaluated in eager mode.
        """
        self._check_selector(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.to_vineyard_tensor(self, selector, vertex_range, axis)
        return ResultDAGNode(self, op)
Exemple #8
0
    def to_numpy(self, selector, vertex_range=None):
        """Select some elements of the graph and output to numpy.

        Args:
            selector (str): Select a portion of graph as a numpy.ndarray.
            vertex_range(dict, optional): Slice vertices. Defaults to None.
        Returns:
            `numpy.ndarray`
        """
        self.check_unmodified()
        selector = transform_labeled_vertex_property_data_selector(self, selector)
        vertex_range = transform_vertex_range(vertex_range)
        op = graph_to_numpy(self, selector, vertex_range)
        ret = op.eval()
        return decode_numpy(ret)
Exemple #9
0
    def to_vineyard_tensor(self, selector=None, vertex_range=None, axis=0):
        """Return results as a vineyard tensor.
        Only object id is returned.

        Returns:
            str: object id of vineyard tensor
        """
        self._check_unmodified()
        selector = self._transform_selector(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)

        op = dag_utils.to_vineyard_tensor(self, selector, vertex_range, axis)
        ret = op.eval()
        object_id = json.loads(ret)["object_id"]
        return object_id
Exemple #10
0
    def to_numpy(self, selector, vertex_range=None):
        """Select some elements of the graph and output to numpy.

        Args:
            selector (str): Select a portion of graph as a numpy.ndarray.
            vertex_range(dict, optional): Slice vertices. Defaults to None.
        Returns:
            `numpy.ndarray`
        """
        check_argument(self.graph_type == types_pb2.ARROW_PROPERTY)
        self._ensure_loaded()
        self._check_unmodified()
        selector = utils.transform_labeled_vertex_property_data_selector(
            self, selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.graph_to_numpy(self, selector, vertex_range)
        ret = op.eval()
        return utils.decode_numpy(ret)
Exemple #11
0
    def to_numpy(self, selector, vertex_range=None):
        """Select some elements of the graph and output to numpy.

        Args:
            selector (str): Select a portion of graph as a numpy.ndarray.
            vertex_range(dict, optional): Slice vertices. Defaults to None.

        Returns:
            :class:`graphscope.framework.context.ResultDAGNode`:
                A result holds the `numpy.ndarray`, evaluated in eager mode.
        """
        # avoid circular import
        from graphscope.framework.context import ResultDAGNode

        check_argument(self.graph_type == graph_def_pb2.ARROW_PROPERTY)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.graph_to_numpy(self, selector, vertex_range)
        return ResultDAGNode(self, op)
Exemple #12
0
    def output(self, fd, selector, vertex_range=None, **kwargs):
        """Dump results to `fd`.
        Support dumps data to local (respect to pod) files, hdfs or oss.
        It first write results to a vineyard dataframe, and let vineyard
        do the data dumping job.
        `fd` must meet specific formats, with auth information if needed. As follows:

            - local
                `file:///tmp/result_path`
            - oss
                `oss:///bucket/object`
            - hdfs
                `hdfs:///tmp/result_path`

        Args:
            fd (str): Output location.
            selector (dict): Similar to `to_dataframe`.
            vertex_range (dict, optional): Similar to `to_dataframe`. Defaults to None.
            kwargs (dict, optional): Storage options with respect to output storage type.
                    for example:
                    key, secret, endpoint for oss,
                    key, secret, client_kwargs for s3,
                    host, port for hdfs,
                    None for local.

        Returns:
            :class:`graphscope.framework.context.ResultDAGNode`, evaluated in eager mode.
        """
        protocol = fd.split("://")[0]
        # Still use the stream to write to file,
        # as the C++ adaptor in Vineyard requires arrow >= 4.0.0
        if protocol in ("file", "hdfs", "hive", "oss", "s3"):
            df = self.to_vineyard_dataframe(selector, vertex_range)
            op = dag_utils.to_data_sink(df, fd, **kwargs)
        else:
            check_argument(isinstance(selector, Mapping),
                           "selector of to_dataframe must be a dict")
            for _, value in selector.items():
                self._check_selector(value)
            _ensure_consistent_label(self.context_type, selector)
            selector = json.dumps(selector)
            vertex_range = utils.transform_vertex_range(vertex_range)
            op = dag_utils.output(self, fd, selector, vertex_range, **kwargs)
        return ResultDAGNode(self, op)
Exemple #13
0
    def to_numpy(self, selector, vertex_range=None, axis=0):
        """Get the context data as a numpy array.

        Args:
            selector (str): Describes how to select values of context.
                See more details in derived context DAG node class.
            vertex_range (dict): optional, default to None.
                Works as slicing. The expression {'begin': m, 'end': n} select a portion
                of vertices from `m` to, but not including `n`. Type of `m`, `n` must be
                identical with vertices' oid type.
                Omitting the first index starts the slice at the beginning of the vertices,
                and omitting the second index extends the slice to the end of the vertices.
                Note the comparision is not based on numeric order, but on alphabetic order.
            axis (int): optional, default to 0.

        Returns:
            :class:`graphscope.framework.context.ResultDAGNode`:
                A result holds the `numpy.ndarray`, evaluated in eager mode.
        """
        self._check_selector(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.context_to_numpy(self, selector, vertex_range, axis)
        return ResultDAGNode(self, op)
Exemple #14
0
    def to_dataframe(self, selector, vertex_range=None):
        """Select some elements of the graph and output as a pandas.DataFrame

        Args:
            selector (dict): Select some portions of graph.
            vertex_range (dict, optional): Slice vertices. Defaults to None.

        Returns:
            :class:`graphscope.framework.context.ResultDAGNode`:
                A result holds the `pandas.DataFrame`, evaluated in eager mode.
        """
        # avoid circular import
        from graphscope.framework.context import ResultDAGNode

        check_argument(self.graph_type == graph_def_pb2.ARROW_PROPERTY)
        check_argument(
            isinstance(selector, Mapping),
            "selector of to dataframe must be a dict",
        )
        selector = json.dumps(selector)
        vertex_range = utils.transform_vertex_range(vertex_range)
        op = dag_utils.graph_to_dataframe(self, selector, vertex_range)
        return ResultDAGNode(self, op)