Ejemplo n.º 1
0
    def from_series(self, src_series, dst_series=None, store_transposed=False):
        """
        Populate the numbering map with vertices from the specified
        pair of series objects, one for the source and one for
        the destination

        Parameters
        ----------
        src_series: cudf.Series or dask_cudf.Series
            Contains a list of external vertex identifiers that will be
            numbered by the NumberMap class.
        dst_series: cudf.Series or dask_cudf.Series
            Contains a list of external vertex identifiers that will be
            numbered by the NumberMap class.
        store_transposed : bool
            Identify how the graph adjacency will be used.
            If True, the graph will be organized by destination.
            If False, the graph will be organized by source
        """
        if self.implementation is not None:
            raise Exception("NumberMap is already populated")

        if dst_series is not None and type(src_series) != type(dst_series):
            raise Exception("src_series and dst_series must have same type")

        if type(src_series) is cudf.Series:
            dst_series_list = None
            df = cudf.DataFrame()
            df["s"] = src_series
            if dst_series is not None:
                df["d"] = dst_series
                dst_series_list = ["d"]
            self.implementation = NumberMap.SingleGPU(df, ["s"],
                                                      dst_series_list,
                                                      self.id_type,
                                                      store_transposed)
        elif type(src_series) is dask_cudf.Series:
            dst_series_list = None
            df = dask_cudf.DataFrame()
            df["s"] = src_series
            if dst_series is not None:
                df["d"] = dst_series
                dst_series_list = ["d"]
            self.implementation = NumberMap.MultiGPU(df, ["s"],
                                                     dst_series_list,
                                                     self.id_type,
                                                     store_transposed)
        else:
            raise Exception("src_series must be cudf.Series or "
                            "dask_cudf.Series")

        self.implementation.compute()
Ejemplo n.º 2
0
    def to_internal_vertex_id(self, df, col_names=None):
        """
        Given a collection of external vertex ids, return the internal
        vertex ids

        Parameters
        ----------
        df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series
            Contains a list of external vertex identifiers that will be
            converted into internal vertex identifiers

        col_names: (optional) list of strings
            This list of 1 or more strings contain the names
            of the columns that uniquely identify an external
            vertex identifier

        Returns
        ---------
        vertex_ids : cudf.Series or dask_cudf.Series
            The vertex identifiers.  Note that to_internal_vertex_id
            does not guarantee order or partitioning (in the case of
            dask_cudf) of vertex ids. If order matters use
            add_internal_vertex_id

        """
        tmp_df = None
        tmp_col_names = None
        if type(df) is cudf.Series:
            tmp_df = cudf.DataFrame()
            tmp_df["0"] = df
            tmp_col_names = ["0"]
        elif type(df) is dask_cudf.Series:
            tmp_df = dask_cudf.DataFrame()
            tmp_df["0"] = df
            tmp_col_names = ["0"]
        else:
            tmp_df = df
            tmp_col_names = col_names

        reply = self.implementation.to_internal_vertex_id(
            tmp_df, tmp_col_names)

        if type(df) in [cudf.DataFrame, dask_cudf.DataFrame]:
            return reply["0"]
        else:
            return reply