Python symmetrize Examples, cugraph.structure.symmetrize.symmetrize Python Examples

Example #1

0

Show file

File: simpleGraph.py Project: mattf/cugraph

 def to_undirected(self, G):
     """
     Return an undirected copy of the graph.
     """
     G.properties.renumbered = self.properties.renumbered
     G.renumber_map = self.renumber_map
     if self.properties.directed is False:
         G.edgelist = self.edgelist
         G.adjlist = self.adjlist
         G.transposedadjlist = self.transposedadjlist
     else:
         df = self.edgelist.edgelist_df
         if self.edgelist.weights:
             source_col, dest_col, value_col = symmetrize(
                 df["src"], df["dst"], df["weights"])
         else:
             source_col, dest_col = symmetrize(df["src"], df["dst"])
             value_col = None
         G.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col,
                                               value_col)

Example #2

0

Show file

File: graph.py Project: h2oai/cugraph

    def to_undirected(self):
        """
        Return an undirected copy of the graph.

        Returns
        -------
        G : Graph
            A undirected graph with the same nodes, and each directed edge
            (u,v,weights) replaced by an undirected edge (u,v,weights).

        Examples
        --------
        >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
        >>>                   dtype=['int32', 'int32', 'float32'], header=None)
        >>> DiG = cugraph.DiGraph()
        >>> DiG.from_cudf_edgelist(M, '0', '1')
        >>> G = DiG.to_undirected()

        """
        if self.distributed:
            raise Exception("Not supported for distributed graph")
        if type(self) is Graph:
            return self
        if type(self) is DiGraph:
            G = Graph()
            df = self.edgelist.edgelist_df
            G.renumbered = self.renumbered
            G.renumber_map = self.renumber_map
            if self.edgelist.weights:
                source_col, dest_col, value_col = symmetrize(
                    df["src"], df["dst"], df["weights"]
                )
            else:
                source_col, dest_col = symmetrize(df["src"], df["dst"])
                value_col = None
            G.edgelist = Graph.EdgeList(
                source_col, dest_col, value_col
            )

            return G

Example #3

0

Show file

File: graph.py Project: zeta1999/cugraph

    def from_cudf_edgelist(self,
                           input_df,
                           source='source',
                           destination='destination',
                           edge_attr=None,
                           renumber=True):
        """
        Initialize a graph from the edge list. It is an error to call this
        method on an initialized Graph object. The passed input_df argument
        wraps gdf_column objects that represent a graph using the edge list
        format. source argument is source column name and destination argument
        is destination column name.
        Source and destination indices must be in the range [0, V) where V is
        the number of vertices. If renumbering needs to be done, renumber
        argument should be passed as True.
        If weights are present, edge_attr argument is the weights column name.

        Parameters
        ----------
        input_df : cudf.DataFrame
            This cudf.DataFrame wraps source, destination and weight
            gdf_column of size E (E: number of edges)
            The 'src' column contains the source index for each edge.
            Source indices are in the range [0, V) (V: number of vertices).
            The 'dst' column contains the destination index for each edge.
            Destination indices are in the range [0, V) (V: number of
            vertices).
            If renumbering needs to be done, renumber
            argument should be passed as True.
            For weighted graphs, dataframe contains 'weight' column
            containing the weight value for each edge.
        source : str
            source argument is source column name
        destination : str
            destination argument is destination column name.
        edge_attr : str
            edge_attr argument is the weights column name.
        renumber : bool
            If source and destination indices are not in range 0 to V where V
            is number of vertices, renumber argument should be True.

        Examples
        --------
        >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
        >>>                   dtype=['int32', 'int32', 'float32'], header=None)
        >>> G = cugraph.Graph()
        >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2',
                                 renumber=False)
        """

        if self.edgelist is not None or self.adjlist is not None:
            raise Exception('Graph already has values')
        if self.multi:
            if type(edge_attr) is not list:
                raise Exception('edge_attr should be a list of column names')
            value_col = {}
            for col_name in edge_attr:
                value_col[col_name] = input_df[col_name]
        elif edge_attr is not None:
            value_col = input_df[edge_attr]
        else:
            value_col = None
        renumber_map = None
        if renumber:
            if type(source) is list and type(destination) is list:
                source_col, dest_col, renumber_map = multi_rnb(
                    input_df, source, destination)
            else:
                source_col, dest_col, renumber_map = rnb(
                    input_df[source], input_df[destination])
            self.renumbered = True
        else:
            if type(source) is list and type(destination) is list:
                raise Exception('set renumber to True for multi column ids')
            else:
                source_col = input_df[source]
                dest_col = input_df[destination]
        if not self.symmetrized and not self.multi:
            if value_col is not None:
                source_col, dest_col, value_col = symmetrize(
                    source_col, dest_col, value_col)
            else:
                source_col, dest_col = symmetrize(source_col, dest_col)

        self.edgelist = Graph.EdgeList(source_col, dest_col, value_col,
                                       renumber_map)

Example #4

0

Show file

File: graph.py Project: h2oai/cugraph

    def from_cudf_edgelist(
        self,
        input_df,
        source="source",
        destination="destination",
        edge_attr=None,
        renumber=True,
    ):
        """
        Initialize a graph from the edge list. It is an error to call this
        method on an initialized Graph object. The passed input_df argument
        wraps gdf_column objects that represent a graph using the edge list
        format. source argument is source column name and destination argument
        is destination column name.

        By default, renumbering is enabled to map the source and destination
        vertices into an index in the range [0, V) where V is the number
        of vertices.  If the input vertices are a single column of integers
        in the range [0, V), renumbering can be disabled and the original
        external vertex ids will be used.

        If weights are present, edge_attr argument is the weights column name.

        Parameters
        ----------
        input_df : cudf.DataFrame or dask_cudf.DataFrame
            This cudf.DataFrame wraps source, destination and weight
            gdf_column of size E (E: number of edges)
            The 'src' column contains the source index for each edge.
            Source indices are in the range [0, V) (V: number of vertices).
            The 'dst' column contains the destination index for each edge.
            Destination indices are in the range [0, V) (V: number of
            vertices).
            If renumbering needs to be done, renumber
            argument should be passed as True.
            For weighted graphs, dataframe contains 'weight' column
            containing the weight value for each edge.
            If a dask_cudf.DataFrame is passed it will be reinterpreted as
            a cudf.DataFrame. For the distributed path please use
            from_dask_cudf_edgelist.
        source : str
            source argument is source column name
        destination : str
            destination argument is destination column name.
        edge_attr : str
            edge_attr argument is the weights column name.
        renumber : bool
            If source and destination indices are not in range 0 to V where V
            is number of vertices, renumber argument should be True.

        Examples
        --------
        >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ',
        >>>                   dtype=['int32', 'int32', 'float32'], header=None)
        >>> G = cugraph.Graph()
        >>> G.from_cudf_edgelist(df, source='0', destination='1',
                                 edge_attr='2', renumber=False)

        """
        if self.edgelist is not None or self.adjlist is not None:
            raise Exception("Graph already has values")

        # Consolidation
        if isinstance(input_df, cudf.DataFrame):
            if len(input_df[source]) > 2147483100:
                raise Exception('cudf dataFrame edge list is too big \
                                 to fit in a single GPU')
            elist = input_df
        elif isinstance(input_df, dask_cudf.DataFrame):
            if len(input_df[source]) > 2147483100:
                raise Exception('dask_cudf dataFrame edge list is too big \
                                 to fit in a single GPU')
            elist = input_df.compute().reset_index(drop=True)
        else:
            raise Exception('input should be a cudf.DataFrame or \
                              a dask_cudf dataFrame')

        renumber_map = None
        if renumber:
            # FIXME: Should SG do lazy evaluation like MG?
            elist, renumber_map = NumberMap.renumber(
                elist, source, destination,
                store_transposed=False
            )
            source = 'src'
            destination = 'dst'
            self.renumbered = True
            self.renumber_map = renumber_map
        else:
            if type(source) is list and type(destination) is list:
                raise Exception('set renumber to True for multi column ids')

        source_col = elist[source]
        dest_col = elist[destination]

        if self.multi:
            if type(edge_attr) is not list:
                raise Exception("edge_attr should be a list of column names")
            value_col = {}
            for col_name in edge_attr:
                value_col[col_name] = elist[col_name]
        elif edge_attr is not None:
            value_col = elist[edge_attr]
        else:
            value_col = None

        if not self.symmetrized and not self.multi:
            if value_col is not None:
                source_col, dest_col, value_col = symmetrize(
                    source_col, dest_col, value_col
                )
            else:
                source_col, dest_col = symmetrize(source_col, dest_col)

        self.edgelist = Graph.EdgeList(
            source_col, dest_col, value_col
        )

        if self.batch_enabled:
            self._replicate_edgelist()

        self.renumber_map = renumber_map

Example #5

0

Show file

File: path_retrieval.py Project: goncaloperes/cugraph

def get_traversed_cost(df, source, source_col, dest_col, value_col):
    """
    Take the DataFrame result from a BFS or SSSP function call and sums
    the given weights along the path to the starting vertex.
    The source_col, dest_col identifiers need to match with the vertex and
    predecessor columns of df.

    Input Parameters
    ----------
    df : cudf.DataFrame
        The dataframe containing the results of a BFS or SSSP call
    source: int
        Index of the source vertex.
    source_col : cudf.DataFrame
        This cudf.Series wraps a gdf_column of size E (E: number of edges).
        The gdf column contains the source index for each edge.
        Source indices must be an integer type.
    dest_col : cudf.Series
        This cudf.Series wraps a gdf_column of size E (E: number of edges).
        The gdf column contains the destination index for each edge.
        Destination indices must be an integer type.
    value_col : cudf.Series
        This cudf.Series wraps a gdf_column of size E (E: number of edges).
        The gdf column contains values associated with this edge.
        Weight should be a floating type.

    Returns
    ---------
    df : cudf.DataFrame
        DataFrame containing two columns 'vertex' and 'info'.
        Unreachable vertices will have value the max value of the weight type.
    """

    if 'vertex' not in df.columns:
        raise ValueError("DataFrame does not appear to be a BFS or "
                         "SSP result - 'vertex' column missing")
    if 'distance' not in df.columns:
        raise ValueError("DataFrame does not appear to be a BFS or "
                         "SSP result - 'distance' column missing")
    if 'predecessor' not in df.columns:
        raise ValueError("DataFrame does not appear to be a BFS or "
                         "SSP result - 'predecessor' column missing")

    src, dst, val = symmetrize(source_col, dest_col, value_col)

    symmetrized_df = cudf.DataFrame()
    symmetrized_df['source'] = src
    symmetrized_df['destination'] = dst
    symmetrized_df['weights'] = val

    input_df = df.merge(symmetrized_df,
                        left_on=['vertex', 'predecessor'],
                        right_on=['source', 'destination'],
                        how="left")

    # Set unreachable vertex weights to max float and source vertex weight to 0
    max_val = np.finfo(val.dtype).max
    input_df[['weights']] = input_df[['weights']].fillna(max_val)
    input_df.loc[input_df['vertex'] == source, 'weights'] = 0

    # Renumber
    renumbered_gdf, renumber_map = NumberMap.renumber(input_df, ["vertex"],
                                                      ["predecessor"],
                                                      preserve_order=True)
    renumbered_gdf = renumbered_gdf.rename(columns={
        'src': 'vertex',
        'dst': 'predecessor'
    })
    stop_vertex = renumber_map.to_internal_vertex_id(cudf.Series(-1)).values[0]

    out_df = path_retrieval_wrapper.get_traversed_cost(renumbered_gdf,
                                                       stop_vertex)

    # Unrenumber
    out_df['vertex'] = renumber_map.unrenumber(renumbered_gdf,
                                               'vertex',
                                               preserve_order=True)["vertex"]
    return out_df

Example #6

0

Show file

File: simpleGraph.py Project: mattf/cugraph

    def __from_edgelist(
        self,
        input_df,
        source="source",
        destination="destination",
        edge_attr=None,
        renumber=True,
    ):

        # Verify column names present in input DataFrame
        s_col = source
        d_col = destination
        if not isinstance(s_col, list):
            s_col = [s_col]
        if not isinstance(d_col, list):
            d_col = [d_col]
        if not (set(s_col).issubset(set(input_df.columns))
                and set(d_col).issubset(set(input_df.columns))):
            # FIXME: Raise concrete Exceptions
            raise Exception("source column names and/or destination column "
                            "names not found in input. Recheck the source and "
                            "destination parameters")

        # FIXME: check if the consolidated graph fits on the
        # device before gathering all the edge lists

        # Consolidation
        if isinstance(input_df, cudf.DataFrame):
            if len(input_df[source]) > 2147483100:
                raise Exception("cudf dataFrame edge list is too big "
                                "to fit in a single GPU")
            elist = input_df
        elif isinstance(input_df, dask_cudf.DataFrame):
            if len(input_df[source]) > 2147483100:
                raise Exception("dask_cudf dataFrame edge list is too big "
                                "to fit in a single GPU")
            elist = input_df.compute().reset_index(drop=True)
        else:
            raise Exception("input should be a cudf.DataFrame or "
                            "a dask_cudf dataFrame")

        # Renumbering
        self.renumber_map = None
        if renumber:
            # FIXME: Should SG do lazy evaluation like MG?
            elist, renumber_map = NumberMap.renumber(elist,
                                                     source,
                                                     destination,
                                                     store_transposed=False)
            source = "src"
            destination = "dst"
            self.properties.renumbered = True
            self.renumber_map = renumber_map
        else:
            if type(source) is list and type(destination) is list:
                raise Exception("set renumber to True for multi column ids")

        # Populate graph edgelist
        source_col = elist[source]
        dest_col = elist[destination]

        if edge_attr is not None:
            self.weighted = True
            value_col = elist[edge_attr]
        else:
            value_col = None

        # TODO: Update Symmetrize to work on Graph and/or DataFrame
        if value_col is not None:
            source_col, dest_col, value_col = symmetrize(
                source_col,
                dest_col,
                value_col,
                multi=self.properties.multi_edge,
                symmetrize=not self.properties.directed)
            if isinstance(value_col, cudf.DataFrame):
                value_dict = {}
                for i in value_col.columns:
                    value_dict[i] = value_col[i]
                value_col = value_dict
        else:
            source_col, dest_col = symmetrize(
                source_col,
                dest_col,
                multi=self.properties.multi_edge,
                symmetrize=not self.properties.directed)

        self.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col,
                                                 value_col)

        if self.batch_enabled:
            self._replicate_edgelist()