Example #1
0
def harmonic_function(G, max_iter=30, label_name="label"):
    """Node classification by Harmonic function

    Parameters
    ----------
    G : NetworkX Graph
    max_iter : int
        maximum number of iterations allowed
    label_name : string
        name of target labels to predict

    Returns
    -------
    predicted : list
        List of length ``len(G)`` with the predicted labels for each node.

    Raises
    ------
    NetworkXError
        If no nodes in `G` have attribute `label_name`.

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.nodes[0]["label"] = "A"
    >>> G.nodes[3]["label"] = "B"
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.harmonic_function(G)
    >>> predicted
    ['A', 'A', 'B', 'B']

    References
    ----------
    Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
    Semi-supervised learning using gaussian fields and harmonic functions.
    In ICML (Vol. 3, pp. 912-919).
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError(
            f"No node on the input graph is labeled by '{label_name}'.")

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]
    F = np.zeros((n_samples, n_classes))

    # Build propagation matrix
    degrees = X.sum(axis=0).A[0]
    degrees[degrees == 0] = 1  # Avoid division by 0
    D = sp.sparse.diags((1.0 / degrees), offsets=0)
    P = (D @ X).tolil()
    P[labels[:, 0]] = 0  # labels[:, 0] indicates IDs of labeled nodes
    # Build base matrix
    B = np.zeros((n_samples, n_classes))
    B[labels[:, 0], labels[:, 1]] = 1

    for _ in range(max_iter):
        F = (P @ F) + B

    return label_dict[np.argmax(F, axis=1)].tolist()
Example #2
0
def local_and_global_consistency(G, alpha=0.99,
                                 max_iter=30,
                                 label_name='label'):
    """Node classification by Local and Global Consistency

    Parameters
    ----------
    G : NetworkX Graph
    alpha : float
        Clamping factor
    max_iter : int
        Maximum number of iterations allowed
    label_name : string
        Name of target labels to predict

    Returns
    ----------
    predicted : array, shape = [n_samples]
        Array of predicted labels

    Raises
    ------
    NetworkXError
        If no nodes on `G` has `label_name`.

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.nodes[0]['label'] = 'A'
    >>> G.nodes[3]['label'] = 'B'
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.local_and_global_consistency(G)
    >>> predicted
    ['A', 'A', 'B', 'B']


    References
    ----------
    Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
    Learning with local and global consistency.
    Advances in neural information processing systems, 16(16), 321-328.
    """
    try:
        import numpy as np
    except ImportError:
        raise ImportError(
            "local_and_global_consistency() requires numpy: ",
            "http://scipy.org/ ")
    try:
        from scipy import sparse
    except ImportError:
        raise ImportError(
            "local_and_global_consistensy() requires scipy: ",
            "http://scipy.org/ ")

    def _build_propagation_matrix(X, labels, alpha):
        """Build propagation matrix of Local and global consistency

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id
        alpha : float
            Clamping factor

        Returns
        ----------
        S : scipy sparse matrix, shape = [n_samples, n_samples]
            Propagation matrix

        """
        degrees = X.sum(axis=0).A[0]
        degrees[degrees == 0] = 1  # Avoid division by 0
        D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0))
        S = alpha * D2.dot(X).dot(D2)
        return S

    def _build_base_matrix(X, labels, alpha, n_classes):
        """Build base matrix of Local and global consistency

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id
        alpha : float
            Clamping factor
        n_classes : integer
            The number of classes (distinct labels) on the input graph

        Returns
        ----------
        B : array, shape = [n_samples, n_classes]
            Base matrix
        """

        n_samples = X.shape[0]
        B = np.zeros((n_samples, n_classes))
        B[labels[:, 0], labels[:, 1]] = 1 - alpha
        return B

    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError(
            "No node on the input graph is labeled by '" + label_name + "'.")

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]
    F = _init_label_matrix(n_samples, n_classes)

    P = _build_propagation_matrix(X, labels, alpha)
    B = _build_base_matrix(X, labels, alpha, n_classes)

    remaining_iter = max_iter
    while remaining_iter > 0:
        F = _propagate(P, F, B)
        remaining_iter -= 1

    predicted = _predict(F, label_dict)

    return predicted
Example #3
0
def harmonic_function(G, max_iter=30, label_name="label"):
    """Node classification by Harmonic function

    Parameters
    ----------
    G : NetworkX Graph
    max_iter : int
        maximum number of iterations allowed
    label_name : string
        name of target labels to predict

    Returns
    -------
    predicted : array, shape = [n_samples]
        Array of predicted labels

    Raises
    ------
    NetworkXError
        If no nodes on `G` has `label_name`.

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.nodes[0]["label"] = "A"
    >>> G.nodes[3]["label"] = "B"
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.harmonic_function(G)
    >>> predicted
    ['A', 'A', 'B', 'B']

    References
    ----------
    Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
    Semi-supervised learning using gaussian fields and harmonic functions.
    In ICML (Vol. 3, pp. 912-919).
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    def _build_propagation_matrix(X, labels):
        """Build propagation matrix of Harmonic function

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id

        Returns
        -------
        P : scipy sparse matrix, shape = [n_samples, n_samples]
            Propagation matrix

        """
        degrees = X.sum(axis=0).A[0]
        degrees[degrees == 0] = 1  # Avoid division by 0
        D = sp.sparse.diags((1.0 / degrees), offsets=0)
        P = (D @ X).tolil()
        P[labels[:, 0]] = 0  # labels[:, 0] indicates IDs of labeled nodes
        return P

    def _build_base_matrix(X, labels, n_classes):
        """Build base matrix of Harmonic function

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id
        n_classes : integer
            The number of classes (distinct labels) on the input graph

        Returns
        -------
        B : array, shape = [n_samples, n_classes]
            Base matrix
        """
        n_samples = X.shape[0]
        B = np.zeros((n_samples, n_classes))
        B[labels[:, 0], labels[:, 1]] = 1
        return B

    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError("No node on the input graph is labeled by '" +
                               label_name + "'.")

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]

    F = _init_label_matrix(n_samples, n_classes)

    P = _build_propagation_matrix(X, labels)
    B = _build_base_matrix(X, labels, n_classes)

    remaining_iter = max_iter
    while remaining_iter > 0:
        F = _propagate(P, F, B)
        remaining_iter -= 1

    predicted = _predict(F, label_dict)

    return predicted
Example #4
0
def harmonic_function(G, max_iter=30, label_name='label'):
    """Node classification by Harmonic function

    Parameters
    ----------
    G : NetworkX Graph
    max_iter : int
        maximum number of iterations allowed
    label_name : string
        name of target labels to predict

    Raises
    ----------
    `NetworkXError` if no nodes on `G` has `label_name`.

    Returns
    ----------
    predicted : array, shape = [n_samples]
        Array of predicted labels

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.node[0]['label'] = 'A'
    >>> G.node[3]['label'] = 'B'
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.harmonic_function(G)
    >>> predicted
    ['A', 'A', 'B', 'B']

    References
    ----------
    Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
    Semi-supervised learning using gaussian fields and harmonic functions.
    In ICML (Vol. 3, pp. 912-919).
    """
    try:
        import numpy as np
    except ImportError:
        raise ImportError(
            "harmonic_function() requires numpy: http://scipy.org/ ")
    try:
        from scipy import sparse
    except ImportError:
        raise ImportError(
            "harmonic_function() requires scipy: http://scipy.org/ ")

    def _build_propagation_matrix(X, labels):
        """Build propagation matrix of Harmonic function

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id

        Returns
        ----------
        P : scipy sparse matrix, shape = [n_samples, n_samples]
            Propagation matrix

        """
        degrees = X.sum(axis=0).A[0]
        degrees[degrees == 0] = 1  # Avoid division by 0
        D = sparse.diags((1.0 / degrees), offsets=0)
        P = D.dot(X).tolil()
        P[labels[:, 0]] = 0  # labels[:, 0] indicates IDs of labeled nodes
        return P

    def _build_base_matrix(X, labels, n_classes):
        """Build base matrix of Harmonic function

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id
        n_classes : integer
            The number of classes (distinct labels) on the input graph

        Returns
        ----------
        B : array, shape = [n_samples, n_classes]
            Base matrix
        """
        n_samples = X.shape[0]
        B = np.zeros((n_samples, n_classes))
        B[labels[:, 0], labels[:, 1]] = 1
        return B

    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError(
            "No node on the input graph is labeled by '" + label_name + "'.")

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]

    F = _init_label_matrix(n_samples, n_classes)

    P = _build_propagation_matrix(X, labels)
    B = _build_base_matrix(X, labels, n_classes)

    remaining_iter = max_iter
    while remaining_iter > 0:
        F = _propagate(P, F, B)
        remaining_iter -= 1

    predicted = _predict(F, label_dict)

    return predicted
Example #5
0
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
    """Node classification by Local and Global Consistency

    Parameters
    ----------
    G : NetworkX Graph
    alpha : float
        Clamping factor
    max_iter : int
        Maximum number of iterations allowed
    label_name : string
        Name of target labels to predict

    Returns
    -------
    predicted : list
        List of length ``len(G)`` with the predicted labels for each node.

    Raises
    ------
    NetworkXError
        If no nodes in `G` have attribute `label_name`.

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.nodes[0]["label"] = "A"
    >>> G.nodes[3]["label"] = "B"
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.local_and_global_consistency(G)
    >>> predicted
    ['A', 'A', 'B', 'B']

    References
    ----------
    Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
    Learning with local and global consistency.
    Advances in neural information processing systems, 16(16), 321-328.
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    X = nx.to_scipy_sparse_array(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError(
            f"No node on the input graph is labeled by '{label_name}'."
        )

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]
    F = np.zeros((n_samples, n_classes))

    # Build propagation matrix
    degrees = X.sum(axis=0)
    degrees[degrees == 0] = 1  # Avoid division by 0
    # TODO: csr_array
    D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)))
    P = alpha * ((D2 @ X) @ D2)
    # Build base matrix
    B = np.zeros((n_samples, n_classes))
    B[labels[:, 0], labels[:, 1]] = 1 - alpha

    for _ in range(max_iter):
        F = (P @ F) + B

    return label_dict[np.argmax(F, axis=1)].tolist()
Example #6
0
def local_and_global_consistency(G, alpha=0.99,
                                 max_iter=30,
                                 label_name='label'):
    """Node classification by Local and Global Consistency

    Parameters
    ----------
    G : NetworkX Graph
    alpha : float
        Clamping factor
    max_iter : int
        Maximum number of iterations allowed
    label_name : string
        Name of target labels to predict

    Raises
    ----------
    `NetworkXError` if no nodes on `G` has `label_name`.

    Returns
    ----------
    predicted : array, shape = [n_samples]
        Array of predicted labels

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.node[0]['label'] = 'A'
    >>> G.node[3]['label'] = 'B'
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.local_and_global_consistency(G)
    >>> predicted
    ['A', 'A', 'B', 'B']


    References
    ----------
    Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
    Learning with local and global consistency.
    Advances in neural information processing systems, 16(16), 321-328.
    """
    try:
        import numpy as np
    except ImportError:
        raise ImportError(
            "local_and_global_consistency() requires numpy: ",
            "http://scipy.org/ ")
    try:
        from scipy import sparse
    except ImportError:
        raise ImportError(
            "local_and_global_consistensy() requires scipy: ",
            "http://scipy.org/ ")

    def _build_propagation_matrix(X, labels, alpha):
        """Build propagation matrix of Local and global consistency

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id
        alpha : float
            Clamping factor

        Returns
        ----------
        S : scipy sparse matrix, shape = [n_samples, n_samples]
            Propagation matrix

        """
        degrees = X.sum(axis=0).A[0]
        degrees[degrees == 0] = 1  # Avoid division by 0
        D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0))
        S = alpha * D2.dot(X).dot(D2)
        return S

    def _build_base_matrix(X, labels, alpha, n_classes):
        """Build base matrix of Local and global consistency

        Parameters
        ----------
        X : scipy sparse matrix, shape = [n_samples, n_samples]
            Adjacency matrix
        labels : array, shape = [n_samples, 2]
            Array of pairs of node id and label id
        alpha : float
            Clamping factor
        n_classes : integer
            The number of classes (distinct labels) on the input graph

        Returns
        ----------
        B : array, shape = [n_samples, n_classes]
            Base matrix
        """

        n_samples = X.shape[0]
        B = np.zeros((n_samples, n_classes))
        B[labels[:, 0], labels[:, 1]] = 1 - alpha
        return B

    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError(
            "No node on the input graph is labeled by '" + label_name + "'.")

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]
    F = _init_label_matrix(n_samples, n_classes)

    P = _build_propagation_matrix(X, labels, alpha)
    B = _build_base_matrix(X, labels, alpha, n_classes)

    remaining_iter = max_iter
    while remaining_iter > 0:
        F = _propagate(P, F, B)
        remaining_iter -= 1

    predicted = _predict(F, label_dict)

    return predicted
Example #7
0
def local_and_global_consistency(G,
                                 alpha=0.99,
                                 max_iter=30,
                                 label_name='label',
                                 return_prob=True):
    """MODIFIED TO RETURN PROBABILITIES ON F
    Node classification by Local and Global Consistency

    References
    ----------
    Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
    Learning with local and global consistency.
    Advances in neural information processing systems, 16(16), 321-328.
    """
    from networkx.algorithms.node_classification.utils import (
        _get_label_info,
        _init_label_matrix,
        _propagate,
        _predict,
    )
    import numpy as np
    from scipy import sparse

    def _build_propagation_matrix(X, labels, alpha):
        degrees = X.sum(axis=0).A[0]
        degrees[degrees == 0] = 1  # Avoid division by 0
        D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0))
        S = alpha * D2.dot(X).dot(D2)
        return S

    def _build_base_matrix(X, labels, alpha, n_classes):
        n_samples = X.shape[0]
        B = np.zeros((n_samples, n_classes))
        B[labels[:, 0], labels[:, 1]] = 1 - alpha
        return B

    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError("No node on the input graph is labeled by '" +
                               label_name + "'.")

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]
    F = _init_label_matrix(n_samples, n_classes)

    P = _build_propagation_matrix(X, labels, alpha)
    B = _build_base_matrix(X, labels, alpha, n_classes)

    remaining_iter = max_iter
    while remaining_iter > 0:
        F = _propagate(P, F, B)
        remaining_iter -= 1

    predicted = _predict(F, label_dict)
    if return_prob:
        return (predicted,
                pd.DataFrame(
                    {label_dict[i]: F[:, i]
                     for i in range(F.shape[1])},
                    index=list(G.nodes())))
    return predicted