Example #1
0
def build_laplacian_regularized(X,
                                laplacian_regularization,
                                var=1.0,
                                eps=0.0,
                                k=0,
                                laplacian_normalization=""):
    """
    Function to construct a regularized Laplacian from data.

    :param X: (n x m) matrix of m-dimensional samples
    :param laplacian_regularization: regularization to add to the Laplacian (parameter gamma)
    :param var: the sigma value for the exponential function, already squared
    :param eps: threshold eps for epsilon graphs
    :param k: number of neighbours k for k-nn. If zero, use epsilon-graph
    :param laplacian_normalization: string selecting which version of the laplacian matrix to construct
                                    'unn':  unnormalized,
                                    'sym': symmetric normalization
                                    'rw':  random-walk normalization
    :return: Q (n x n ) matrix, the regularized Laplacian
    """
    # build the similarity graph W
    W = build_similarity_graph(X, var, eps, k)
    """
    Build the Laplacian L and the regularized Laplacian Q.
    Both are (n x n) matrices.
    """
    L = build_laplacian(W, laplacian_normalization)

    # compute Q
    Q = L + laplacian_regularization * np.eye(W.shape[0])

    return Q
Example #2
0
def soft_hfs(X,
             Y,
             c_l,
             c_u,
             laplacian_regularization=0.000001,
             var=1,
             eps=0,
             k=10,
             laplacian_normalization=""):
    #  a skeleton function to perform soft (unconstrained) HFS,
    #  needs to be completed
    #
    #  Input
    #  X:
    #      (n x m) matrix of m-dimensional samples
    #  Y:
    #      (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled)
    #  c_l,c_u:
    #      coefficients for C matrix

    #
    #  Output
    #  labels:
    #      class assignments for each (n) nodes
    num_samples = np.size(X, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl) - 1

    # Indices of labelled and unlabelled data
    l_idx = np.argwhere(Y != 0)[:, 0]
    u_idx = np.argwhere(Y == 0)[:, 0]

    # Build C matrix
    diagC = np.zeros((num_samples, ))
    diagC[l_idx] = c_l
    diagC[u_idx] = c_u
    C = np.diag(diagC)
    C_inv = np.diag(1 / diagC)

    # Target vector
    y = one_hot_target(Y)[:, 1:]

    # Build similarity graph and Laplacian
    W = helper.build_similarity_graph(X, var, eps, k)
    L = helper.build_laplacian(W, laplacian_normalization)

    # Q matrix (regularized laplacian)
    Q = L + laplacian_regularization * np.eye(num_samples)

    # Computation of fstar
    D = np.dot(C_inv, Q) + np.eye(num_samples)
    D_inv = np.linalg.inv(D)
    fstar = np.dot(D_inv, y)

    # +1 : labels start at 1
    labels = np.argmax(fstar, axis=1) + 1

    return labels
Example #3
0
def hard_hfs(X,
             Y,
             laplacian_regularization=0.000001,
             var=1,
             eps=0,
             k=10,
             laplacian_normalization=""):
    #  a skeleton function to perform hard (constrained) HFS,
    #  needs to be completed
    #
    #  Input
    #  X:
    #      (n x m) matrix of m-dimensional samples
    #  Y:
    #      (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled)
    #
    #  Output
    #  labels:
    #      class assignments for each (n) nodes

    num_samples = np.size(X, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl) - 1

    # Indices of labelled and unlabelled data
    l_idx = np.argwhere(Y != 0)[:, 0]
    u_idx = np.argwhere(Y == 0)[:, 0]

    # Build similarity graph and Laplacian
    W = helper.build_similarity_graph(X, var, eps, k)
    L = helper.build_laplacian(W, laplacian_normalization)

    # Extract blocks corresponding to unlabelled and labelled data
    Luu = L[u_idx, :][:, u_idx]
    Wul = W[u_idx, :][:, l_idx]

    # fl with one hot encoding
    fl = one_hot_target(Y[l_idx])

    # Compute fu using regularized Laplacian
    Q = Luu + laplacian_regularization * np.eye(u_idx.shape[0])
    Q_inv = np.linalg.inv(Q)
    fu = np.dot(Q_inv, np.dot(Wul, fl))

    # Infer label from computed fu using thresholding
    fu_lab = np.argmax(fu, axis=1)

    # Consolidate labels from fu and fl
    labels = np.zeros((num_samples, ), dtype=int)
    # +1 because labels start at 1
    labels[u_idx] = fu_lab + 1
    labels[l_idx] = Y[l_idx]

    return labels
Example #4
0
def hard_hfs(X, Y, laplacian_regularization, var=1, eps=0, k=0, laplacian_normalization=""):
    """
    TO BE COMPLETED

    Function to perform hard (constrained) HFS.

    :param X: (n x m) matrix of m-dimensional samples
    :param Y: (n x 1) vector with nodes labels [0, 1, ... , num_classes] (0 is unlabeled)
    :param laplacian_regularization: regularization to add to the Laplacian
    :param var: the sigma value for the exponential function, already squared
    :param eps: threshold eps for epsilon graphs
    :param k: number of neighbours k for k-nn. If zero, use epsilon-graph
    :param laplacian_normalization: string selecting which version of the laplacian matrix to construct
                                    'unn':  unnormalized,
                                    'sym': symmetric normalization
                                    'rw':  random-walk normalization
    :return: labels, class assignments for each of the n nodes
    """

    num_samples = np.size(X, 0)
    Cl = np.unique(Y)
    num_classes = len(Cl)-1

    """
    Build the vectors:
    l_idx = (l x num_classes) vector with indices of labeled nodes
    u_idx = (u x num_classes) vector with indices of unlabeled nodes
    """
    l_idx = np.where(Y != 0)[0]
    u_idx = np.where(Y == 0)[0]

    """
    Compute the hfs solution, remember that you can use the functions build_laplacian_regularized and 
    build_similarity_graph    
    
    f_l = (l x num_classes) hfs solution for labeled data. It is the one-hot encoding of Y for labeled nodes.   
    
    example:         
        if Cl=[0,3,5] and Y=[0,0,0,3,0,0,0,5,5], then f_l is a 3x2  binary matrix where the first column codes 
        the class '3'  and the second the class '5'.    
    
    In case of 2 classes, you can also use +-1 labels      
        
    f_u = array (u x num_classes) hfs solution for unlabeled data
    
    f = array of shape(num_samples, num_classes)
    """
    W = build_similarity_graph(X)
    L = build_laplacian(W, laplacian_normalization="")
    
    f_l = np.zeros((len(l_idx),num_classes))
    for ind,c in enumerate(Y[l_idx]) :
        if c != 0 :
            f_l[ind][int(c)-1] = 1
        
    L_uu = L[u_idx][:,u_idx]
    L_ul = L[u_idx][:,l_idx]
    f_u = np.linalg.pinv(L_uu).dot(- np.dot(L_ul, f_l))
    # f = np.concatenate(f_l, f_u)
    """
    compute the labels assignment from the hfs solution   
    labels: (n x 1) class assignments [1,2,...,num_classes]    
    """
    labels = np.zeros(len(X))
    labels[l_idx] = np.argmax(f_l, axis =1) + 1
    labels[u_idx] = np.argmax(f_u, axis =1) + 1 

    return labels