def build_laplacian_regularized(X, laplacian_regularization, var=1.0, eps=0.0, k=0, laplacian_normalization=""): """ Function to construct a regularized Laplacian from data. :param X: (n x m) matrix of m-dimensional samples :param laplacian_regularization: regularization to add to the Laplacian (parameter gamma) :param var: the sigma value for the exponential function, already squared :param eps: threshold eps for epsilon graphs :param k: number of neighbours k for k-nn. If zero, use epsilon-graph :param laplacian_normalization: string selecting which version of the laplacian matrix to construct 'unn': unnormalized, 'sym': symmetric normalization 'rw': random-walk normalization :return: Q (n x n ) matrix, the regularized Laplacian """ # build the similarity graph W W = build_similarity_graph(X, var, eps, k) """ Build the Laplacian L and the regularized Laplacian Q. Both are (n x n) matrices. """ L = build_laplacian(W, laplacian_normalization) # compute Q Q = L + laplacian_regularization * np.eye(W.shape[0]) return Q
def soft_hfs(X, Y, c_l, c_u, laplacian_regularization=0.000001, var=1, eps=0, k=10, laplacian_normalization=""): # a skeleton function to perform soft (unconstrained) HFS, # needs to be completed # # Input # X: # (n x m) matrix of m-dimensional samples # Y: # (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled) # c_l,c_u: # coefficients for C matrix # # Output # labels: # class assignments for each (n) nodes num_samples = np.size(X, 0) Cl = np.unique(Y) num_classes = len(Cl) - 1 # Indices of labelled and unlabelled data l_idx = np.argwhere(Y != 0)[:, 0] u_idx = np.argwhere(Y == 0)[:, 0] # Build C matrix diagC = np.zeros((num_samples, )) diagC[l_idx] = c_l diagC[u_idx] = c_u C = np.diag(diagC) C_inv = np.diag(1 / diagC) # Target vector y = one_hot_target(Y)[:, 1:] # Build similarity graph and Laplacian W = helper.build_similarity_graph(X, var, eps, k) L = helper.build_laplacian(W, laplacian_normalization) # Q matrix (regularized laplacian) Q = L + laplacian_regularization * np.eye(num_samples) # Computation of fstar D = np.dot(C_inv, Q) + np.eye(num_samples) D_inv = np.linalg.inv(D) fstar = np.dot(D_inv, y) # +1 : labels start at 1 labels = np.argmax(fstar, axis=1) + 1 return labels
def hard_hfs(X, Y, laplacian_regularization=0.000001, var=1, eps=0, k=10, laplacian_normalization=""): # a skeleton function to perform hard (constrained) HFS, # needs to be completed # # Input # X: # (n x m) matrix of m-dimensional samples # Y: # (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled) # # Output # labels: # class assignments for each (n) nodes num_samples = np.size(X, 0) Cl = np.unique(Y) num_classes = len(Cl) - 1 # Indices of labelled and unlabelled data l_idx = np.argwhere(Y != 0)[:, 0] u_idx = np.argwhere(Y == 0)[:, 0] # Build similarity graph and Laplacian W = helper.build_similarity_graph(X, var, eps, k) L = helper.build_laplacian(W, laplacian_normalization) # Extract blocks corresponding to unlabelled and labelled data Luu = L[u_idx, :][:, u_idx] Wul = W[u_idx, :][:, l_idx] # fl with one hot encoding fl = one_hot_target(Y[l_idx]) # Compute fu using regularized Laplacian Q = Luu + laplacian_regularization * np.eye(u_idx.shape[0]) Q_inv = np.linalg.inv(Q) fu = np.dot(Q_inv, np.dot(Wul, fl)) # Infer label from computed fu using thresholding fu_lab = np.argmax(fu, axis=1) # Consolidate labels from fu and fl labels = np.zeros((num_samples, ), dtype=int) # +1 because labels start at 1 labels[u_idx] = fu_lab + 1 labels[l_idx] = Y[l_idx] return labels
def hard_hfs(X, Y, laplacian_regularization, var=1, eps=0, k=0, laplacian_normalization=""): """ TO BE COMPLETED Function to perform hard (constrained) HFS. :param X: (n x m) matrix of m-dimensional samples :param Y: (n x 1) vector with nodes labels [0, 1, ... , num_classes] (0 is unlabeled) :param laplacian_regularization: regularization to add to the Laplacian :param var: the sigma value for the exponential function, already squared :param eps: threshold eps for epsilon graphs :param k: number of neighbours k for k-nn. If zero, use epsilon-graph :param laplacian_normalization: string selecting which version of the laplacian matrix to construct 'unn': unnormalized, 'sym': symmetric normalization 'rw': random-walk normalization :return: labels, class assignments for each of the n nodes """ num_samples = np.size(X, 0) Cl = np.unique(Y) num_classes = len(Cl)-1 """ Build the vectors: l_idx = (l x num_classes) vector with indices of labeled nodes u_idx = (u x num_classes) vector with indices of unlabeled nodes """ l_idx = np.where(Y != 0)[0] u_idx = np.where(Y == 0)[0] """ Compute the hfs solution, remember that you can use the functions build_laplacian_regularized and build_similarity_graph f_l = (l x num_classes) hfs solution for labeled data. It is the one-hot encoding of Y for labeled nodes. example: if Cl=[0,3,5] and Y=[0,0,0,3,0,0,0,5,5], then f_l is a 3x2 binary matrix where the first column codes the class '3' and the second the class '5'. In case of 2 classes, you can also use +-1 labels f_u = array (u x num_classes) hfs solution for unlabeled data f = array of shape(num_samples, num_classes) """ W = build_similarity_graph(X) L = build_laplacian(W, laplacian_normalization="") f_l = np.zeros((len(l_idx),num_classes)) for ind,c in enumerate(Y[l_idx]) : if c != 0 : f_l[ind][int(c)-1] = 1 L_uu = L[u_idx][:,u_idx] L_ul = L[u_idx][:,l_idx] f_u = np.linalg.pinv(L_uu).dot(- np.dot(L_ul, f_l)) # f = np.concatenate(f_l, f_u) """ compute the labels assignment from the hfs solution labels: (n x 1) class assignments [1,2,...,num_classes] """ labels = np.zeros(len(X)) labels[l_idx] = np.argmax(f_l, axis =1) + 1 labels[u_idx] = np.argmax(f_u, axis =1) + 1 return labels