def build_laplacian_regularized(X, laplacian_regularization, var=1.0, eps=0.0, k=0, laplacian_normalization=""): """ Function to construct a regularized Laplacian from data. :param X: (n x m) matrix of m-dimensional samples :param laplacian_regularization: regularization to add to the Laplacian (parameter gamma) :param var: the sigma value for the exponential function, already squared :param eps: threshold eps for epsilon graphs :param k: number of neighbours k for k-nn. If zero, use epsilon-graph :param laplacian_normalization: string selecting which version of the laplacian matrix to construct 'unn': unnormalized, 'sym': symmetric normalization 'rw': random-walk normalization :return: Q (n x n ) matrix, the regularized Laplacian """ # build the similarity graph W W = build_similarity_graph(X, var, eps, k) """ Build the Laplacian L and the regularized Laplacian Q. Both are (n x n) matrices. """ L = build_laplacian(W, laplacian_normalization) # compute Q Q = L + laplacian_regularization * np.eye(W.shape[0]) return Q
def soft_hfs(X, Y, c_l, c_u, laplacian_regularization=0.000001, var=1, eps=0, k=10, laplacian_normalization=""): # a skeleton function to perform soft (unconstrained) HFS, # needs to be completed # # Input # X: # (n x m) matrix of m-dimensional samples # Y: # (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled) # c_l,c_u: # coefficients for C matrix # # Output # labels: # class assignments for each (n) nodes num_samples = np.size(X, 0) Cl = np.unique(Y) num_classes = len(Cl) - 1 # Indices of labelled and unlabelled data l_idx = np.argwhere(Y != 0)[:, 0] u_idx = np.argwhere(Y == 0)[:, 0] # Build C matrix diagC = np.zeros((num_samples, )) diagC[l_idx] = c_l diagC[u_idx] = c_u C = np.diag(diagC) C_inv = np.diag(1 / diagC) # Target vector y = one_hot_target(Y)[:, 1:] # Build similarity graph and Laplacian W = helper.build_similarity_graph(X, var, eps, k) L = helper.build_laplacian(W, laplacian_normalization) # Q matrix (regularized laplacian) Q = L + laplacian_regularization * np.eye(num_samples) # Computation of fstar D = np.dot(C_inv, Q) + np.eye(num_samples) D_inv = np.linalg.inv(D) fstar = np.dot(D_inv, y) # +1 : labels start at 1 labels = np.argmax(fstar, axis=1) + 1 return labels
def soft_hfs(X, Y, c_l, c_u, laplacian_regularization, var=1, eps=0, k=0, laplacian_normalization=""): """ TO BE COMPLETED. Function to perform soft (unconstrained) HFS :param X: (n x m) matrix of m-dimensional samples :param Y: (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled) :param c_l: coefficients for C matrix :param c_u: coefficients for C matrix :param laplacian_regularization: :param var: :param eps: :param k: :param laplacian_normalization: :return: labels, class assignments for each of the n nodes """ num_samples = np.size(Y, 0) Cl = np.unique(Y) num_classes = len(Cl)-1 """ Compute the target y for the linear system y = (n x num_classes) target vector l_idx = (l x num_classes) vector with indices of labeled nodes u_idx = (u x num_classes) vector with indices of unlabeled nodes """ y = np.zeros((len(Y),num_classes)) for ind,c in enumerate(Y) : if c != 0 : y[ind][int(c)-1] = 1 """ compute the hfs solution, remember that you can use build_laplacian_regularized and build_similarity_graph f = (n x num_classes) hfs solution C = (n x n) diagonal matrix with c_l for labeled samples and c_u otherwise """ labeled_ind = (Y != 0)*1 unlabeled_ind = (Y == 0)*1 C = np.diag(c_l * labeled_ind + c_u * unlabeled_ind ) W = build_similarity_graph(X) Q = build_laplacian_regularized(W, laplacian_regularization, laplacian_normalization="") f_star = np.linalg.inv(np.linalg.inv(C).dot(Q) + np.eye(len(X))).dot(y) """ compute the labels assignment from the hfs solution labels: (n x 1) class assignments [1, ... ,num_classes] """ labels = np.zeros(len(X)) labels = np.argmax(f_star, axis =1) + 1 return labels, np.where(Y == 0)[0]
def hard_hfs(X, Y, laplacian_regularization=0.000001, var=1, eps=0, k=10, laplacian_normalization=""): # a skeleton function to perform hard (constrained) HFS, # needs to be completed # # Input # X: # (n x m) matrix of m-dimensional samples # Y: # (n x 1) vector with nodes labels [1, ... , num_classes] (0 is unlabeled) # # Output # labels: # class assignments for each (n) nodes num_samples = np.size(X, 0) Cl = np.unique(Y) num_classes = len(Cl) - 1 # Indices of labelled and unlabelled data l_idx = np.argwhere(Y != 0)[:, 0] u_idx = np.argwhere(Y == 0)[:, 0] # Build similarity graph and Laplacian W = helper.build_similarity_graph(X, var, eps, k) L = helper.build_laplacian(W, laplacian_normalization) # Extract blocks corresponding to unlabelled and labelled data Luu = L[u_idx, :][:, u_idx] Wul = W[u_idx, :][:, l_idx] # fl with one hot encoding fl = one_hot_target(Y[l_idx]) # Compute fu using regularized Laplacian Q = Luu + laplacian_regularization * np.eye(u_idx.shape[0]) Q_inv = np.linalg.inv(Q) fu = np.dot(Q_inv, np.dot(Wul, fl)) # Infer label from computed fu using thresholding fu_lab = np.argmax(fu, axis=1) # Consolidate labels from fu and fl labels = np.zeros((num_samples, ), dtype=int) # +1 because labels start at 1 labels[u_idx] = fu_lab + 1 labels[l_idx] = Y[l_idx] return labels
def hard_hfs(X, Y, laplacian_regularization, var=1, eps=0, k=0, laplacian_normalization=""): """ TO BE COMPLETED Function to perform hard (constrained) HFS. :param X: (n x m) matrix of m-dimensional samples :param Y: (n x 1) vector with nodes labels [0, 1, ... , num_classes] (0 is unlabeled) :param laplacian_regularization: regularization to add to the Laplacian :param var: the sigma value for the exponential function, already squared :param eps: threshold eps for epsilon graphs :param k: number of neighbours k for k-nn. If zero, use epsilon-graph :param laplacian_normalization: string selecting which version of the laplacian matrix to construct 'unn': unnormalized, 'sym': symmetric normalization 'rw': random-walk normalization :return: labels, class assignments for each of the n nodes """ num_samples = np.size(X, 0) Cl = np.unique(Y) num_classes = len(Cl)-1 """ Build the vectors: l_idx = (l x num_classes) vector with indices of labeled nodes u_idx = (u x num_classes) vector with indices of unlabeled nodes """ l_idx = np.where(Y != 0)[0] u_idx = np.where(Y == 0)[0] """ Compute the hfs solution, remember that you can use the functions build_laplacian_regularized and build_similarity_graph f_l = (l x num_classes) hfs solution for labeled data. It is the one-hot encoding of Y for labeled nodes. example: if Cl=[0,3,5] and Y=[0,0,0,3,0,0,0,5,5], then f_l is a 3x2 binary matrix where the first column codes the class '3' and the second the class '5'. In case of 2 classes, you can also use +-1 labels f_u = array (u x num_classes) hfs solution for unlabeled data f = array of shape(num_samples, num_classes) """ W = build_similarity_graph(X) L = build_laplacian(W, laplacian_normalization="") f_l = np.zeros((len(l_idx),num_classes)) for ind,c in enumerate(Y[l_idx]) : if c != 0 : f_l[ind][int(c)-1] = 1 L_uu = L[u_idx][:,u_idx] L_ul = L[u_idx][:,l_idx] f_u = np.linalg.pinv(L_uu).dot(- np.dot(L_ul, f_l)) # f = np.concatenate(f_l, f_u) """ compute the labels assignment from the hfs solution labels: (n x 1) class assignments [1,2,...,num_classes] """ labels = np.zeros(len(X)) labels[l_idx] = np.argmax(f_l, axis =1) + 1 labels[u_idx] = np.argmax(f_u, axis =1) + 1 return labels
def hard_hfs(X, Y, laplacian_regularization, var=1, eps=0, k=6, laplacian_normalization=""): """ Function to perform hard (constrained) HFS. /!\ WE SUPPOSE HERE THAT X AND Y ARE SORTED ACCORDING TO Y /!\ :param X: (n x m) matrix of m-dimensional samples :param Y: (n x 1) vector with nodes labels [0, 1, ... , num_classes] (0 is unlabeled) :param laplacian_regularization: regularization to add to the Laplacian =param num_label: the number of kept labels :param var: the sigma value for the exponential function, already squared :param eps: threshold eps for epsilon graphs :param k: number of neighbours k for k-nn. If zero, use epsilon-graph :param laplacian_normalization: string selecting which version of the laplacian matrix to construct 'unn': unnormalized, 'sym': symmetric normalization 'rw': random-walk normalization :return: labels, class assignments for each of the n nodes """ num_samples = np.size(X, 0) Cl = np.unique(Y) num_classes = len(Cl)-1 """ Build the vectors: l_idx = (l x num_classes) vector with indices of labeled nodes u_idx = (u x num_classes) vector with indices of unlabeled nodes """ l_idx, u_idx = [], [] for i in range(num_samples): if Y[i]==0: u_idx.append(i) else: l_idx.append(i) num_labels = len(l_idx) print("labels",num_labels) print("samples",num_samples) """ Compute the hfs solution, remember that you can use the functions build_laplacian_regularized and build_similarity_graph f_l = (l x num_classes) hfs solution for labeled data. It is the one-hot encoding of Y for labeled nodes. example: if Cl=[0,3,5] and Y=[0,0,0,3,0,0,0,5,5], then f_l is a 3x2 binary matrix where the first column codes the class '3' and the second the class '5'. In case of 2 classes, you can also use +-1 labels f_u = array (u x num_classes) hfs solution for unlabeled data f = array of shape(num_samples, num_classes) """ L = build_laplacian_regularized(X,k=k) Luu = L[num_labels:num_samples,num_labels:num_samples] print(Luu.shape) print(Luu) inv_Luu = np.linalg.inv(Luu) W = build_similarity_graph(X,k) Wul = W[num_labels:num_samples,0:num_labels] f_l = np.zeros((len(l_idx),num_classes)) for i in l_idx: f_l[i][int(Y[i])-1]=1 # this is the closed form solution, showed in class f_u = inv_Luu @ (Wul @ f_l) """ compute the labels assignment from the hfs solution labels: (n x 1) class assignments [1,2,...,num_classes] """ labels = [] for i in range(len(f_l)): lab = np.argmax([f_l[i][k] for k in range(num_classes)])+1 labels.append(lab) for i in range(len(f_u)): lab = np.argmax([f_u[i][k] for k in range(num_classes)])+1 labels.append(lab) return labels