def __LGC(self,X,W,Y,labeledIndexes, alpha = 0.1, useEstimatedFreq = None, hook=None): """ Init """ import scipy.sparse if scipy.sparse.issparse(W): W = W.todense() Y = np.copy(Y) if Y.ndim == 1: Y = gutils.init_matrix(Y,labeledIndexes) Y[np.logical_not(labeledIndexes),:] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") """ Estimate frequency of classes""" num_labeled = Y[labeledIndexes].shape[0] num_classes = Y.shape[1] if not useEstimatedFreq is None: if isinstance(useEstimatedFreq,bool): estimatedFreq = np.sum(Y[labeledIndexes],axis=0) / num_labeled else: estimatedFreq = useEstimatedFreq else: estimatedFreq = np.repeat(1/num_classes,num_classes) omega = estimatedFreq """ """ mu = (1-alpha)/alpha n = Y.shape[0] c = Y.shape[1] I = np.identity(Y.shape[0]) S = I - gutils.lap_matrix(W, is_normalized=True) """ stuff that has matrix multiplication with theta """ theta = (1/mu)*np.asarray(np.linalg.inv(I - alpha*S)) F_lgc = (theta@Y)*mu theta_1n = np.sum(theta,axis=1).flatten() theta_1n_ratio = (theta_1n/(np.sum(theta_1n)))[:,np.newaxis] #Shape: nx1 print(theta_1n_ratio.shape) """ Intermediate calc """ zeta = n*omega - np.sum(F_lgc,axis=0) #Shape: 1xc zeta = np.reshape(zeta,(1,c)) ypsilon = np.ones(shape=(n,1)) - np.sum(F_lgc,axis=1)[:,np.newaxis] -\ theta_1n_ratio * (n - np.sum(F_lgc.flatten())) #Shape: nx1 F = F_lgc F += theta_1n_ratio @ zeta F += (1/c)*(ypsilon@ np.ones((1,c))) log_args = [np.round(x,3) for x in [np.sum(F,axis=1)[0:10], np.sum(F,axis=0), n*omega]] LOG.info("F sum on rows: {} (expected 1,1,...,1); F sum col: {} (expected {})".format(*log_args) ) return F
def __LGC(self, X, W, Y, labeledIndexes, alpha=0.1, hook=None): import scipy.sparse if scipy.sparse.issparse(W): W = W.todense() Y = np.copy(Y) if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") #Get D^{-1/2} d_sqrt = gutils.deg_matrix(W, pwr=-1 / 2) I = np.identity(Y.shape[0]) S = I - gutils.lap_matrix(W, is_normalized=True) return (np.matmul(np.linalg.inv(I - alpha * S), Y))
def __MR(self, X, W, Y, labeledIndexes, p, tuning_iter, hook=None): Y = np.copy(Y) if Y.ndim == 1: Y[np.logical_not(labeledIndexes)] = 0 Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") l = np.reshape(np.array(np.where(labeledIndexes)), (-1)) num_lab = l.shape[0] if not isinstance(p, int): p = int(p * num_lab) if p > Y.shape[0]: p = Y.shape[0] LOG.warn("Warning: p greater than the number of labeled indexes", LOG.ll.FILTER) W = scipy_to_np(W) L = gutils.lap_matrix(W, is_normalized=False) D = gutils.deg_matrix(W) def check_symmetric(a, tol=1e-8): return np.allclose(a, a.T, atol=tol) if check_symmetric(L): E = sp.eigh(L, D, eigvals=(1, p))[1] else: LOG.warn("Warning: Laplacian not symmetric", LOG.ll.FILTER) eigenValues, eigenVectors = sp.eig(L, D) idx = eigenValues.argsort() eigenValues = eigenValues[idx] assert eigenValues[0] <= eigenValues[eigenValues.shape[0] - 1] eigenVectors = eigenVectors[:, idx] E = eigenVectors[:, 1:(p + 1)] e_lab = E[labeledIndexes, :] """ TIKHONOV REGULARIZATION. Currently set to 0.""" TIK = np.zeros(shape=e_lab.shape) try: A = np.linalg.inv(e_lab.T @ e_lab + TIK.T @ TIK) @ e_lab.T except: A = np.linalg.pinv(e_lab.T @ e_lab + TIK.T @ TIK) @ e_lab.T F = np.zeros(shape=Y.shape) y_m = np.argmax(Y, axis=1)[labeledIndexes] for i in range(Y.shape[1]): c = np.ones(num_lab) c[y_m != i] = -1 a = A @ np.transpose(c) LOG.debug(a, LOG.ll.FILTER) for j in np.arange(F.shape[0]): F[j, i] = np.dot(a, E[j, :]) ERmat = -1 * np.ones((Y.shape[0], )) Y_amax = np.argmax(Y, axis=1) for i in np.where(labeledIndexes): ERmat[i] = np.square(Y[i, Y_amax[i]] - F[i, Y_amax[i]]) removed_Lids = np.argsort(ERmat) removed_Lids = removed_Lids[::-1] labeledIndexes = np.array(labeledIndexes) Y = np.copy(Y) for i in range(tuning_iter): labeledIndexes[removed_Lids[i]] = False if not hook is None: hook._step(step=i, X=X, W=W, Y=Y, labeledIndexes=labeledIndexes) return Y, labeledIndexes
def __GTAM(self, X, W, Y, labeledIndexes, mu=99.0, useEstimatedFreq=True, num_iter=None, constant_prop=False, hook=None): '''BEGIN initialization''' Y = np.copy(Y) labeledIndexes = np.array(labeledIndexes) if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") num_labeled = Y[labeledIndexes].shape[0] num_unlabeled = Y.shape[0] - num_labeled num_classes = Y.shape[1] """ Estimate frequency of classes""" if not useEstimatedFreq is None: if isinstance(useEstimatedFreq, bool): estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled else: estimatedFreq = useEstimatedFreq else: estimatedFreq = np.repeat(1 / num_classes, num_classes) LOG.debug("Estimated frequency: {}".format(estimatedFreq), LOG.ll.CLASSIFIER) """ IMPORTANT! ERASES LABELS """ Y[np.logical_not(labeledIndexes), :] = 0 D = gutils.deg_matrix(W, flat=True) #Identity matrix I = np.identity(W.shape[0]) #Get graph laplacian L = gutils.lap_matrix(W, is_normalized=True) #Propagation matrix P = np.linalg.inv(I + L / mu) P_t = P.transpose() #Matrix A A = ((P_t @ L) @ P) + mu * ((P_t - I) @ (P - I)) A = np.asarray(A) #A = A + A.transpose() W = scipy.sparse.coo_matrix(W) Z = [] Q = None def divide_row_by_sum(e): e = gutils.scipy_to_np(e) e = e / np.sum(e + 1e-100, axis=1, keepdims=True) return e #Determine nontuning iter if num_iter is None: num_iter = num_unlabeled else: num_iter = min(num_iter, num_unlabeled) id_min_line, id_min_col = -1, -1 '''END initialization''' ####################################################################################### '''BEGIN iterations''' for i in np.arange(num_iter): '''Z matrix - The binary values of current Y are replaced with their corresponding D entries. Then, we normalize each row so that row sums to its estimated influence ''' ul = np.logical_not(labeledIndexes) Z = gutils.calc_Z(Y, labeledIndexes, D, estimatedFreq, weigh_by_degree=self.weigh_by_degree) if Q is None: #Compute graph gradient Q = np.matmul(A, Z) if not hook is None: Q_pure = np.copy(Q) Q[labeledIndexes, :] = np.inf else: Q[id_min_line, :] = np.inf new_el_pct = Z[id_min_line, id_min_col] / np.sum(Z[:, id_min_col]) Q[ul,id_min_col] =\ (1 - new_el_pct) * Q[ul,id_min_col] + Z[id_min_line,id_min_col] * A[ul,id_min_line] #Find minimum unlabeled index if constant_prop: expectedNumLabels = estimatedFreq * sum(labeledIndexes) actualNumLabels = np.sum(Y[labeledIndexes], axis=0) class_to_label = np.argmax(expectedNumLabels - actualNumLabels) id_min_col = class_to_label id_min_line = np.argmin(Q[:, class_to_label]) else: id_min = np.argmin(Q) id_min_line = id_min // num_classes id_min_col = id_min % num_classes #Update Y and labeledIndexes labeledIndexes[id_min_line] = True Y[id_min_line, id_min_col] = 1 #Maybe plot current iteration if not hook is None: hook._step(step=i, Y=Y, labeledIndexes=labeledIndexes, P=P, Z=Z, Q=Q_pure, id_min_line=id_min_line, id_min_col=id_min_col) '''END iterations''' ###################################################################################################### return np.asarray(P @ Z)
def __MR(self,X,W,Y,labeledIndexes,p,hook=None): Y = np.copy(Y) if Y.ndim == 1: Y = gutils.init_matrix(Y,labeledIndexes) Y[np.logical_not(labeledIndexes),:] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") l = np.reshape(np.array(np.where(labeledIndexes)),(-1)) num_lab = l.shape[0] if not isinstance(p, int): p = int(p * num_lab) if p > Y.shape[0]: p = Y.shape[0] LOG.warn("Warning: p greater than the number of labeled indexes",LOG.ll.CLASSIFIER) W = gutils.scipy_to_np(W) W = 0.5* (W + W.T) L = gutils.lap_matrix(W, is_normalized=False) D = gutils.deg_matrix(W) def check_symmetric(a, tol=1e-8): return np.allclose(a, a.T, atol=tol) def is_pos_sdef(x): return np.all(np.linalg.eigvals(x) >= -1e-06) if check_symmetric(L): eigenVectors, E = sp.eigh(L,D,eigvals=(1,p)) else: LOG.warn("Warning: Laplacian not symmetric",LOG.ll.CLASSIFIER) eigenValues, eigenVectors = sp.eig(L,D) idx = eigenValues.argsort() eigenValues = eigenValues[idx] assert eigenValues[0] <= eigenValues[eigenValues.shape[0]-1] eigenVectors = eigenVectors[:,idx] E = eigenVectors[:,1:(p+1)] e_lab = E[labeledIndexes,:] #TIK = np.ones(shape=e_lab.shape) TIK = np.zeros(shape=e_lab.shape) try: A = np.linalg.inv(e_lab.T @ e_lab + TIK.T@TIK) @ e_lab.T except: A = np.linalg.pinv(e_lab.T @ e_lab + TIK.T@TIK) @ e_lab.T F = np.zeros(shape=Y.shape) y_m = np.argmax(Y, axis=1)[labeledIndexes] for i in range(p): if not hook is None: hook._step(step=i,X=X,W=W,Y=E[:,i]) for i in range(Y.shape[1]): c = np.ones(num_lab) c[y_m != i] = -1 a = A @ np.transpose(c) LOG.debug(a,LOG.ll.CLASSIFIER) for j in np.arange(F.shape[0]): F[j,i] = np.dot(a,E[j,:]) F[j,i] = max(F[j,i],0) return (F)
def LGCLVO(self, X, W, Y, labeledIndexes, mu=99.0, useEstimatedFreq=True, tuning_iter=0, hook=None, constant_prop=False, useLGCMat=False, useZ=False): '''BEGIN initialization''' Y = np.copy(Y) #We make a deep copy of labeledindexes labeledIndexes = np.array(labeledIndexes) if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") num_labeled = Y[labeledIndexes].shape[0] num_unlabeled = Y.shape[0] - num_labeled num_classes = Y.shape[1] D = np.sum(W, axis=0) if useEstimatedFreq: estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled else: estimatedFreq = np.repeat(1 / num_classes, num_classes) if useLGCMat: W = self.get_prop_W(W, Y, mu) W = 0.5 * (W + W.transpose()) #Identity matrix I = np.identity(W.shape[0]) #Get graph laplacian L = gutils.lap_matrix(W, is_normalized=True) #Propagation matrix P = np.linalg.inv(I + 0.5 * (L + L.transpose()) / mu) P_t = P.transpose() #Matrix A A = ((P_t @ L) @ P) + mu * ((P_t - I) @ (P - I)) A = A + A.transpose() Z = [] ####################################################################################### '''BEGIN iterations''' for i in np.arange(tuning_iter): '''Z matrix - The binary values of current Y are replaced with their corresponding D entries. Then, we normalize each row so that row sums to its estimated influence ''' if useZ: Z = gutils.calc_Z(Y, labeledIndexes, D, estimatedFreq, reciprocal=False) Q = np.matmul(A, Z) else: Q = np.matmul(A, Y) #During label tuning, we'll also 'unlabel' the argmax unlabeledIndexes = np.logical_not(labeledIndexes) temp = Q[unlabeledIndexes, :] Q[unlabeledIndexes, :] = -np.inf id_max = np.argmax(Q) id_max_line = id_max // num_classes id_max_col = id_max % num_classes Q[unlabeledIndexes, :] = temp Q[labeledIndexes, :] = np.inf #Find minimum unlabeled index if constant_prop: id_min_line = np.argmin(Q[:, id_max_col]) id_min_col = id_max_col else: id_min = np.argmin(Q) id_min_line = id_min // num_classes id_min_col = id_min % num_classes #Label OP labeledIndexes[id_min_line] = True Y[id_min_line, id_min_col] = 1 #Unlabel OP labeledIndexes[id_max_line] = False Y[id_max_line, id_max_col] = 0 if not hook is None: hook._step(step=i, X=X, W=W, Y=Y, labeledIndexes=labeledIndexes, l_i=id_max_line, l_j=id_max_col, ul_i=id_min_line, ul_j=id_min_col) '''END iterations''' return Y, labeledIndexes
def __SIIS(self, X, W, Y, labeledIndexes, m, alpha, beta, rho, max_iter, hook=None): Y = np.copy(Y) if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") if m is None: m = W.shape[0] c = Y.shape[1] W = scipy.sparse.csr_matrix(W) / np.mean(W.data) D = gutils.deg_matrix(W, pwr=1.0) L = gutils.lap_matrix(W, is_normalized=True) U, SIGMA = gutils.extract_lap_eigvec(L, m, remove_first_eig=True) U = scipy.sparse.csr_matrix(U) SIGMA = _to_np(SIGMA) J = gutils.labels_indicator(labeledIndexes) """ !!! """ P = SIISClassifier.edge_mat(W) """ Initialize params """ LAMB_1 = np.ones((P.shape[0], c)) LAMB_2 = np.ones((Y.shape[0], c)) mu = 1.0 mu_max = 10000000.0 eps = 1 / (10000) """ Reusable matrices """ JU = _to_np(J @ U) PU = _to_np(P @ U) PU_T = PU.transpose() JU_T = JU.transpose() A = np.zeros((m, c)) Q = None B = None improvement = 1 iter = 0 """ TODO: Tensorflow version import tensorflow as tf with tf.Session() as sess: A = tf.Variable(1e-06*tf.ones((m,c),dtype=tf.float64)) sess.run(tf.global_variables_initializer()) C = tf.reduce_sum(tf.linalg.norm(tf.matmul(PU,A),axis=1)) +\ alpha*tf.reduce_sum(tf.linalg.norm(tf.matmul(_to_np(U)[labeledIndexes,:],A)-Y[labeledIndexes,:],axis=1)) +\ beta* tf.trace(tf.matmul(tf.matmul(tf.transpose(A),SIGMA),A)) opt = tf.train.AdamOptimizer(learning_rate=0.5*1e-02) opt_min = opt.minimize(C) sess.run(tf.global_variables_initializer()) for i in range(2000): sess.run(opt_min) LOG.debug(sess.run(C),LOG.ll.CLASSIFIER) LOG.debug(sess.run(C),LOG.ll.CLASSIFIER) F = _to_np(U)@sess.run(A) LOG.debug(F.shape,LOG.ll.CLASSIFIER) """ A = np.zeros((m, c)) while iter <= max_iter and improvement > eps: """ Update Q """ N = PU @ A - (1 / mu) * LAMB_1 N_norm = np.linalg.norm(N, axis=1) to_zero = N_norm <= (1 / mu) mult = ((N_norm - (1 / mu)) / N_norm) N = N * mult[:, np.newaxis] N[to_zero, :] = 0.0 Q = N """ Update B """ M = JU @ A - Y - (1 / mu) * LAMB_2 M_norm = np.linalg.norm(M, axis=1) to_zero = M_norm <= (alpha / mu) mult = ((M_norm - (alpha / mu)) / M_norm) M = M * mult[:, np.newaxis] M[to_zero, :] = 0.0 B = M old_A = A """ Update A """ A_inv_term = 2 * beta * SIGMA + mu * PU_T @ PU + mu * JU_T @ JU A_inv_term = np.linalg.inv(A_inv_term) A = A_inv_term @ \ (PU_T@ LAMB_1 + JU_T@LAMB_2 +\ mu * PU_T@Q + mu* JU_T @ (B + Y) ) """ Update Lagrangian coeffs """ LAMB_1 = LAMB_1 + mu * (Q - PU @ A) LAMB_2 = LAMB_2 + mu * (B - JU @ A + Y) """ Update penalty coeffficients """ mu = min(rho * mu, mu_max) if not old_A is None: improvement = (np.max(np.abs(A - old_A))) / np.max( np.abs(old_A)) LOG.debug("Iter {}".format(iter), LOG.ll.CLASSIFIER) iter += 1 C = np.sum(np.linalg.norm(PU@A,axis=1)) + alpha*np.sum(np.linalg.norm(JU@A - Y,axis=1)) +\ beta*np.trace(A.T@SIGMA@A) LOG.debug("Iter {} - Cost {}".format(iter, C), LOG.ll.CLASSIFIER) F = U @ A for i in range(F.shape[0]): mx = np.argmax(F[i, :]) F[i, :] = 0.0 F[i, mx] = 1.0 return F
def LGCLVO(self, X, W, Y, labeledIndexes, mu=99.0, useEstimatedFreq=True, tuning_iter=0, hook=None, constant_prop=False, useZ=True, normalize_rows=True): Y = np.copy(Y) #We make a deep copy of labeledindexes labeledIndexes = np.array(labeledIndexes) lids = np.where(labeledIndexes)[0] if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") W = 0.5 * (W + W.transpose()) num_labeled = Y[labeledIndexes].shape[0] num_unlabeled = Y.shape[0] - num_labeled num_classes = Y.shape[1] D = gutils.deg_matrix(W, flat=True) if not useEstimatedFreq is None: if isinstance(useEstimatedFreq, bool): estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled else: estimatedFreq = useEstimatedFreq else: estimatedFreq = np.repeat(1 / num_classes, num_classes) if scipy.sparse.issparse(W): l = np.sum(labeledIndexes) itertool_prod = [[i, j] for i in range(l) for j in range(l)] row = np.asarray([lids[i] for i in range(l)]) col = np.asarray([i for i in range(l)]) data = np.asarray([1.0] * l) temp_Y = _to_np( scipy.sparse.coo_matrix((data, (row, col)), shape=(W.shape[0], l))) PL = LGC_iter_TF(X, W, Y=temp_Y, labeledIndexes=labeledIndexes, alpha=1 / (1 + mu), num_iter=10000) PL = PL[labeledIndexes, :] PL[range(PL.shape[0]), range(PL.shape[0])] = 0 #Set diagonal to 0 PL = PL del temp_Y row = np.asarray( [lids[x[0]] for x in itertool_prod if x[0] != x[1]]) col = np.asarray( [lids[x[1]] for x in itertool_prod if x[0] != x[1]]) data = [PL[x[0], x[1]] for x in itertool_prod if x[0] != x[1]] P = scipy.sparse.coo_matrix((data, (row, col)), shape=W.shape).tocsr() P = P else: #Identity matrix I = np.identity(W.shape[0]) #Get graph laplacian L = gutils.lap_matrix(W, is_normalized=True) #Propagation matrix P = np.zeros(W.shape) P[np.ix_(labeledIndexes, labeledIndexes)] = np.linalg.inv(I + 0.5 * (L + L.transpose()) / mu)[np.ix_( labeledIndexes, labeledIndexes)] P[labeledIndexes, labeledIndexes] = 0 P[np.ix_(labeledIndexes, labeledIndexes)] = P[np.ix_( labeledIndexes, labeledIndexes)] / np.sum(P[np.ix_( labeledIndexes, labeledIndexes)], axis=0, keepdims=False) W = scipy.sparse.csr_matrix(W) Z = [] detected_noisylabels = [] suggested_labels = [] where_noisylabels = [] Q_values = [] Y_flat = np.argmax(Y, axis=1) def divide_row_by_sum(e): e = _to_np(e) if normalize_rows: e = e / np.sum(e + 1e-100, axis=1, keepdims=True) return e else: return e def find_argmin(Q, class_to_unlabel): id_min_line = np.argmin(Q[:, class_to_unlabel]) id_min_col = class_to_unlabel return id_min_line, id_min_col, Q[id_min_line, id_min_col] ####################################################################################### '''BEGIN iterations''' Q = None cleanIndexes = np.copy(labeledIndexes) for i_iter in range(tuning_iter): found_noisy = True if np.sum(labeledIndexes) > 0 and found_noisy: '''Z matrix - The binary values of current Y are replaced with their corresponding D entries. Then, we normalize each row so that row sums to its estimated influence ''' if (not self.use_baseline) or Q is None: if useZ: Z = gutils.calc_Z(Y, labeledIndexes, D, estimatedFreq, weigh_by_degree=False) F = P @ Z if scipy.sparse.issparse(F): F = np.asarray(F.toarray()) #Compute graph gradient Q = (divide_row_by_sum(F) - divide_row_by_sum(Z)) else: F = P @ Y if scipy.sparse.issparse(F): F = np.asarray(F.toarray()) Q = (divide_row_by_sum(F) - divide_row_by_sum(Y)) #import scipy.stats #During label tuning, we'll also 'unlabel' the argmax unlabeledIndexes = np.logical_not(cleanIndexes) if self.early_stop: Q[np.sum(F, axis=1) == 0.0, :] = 9999 Q[unlabeledIndexes, :] = np.inf #Find minimum unlabeled index if constant_prop: expectedNumLabels = estimatedFreq * np.sum(labeledIndexes) actualNumLabels = np.sum(Y[labeledIndexes, :], axis=0) temp = expectedNumLabels - actualNumLabels class_priority = np.argsort(temp) found_noisy = False for class_to_unlabel in class_priority: id_min_line, id_min_col, val = find_argmin( Q, class_to_unlabel) if val < 0: #This means that the class would have a different label under the modified label prop found_noisy = True break else: id_min = np.argmin(Q) id_min_line = id_min // num_classes id_min_col = id_min % num_classes #The class previously assigned to instance X_{id_min_line} found_noisy = Q[id_min_line, id_min_col] < 0 if found_noisy: id_max_col = np.argmax( Q[id_min_line, :]) #The new, suggested class detected_noisylabels.append(id_min_col) where_noisylabels.append(id_min_line) suggested_labels.append(id_max_col) Q_values.append(Q[id_min_line, id_min_col]) #Unlabel OP if labeledIndexes[id_min_line] == False: raise Exception( "Error: unlabeled instance was selected") if not Y[id_min_line, id_min_col] == 1: raise Exception("Error: picked wrong class to unlabel") labeledIndexes[id_min_line] = False cleanIndexes[id_min_line] = False if not Y[id_min_line, id_min_col] == 1: raise Exception( "Tried to remove label from unlabeled instance") Y[id_min_line, id_min_col] = 0 if self.relabel: labeledIndexes[id_min_line] = True Y[id_min_line, :] = 0 Y[id_min_line, id_max_col] = 1 if not hook is None: hook._step(step=(i_iter + 1), X=X, W=W, Y=Y, labeledIndexes=labeledIndexes) ''' MATPLOTLIB stuff ''' """ import cv2 as cv #ret2,th2 = cv.threshold(255*np.asarray(Q_values).astype(np.uint8),0,255,cv.THRESH_BINARY+cv.THRESH_OTSU) from skimage.filters import threshold_multiotsu Q_values = np.asarray(Q_values) th = threshold_multiotsu(Q_values) th = np.where(Q_values < th[0])[0] for i in range(th.shape[0]): th2 = max(0,i - 1) if not th[i] == i: break import matplotlib matplotlib.use("TkAgg") import matplotlib.pyplot as plt fig = plt.figure(figsize=(5,2)) ax = fig.add_subplot() ax.plot(np.arange(len(Q_values)),Q_values) ax.axvline(10,color='red') #plt.axvline(th2,color='purple') #plt.axhline(-0.5,color='green') print(th2) plt.show() """ '''END iterations''' LOG.info( "NUMBER OF DETECTED NOISY INSTANCES:{}".format( len(detected_noisylabels)), LOG.ll.FILTER) return Y, labeledIndexes
def LDST(self, X, W, Y, labeledIndexes, mu=99.0, useEstimatedFreq=True, tuning_iter=0, hook=None, constant_prop=False, useZ=True): '''BEGIN initialization''' Y = np.copy(Y) #We make a deep copy of labeledindexes labeledIndexes = np.array(labeledIndexes) if Y.ndim == 1: Y = gutils.init_matrix(Y, labeledIndexes) Y[np.logical_not(labeledIndexes), :] = 0 if not W.shape[0] == Y.shape[0]: raise ValueError("W,Y shape not compatible") W = 0.5 * (W + W.transpose()) num_labeled = Y[labeledIndexes].shape[0] num_unlabeled = Y.shape[0] - num_labeled num_classes = Y.shape[1] D = gutils.deg_matrix(W, flat=True) """ Estimate frequency of classes""" if not useEstimatedFreq is None: if isinstance(useEstimatedFreq, bool): estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled else: estimatedFreq = useEstimatedFreq else: estimatedFreq = np.repeat(1 / num_classes, num_classes) #Identity matrix I = np.identity(W.shape[0]) #Get graph laplacian L = gutils.lap_matrix(W, is_normalized=True) #Propagation matrix """ !!!!!! """ P = np.linalg.inv(I + 0.5 * (L + L.transpose()) / mu) #P = np.zeros(W.shape) #P[np.ix_(labeledIndexes,labeledIndexes)] = np.linalg.inv( I + 0.5*(L + L.transpose())/mu )[np.ix_(labeledIndexes,labeledIndexes)] P_t = P.transpose() #Matrix A A = ((P_t @ L) @ P) + mu * ((P_t - I) @ (P - I)) Z = [] ####################################################################################### '''BEGIN iterations''' for i_iter in np.arange(tuning_iter): if np.sum(labeledIndexes) > 0: '''Z matrix - The binary values of current Y are replaced with their corresponding D entries. Then, we normalize each row so that row sums to its estimated influence ''' if useZ: Z = gutils.calc_Z(Y, labeledIndexes, D, estimatedFreq, weigh_by_degree=self.weigh_by_degree) #Compute graph gradient Q = np.matmul(A, Z) else: Q = np.matmul(A, Y) for i_labeled in np.where(labeledIndexes)[0]: assigned_class = np.argmax(Y[i_labeled, :]) other_classes = list(range(Y.shape[1])) other_classes.remove(assigned_class) best_other = min([Q[i_labeled, j] for j in other_classes]) for j in range(Y.shape[1]): if self.gradient_fix: Q[i_labeled, assigned_class] = -best_other Q[i_labeled, other_classes] = -np.inf #During label tuning, we'll also 'unlabel' the argmax unlabeledIndexes = np.logical_not(labeledIndexes) Q[unlabeledIndexes, :] = -np.inf #Find minimum unlabeled index if constant_prop: raise "" """expectedNumLabels = estimatedFreq * sum(labeledIndexes) actualNumLabels = np.sum(Y[labeledIndexes],axis=0) class_to_unlabel = np.argmax(actualNumLabels - expectedNumLabels) id_max_line = np.argmax(Q[:,class_to_unlabel]) id_max_col = class_to_unlabel """ else: id_max = np.argmax(Q) id_max_line = id_max // num_classes id_max_col = id_max % num_classes if not Y[id_max_line, id_max_col] == 1: print(Y[id_max_line, :]) raise Exception( "Tried to remove label from unlabeled instance") #Unlabel OP labeledIndexes[id_max_line] = False Y[id_max_line, id_max_col] = 0 if not hook is None: hook._step(step=i_iter + 1, X=X, W=W, Y=Y, labeledIndexes=labeledIndexes) '''END iterations''' return Y, labeledIndexes