class KM_Solver(object): def __init__(self, data_mat=None, W=None, H=None, res_dir=None, rank=4, SNR=-5, seed_num=1, true_labels=None): if data_mat is None or W is None or H is None or res_dir is None: raise ValueError('Error: some inputs are missing!') self.data_mat = data_mat self.W, self.H = W, H self.rank = rank self.SNR = SNR self.res_dir = res_dir self.seed_num = seed_num self.converge = Convergence(res_dir) self.labels = true_labels np.random.seed( seed_num ) # set the seed so that each run will get the same initial values (m, n) = self.data_mat.shape self.flag = 0 # flag to indicate whether to use LS or gradient descent to update W m_name = 'km' + str(self.flag) self.output_dir = path.join(self.res_dir, 'onmf', m_name, 'rank' + str(self.rank), 'data' + str(SNR), 'seed' + str(self.seed_num)) self.time_used = 0 # record the time elapsed when running the simulations def set_max_iters(self, num): self.converge.set_max_iters(num) def set_tol(self, tol): self.converge.set_tolerance(tol) def get_nmf_cost(self, W, H): res = LA.norm(self.data_mat - W * H, 'fro')**2 return res def get_iter_num(self): return self.converge.len() def update_scheme(self, verbose=False): ''' The function performs the update of W and H using similar ways as that in K-means Specifically, for each column of H, h_j, it will try to consider each pos as the place holding non-zero entry For example, if H_{k, j} != 0, then H_{k, j} = arg min_{c > 0} ||x_j - w_k * c||_2^2 which leads to H_{k, j} = (x_j^T w_k) / ||w_k||_2^2 if w_k != 0, and otherwise, = 1 By trying all k = 1, ..., K. we obtain h_j with lowest obj value for each column of W, w_k, w_k = arg min_{ w >= 0} sum_{j \in C_k} (x_j - w * H_{k, j})^2 which leads to w_k = X * ~h_k^T / ||~h_k||_2^2 ''' # update H #p_cost = self.get_nmf_cost(self.W, self.H) (ha, hb) = self.H.shape H_pre = np.asmatrix(np.copy(self.H)) for j in range(hb): tmp = LA.norm(self.data_mat[:, j] - self.W * H_pre[:, j], 2)**2 p_cost = self.get_nmf_cost(self.W, self.H) for k in range(ha): h_j_new = np.asmatrix(np.zeros((ha, 1))) #print h_j_new #print h_j_new if LA.norm(self.W[:, k], 2) == 0: #print 'the k th column of W is 0' h_j_new[k, 0] = 1 else: h_j_new[k, 0] = self.data_mat[:, j].transpose( ) * self.W[:, k] / (LA.norm(self.W[:, k], 2)**2) # check if a smaller obj value is obtained val = LA.norm(self.data_mat[:, j] - self.W * h_j_new, 2)**2 #print 'val: ' + str(val) + ', tmp: ' +str(tmp) if val < tmp: self.H[:, j] = np.copy(h_j_new) tmp = val ''' c_cost = self.get_nmf_cost(self.W, self.H) if c_cost > p_cost: print 'cur cost: ' + str(c_cost) + ', p_cost: ' + str(p_cost) print H_pre[:, j] print self.H[:, j] print LA.norm(self.data_mat[:, j] - self.W * H_pre[:, j], 'fro') ** 2 print LA.norm(self.data_mat[:, j] - self.W * self.H[:, j], 'fro') ** 2 print '------' print LA.norm(self.data_mat[:, 0:2] - self.W * H_pre[:, 0:2], 'fro') ** 2 print LA.norm(self.data_mat[:, 0:2] - self.W * self.H[:, 0:2], 'fro') ** 2 print '------' print LA.norm(self.data_mat[:, 0] - self.W * H_pre[:, 0], 'fro') ** 2 print LA.norm(self.data_mat[:, 0] - self.W * self.H[:, 0], 'fro') ** 2 print '------' print LA.norm(self.data_mat[:, 1] - self.W * H_pre[:, 1], 'fro') ** 2 print LA.norm(self.data_mat[:, 1] - self.W * self.H[:, 1], 'fro') ** 2 raise ValueError('Error: j = ' + str(j)) #print self.H[:, j] ''' if verbose: print 'KM: iter = ' + str(self.get_iter_num()) + ', after update H -' + \ ', nmf cost = ' + str(self.get_nmf_cost(self.W, self.H)) #c_cost = self.get_nmf_cost(self.W, self.H) #if c_cost > p_cost: # print self.H # raise ValueError('Error') # update W if self.flag == 0: # use the LS or K-means way to update W (centroids) for k in range(ha): if LA.norm(self.H[k, :], 2) == 0: # if no data points belongs to cluster k self.W[:, k].fill(0) else: self.W[:, k] = self.data_mat * self.H[k, :].transpose() / ( LA.norm(self.H[k, :], 2)**2) else: # use the gradient descent to update W Hessian = self.H * self.H.transpose() #c = 0.5 * LA.norm(Hessian, 'fro') egenvals, _ = LA.eigh(Hessian) c = 0.51 * np.max(egenvals) grad_W_pre = self.W * Hessian - self.data_mat * self.H.transpose() self.W = np.maximum(0, self.W - grad_W_pre / c) if verbose: print 'KM: iter = ' + str(self.get_iter_num()) + ', after update W -' + \ ', nmf cost = ' + str(self.get_nmf_cost(self.W, self.H)) def solve(self): start_time = time.time() self.set_tol(1e-5) end_time = time.time() self.time_used += end_time - start_time #cost = self.get_nmf_cost(self.W, self.H) cost = self.get_nmf_cost(self.W, self.H) #self.converge.add_obj_value(cost) self.converge.add_obj_value(cost) self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) print self.H[:, 0] acc_km = [] # record the clustering accuracy for each SNCP iteration time_km = [] # record the time used after each SNCP iteration # calculate the clustering accurary pre_labels = np.argmax(np.asarray(self.H), 0) if self.labels is None: raise ValueError('Error: no labels!') acc = calculate_accuracy(pre_labels, self.labels) acc_km.append(acc) time_km.append(self.time_used) print 'Start to solve the problem by KM ----------' while not self.converge.d(): # update the variable W , H start_time = time.time() self.update_scheme(verbose=False) end_time = time.time() self.time_used += end_time - start_time time_km.append(self.time_used) print 'time used: ' + str(self.time_used) # calculate the clustering accurary pre_labels = np.argmax(np.asarray(self.H), 0) if self.labels is None: raise ValueError('Error: no labels!') acc = calculate_accuracy(pre_labels, self.labels) acc_km.append(acc) # store the newly obtained values for convergence analysis self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) # store the obj_val cost = self.get_nmf_cost(self.W, self.H) self.converge.add_obj_value(cost) print 'onmf_KM: iter = ' + str( self.get_iter_num()) + ', nmf_cost = ' + str(cost) print 'HTH:' print self.H * self.H.transpose() # show the number of inner iterations self.converge.save_data(time_km, self.output_dir, 'time_km.csv') self.converge.save_data(acc_km, self.output_dir, 'acc_km.csv') print 'Stop the solve the problem ---------' self.converge_analysis() ''' return the solution W, H ''' def get_solution(self): return self.W, self.H ''' return the optimal obj val ''' def get_opt_obj_and_fea(self): return self.get_nmf_cost(self.W, self.H), None ''' return the iteration number and time used ''' def get_iter_and_time(self): return self.get_iter_num(), self.time_used ''' simulation result analysis (convergence plot) ''' def converge_analysis(self): # get the dirname to store the result: data file and figure #dir_name = path.join(self.res_dir, 'onmf', 'penalty', 'inner<1e-3', 'rank' + str(self.rank), 'SNR-3', 'seed' + str(self.seed_num)) dir_name = self.output_dir print 'Start to plot and store the obj convergence ------' self.converge.plot_convergence_obj(dir_name) print 'Start to plot and store the primal change ------' self.converge.plot_convergence_prim_var(dir_name) print 'Start to plot and store the fea condition change ------' self.converge.plot_convergence_fea_condition(dir_name) print 'Start to store the obj values and the factors' self.converge.store_obj_val(dir_name) self.converge.store_prim_val( -1, dir_name) # store the last element of primal variabl
class PLAM_Solver(object): def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1): if None in [data_manager, res_dir]: raise ValueError('Error: some inputs are missing!') self.data_manager = data_manager self.data_mat = self.data_manager.get_data_mat() self.W, self.H = self.data_manager.gen_inits_WH(init='random', seed=seed_num, H_ortho=False) self.rank = rank self.res_dir = res_dir self.seed_num = seed_num self.converge = Convergence(res_dir) #np.random.seed(seed_num) # set the seed so that each run will get the same initial values (m, n) = self.data_mat.shape self.n_factor = m * n # set the normalization factor to normalize the objective value self.time_used = 0 # record the time used by the method print 'data_mat' print self.data_mat def set_max_iters(self, num): self.converge.set_max_iters(num) def set_tol(self, tol): self.converge.set_tolerance(tol) def get_obj_val(self): res = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / self.n_factor return res def get_iter_num(self): return self.converge.len() def update_prim_var(self, var_name): if var_name == 'W': step_size = 1 beta = 0.5 gx = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 gradient = (self.W * self.H - self.data_mat) * self.H.transpose() while True: tmp = self.W - step_size * gradient fz = LA.norm(self.data_mat - tmp * self.H, 'fro')**2 if fz <= gx + 0.5 * np.trace(gradient.transpose() * (tmp - self.W)): self.W = tmp break step_size = step_size * beta self.W = np.maximum(self.W, 0) elif var_name == 'H': step_size = 1 beta = 0.5 gx = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 gradient = self.W.transpose() * (self.W * self.H - self.data_mat) while True: tmp = self.H - step_size * gradient fz = LA.norm(self.data_mat - self.W * tmp, 'fro')**2 if fz <= gx + 0.5 * np.trace(gradient.transpose() * (tmp - self.H)): self.H = tmp break step_size = beta * step_size self.H = np.maximum(self.H, 0) else: raise ValueError('Error: no other variable should be updated!') def solve(self): obj_val = self.get_obj_val() print 'The initial error: iter = ' + str( self.get_iter_num()) + ', obj =' + str(obj_val) self.converge.add_obj_value(obj_val) self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) print 'Start to solve the problem by PALM ----------' while not self.converge.d(): # update the variable W , H iteratively according to palm method start_time = time.time() self.update_prim_var('W') #obj_val = self.get_obj_val() #print 'nmf_PLAM: iter = ' + str(self.get_iter_num()) + ',after update W, obj = ' + str(obj_val) self.update_prim_var('H') end_time = time.time() self.time_used += end_time - start_time obj_val = self.get_obj_val() print 'nmf_PLAM: iter = ' + str( self.get_iter_num()) + ',after update H, obj = ' + str(obj_val) # store the newly obtained values for convergence analysis self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) # store the objective function value obj_val = self.get_obj_val() self.converge.add_obj_value(obj_val) print 'nmf_PLAM: iter = ' + str( self.get_iter_num()) + ', obj = ' + str(obj_val) # store the satisfaction of feasible conditions (ha, hb) = self.H.shape fea = LA.norm( self.H * self.H.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha) self.converge.add_fea_condition_value('HTH_I', fea) print 'Stop the solve the problem ---------' self.converge_analysis() ''' return the solution W, H ''' def get_solution(self): return self.W, self.H ''' return the optimal obj val ''' def get_opt_obj_and_fea(self): return self.get_obj_val(), self.converge.get_last_fea_condition_value( 'HTH_I') ''' simulation result analysis (convergence plot) ''' def converge_analysis(self): # get the dirname to store the result: data file and figure dir_name = path.join(self.res_dir, 'onmf', 'plam_k++', 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num)) print 'Start to plot and store the obj convergence ------' self.converge.plot_convergence_obj(dir_name) print 'Start to plot and store the primal change ------' self.converge.plot_convergence_prim_var(dir_name) print 'Start to plot and store the fea condition change ------' self.converge.plot_convergence_fea_condition(dir_name) print 'Start to store the obj values and the factors' self.converge.store_obj_val(dir_name) self.converge.store_prim_val( -1, dir_name) # store the last element of primal variabl
class NMF_Solver(object): def __init__(self, data_manager = None, res_dir = None, rank = 4, seed_num = 1): if data_manager is None or res_dir is None: raise ValueError('Error: some inputs are missing!') self.data_manager = data_manager self.data_mat = self.data_manager.get_data_mat() self.W, self.H = self.data_manager.gen_inits_WH(init = 'random', seed = seed_num, H_ortho = False) self.res_dir = res_dir self.rank = rank self.seed_num = seed_num self.converge = Convergence(res_dir) #np.random.seed(seed_num) # set the seed so that each run will get the same initial values (m, n) = self.data_mat.shape #self.n_factor = m * n # set the normalization factor to normalize the objective value self.n_factor = LA.norm(self.data_mat, 'fro') ** 2 self.flag = 0 # a flag to indicate which (problem, method) pair to be used, # 0: nmf_fro + multiplicative rule # 1: nmf_kl + nultiplicative rule # 2: nmf_fro + palm self.time_used = 0 # record the time used by the method def set_max_iters(self, num): self.converge.set_max_iters(num) def set_tol(self, tol): self.converge.set_tolerance(tol) def get_obj_val(self, flag = 0): if flag == 0 or flag == 2: res = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / self.n_factor else: # initial reconstruction R = self.W * self.H # compute KL-divergence #errs(k) = sum(V(:) .* log(V(:) ./ R(:)) - V(:) + R(:)); tmp = np.multiply(self.data_mat.flatten('F'), np.log(np.divide(self.data_mat.flatten('F'), R.flatten('F')))) res = np.sum(tmp - self.data_mat.flatten('F') + R.flatten('F')) / np.sqrt(self.n_factor) return res def get_iter_num(self): return self.converge.len() def update_prim_var(self, var_name, flag = 0): if flag == 1: #preallocate matrix of ones (m, n) = self.data_mat.shape Onm = np.asmatrix(np.ones((m, n))) if var_name == 'W': if flag == 0: #W = W .* ((V * H') ./ max(W * (H * H'), myeps)); temp = np.divide(self.data_mat * self.H.transpose(), \ np.maximum(self.W * (self.H * self.H.transpose()), 1e-20)) self.W = np.multiply(self.W, temp) elif flag == 1: # initial reconstruction R = self.W * self.H #W = W .* (((V ./ R) * H') ./ max(Onm * H', myeps)); temp = np.divide(np.divide(self.data_mat, R) * self.H.transpose(), \ np.maximum(Onm * self.H.transpose(), 1e-20)) self.W = np.multiply(self.W, temp) else: step_size = 1 beta = 0.5 gx = LA.norm(self.data_mat - self.W * self.H, 'fro') ** 2 gradient = (self.W * self.H - self.data_mat) * self.H.transpose() while True: tmp = self.W - step_size * gradient fz = LA.norm(self.data_mat - tmp * self.H, 'fro') ** 2 if fz <= gx + 0.5 * np.trace(gradient.transpose() * (tmp - self.W)): self.W = tmp break step_size = step_size * beta self.W = np.maximum(self.W, 0) elif var_name == 'H': if flag == 0: #H = H .* ( (W'* V) ./ max((W' * W) * H, myeps)) temp = np.divide(self.W.transpose() * self.data_mat, \ np.maximum(self.W.transpose() * self.W * self.H, 1e-20)) self.H = np.multiply(self.H, temp) elif flag == 1: # initial reconstruction R = self.W * self.H #H = H .* ((W' * (V ./ R)) ./ max(W' * Onm, myeps)); temp = np.divide(self.W.transpose() * np.divide(self.data_mat, R), \ np.maximum(self.W.transpose() * Onm, 1e-20)) self.H = np.multiply(self.H, temp) else: step_size = 1 beta = 0.5 gx = LA.norm(self.data_mat - self.W * self.H, 'fro') ** 2 gradient = self.W.transpose() * (self.W * self.H - self.data_mat) while True: tmp = self.H - step_size * gradient fz = LA.norm(self.data_mat - self.W * tmp, 'fro') ** 2 if fz <= gx + 0.5 * np.trace(gradient.transpose() * (tmp - self.H)): self.H = tmp break step_size = beta * step_size self.H = np.maximum(self.H, 0) else: raise ValueError('Error: no other variable should be updated!') def solve(self): obj_val = self.get_obj_val(self.flag) print 'The initial error: iter = ' + str(self.get_iter_num()) + ', obj =' + str(obj_val) self.converge.add_obj_value(obj_val) self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) print 'Start the solve the problem by NMF ----------' while not self.converge.d(): # update the variable W, H iteratively according to NMF multiplicative rules start_time = time.time() # record the start time self.update_prim_var('W', self.flag) self.update_prim_var('H', self.flag) end_time = time.time() # record the end time self.time_used += end_time - start_time # store the newly obtained values for convergence analysis self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) # store the objective function value obj_val = self.get_obj_val(self.flag) self.converge.add_obj_value(obj_val) print 'NMF solver: iter = ' + str(self.get_iter_num()) + ', obj = ' + str(obj_val) # store the satisfaction of feasible conditions (ha, hb) = self.H.shape fea = LA.norm(self.H * self.H.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha) self.converge.add_fea_condition_value('HTH_I', fea) print 'Stop to solve the problem ----------' self.converge_analysis() ''' return the solution W, H ''' def get_solution(self): return self.W, self.H ''' return the optimal obj val ''' def get_opt_obj_and_fea(self): return self.get_obj_val(), self.converge.get_last_fea_condition_value('HTH_I') ''' return the iteration number and time used ''' def get_iter_and_time(self): return self.get_iter_num(), self.time_used def get_time(self): return self.time_used ''' return the cluster assignment from H ''' def get_cls_assignment_from_H(self): labels = np.argmax(np.asarray(self.H), 0) if len(labels) != self.data_mat.shape[1]: raise ValueError('Error: the size of data samples must = the length of labels!') return labels ''' simulation result analysis (convergence plot) ''' def converge_analysis(self): # get the dirname to store the result: data file and figure if self.flag == 0: sub_folder = 'nmf_fro_mul' elif self.flag == 1: sub_folder = 'nmf_kl' else: sub_folder = 'nmf_fro_palm' dir_name = path.join(self.res_dir, 'nmf', sub_folder, 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num)) print 'Start to plot and store the obj convergence ------' self.converge.plot_convergence_obj(dir_name) print 'Start to plot and store the primal change ------' self.converge.plot_convergence_prim_var(dir_name) print 'Start to plot and store the fea condition change ------' self.converge.plot_convergence_fea_condition(dir_name) print 'Start to store the obj values and the factors' self.converge.store_obj_val(dir_name) self.converge.store_prim_val(-1, dir_name) # store the last element of primal variabl
class SNCP2_Solver(object): def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1, mul=0, nu=0): if data_manager is None or res_dir is None: raise ValueError('Error: some inputs are missing!') self.data_manager = data_manager self.W, self.H = self.data_manager.gen_inits_WH(init='random', seed=seed_num, H_ortho=True) self.data_mat = self.data_manager.get_data_mat() self.mul = mul self.nu = nu self.rank = rank self.res_dir = res_dir self.seed_num = seed_num self.converge = Convergence(res_dir) self.true_labels = self.data_manager.get_labels() self.n_factor = LA.norm(self.data_mat, 'fro')**2 self.W_bound = False # flag to indicate whether to constrain W by upper bound and lower bound self.W_step = 0.51 self.H_step = 0.51 self.time_used = 0 # record the time elapsed when running the simulation start_time = time.time() self.initialize_penalty_para() end_time = time.time() self.time_used += end_time - start_time self.set_tol(1e-3) self.set_max_iters(400) W_bound = 'W_bound' if self.W_bound else 'W_nobound' self.output_dir = path.join(self.res_dir, 'onmf', 'sncp2_W1H1', \ W_bound + '_' + 'epsilon' + str(self.inner_tol) + '&gamma' + str(self.gamma) + '&mul' + str(self.mul) + '&nu' + str(self.nu), \ 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num)) # we construct a result manager to manage and save the result res_dir1 = path.join(res_dir, 'onmf', 'sncp2_new', self.data_manager.get_data_name(), 'cls' + str(rank), W_bound + 'W'+ str(self.W_step) + 'H' + str(self.H_step), \ 'inner' + str(self.inner_tol) + '&gamma' + str(self.gamma) + '&mul' + str(self.mul) + '&nu' + str(self.nu), 'seed' + str(self.seed_num)) self.res_manager = ClusterONMFManager( root_dir=res_dir1, save_pdv=False ) # get an instance of ClusterONMFManager to manage the generated result # initialize some variables to store info self.acc_iter = [] # record the clustering accuracy for each iteration self.time_iter = [] # record the time for each iteration self.nmf_cost_iter = [] # record the nmf cost after each iteration self.pobj_iter = [ ] # record the penalized objective value after each iteration self.obj_iter = [] # record the objective value for each iteration def set_max_iters(self, num): self.converge.set_max_iters(num) def set_tol(self, tol): self.converge.set_tolerance(tol) def initialize_penalty_para(self): self.rho = 1e-8 self.gamma = 1.1 self.inner_tol = 3e-3 (ha, hb) = self.H.shape self.I_ha = np.asmatrix(np.eye(ha)) self.B = np.zeros_like(self.H) self.all_1_mat = np.asmatrix(np.ones((ha, hb))) self.max_val = np.max(self.data_mat) self.min_val = np.min(self.data_mat) def get_nmf_cost(self, W, H): res = LA.norm(self.data_mat - np.asmatrix(W) * np.asmatrix(H), 'fro')**2 / self.n_factor return res def get_obj_val(self, W, H): res = LA.norm(self.data_mat - np.asmatrix(W) * np.asmatrix(H), 'fro')**2 / self.n_factor \ + 0.5 * self.nu * LA.norm(H, 'fro') ** 2 + 0.5 * self.mul * LA.norm(W, 'fro') ** 2 return res def get_penalized_obj(self, W, H): ''' objective function ||X - WH||_{F}^{2} + self.rho * sum{||hj||_{1} - ||hj||_{infty}} + 0.5 * ||H||_{F}^2 ''' (ha, hb) = H.shape tmp = 0 for k in range(hb): tmp = tmp + (LA.norm(H[:, k], 1) - LA.norm(H[:, k], np.inf)) return LA.norm(self.data_mat - W * H, 'fro') ** 2 / self.n_factor + self.rho * tmp \ + 0.5 * self.nu * LA.norm(H, 'fro') ** 2 + 0.5 * self.mul * LA.norm(W, 'fro') def get_onmf_cost(self, W, H, nu=0, mul=0): ''' This function returns the approximation error of ONMF based on current W and H Args: W (numpy array or mat): the factor W H (numpy array or mat): the factor H nu (float): the penalty parameter Returns: the cost ''' res = LA.norm(self.data_mat - W * H, 'fro')**2 / self.n_factor \ + 0.5 * nu * LA.norm(H, 'fro')** 2 \ + 0.5 * mul * LA.norm(W, 'fro') ** 2 return res def get_sncp_cost(self, W, H, nu=0, mul=0, rho=0): ''' This function returns the cost of the penalized subproblem when using SNCP Args: W (numpy array or mat): the factor W H (numpy array or mat): the factor H nu (float): the parameter nu * ||H||_F^2 mul (float): the parameter mul * ||W||_F^2 rho (float): the penalty parameter rho * \sum_j (||hj||_1 - ||hj||_{\infty}) Returns: the cost ''' (ha, hb) = H.shape tmp = 0 for k in range(hb): tmp = tmp + (LA.norm(H[:, k], 1) - LA.norm(H[:, k], np.inf)) return LA.norm(self.data_mat - W * H, 'fro') ** 2 / self.n_factor + rho * tmp \ + 0.5 * nu * LA.norm(H, 'fro') ** 2 + 0.5 * mul * LA.norm(W, 'fro') def get_iter_num(self): return self.converge.len() def update_prim_var_by_PALM0(self, k, W_init=None, H_init=None, max_iter=1000, tol=1e-1, verbose=False): ''' This function alternatively updates the primal variables in a Gauss-Seidel fasion. The update of H is performed using the proximal gradient method The update of W is performed using the proximal subgradient method Input: k ------ the outer iteration number W_init ------ the initialization for W H_init ------ the initialization for H max_iter ------ the max number of iterations for PALM tol ------ the tolerance for stopping PALM verbose ------ flag to control output debug info ''' if W_init is None or H_init is None: raise ValueError( 'Error: inner iterations by PLAM are lack of initializations!') start_time = time.time() # record the start time H_j_pre, W_j_pre = np.asmatrix(np.copy(H_init)), np.asmatrix( np.copy(W_init)) (ha, hb) = H_j_pre.shape end_time = time.time() self.time_used += end_time - start_time for j in range(max_iter): # update H and W by proximal gradient method respectively start_time = time.time() self.B.fill(0) self.B[H_j_pre.argmax(0), np.arange(hb)] = 1 Hessian = 2 * W_j_pre.transpose( ) * W_j_pre / self.n_factor + self.nu * self.I_ha t = self.H_step * LA.eigvalsh(Hessian)[ha - 1] grad_H_pre = Hessian * H_j_pre - 2 * W_j_pre.transpose() * self.data_mat / self.n_factor + \ self.rho * (self.all_1_mat - self.B) H_j_cur = np.maximum(0, H_j_pre - grad_H_pre / t) Hessian = 2 * H_j_cur * H_j_cur.transpose( ) / self.n_factor + self.mul * self.I_ha c = self.W_step * LA.eigvalsh(Hessian)[ha - 1] grad_W_pre = W_j_pre * Hessian - 2 * self.data_mat * H_j_cur.transpose( ) / self.n_factor if self.W_bound: W_j_cur = np.minimum(self.max_val, np.maximum(0, W_j_pre - grad_W_pre / c)) else: W_j_cur = np.maximum(0, W_j_pre - grad_W_pre / c) if verbose: obj = self.get_obj_val(W_j_cur, H_j_cur) pobj = self.get_penalized_obj(W_j_cur, H_j_cur) # store the info # calculate the clustering accurary pre_labels = np.argmax(np.asarray(H_j_cur), 0) if self.labels is None: raise ValueError('Error: no labels!') acc = calculate_accuracy(pre_labels, self.labels) self.acc_iter.append(acc) self.obj_iter.append(obj) self.pobj_iter.append(pobj) cost = self.get_nmf_cost(W_j_cur, H_j_cur) self.nmf_cost_iter.append(cost) onmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, self.nu, self.mul) sncp_cost = self.get_sncp_cost(W_j_cur, H_j_cur, self.nu, self.mul, self.rho) self.res_manager.add_cost_value('onmf_cost_palm', onmf_cost) # store obj val self.res_manager.add_cost_value('palm_cost', sncp_cost) nmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, 0, 0) self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost) #check the convergence H_j_change = LA.norm(H_j_cur - H_j_pre, 'fro') / LA.norm( H_j_pre, 'fro') W_j_change = LA.norm(W_j_cur - W_j_pre, 'fro') / LA.norm( W_j_pre, 'fro') #update the pres H_j_pre = np.asmatrix(np.copy(H_j_cur)) W_j_pre = np.asmatrix(np.copy(W_j_cur)) end_time = time.time() self.time_used += end_time - start_time self.time_iter.append(self.time_used) #self.res_manager.push_time(self.time_used) #self.res_manager.push_iters(self.rho, j+1) # save the info if H_j_change + W_j_change < tol: self.res_manager.push_iters(self.rho, j + 1) break return (W_j_cur, H_j_cur, j + 1) def update_prim_var_by_PALM1(self, k, W_init=None, H_init=None, max_iter=1000, tol=1e-1, verbose=False): ''' This function alternatively updates the primal variables in a Gauss-Seidel fasion. Each update is performed using the proximal gradient method Input: k ------ the outer iteration number W_init ------ the initialization for W H_init ------ the initialization for H max_iter ------ the max number of iterations for PALM tol ------ the tolerance for stopping PALM verbose ------ flag to control output debug info ''' if W_init is None or H_init is None: raise ValueError( 'Error: inner iterations by PLAM are lack of initializations!') start_time = time.time() # record the start time H_j_pre, W_j_pre, H_j_cur, W_j_cur = H_init, W_init, H_init, W_init (ha, hb) = H_j_pre.shape end_time = time.time() self.time_used += end_time - start_time for j in range(max_iter): # update H and W by proximal gradient method respectively #if verbose: # print 'PALM1: inner iter = ' + str(j) + ', before H, obj_val = ' + \ # str(self.get_obj_val(W_j_pre, H_j_pre)) + ', penalized_obj = ' + str(self.get_penalized_obj(W_j_pre, H_j_pre)) start_time = time.time() # keep the infinity norm as a non-smooth part Hessian = 2 * W_j_pre.transpose( ) * W_j_pre / self.n_factor + self.nu * self.I_ha t = self.H_step * LA.eigvalsh(Hessian)[ha - 1] grad_H_pre = Hessian * H_j_pre - 2 * W_j_pre.transpose( ) * self.data_mat / self.n_factor + self.rho * self.all_1_mat H_j_cur = H_j_pre - grad_H_pre / t self.B.fill(0) self.B[H_j_cur.argmax(0), np.arange(hb)] = 1 H_j_cur += (self.rho / t) * self.B H_j_cur = np.maximum(H_j_cur, 0) Hessian = 2 * H_j_cur * H_j_cur.transpose( ) / self.n_factor + self.mul * self.I_ha c = self.W_step * LA.eigvalsh(Hessian)[ha - 1] grad_W_pre = W_j_pre * Hessian - 2 * self.data_mat * H_j_cur.transpose( ) / self.n_factor if self.W_bound: W_j_cur = np.minimum(self.max_val, np.maximum(0, W_j_pre - grad_W_pre / c)) else: W_j_cur = np.maximum(0, W_j_pre - grad_W_pre / c) if verbose: obj = self.get_obj_val(W_j_cur, H_j_cur) pobj = self.get_penalized_obj(W_j_cur, H_j_cur) # calculate the clustering accurary pre_labels = np.argmax(np.asarray(H_j_cur), 0) if self.true_labels is None: raise ValueError('Error: no labels!') acc = calculate_accuracy(pre_labels, self.true_labels) self.acc_iter.append(acc) self.obj_iter.append(obj) self.pobj_iter.append(pobj) cost = self.get_nmf_cost(W_j_cur, H_j_cur) self.nmf_cost_iter.append(cost) onmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, self.nu, self.mul) sncp_cost = self.get_sncp_cost(W_j_cur, H_j_cur, self.nu, self.mul, self.rho) self.res_manager.add_cost_value('onmf_cost_palm', onmf_cost) # store obj val self.res_manager.add_cost_value('palm_cost', sncp_cost) nmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, 0, 0) self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost) # check the convergence H_j_change = LA.norm(H_j_cur - H_j_pre, 'fro') / LA.norm( H_j_pre, 'fro') W_j_change = LA.norm(W_j_cur - W_j_pre, 'fro') / LA.norm( W_j_pre, 'fro') # update the pres H_j_pre = np.asmatrix(np.copy(H_j_cur)) W_j_pre = np.asmatrix(np.copy(W_j_cur)) end_time = time.time() self.time_used += end_time - start_time self.time_iter.append(self.time_used) #self.res_manager.push_time(self.time_used) #self.res_manager.push_iters(self.rho, j+1) if H_j_change + W_j_change < tol: self.res_manager.push_iters(self.rho, j + 1) break return (W_j_cur, H_j_cur, j + 1) def update_scheme(self): ''' The updating rules for primal variables, W, H and the penalty parameter rho use proximal gradient method to update each varialbe once for each iteration ''' # update # (self.W, self.H, inner_iter_num) = self.update_prim_var_by_PALM0(self.get_iter_num(), self.W, self.H, 3000, self.inner_tol, verbose = False) (self.W, self.H, inner_iter_num) = self.update_prim_var_by_PALM1(self.get_iter_num(), self.W, self.H, 3000, self.inner_tol, verbose=False) # show the feasibility satisfaction level HH^{T} - I (ha, hb) = self.H.shape H_norm = np.asmatrix( np.diag(np.diag(self.H * self.H.transpose())**(-0.5))) * self.H fea = LA.norm(H_norm * H_norm.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha) start_time = time.time() #if self.get_iter_num() > 0 and fea > 1e-10: self.rho = np.minimum(self.rho * self.gamma, 1e10) print self.rho end_time = time.time() self.time_used += end_time - start_time return inner_iter_num def solve(self): ''' problem formulation min ||X - WH||_{F}^{2} + rho * sum{||hj||_{1} - ||hj||_{infty} + 0.5 *nu * ||H||_F^2 * 0.5 * mul * ||W||_F^2 ''' obj = self.get_obj_val(self.W, self.H) p_obj = self.get_penalized_obj(self.W, self.H) print 'The initial error: iter = ' + str(self.get_iter_num( )) + ', obj_val =' + str(obj) + ', penalized_obj =' + str(p_obj) #self.converge.add_obj_value(cost) self.converge.add_obj_value(obj) self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) print self.H[:, 0] inner_iter_nums = [] # record the inner iterations number acc_sncp = [] # record the clustering accuracy for each SNCP iteration time_sncp = [] # record the time used after each SNCP iteration nmf_cost_sncp = [] # record the nmf cost after each SNCP iteration pobj_sncp = [ ] # record the penalized objective value after each iteration cost = self.get_nmf_cost(self.W, self.H) nmf_cost_sncp.append(cost) pobj_sncp.append(p_obj) self.pobj_iter.append(p_obj) self.nmf_cost_iter.append(cost) self.obj_iter.append(obj) # calculate the clustering accurary pre_labels = np.argmax(np.asarray(self.H), 0) if self.true_labels is None: raise ValueError('Error: no labels!') print len(self.true_labels) acc = calculate_accuracy(pre_labels, self.true_labels) acc_sncp.append(acc) self.acc_iter.append(acc) time_sncp.append(self.time_used) self.time_iter.append(self.time_used) fea = 100 self.res_manager.push_W(self.W) # store W self.res_manager.push_H(self.H) # store H self.res_manager.push_H_norm_ortho() # store feasibility nmf_cost = self.get_onmf_cost(self.W, self.H, 0, 0) onmf_cost = self.get_onmf_cost(self.W, self.H, self.nu, self.mul) sncp_cost = self.get_sncp_cost(self.W, self.H, self.nu, self.mul, self.rho) self.res_manager.add_cost_value('onmf_cost_sncp', onmf_cost) # store obj val self.res_manager.add_cost_value('sncp_cost', sncp_cost) self.res_manager.add_cost_value('onmf_cost_palm', onmf_cost) # store obj val self.res_manager.add_cost_value('palm_cost', sncp_cost) self.res_manager.add_cost_value('nmf_cost_sncp', nmf_cost) self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost) cls_assign = self.res_manager.calculate_cluster_quality( self.true_labels) # calculate and store clustering quality self.res_manager.push_time(self.time_used) print 'Start to solve the problem by SNCP2 ----------' while not self.converge.d() or fea > 1e-10: # update the variable W , H num = self.update_scheme() inner_iter_nums.append(num) time_sncp.append(self.time_used) print 'time used: ' + str( self.time_used) + ', inner_num: ' + str(num) # calculate the clustering accurary pre_labels = np.argmax(np.asarray(self.H), 0) if self.true_labels is None: raise ValueError('Error: no labels!') acc = calculate_accuracy(pre_labels, self.true_labels) acc_sncp.append(acc) # store the newly obtained values for convergence analysis self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) # store the nmf approximation error value obj = self.get_obj_val(self.W, self.H) p_obj = self.get_penalized_obj(self.W, self.H) nmf_cost_sncp.append(self.get_nmf_cost(self.W, self.H)) self.converge.add_obj_value(obj) pobj_sncp.append(p_obj) print 'onmf_SNCP2: iter = ' + str(self.get_iter_num( )) + ', obj_val = ' + str(obj) + ' penalized_obj = ' + str(p_obj) # store the satisfaction of feasible conditions (ha, hb) = self.H.shape H_norm = np.asmatrix( np.diag(np.diag(self.H * self.H.transpose())**(-0.5))) * self.H fea = LA.norm( H_norm * H_norm.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha) #print 'normalized orthogonality: ' + str(fea) self.converge.add_fea_condition_value('HTH_I', fea) # store the generated results by result manager self.res_manager.push_W(self.W) # store W self.res_manager.push_H(self.H) # store H self.res_manager.push_H_norm_ortho() # store feasibility self.res_manager.push_W_norm_residual() self.res_manager.push_H_norm_residual() nmf_cost = self.get_onmf_cost(self.W, self.H, 0, 0) onmf_cost = self.get_onmf_cost(self.W, self.H, self.nu, self.mul) sncp_cost = self.get_sncp_cost(self.W, self.H, self.nu, self.mul, self.rho) self.res_manager.add_cost_value('onmf_cost_sncp', onmf_cost) # store obj val self.res_manager.add_cost_value('sncp_cost', sncp_cost) self.res_manager.add_cost_value('onmf_cost_palm', onmf_cost) # store obj val self.res_manager.add_cost_value('palm_cost', sncp_cost) self.res_manager.add_cost_value('nmf_cost_sncp', nmf_cost) self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost) cls_assign = self.res_manager.calculate_cluster_quality( self.true_labels) # calculate and store clustering quality self.res_manager.push_time(self.time_used) print 'HTH:' print self.H * self.H.transpose() print 'the L2-norm of columns of H:' print LA.norm(self.H, axis=0) # show the number of inner iterations self.converge.save_data(inner_iter_nums, self.output_dir, 'inner_nums.csv') #self.converge.save_data(time_sncp, self.output_dir, 'time_sncp.csv') self.converge.save_data(acc_sncp, self.output_dir, 'acc_sncp.csv') self.converge.save_data(nmf_cost_sncp, self.output_dir, 'nmf_cost_sncp.csv') self.converge.save_data(pobj_sncp, self.output_dir, 'pobj_sncp.csv') self.converge.save_data(self.obj_iter, self.output_dir, 'obj_iters.csv') self.converge.save_data(self.acc_iter, self.output_dir, 'acc_iters.csv') self.converge.save_data(self.nmf_cost_iter, self.output_dir, 'nmf_cost_iters.csv') self.converge.save_data(self.pobj_iter, self.output_dir, 'pobj_iters.csv') self.converge.save_data(self.time_iter, self.output_dir, 'time_iters.csv') print 'Stop the solve the problem ---------' self.converge_analysis() self.res_manager.write_to_csv( ) # store the generated results to csv files ''' return the solution W, H ''' def get_solution(self): return self.W, self.H ''' return the optimal obj val ''' def get_opt_obj_and_fea(self): return self.get_nmf_cost( self.W, self.H), self.converge.get_last_fea_condition_value('HTH_I') ''' return the iteration number and time used ''' def get_iter_and_time(self): return self.get_iter_num(), self.time_used def get_time(self): return self.time_used ''' return the cluster assignment from H ''' def get_cls_assignment_from_H(self): labels = np.argmax(np.asarray(self.H), 0) if len(labels) != self.data_mat.shape[1]: raise ValueError( 'Error: the size of data samples must = the length of labels!') return labels ''' simulation result analysis (convergence plot) ''' def converge_analysis(self): # get the dirname to store the result: data file and figure #dir_name = path.join(self.res_dir, 'onmf', 'penalty', 'inner<1e-3', 'rank' + str(self.rank), 'SNR-3', 'seed' + str(self.seed_num)) dir_name = self.output_dir print 'Start to plot and store the obj convergence ------' self.converge.plot_convergence_obj(dir_name) print 'Start to plot and store the primal change ------' self.converge.plot_convergence_prim_var(dir_name) print 'Start to plot and store the fea condition change ------' self.converge.plot_convergence_fea_condition(dir_name) print 'Start to store the obj values and the factors' self.converge.store_obj_val(dir_name) self.converge.store_prim_val( -1, dir_name) # store the last element of primal variabl
class HALS_Solver(object): def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1): if data_manager is None or res_dir is None: raise ValueError('Error: some inputs are missing!') self.data_manager = data_manager self.data_mat = self.data_manager.get_data_mat() self.data_mat = np.asmatrix(np.copy(self.data_mat).transpose()) W_init, H_init = self.data_manager.gen_inits_WH(init='random', seed=seed_num, H_ortho=False) self.F, self.G = H_init.transpose(), W_init self.res_dir = res_dir self.rank = rank #self.SNR = SNR self.seed_num = seed_num self.converge = Convergence(res_dir) #np.random.seed(seed_num) # set the seed so that each run will get the same initial values (m, n) = self.data_mat.shape self.flag = 0 # flag to indicate whether G can be negative or not # flag = 0 : the G should be nonnegative # flag = 1: the G can be negative #self.n_factor = m * n # set the normalization factor to normalize the objective value self.n_factor = LA.norm(self.data_mat, 'fro')**2 self.time_used = 0 # record the time used by the method self.U = None # used for update F def set_max_iters(self, num): self.converge.set_max_iters(num) def set_tol(self, tol): self.converge.set_tolerance(tol) def get_obj_val(self): res = LA.norm(self.data_mat - self.F * self.G.transpose(), 'fro')**2 / self.n_factor return res def get_iter_num(self): return self.converge.len() def update_F(self): A = self.data_mat * self.G B = self.G.transpose() * self.G #print self.F.shape #print self.G.shape for j in range(self.rank): Fj = self.U - self.F[:, j] #print 'B[ji]' #print B[j, j] h = A[:, j] - self.F * B[:, j] + B[j, j] * self.F[:, j] #print 'Fj' + str(Fj.shape) #print 'h' + str(h.shape) #print 'Fj * Fj' + str(Fj.transpose() * Fj) tmp = np.multiply(Fj.transpose() * h, Fj) / np.asscalar( Fj.transpose() * Fj) tmp = h - tmp fj = np.maximum(1e-30, tmp) #print (Fj.transpose() * Fj)[0, 0] #print fj #print LA.norm(fj, 2) fj = fj / LA.norm(fj, 2) self.F[:, j] = fj self.U = Fj + fj def update_G(self): C = self.data_mat.transpose() * self.F D = self.F.transpose() * self.F #print D for j in range(self.rank): if self.flag == 0: temp = C[:, j] - self.G * D[:, j] + D[j, j] * self.G[:, j] self.G[:, j] = np.maximum(temp, 1e-30) else: self.G[:, j] = C[:, j] - self.G * D[:, j] + D[j, j] * self.G[:, j] def solve(self): obj_val = self.get_obj_val() print 'The initial error: iter = ' + str( self.get_iter_num()) + ', obj =' + str(obj_val) self.converge.add_obj_value(obj_val) self.converge.add_prim_value('F', self.F) self.converge.add_prim_value('G', self.G) # initialize U start_time = time.time() self.U = self.F * np.asmatrix(np.ones(self.rank)).transpose() end_time = time.time() self.time_used += end_time - start_time #print self.F[0, :] #print self.U[0] print 'Start to solve the problem by HALS ONMF ----------' while not self.converge.d(): # update the variable W , H iteratively according to DTPP method start_time = time.time() # record the start time self.update_F() self.update_G() end_time = time.time() # record the end time self.time_used += end_time - start_time #print self.F[0:5, 0:5] # store the newly obtained values for convergence analysis self.converge.add_prim_value('F', self.F) self.converge.add_prim_value('G', self.G) # store the objective function value obj_val = self.get_obj_val() self.converge.add_obj_value(obj_val) print 'onmf_HALS: iter = ' + str( self.get_iter_num()) + ', obj = ' + str(obj_val) # store the satisfaction of feasible conditions #(ha, hb) = self.F.shape fea = LA.norm( self.F.transpose() * self.F - np.asmatrix(np.eye(self.rank)), 'fro') / (self.rank * self.rank) self.converge.add_fea_condition_value('FTF_I', fea) print 'Stop the solve the problem ---------' self.converge_analysis() ''' return the solution W, H ''' def get_solution(self): return self.G, self.F.transpose() ''' return the optimal obj val ''' def get_opt_obj_and_fea(self): return self.get_obj_val(), self.converge.get_last_fea_condition_value( 'FTF_I') ''' return the iteration number and time used ''' def get_iter_and_time(self): return self.get_iter_num(), self.time_used def get_time(self): return self.time_used ''' return the cluster assignment from H ''' def get_cls_assignment_from_H(self): labels = np.argmax(np.asarray(self.F), 1) #print len(labels) #print self.data_mat.shape[1] if len(labels) != self.data_mat.shape[0]: raise ValueError( 'Error: the size of data samples must = the length of labels!') return labels ''' simulation result analysis (convergence plot) ''' def converge_analysis(self): # get the dirname to store the result: data file and figure m_name = 'hals_' + str(self.flag) dir_name = path.join(self.res_dir, 'onmf', m_name, 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num)) print 'Start to plot and store the obj convergence ------' self.converge.plot_convergence_obj(dir_name) print 'Start to plot and store the primal change ------' self.converge.plot_convergence_prim_var(dir_name) print 'Start to plot and store the fea condition change ------' self.converge.plot_convergence_fea_condition(dir_name) print 'Start to store the obj values and the factors' self.converge.store_obj_val(dir_name) self.converge.store_prim_val( -1, dir_name) # store the last element of primal variable
class ONPMF_Solver(object): def __init__(self, data_manager = None, res_dir = None, rank = 4, seed_num = 1): if data_manager is None or res_dir is None: raise ValueError('Error: input is missing!') self.rank = rank self.res_dir = res_dir self.seed_num = seed_num self.converge = Convergence(res_dir) self.data_manager = data_manager self.data_mat = self.data_manager.get_data_mat() self.W, self.H = self.data_manager.gen_inits_WH(init = 'random', seed = seed_num, H_ortho = True) self.W = np.asmatrix(self.W, dtype = np.float64) self.H = np.asmatrix(self.H, dtype = np.float64) #np.random.seed(seed_num) # set the seed so that each run will get the same initial values #(m, n) = self.data_mat.shape self.n_factor = LA.norm(self.data_mat, 'fro') ** 2 # set the normalization factor to normalize the objective value self.time_used = 0 self.flag = 0 # the flag indicates whether the W can be negative or not depending on the data # flag = 0 : W must be nonnegative # flag = 1: W can be negative def set_max_iters(self, num): self.converge.set_max_iters(num) def set_tol(self, tol): self.converge.set_tolerance(tol) ''' initialize the primal variables ''' def initialize_prim_vars(self): self.W = np.asmatrix(self.W) ''' U, s, V = LA.svd(self.data_mat) self.H = np.asmatrix(V[0:10, :]) print self.H * self.H.transpose() (ha, hb) = self.H.shape labels = np.argmax(np.asarray(np.abs(self.H)), 0) H = np.zeros((ha, hb)) for j in range(hb): H[labels[j], j] = 1 H = np.asmatrix(H) H = np.asmatrix(np.diag(np.diag(H * H.transpose()) ** (-0.5))) * H print H * H.transpose() self.H = H ''' # self.H = np.maximum(self.H, 0) self.converge.add_prim_value('W', self.W) # store the initial values self.converge.add_prim_value('H', self.H) def initialize_dual_vars(self): (m, n) = self.data_mat.shape self.Z = np.asmatrix(np.zeros(shape = (self.rank, n), dtype = np.float64)) self.converge.add_dual_value('Z', self.Z) # store the initial value for dual variables def initialize_penalty_para(self, flag = 0): # set the penalty parameter rol self.rol = 0.01 self.alpha = 100 self.gamma = 1.01 ''' if self.data_manager.get_data_name() == 'mnist#8': self.scale = 0.001 else: self.scale = 0.00001 ''' self.scale = 0.00001 if self.flag == 0: self.mul = 0 else: self.mul = 1e-10 # used when U can be negative ''' compute the lagrangian function value for testing ''' def get_lag_val(self): sum_t = LA.norm(self.data_mat - self.W * self.H, 'fro') ** 2 / 2 \ + self.mul * LA.norm(self.W, 'fro')**2 + np.trace(self.Z.transpose() * (-self.H)) + \ 0.5 * self.rol * LA.norm(np.minimum(self.H, 0), 'fro')**2 return sum_t def get_obj_val(self): res = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / self.n_factor return res def get_iter_num(self): return self.converge.len() ''' update the primal variable with a given name at each iteration of ADMM''' def update_prim_var(self, var_name): if var_name == 'H': # update primal variable Y beta = 0.01 step_size = 1 gradient = self.W.transpose() * (self.W * self.H - self.data_mat) - self.Z + self.rol * np.minimum(0, self.H) #print (gradient) #print (self.W) #print (self.Z) #print (self.rol) Lx = 0.5 * LA.norm(self.data_mat - self.W * self.H, 'fro')**2 Lx = Lx + np.trace(self.Z.transpose() * (-self.H)) Lx = Lx + 0.5 * self.rol * LA.norm(np.minimum(self.H, 0), 'fro') ** 2 #print 'Lx: ' + str(Lx) while True: B = self.H - step_size * gradient #U, s, V = LA.svd(B) #print (B) #print (self.H) #print (step_size) #print (gradient) U, s, V = scipy.linalg.svd(B) (a, b) = B.shape E = np.asmatrix(np.eye(a, b)) H_new = np.asmatrix(U) * E * np.asmatrix(V) # pay attention V do not to be transposed #H_new = H_new.transpose() # this is very important Lz = 0.5 * LA.norm(self.data_mat - self.W * H_new, 'fro')**2 \ + np.trace(self.Z.transpose() * (-H_new)) \ + 0.5 * self.rol * LA.norm(np.minimum(H_new, 0), 'fro') ** 2 #print 'update H : ' + str(step_size) + ' ' + str(Lz) if Lz <= Lx + self.scale * np.trace(gradient.transpose() * (H_new - self.H)): break step_size = step_size * beta self.H = np.asmatrix(np.copy(H_new)) elif var_name == 'W': # update primal variable P if self.flag == 0: ''' for j in range(10): beta = 0.1 step_size = 1 gradient = (self.W * self.H - self.data_mat) * self.H.transpose() f_x = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / 2 while True: W_new = np.maximum(0, self.W - step_size * gradient) f_z = LA.norm(self.data_mat - W_new * self.H, 'fro') ** 2 / 2 if f_z <= f_x + 0.00001 * np.trace(gradient.transpose() * (W_new - self.W)): break step_size = step_size * beta test = LA.norm(W_new - self.W, 'fro') / LA.norm(self.W, 'fro') self.W = np.asmatrix(np.copy(W_new)) if test < 1e-5: #print 'satisfy the stopping criteria!' break ''' (wa, wb) = self.W.shape for i in range(wa): b = np.array(self.data_mat[i, :]).flatten() t, bla = optm.nnls(self.H.transpose(), b) self.W[i, :] = np.asmatrix(t) else: (ha, hb) = self.H.shape I_ha = np.asmatrix(np.eye(ha)) self.W = self.data_mat * self.H.transpose() * LA.inv(self.H * self.H.transpose() + self.mul * I_ha) else: raise ValueError('Error: no primal variable with this name to be updated!') ''' update the dual variable with a given name at each iteration of ADMM''' def update_dual_var(self, var_name): if var_name == 'Z': # update dual variable Z k = np.maximum(self.get_iter_num(), 1) self.Z = np.maximum(0, self.Z - (self.alpha / k) * self.H) else: raise ValueError('Error: no dual variable with this name to be updated!') ''' update the penalty parameters rol1 and rol2 adaptively ''' def update_penalty_parameters(self): self.rol = self.gamma * self.rol def solve(self): self.initialize_prim_vars() self.initialize_dual_vars() self.initialize_penalty_para() data_name = self.data_manager.get_data_name() if data_name.startswith('tdt2') or data_name.startswith('tcga'): self.set_max_iters(500) #self.set_max_iters(500) print self.H[:, 0] #obj_val = self.get_obj_val() #print 'The initial error: iter' + str(self.get_iter_num()) + ', obj =' + str(obj_val) print 'Start to solve the problem by ADMM ------------' while not self.converge.d(): start_time = time.time() # update the primal and dual variables accroding to ADMM algorithm #print 'befor update, lag_val: ' + str(self.get_lag_val()) # update primal variable Y self.update_prim_var('W') #print 'after update W, lag_val: ' + str(self.get_lag_val()) # update primal variable H self.update_prim_var('H') #print 'after update H, lag_val: ' + str(self.get_lag_val()) # update dual varialbe Z self.update_dual_var('Z') self.update_penalty_parameters() end_time = time.time() self.time_used += end_time - start_time # store the newly obtained values for convergence analysis # note that the change of each primal and dual varialbes will also be computed and added if any self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) self.converge.add_dual_value('Z', self.Z) obj_val = self.get_obj_val() self.converge.add_obj_value(obj_val) print 'onmf_onpmf: iter = ' + str(self.get_iter_num()) + ', obj = ' + str(obj_val) #print 'onmf_onpmf: iter = ' + str(self.get_iter_num()) # store the satisfaction of feasible conditions (ha, hb) = self.H.shape fea = LA.norm(self.H * self.H.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha) self.converge.add_fea_condition_value('HTH_I', fea) print 'Stop to solve the problem ------' self.converge_analysis() ''' return the solution W, H ''' def get_solution(self): return self.W, self.H ''' return the optimal objective value and feasibility level ''' def get_opt_obj_and_fea(self): return self.get_obj_val(), self.converge.get_last_fea_condition_value('HTH_I') ''' return the iteration number and time used ''' def get_iter_and_time(self): return self.get_iter_num(), self.time_used def get_time(self): return self.time_used ''' return the cluster assignment from H ''' def get_cls_assignment_from_H(self): labels = np.argmax(np.asarray(self.H), 0) if len(labels) != self.data_mat.shape[1]: raise ValueError('Error: the size of data samples must = the length of labels!') return labels ''' simulation result analysis (convergence plot) ''' def converge_analysis(self): # get the dirname to store the result: data file and figure #dir_name = path.join(self.res_dir, path.basename(path.normpath(self.data_path)), 'rank' + str(self.rank)) m_name = 'onp_mf1_' + str(self.flag) dir_name = path.join(self.res_dir, 'onmf', m_name, 'alpha100', 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num)) print 'Start to plot and store the obj convergence ------' self.converge.plot_convergence_obj(dir_name) print 'Start to plot and store the primal change ------' self.converge.plot_convergence_prim_var(dir_name) print 'Start to plot and store the dual change ------' self.converge.plot_convergence_dual_var(dir_name) print 'Start to plot and store the fea condition change ------' self.converge.plot_convergence_fea_condition(dir_name) print 'Start to store the obj values and the factors' self.converge.store_obj_val(dir_name) self.converge.store_prim_val(-1, dir_name) # store the last element of primal varialbes list
class DTPP_Solver(object): def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1): if data_manager is None or res_dir is None: raise ValueError('Error: some inputs are missing!') self.data_manager = data_manager self.W, self.H = self.data_manager.gen_inits_WH(init='random', seed=seed_num, H_ortho=False) self.data_mat = self.data_manager.get_data_mat() #self.true_labels = self.data_manager.get_labels() self.res_dir = res_dir self.rank = rank self.seed_num = seed_num self.converge = Convergence(res_dir) np.random.seed( seed_num ) # set the seed so that each run will get the same initial values (m, n) = self.data_mat.shape #self.n_factor = m * n # set the normalization factor to normalize the objective value self.n_factor = LA.norm(self.data_mat, 'fro')**2 self.time_used = 0 # record the time used by the method #self.set_max_iters(1000) def set_max_iters(self, num): self.converge.set_max_iters(num) def set_tol(self, tol): self.converge.set_tolerance(tol) def get_obj_val(self): #print self.data_mat.shape #print self.W.shape #print self.H.shape res = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / self.n_factor return res def get_iter_num(self): return self.converge.len() def update_prim_var(self, var_name): if var_name == 'W': #W = W .* ((V * H') ./ max(W * (H * H'), myeps)); temp = np.divide(self.data_mat * self.H.transpose(), \ np.maximum(self.W * (self.H * self.H.transpose()), 1e-20)) self.W = np.multiply(self.W, temp) elif var_name == 'H': #H = H .* (((W' * V) ./ max(W' * V * (H' * H), myeps)) .^ (1/2)); temp = np.divide(self.W.transpose() * self.data_mat, np.maximum(self.W.transpose() * self.data_mat * (self.H.transpose() * self.H), \ 1e-20)) self.H = np.multiply(self.H, np.power(temp, 0.5)) else: raise ValueError('Error: no other variable should be updated!') def solve(self): obj_val = self.get_obj_val() print 'The initial error: iter = ' + str( self.get_iter_num()) + ', obj =' + str(obj_val) #print 'max_iter: ' + str(self. self.converge.add_obj_value(obj_val) self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) print 'H0' print self.H print 'Start to solve the problem by DTPP ----------' while not self.converge.d(): # update the variable W , H iteratively according to DTPP method start_time = time.time() # record the start time self.update_prim_var('W') self.update_prim_var('H') end_time = time.time() # record the end time self.time_used += end_time - start_time # store the newly obtained values for convergence analysis self.converge.add_prim_value('W', self.W) self.converge.add_prim_value('H', self.H) # store the objective function value obj_val = self.get_obj_val() self.converge.add_obj_value(obj_val) print 'onmf_DTPP: iter = ' + str( self.get_iter_num()) + ', obj = ' + str(obj_val) # store the satisfaction of feasible conditions (ha, hb) = self.H.shape fea = LA.norm( self.H * self.H.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha) self.converge.add_fea_condition_value('HTH_I', fea) print 'Stop the solve the problem ---------' self.converge_analysis() ''' return the solution W, H ''' def get_solution(self): return self.W, self.H ''' return the optimal obj val ''' def get_opt_obj_and_fea(self): return self.get_obj_val(), self.converge.get_last_fea_condition_value( 'HTH_I') ''' return the iteration number and time used ''' def get_iter_and_time(self): return self.get_iter_num(), self.time_used def get_time(self): return self.time_used ''' return the cluster assignment from H ''' def get_cls_assignment_from_H(self): labels = np.argmax(np.asarray(self.H), 0) if len(labels) != self.data_mat.shape[1]: raise ValueError( 'Error: the size of data samples must = the length of labels!') return labels ''' simulation result analysis (convergence plot) ''' def converge_analysis(self): # get the dirname to store the result: data file and figure dir_name = path.join(self.res_dir, 'onmf', 'dtpp', 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num)) print 'Start to plot and store the obj convergence ------' self.converge.plot_convergence_obj(dir_name) print 'Start to plot and store the primal change ------' self.converge.plot_convergence_prim_var(dir_name) print 'Start to plot and store the fea condition change ------' self.converge.plot_convergence_fea_condition(dir_name) print 'Start to store the obj values and the factors' self.converge.store_obj_val(dir_name) self.converge.store_prim_val( -1, dir_name) # store the last element of primal variabl