def fit_new_doc(self, a): # 固定w,根据新a求h avg = np.sqrt(a.mean() / self.k) h = avg * self.rng.randn(self.k, 1).astype(np.float32) np.abs(h, h) # 设置初始误差 tolh = max(0.001, self.tol) error_at_init = self.error_new_doc(a, h) print(error_at_init) self.error_array = [error_at_init] at = a.transpose() for i in range(self.max_iter): # 固定W求H h, _, iter_count = nmf.nlssubprob(a, self.w, h, tolh, 1000) if iter_count == 1: tolh *= 0.1 error = self.error_new_doc(a, h) print(error) self.error_array.append(error) if i + 1 >= self.min_iter and \ np.abs(error - self.error_array[i]) / error_at_init <= self.tol: print(f'达到收敛条件,在第{i}次迭代后提前结束') break return h
def fit_original_nmf_cjlin_nnls(self): """ 计算原始的nmf,损失函数: """ tolw = max(0.001, self.tol) tolh = tolw error_at_init = self.error_original_nmf() self.error_array = [error_at_init] self.callback_set_nmf_training_progress(0, error_at_init) at = self.a.T for i in range(self.max_iter): if i % 2 == 0: # 固定H求W self.w, _, iter_count = nmf.nlssubprob(at, self.h.T, self.w.T, tolw, 1000) self.w = self.w.T if iter_count == 1: tolw *= 0.1 else: # 固定W求H self.h, _, iter_count = nmf.nlssubprob(self.a, self.w, self.h, tolh, 1000) if iter_count == 1: tolh *= 0.1 error = self.error_original_nmf() print(error) self.error_array.append(error) self.callback_set_nmf_training_progress(i + 1, error) if i + 1 >= self.min_iter and \ np.abs(error - self.error_array[i]) / error_at_init <= self.tol: print(f'达到收敛条件,在第{i}次迭代后提前结束') break
def fit_semi_supervised_nmf_cjlin_nnls(self): """ 计算半监督的nmf,损失函数: """ tolw = max(0.001, self.tol) tolh_list = [tolw] * self.n error_at_init = self.error_semi_supervised_nmf() self.error_array = [error_at_init] self.callback_set_nmf_training_progress(0, error_at_init) vstack_ht_mw = np.vstack((self.h.T, self.mw)) vstack_at_hstack_mw_wrt = np.vstack( (self.a.T, np.dot(self.mw, self.wr.T))) for i in range(self.max_iter): if i % 2 == 0: # 固定H求W vstack_ht_mw[0:self.n, 0:self.k] = self.h.T ''' vstack_ht_mw = np.vstack((self.h.T, self.mw)) vstack_at_hstack_mw_wrt = np.vstack( (self.a.T, np.dot(self.mw, self.wr.T)) ) ''' self.w, _, iter_count = nmf.nlssubprob(vstack_at_hstack_mw_wrt, vstack_ht_mw, self.w.T, tolw, 1000) self.w = self.w.T if iter_count == 1: tolw *= 0.1 print(f'迭代次数{iter_count}') else: # 固定W求H for ci in range(self.n): vstack_w_mhi_ik = np.vstack( (self.w, self.mh[ci, ci] * self.ik)) vstack_aci_mhi_dhi_hrci = np.vstack((self.a[:, ci].reshape( (self.m, 1)), (self.mh[ci, ci] * self.dh[ci, ci] * self.hr[:, ci]).reshape((self.k, 1)))) hci, _, iter_count = nmf.nlssubprob( vstack_aci_mhi_dhi_hrci, vstack_w_mhi_ik, self.h[:, ci].reshape( (self.k, 1)), tolh_list[ci], 1000) self.h[:, ci] = hci.reshape(self.k) if iter_count == 1: tolh_list[ci] *= 0.1 # 更新DH for j in range(self.n): if self.mh[j, j] == 0: self.dh[j, j] = 0 else: hri = self.hr[:, j] # 这里向量乘向量不需要reshape # 因为numpy中,np.dot(v1, v2) # 就相当于v1.T * v2 (v1∈(n, 1), v2∈(n, 1)) self.dh[j, j] = (np.dot(hri, self.h[:, j]) / np.dot(hri, hri)) error = self.error_semi_supervised_nmf() print(error) self.error_array.append(error) self.callback_set_nmf_training_progress(i + 1, error) if i + 1 >= self.min_iter and \ np.abs(error - self.error_array[i]) / error_at_init <= self.tol: print(f'达到收敛条件,在第{i}次迭代后提前结束') break