Example #1
0
    def fit_new_doc(self, a):
        # 固定w,根据新a求h
        avg = np.sqrt(a.mean() / self.k)
        h = avg * self.rng.randn(self.k, 1).astype(np.float32)
        np.abs(h, h)

        # 设置初始误差
        tolh = max(0.001, self.tol)
        error_at_init = self.error_new_doc(a, h)
        print(error_at_init)
        self.error_array = [error_at_init]

        at = a.transpose()
        for i in range(self.max_iter):
            # 固定W求H
            h, _, iter_count = nmf.nlssubprob(a, self.w, h, tolh, 1000)
            if iter_count == 1:
                tolh *= 0.1

            error = self.error_new_doc(a, h)
            print(error)
            self.error_array.append(error)

            if i + 1 >= self.min_iter and \
                    np.abs(error - self.error_array[i]) / error_at_init <= self.tol:
                print(f'达到收敛条件,在第{i}次迭代后提前结束')
                break

        return h
Example #2
0
    def fit_original_nmf_cjlin_nnls(self):
        """
        计算原始的nmf,损失函数:

        """
        tolw = max(0.001, self.tol)
        tolh = tolw
        error_at_init = self.error_original_nmf()
        self.error_array = [error_at_init]
        self.callback_set_nmf_training_progress(0, error_at_init)

        at = self.a.T
        for i in range(self.max_iter):
            if i % 2 == 0:
                # 固定H求W
                self.w, _, iter_count = nmf.nlssubprob(at, self.h.T, self.w.T,
                                                       tolw, 1000)
                self.w = self.w.T
                if iter_count == 1:
                    tolw *= 0.1

            else:
                # 固定W求H
                self.h, _, iter_count = nmf.nlssubprob(self.a, self.w, self.h,
                                                       tolh, 1000)
                if iter_count == 1:
                    tolh *= 0.1

            error = self.error_original_nmf()
            print(error)
            self.error_array.append(error)
            self.callback_set_nmf_training_progress(i + 1, error)

            if i + 1 >= self.min_iter and \
                    np.abs(error - self.error_array[i]) / error_at_init <= self.tol:
                print(f'达到收敛条件,在第{i}次迭代后提前结束')
                break
Example #3
0
    def fit_semi_supervised_nmf_cjlin_nnls(self):
        """
        计算半监督的nmf,损失函数:
        """
        tolw = max(0.001, self.tol)
        tolh_list = [tolw] * self.n
        error_at_init = self.error_semi_supervised_nmf()
        self.error_array = [error_at_init]
        self.callback_set_nmf_training_progress(0, error_at_init)

        vstack_ht_mw = np.vstack((self.h.T, self.mw))
        vstack_at_hstack_mw_wrt = np.vstack(
            (self.a.T, np.dot(self.mw, self.wr.T)))

        for i in range(self.max_iter):
            if i % 2 == 0:
                # 固定H求W
                vstack_ht_mw[0:self.n, 0:self.k] = self.h.T
                '''
                vstack_ht_mw = np.vstack((self.h.T, self.mw))
                vstack_at_hstack_mw_wrt = np.vstack(
                    (self.a.T, np.dot(self.mw, self.wr.T))
                )
                '''

                self.w, _, iter_count = nmf.nlssubprob(vstack_at_hstack_mw_wrt,
                                                       vstack_ht_mw, self.w.T,
                                                       tolw, 1000)
                self.w = self.w.T
                if iter_count == 1:
                    tolw *= 0.1
                print(f'迭代次数{iter_count}')
            else:
                # 固定W求H
                for ci in range(self.n):
                    vstack_w_mhi_ik = np.vstack(
                        (self.w, self.mh[ci, ci] * self.ik))

                    vstack_aci_mhi_dhi_hrci = np.vstack((self.a[:, ci].reshape(
                        (self.m, 1)), (self.mh[ci, ci] * self.dh[ci, ci] *
                                       self.hr[:, ci]).reshape((self.k, 1))))
                    hci, _, iter_count = nmf.nlssubprob(
                        vstack_aci_mhi_dhi_hrci, vstack_w_mhi_ik,
                        self.h[:, ci].reshape(
                            (self.k, 1)), tolh_list[ci], 1000)

                    self.h[:, ci] = hci.reshape(self.k)
                    if iter_count == 1:
                        tolh_list[ci] *= 0.1

                # 更新DH
                for j in range(self.n):
                    if self.mh[j, j] == 0:
                        self.dh[j, j] = 0
                    else:
                        hri = self.hr[:, j]
                        # 这里向量乘向量不需要reshape
                        # 因为numpy中,np.dot(v1, v2)
                        # 就相当于v1.T * v2 (v1∈(n, 1), v2∈(n, 1))
                        self.dh[j, j] = (np.dot(hri, self.h[:, j]) /
                                         np.dot(hri, hri))

            error = self.error_semi_supervised_nmf()
            print(error)
            self.error_array.append(error)
            self.callback_set_nmf_training_progress(i + 1, error)

            if i + 1 >= self.min_iter and \
                    np.abs(error - self.error_array[i]) / error_at_init <= self.tol:
                print(f'达到收敛条件,在第{i}次迭代后提前结束')
                break