예제 #1
0
class KM_Solver(object):
    def __init__(self,
                 data_mat=None,
                 W=None,
                 H=None,
                 res_dir=None,
                 rank=4,
                 SNR=-5,
                 seed_num=1,
                 true_labels=None):
        if data_mat is None or W is None or H is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
        self.data_mat = data_mat
        self.W, self.H = W, H
        self.rank = rank
        self.SNR = SNR
        self.res_dir = res_dir
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        self.labels = true_labels
        np.random.seed(
            seed_num
        )  # set the seed so that each run will get the same initial values
        (m, n) = self.data_mat.shape
        self.flag = 0  # flag to indicate whether to use LS or gradient descent to update W
        m_name = 'km' + str(self.flag)
        self.output_dir = path.join(self.res_dir, 'onmf', m_name,
                                    'rank' + str(self.rank), 'data' + str(SNR),
                                    'seed' + str(self.seed_num))
        self.time_used = 0  # record the time elapsed when running the simulations

    def set_max_iters(self, num):
        self.converge.set_max_iters(num)

    def set_tol(self, tol):
        self.converge.set_tolerance(tol)

    def get_nmf_cost(self, W, H):
        res = LA.norm(self.data_mat - W * H, 'fro')**2
        return res

    def get_iter_num(self):
        return self.converge.len()

    def update_scheme(self, verbose=False):
        '''
        The function performs the update of W and H using similar ways as that in K-means
	Specifically,
		for each column of H, h_j,
			it will try to consider each pos as the place holding non-zero entry
			For example, if H_{k, j} != 0, then 
				H_{k, j} = arg min_{c > 0} ||x_j - w_k * c||_2^2
			which leads to
				H_{k, j} = (x_j^T w_k) / ||w_k||_2^2 if w_k != 0, and otherwise, = 1
                        By trying all k = 1, ..., K. we obtain h_j with lowest obj value
		for each column of W, w_k,
			w_k = arg min_{ w >= 0} sum_{j \in C_k} (x_j - w * H_{k, j})^2
			which leads to 
				w_k = X * ~h_k^T / ||~h_k||_2^2
				
        '''
        # update H
        #p_cost = self.get_nmf_cost(self.W, self.H)
        (ha, hb) = self.H.shape
        H_pre = np.asmatrix(np.copy(self.H))
        for j in range(hb):
            tmp = LA.norm(self.data_mat[:, j] - self.W * H_pre[:, j], 2)**2
            p_cost = self.get_nmf_cost(self.W, self.H)
            for k in range(ha):
                h_j_new = np.asmatrix(np.zeros((ha, 1)))
                #print h_j_new
                #print h_j_new
                if LA.norm(self.W[:, k], 2) == 0:
                    #print 'the k th column of W is 0'
                    h_j_new[k, 0] = 1
                else:
                    h_j_new[k, 0] = self.data_mat[:, j].transpose(
                    ) * self.W[:, k] / (LA.norm(self.W[:, k], 2)**2)
                # check if a smaller obj value is obtained
                val = LA.norm(self.data_mat[:, j] - self.W * h_j_new, 2)**2
                #print 'val: ' + str(val) + ', tmp: ' +str(tmp)
                if val < tmp:
                    self.H[:, j] = np.copy(h_j_new)
                    tmp = val
            '''
	    c_cost = self.get_nmf_cost(self.W, self.H)
	    if c_cost > p_cost:
		print 'cur cost: ' + str(c_cost) + ', p_cost: ' + str(p_cost)
		print H_pre[:, j]
		print self.H[:, j]
		print LA.norm(self.data_mat[:, j] - self.W * H_pre[:, j], 'fro') ** 2
		print LA.norm(self.data_mat[:, j] - self.W * self.H[:, j], 'fro') ** 2
		print '------'
		print LA.norm(self.data_mat[:, 0:2] - self.W * H_pre[:, 0:2], 'fro') ** 2
		print LA.norm(self.data_mat[:, 0:2] - self.W * self.H[:, 0:2], 'fro') ** 2
		
		print '------'
                print LA.norm(self.data_mat[:, 0] - self.W * H_pre[:, 0], 'fro') ** 2
                print LA.norm(self.data_mat[:, 0] - self.W * self.H[:, 0], 'fro') ** 2        
		
		print '------'
                print LA.norm(self.data_mat[:, 1] - self.W * H_pre[:, 1], 'fro') ** 2
                print LA.norm(self.data_mat[:, 1] - self.W * self.H[:, 1], 'fro') ** 2
		raise ValueError('Error: j = ' + str(j))
		#print self.H[:, j]
	     '''
        if verbose:
            print 'KM: iter = ' + str(self.get_iter_num()) + ', after update H -' + \
                                ', nmf cost = ' + str(self.get_nmf_cost(self.W, self.H))
            #c_cost = self.get_nmf_cost(self.W, self.H)
            #if c_cost > p_cost:
            #    print self.H
            #    raise ValueError('Error')
# update W
        if self.flag == 0:  # use the LS or K-means way to update W (centroids)
            for k in range(ha):
                if LA.norm(self.H[k, :],
                           2) == 0:  # if no data points belongs to cluster k
                    self.W[:, k].fill(0)
                else:
                    self.W[:, k] = self.data_mat * self.H[k, :].transpose() / (
                        LA.norm(self.H[k, :], 2)**2)
        else:  # use the gradient descent to update W
            Hessian = self.H * self.H.transpose()
            #c = 0.5 * LA.norm(Hessian, 'fro')
            egenvals, _ = LA.eigh(Hessian)
            c = 0.51 * np.max(egenvals)
            grad_W_pre = self.W * Hessian - self.data_mat * self.H.transpose()
            self.W = np.maximum(0, self.W - grad_W_pre / c)

        if verbose:
            print 'KM: iter = ' + str(self.get_iter_num()) + ', after update W -' + \
           ', nmf cost = ' + str(self.get_nmf_cost(self.W, self.H))

    def solve(self):
        start_time = time.time()
        self.set_tol(1e-5)
        end_time = time.time()
        self.time_used += end_time - start_time

        #cost = self.get_nmf_cost(self.W, self.H)
        cost = self.get_nmf_cost(self.W, self.H)
        #self.converge.add_obj_value(cost)
        self.converge.add_obj_value(cost)
        self.converge.add_prim_value('W', self.W)
        self.converge.add_prim_value('H', self.H)

        print self.H[:, 0]

        acc_km = []  # record the clustering accuracy for each SNCP iteration
        time_km = []  # record the time used after each SNCP iteration

        # calculate the clustering accurary
        pre_labels = np.argmax(np.asarray(self.H), 0)
        if self.labels is None:
            raise ValueError('Error: no labels!')
        acc = calculate_accuracy(pre_labels, self.labels)
        acc_km.append(acc)

        time_km.append(self.time_used)

        print 'Start to solve the problem by KM ----------'
        while not self.converge.d():

            # update the variable W , H
            start_time = time.time()
            self.update_scheme(verbose=False)
            end_time = time.time()
            self.time_used += end_time - start_time
            time_km.append(self.time_used)
            print 'time used: ' + str(self.time_used)

            # calculate the clustering accurary
            pre_labels = np.argmax(np.asarray(self.H), 0)
            if self.labels is None:
                raise ValueError('Error: no labels!')
            acc = calculate_accuracy(pre_labels, self.labels)
            acc_km.append(acc)

            # store the newly obtained values for convergence analysis
            self.converge.add_prim_value('W', self.W)
            self.converge.add_prim_value('H', self.H)

            # store the obj_val
            cost = self.get_nmf_cost(self.W, self.H)
            self.converge.add_obj_value(cost)

            print 'onmf_KM: iter = ' + str(
                self.get_iter_num()) + ', nmf_cost = ' + str(cost)

        print 'HTH:'
        print self.H * self.H.transpose()

        # show the number of inner iterations
        self.converge.save_data(time_km, self.output_dir, 'time_km.csv')
        self.converge.save_data(acc_km, self.output_dir, 'acc_km.csv')

        print 'Stop the solve the problem ---------'
        self.converge_analysis()

    ''' return the solution W, H '''

    def get_solution(self):
        return self.W, self.H

    ''' return the optimal obj val '''

    def get_opt_obj_and_fea(self):
        return self.get_nmf_cost(self.W, self.H), None

    ''' return the iteration number and time used '''

    def get_iter_and_time(self):
        return self.get_iter_num(), self.time_used

    ''' simulation result analysis (convergence plot) '''

    def converge_analysis(self):
        # get the dirname to store the result: data file and figure
        #dir_name = path.join(self.res_dir, 'onmf', 'penalty', 'inner<1e-3', 'rank' + str(self.rank), 'SNR-3', 'seed' + str(self.seed_num))
        dir_name = self.output_dir
        print 'Start to plot and store the obj convergence ------'
        self.converge.plot_convergence_obj(dir_name)
        print 'Start to plot and store the primal change ------'
        self.converge.plot_convergence_prim_var(dir_name)
        print 'Start to plot and store the fea condition change ------'
        self.converge.plot_convergence_fea_condition(dir_name)
        print 'Start to store the obj values and the factors'
        self.converge.store_obj_val(dir_name)
        self.converge.store_prim_val(
            -1, dir_name)  # store the last element of primal variabl
예제 #2
0
class PLAM_Solver(object):
    def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1):
        if None in [data_manager, res_dir]:
            raise ValueError('Error: some inputs are missing!')
        self.data_manager = data_manager
        self.data_mat = self.data_manager.get_data_mat()
        self.W, self.H = self.data_manager.gen_inits_WH(init='random',
                                                        seed=seed_num,
                                                        H_ortho=False)
        self.rank = rank
        self.res_dir = res_dir
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        #np.random.seed(seed_num)  # set the seed so that each run will get the same initial values
        (m, n) = self.data_mat.shape
        self.n_factor = m * n  # set the normalization factor to normalize the objective value
        self.time_used = 0  # record the time used by the method
        print 'data_mat'
        print self.data_mat

    def set_max_iters(self, num):
        self.converge.set_max_iters(num)

    def set_tol(self, tol):
        self.converge.set_tolerance(tol)

    def get_obj_val(self):
        res = LA.norm(self.data_mat - self.W * self.H,
                      'fro')**2 / self.n_factor
        return res

    def get_iter_num(self):
        return self.converge.len()

    def update_prim_var(self, var_name):
        if var_name == 'W':
            step_size = 1
            beta = 0.5
            gx = LA.norm(self.data_mat - self.W * self.H, 'fro')**2
            gradient = (self.W * self.H - self.data_mat) * self.H.transpose()
            while True:
                tmp = self.W - step_size * gradient
                fz = LA.norm(self.data_mat - tmp * self.H, 'fro')**2
                if fz <= gx + 0.5 * np.trace(gradient.transpose() *
                                             (tmp - self.W)):
                    self.W = tmp
                    break
                step_size = step_size * beta
            self.W = np.maximum(self.W, 0)
        elif var_name == 'H':
            step_size = 1
            beta = 0.5
            gx = LA.norm(self.data_mat - self.W * self.H, 'fro')**2
            gradient = self.W.transpose() * (self.W * self.H - self.data_mat)
            while True:
                tmp = self.H - step_size * gradient
                fz = LA.norm(self.data_mat - self.W * tmp, 'fro')**2
                if fz <= gx + 0.5 * np.trace(gradient.transpose() *
                                             (tmp - self.H)):
                    self.H = tmp
                    break
                step_size = beta * step_size
            self.H = np.maximum(self.H, 0)
        else:
            raise ValueError('Error: no other variable should be updated!')

    def solve(self):

        obj_val = self.get_obj_val()
        print 'The initial error: iter = ' + str(
            self.get_iter_num()) + ', obj =' + str(obj_val)
        self.converge.add_obj_value(obj_val)
        self.converge.add_prim_value('W', self.W)
        self.converge.add_prim_value('H', self.H)

        print 'Start to solve the problem by PALM ----------'
        while not self.converge.d():
            # update the variable W , H iteratively according to palm method
            start_time = time.time()
            self.update_prim_var('W')
            #obj_val = self.get_obj_val()
            #print 'nmf_PLAM: iter = ' + str(self.get_iter_num()) + ',after update W, obj = ' + str(obj_val)

            self.update_prim_var('H')
            end_time = time.time()
            self.time_used += end_time - start_time

            obj_val = self.get_obj_val()
            print 'nmf_PLAM: iter = ' + str(
                self.get_iter_num()) + ',after update H, obj = ' + str(obj_val)

            # store the newly obtained values for convergence analysis
            self.converge.add_prim_value('W', self.W)
            self.converge.add_prim_value('H', self.H)

            # store the objective function value
            obj_val = self.get_obj_val()
            self.converge.add_obj_value(obj_val)
            print 'nmf_PLAM: iter = ' + str(
                self.get_iter_num()) + ', obj = ' + str(obj_val)

            # store the satisfaction of feasible conditions
            (ha, hb) = self.H.shape
            fea = LA.norm(
                self.H * self.H.transpose() - np.asmatrix(np.eye(ha)),
                'fro') / (ha * ha)
            self.converge.add_fea_condition_value('HTH_I', fea)

        print 'Stop the solve the problem ---------'
        self.converge_analysis()

    ''' return the solution W, H '''

    def get_solution(self):
        return self.W, self.H

    ''' return the optimal obj val '''

    def get_opt_obj_and_fea(self):
        return self.get_obj_val(), self.converge.get_last_fea_condition_value(
            'HTH_I')

    ''' simulation result analysis (convergence plot) '''

    def converge_analysis(self):
        # get the dirname to store the result: data file and figure
        dir_name = path.join(self.res_dir, 'onmf', 'plam_k++',
                             'rank' + str(self.rank),
                             self.data_manager.get_data_name(),
                             'seed' + str(self.seed_num))
        print 'Start to plot and store the obj convergence ------'
        self.converge.plot_convergence_obj(dir_name)
        print 'Start to plot and store the primal change ------'
        self.converge.plot_convergence_prim_var(dir_name)
        print 'Start to plot and store the fea condition change ------'
        self.converge.plot_convergence_fea_condition(dir_name)
        print 'Start to store the obj values and the factors'
        self.converge.store_obj_val(dir_name)
        self.converge.store_prim_val(
            -1, dir_name)  # store the last element of primal variabl
예제 #3
0
class NMF_Solver(object):
    def __init__(self, data_manager = None, res_dir = None, rank = 4, seed_num = 1):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
        self.data_manager = data_manager
	self.data_mat = self.data_manager.get_data_mat()
        self.W, self.H = self.data_manager.gen_inits_WH(init = 'random', seed = seed_num, H_ortho = False)
        self.res_dir = res_dir
        self.rank = rank
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        #np.random.seed(seed_num)   # set the seed so that each run will get the same initial values
        (m, n) = self.data_mat.shape
        #self.n_factor = m * n # set the normalization factor to normalize the objective value
        self.n_factor = LA.norm(self.data_mat, 'fro') ** 2
        self.flag = 0 # a flag to indicate which (problem, method) pair to be used,
                      # 0: nmf_fro + multiplicative rule
                      # 1: nmf_kl + nultiplicative rule
                      # 2: nmf_fro + palm
        self.time_used = 0 # record the time used by the method

    def set_max_iters(self, num):
        self.converge.set_max_iters(num)

    def set_tol(self, tol):
        self.converge.set_tolerance(tol)

    def get_obj_val(self, flag = 0):
        if flag == 0 or flag == 2:
            res = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / self.n_factor
        else:
            # initial reconstruction
            R = self.W * self.H
            # compute KL-divergence
            #errs(k) = sum(V(:) .* log(V(:) ./ R(:)) - V(:) + R(:));
            tmp = np.multiply(self.data_mat.flatten('F'), np.log(np.divide(self.data_mat.flatten('F'), R.flatten('F'))))
            res = np.sum(tmp - self.data_mat.flatten('F') + R.flatten('F')) / np.sqrt(self.n_factor)
        return res

    def get_iter_num(self):
        return self.converge.len()

    def update_prim_var(self, var_name, flag = 0):
        if flag == 1:
            #preallocate matrix of ones
            (m, n) = self.data_mat.shape
            Onm  = np.asmatrix(np.ones((m, n)))

        if var_name == 'W':
            if flag == 0:
                #W = W .* ((V * H') ./ max(W * (H * H'), myeps));
                temp = np.divide(self.data_mat * self.H.transpose(), \
                        np.maximum(self.W * (self.H * self.H.transpose()), 1e-20))
                self.W = np.multiply(self.W, temp)
            elif flag == 1:
                # initial reconstruction
                R = self.W * self.H
                #W = W .* (((V ./ R) * H') ./ max(Onm * H', myeps));
                temp = np.divide(np.divide(self.data_mat, R) * self.H.transpose(), \
                        np.maximum(Onm * self.H.transpose(), 1e-20))
                self.W = np.multiply(self.W, temp)
            else:
                step_size = 1
                beta = 0.5
                gx = LA.norm(self.data_mat - self.W * self.H, 'fro') ** 2
                gradient = (self.W * self.H - self.data_mat) * self.H.transpose()
                while True:
                    tmp = self.W - step_size * gradient
                    fz = LA.norm(self.data_mat - tmp * self.H, 'fro') ** 2
                    if fz <= gx + 0.5 * np.trace(gradient.transpose() * (tmp - self.W)):
                        self.W = tmp
                        break
                    step_size = step_size * beta
                self.W = np.maximum(self.W, 0)
        elif var_name == 'H':
            if flag == 0:
                #H = H .* ( (W'* V) ./ max((W' * W) * H, myeps))
                temp = np.divide(self.W.transpose() * self.data_mat, \
                        np.maximum(self.W.transpose() * self.W * self.H, 1e-20))
                self.H = np.multiply(self.H, temp)
            elif flag == 1:
                # initial reconstruction
                R = self.W * self.H
                #H = H .* ((W' * (V ./ R)) ./ max(W' * Onm, myeps));
                temp = np.divide(self.W.transpose() * np.divide(self.data_mat, R), \
                        np.maximum(self.W.transpose() * Onm, 1e-20))
                self.H = np.multiply(self.H, temp)
            else:
                step_size = 1
                beta = 0.5
                gx = LA.norm(self.data_mat - self.W * self.H, 'fro') ** 2
                gradient = self.W.transpose() * (self.W * self.H - self.data_mat)
                while True:
                    tmp = self.H - step_size * gradient
                    fz = LA.norm(self.data_mat - self.W * tmp, 'fro') ** 2
                    if fz <= gx + 0.5 * np.trace(gradient.transpose() * (tmp - self.H)):
                        self.H = tmp
                        break
                    step_size = beta * step_size
                self.H = np.maximum(self.H, 0)
        else:
            raise ValueError('Error: no other variable should be updated!')

    def solve(self):

        obj_val = self.get_obj_val(self.flag)
        print 'The initial error: iter = ' + str(self.get_iter_num()) + ', obj =' + str(obj_val)
        self.converge.add_obj_value(obj_val)
        self.converge.add_prim_value('W', self.W)
        self.converge.add_prim_value('H', self.H)

        print 'Start the solve the problem by NMF ----------'
        while not self.converge.d():
            # update the variable W, H iteratively according to NMF multiplicative rules
            start_time = time.time()   # record the start time
            self.update_prim_var('W', self.flag)
            self.update_prim_var('H', self.flag)
            end_time = time.time()    # record the end time
            self.time_used += end_time - start_time

            # store the newly obtained values for convergence analysis
            self.converge.add_prim_value('W', self.W)
            self.converge.add_prim_value('H', self.H)

            # store the objective function value
            obj_val = self.get_obj_val(self.flag)
            self.converge.add_obj_value(obj_val)
            print 'NMF solver: iter = ' + str(self.get_iter_num()) + ', obj = ' + str(obj_val)

            # store the satisfaction of feasible conditions
            (ha, hb) = self.H.shape
            fea = LA.norm(self.H * self.H.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha)
            self.converge.add_fea_condition_value('HTH_I', fea)


        print 'Stop to solve the problem ----------'
        self.converge_analysis()


    ''' return the solution W, H '''
    def get_solution(self):
        return self.W, self.H

    ''' return the optimal obj val '''
    def get_opt_obj_and_fea(self):
        return self.get_obj_val(), self.converge.get_last_fea_condition_value('HTH_I')

    ''' return the iteration number and time used '''
    def get_iter_and_time(self):
        return self.get_iter_num(), self.time_used

    def get_time(self):
	return self.time_used

    ''' return the cluster assignment from H '''
    def get_cls_assignment_from_H(self):
	labels = np.argmax(np.asarray(self.H), 0)
        if len(labels) != self.data_mat.shape[1]:
            raise ValueError('Error: the size of data samples must = the length of labels!')
        return labels

    ''' simulation result analysis (convergence plot) '''
    def converge_analysis(self):
        # get the dirname to store the result: data file and figure
        if self.flag == 0:
            sub_folder = 'nmf_fro_mul'
        elif self.flag == 1:
            sub_folder = 'nmf_kl'
        else:
            sub_folder = 'nmf_fro_palm'

        dir_name = path.join(self.res_dir, 'nmf', sub_folder, 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num))
        print 'Start to plot and store the obj convergence ------'
        self.converge.plot_convergence_obj(dir_name)
        print 'Start to plot and store the primal change ------'
        self.converge.plot_convergence_prim_var(dir_name)
        print 'Start to plot and store the fea condition change ------'
        self.converge.plot_convergence_fea_condition(dir_name)
        print 'Start to store the obj values and the factors'
        self.converge.store_obj_val(dir_name)
        self.converge.store_prim_val(-1, dir_name) # store the last element of primal variabl
예제 #4
0
class SNCP2_Solver(object):
    def __init__(self,
                 data_manager=None,
                 res_dir=None,
                 rank=4,
                 seed_num=1,
                 mul=0,
                 nu=0):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
        self.data_manager = data_manager
        self.W, self.H = self.data_manager.gen_inits_WH(init='random',
                                                        seed=seed_num,
                                                        H_ortho=True)
        self.data_mat = self.data_manager.get_data_mat()
        self.mul = mul
        self.nu = nu
        self.rank = rank
        self.res_dir = res_dir
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        self.true_labels = self.data_manager.get_labels()
        self.n_factor = LA.norm(self.data_mat, 'fro')**2
        self.W_bound = False  # flag to indicate whether to constrain W by upper bound and lower bound
        self.W_step = 0.51
        self.H_step = 0.51

        self.time_used = 0  # record the time elapsed when running the simulation
        start_time = time.time()
        self.initialize_penalty_para()
        end_time = time.time()
        self.time_used += end_time - start_time
        self.set_tol(1e-3)
        self.set_max_iters(400)

        W_bound = 'W_bound' if self.W_bound else 'W_nobound'
        self.output_dir = path.join(self.res_dir, 'onmf', 'sncp2_W1H1', \
     W_bound + '_' + 'epsilon' + str(self.inner_tol) + '&gamma' + str(self.gamma) + '&mul' + str(self.mul) + '&nu' + str(self.nu), \
     'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num))

        # we construct a result manager to manage and save the result
        res_dir1 = path.join(res_dir, 'onmf', 'sncp2_new', self.data_manager.get_data_name(), 'cls' + str(rank), W_bound + 'W'+ str(self.W_step) + 'H' + str(self.H_step), \
            'inner' + str(self.inner_tol) + '&gamma' + str(self.gamma) + '&mul' + str(self.mul) + '&nu' + str(self.nu), 'seed' + str(self.seed_num))
        self.res_manager = ClusterONMFManager(
            root_dir=res_dir1, save_pdv=False
        )  # get an instance of ClusterONMFManager to manage the generated result

        # initialize some variables to store info
        self.acc_iter = []  # record the clustering accuracy for each iteration
        self.time_iter = []  # record the time for each iteration
        self.nmf_cost_iter = []  # record the nmf cost after each iteration
        self.pobj_iter = [
        ]  # record the penalized objective value after each iteration
        self.obj_iter = []  # record the objective value for each iteration

    def set_max_iters(self, num):
        self.converge.set_max_iters(num)

    def set_tol(self, tol):
        self.converge.set_tolerance(tol)

    def initialize_penalty_para(self):
        self.rho = 1e-8
        self.gamma = 1.1
        self.inner_tol = 3e-3

        (ha, hb) = self.H.shape
        self.I_ha = np.asmatrix(np.eye(ha))
        self.B = np.zeros_like(self.H)
        self.all_1_mat = np.asmatrix(np.ones((ha, hb)))
        self.max_val = np.max(self.data_mat)
        self.min_val = np.min(self.data_mat)

    def get_nmf_cost(self, W, H):
        res = LA.norm(self.data_mat - np.asmatrix(W) * np.asmatrix(H),
                      'fro')**2 / self.n_factor
        return res

    def get_obj_val(self, W, H):
        res = LA.norm(self.data_mat - np.asmatrix(W) * np.asmatrix(H), 'fro')**2 / self.n_factor \
     + 0.5 * self.nu * LA.norm(H, 'fro') ** 2 + 0.5 * self.mul * LA.norm(W, 'fro') ** 2
        return res

    def get_penalized_obj(self, W, H):
        '''
        objective function
            ||X - WH||_{F}^{2} + self.rho * sum{||hj||_{1} - ||hj||_{infty}} + 0.5 * ||H||_{F}^2
        '''
        (ha, hb) = H.shape
        tmp = 0
        for k in range(hb):
            tmp = tmp + (LA.norm(H[:, k], 1) - LA.norm(H[:, k], np.inf))
        return LA.norm(self.data_mat - W * H, 'fro') ** 2 / self.n_factor + self.rho * tmp \
  + 0.5 * self.nu * LA.norm(H, 'fro') ** 2 + 0.5 * self.mul * LA.norm(W, 'fro')

    def get_onmf_cost(self, W, H, nu=0, mul=0):
        ''' This function returns the approximation error of ONMF based on current W and H

        Args:
            W (numpy array or mat): the factor W
            H (numpy array or mat): the factor H
            nu (float): the penalty parameter
        Returns:
            the cost
        '''
        res = LA.norm(self.data_mat - W * H, 'fro')**2 / self.n_factor \
  + 0.5 * nu * LA.norm(H, 'fro')** 2 \
                + 0.5 * mul * LA.norm(W, 'fro') ** 2
        return res

    def get_sncp_cost(self, W, H, nu=0, mul=0, rho=0):
        ''' This function returns the cost of the penalized subproblem when using SNCP

        Args:
            W (numpy array or mat): the factor W
            H (numpy array or mat): the factor H
            nu (float): the parameter nu * ||H||_F^2
            mul (float): the parameter mul * ||W||_F^2
            rho (float): the penalty parameter rho * \sum_j (||hj||_1 - ||hj||_{\infty})
        Returns:
            the cost
        '''
        (ha, hb) = H.shape
        tmp = 0
        for k in range(hb):
            tmp = tmp + (LA.norm(H[:, k], 1) - LA.norm(H[:, k], np.inf))
        return LA.norm(self.data_mat - W * H, 'fro') ** 2 / self.n_factor + rho * tmp \
  + 0.5 * nu * LA.norm(H, 'fro') ** 2 + 0.5 * mul * LA.norm(W, 'fro')

    def get_iter_num(self):
        return self.converge.len()

    def update_prim_var_by_PALM0(self,
                                 k,
                                 W_init=None,
                                 H_init=None,
                                 max_iter=1000,
                                 tol=1e-1,
                                 verbose=False):
        '''
        This function alternatively updates the primal variables in a Gauss-Seidel fasion.
        The update of H	is performed using the proximal gradient method
	The update of W is performed using the proximal subgradient method
        Input:
            k           ------ the outer iteration number
            W_init      ------ the initialization for W
            H_init      ------ the initialization for H
            max_iter    ------ the max number of iterations for PALM
            tol         ------ the tolerance for stopping PALM
	    verbose     ------ flag to control output debug info
        '''
        if W_init is None or H_init is None:
            raise ValueError(
                'Error: inner iterations by PLAM are lack of initializations!')

        start_time = time.time()  # record the start time
        H_j_pre, W_j_pre = np.asmatrix(np.copy(H_init)), np.asmatrix(
            np.copy(W_init))
        (ha, hb) = H_j_pre.shape
        end_time = time.time()
        self.time_used += end_time - start_time

        for j in range(max_iter):
            # update H and W by proximal gradient method respectively

            start_time = time.time()
            self.B.fill(0)
            self.B[H_j_pre.argmax(0), np.arange(hb)] = 1
            Hessian = 2 * W_j_pre.transpose(
            ) * W_j_pre / self.n_factor + self.nu * self.I_ha
            t = self.H_step * LA.eigvalsh(Hessian)[ha - 1]
            grad_H_pre = Hessian * H_j_pre - 2 * W_j_pre.transpose() * self.data_mat / self.n_factor + \
   self.rho * (self.all_1_mat - self.B)
            H_j_cur = np.maximum(0, H_j_pre - grad_H_pre / t)

            Hessian = 2 * H_j_cur * H_j_cur.transpose(
            ) / self.n_factor + self.mul * self.I_ha
            c = self.W_step * LA.eigvalsh(Hessian)[ha - 1]
            grad_W_pre = W_j_pre * Hessian - 2 * self.data_mat * H_j_cur.transpose(
            ) / self.n_factor
            if self.W_bound:
                W_j_cur = np.minimum(self.max_val,
                                     np.maximum(0, W_j_pre - grad_W_pre / c))
            else:
                W_j_cur = np.maximum(0, W_j_pre - grad_W_pre / c)

            if verbose:
                obj = self.get_obj_val(W_j_cur, H_j_cur)
                pobj = self.get_penalized_obj(W_j_cur, H_j_cur)

                # store the info
                # calculate the clustering accurary
                pre_labels = np.argmax(np.asarray(H_j_cur), 0)
                if self.labels is None:
                    raise ValueError('Error: no labels!')
                acc = calculate_accuracy(pre_labels, self.labels)
                self.acc_iter.append(acc)

                self.obj_iter.append(obj)
                self.pobj_iter.append(pobj)

                cost = self.get_nmf_cost(W_j_cur, H_j_cur)
                self.nmf_cost_iter.append(cost)

                onmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, self.nu,
                                               self.mul)
                sncp_cost = self.get_sncp_cost(W_j_cur, H_j_cur, self.nu,
                                               self.mul, self.rho)
                self.res_manager.add_cost_value('onmf_cost_palm',
                                                onmf_cost)  # store obj val
                self.res_manager.add_cost_value('palm_cost', sncp_cost)
                nmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, 0, 0)
                self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost)

            #check the convergence
            H_j_change = LA.norm(H_j_cur - H_j_pre, 'fro') / LA.norm(
                H_j_pre, 'fro')
            W_j_change = LA.norm(W_j_cur - W_j_pre, 'fro') / LA.norm(
                W_j_pre, 'fro')

            #update the pres
            H_j_pre = np.asmatrix(np.copy(H_j_cur))
            W_j_pre = np.asmatrix(np.copy(W_j_cur))

            end_time = time.time()
            self.time_used += end_time - start_time
            self.time_iter.append(self.time_used)
            #self.res_manager.push_time(self.time_used)
            #self.res_manager.push_iters(self.rho, j+1)

            # save the info
            if H_j_change + W_j_change < tol:
                self.res_manager.push_iters(self.rho, j + 1)
                break

        return (W_j_cur, H_j_cur, j + 1)

    def update_prim_var_by_PALM1(self,
                                 k,
                                 W_init=None,
                                 H_init=None,
                                 max_iter=1000,
                                 tol=1e-1,
                                 verbose=False):
        '''
        This function alternatively updates the primal variables in a Gauss-Seidel fasion.
        Each update is performed using the proximal gradient method
        Input:
            k           ------ the outer iteration number
            W_init      ------ the initialization for W
            H_init      ------ the initialization for H
            max_iter    ------ the max number of iterations for PALM
            tol         ------ the tolerance for stopping PALM
	    verbose 	------ flag to control output debug info
        '''
        if W_init is None or H_init is None:
            raise ValueError(
                'Error: inner iterations by PLAM are lack of initializations!')

        start_time = time.time()  # record the start time
        H_j_pre, W_j_pre, H_j_cur, W_j_cur = H_init, W_init, H_init, W_init
        (ha, hb) = H_j_pre.shape
        end_time = time.time()
        self.time_used += end_time - start_time

        for j in range(max_iter):
            # update H and W by proximal gradient method respectively
            #if verbose:
            #    print 'PALM1: inner iter = ' + str(j) + ', before H, obj_val = ' + \
            #			str(self.get_obj_val(W_j_pre, H_j_pre)) + ', penalized_obj = ' + str(self.get_penalized_obj(W_j_pre, H_j_pre))

            start_time = time.time()
            # keep the infinity norm as a non-smooth part
            Hessian = 2 * W_j_pre.transpose(
            ) * W_j_pre / self.n_factor + self.nu * self.I_ha
            t = self.H_step * LA.eigvalsh(Hessian)[ha - 1]
            grad_H_pre = Hessian * H_j_pre - 2 * W_j_pre.transpose(
            ) * self.data_mat / self.n_factor + self.rho * self.all_1_mat

            H_j_cur = H_j_pre - grad_H_pre / t
            self.B.fill(0)
            self.B[H_j_cur.argmax(0), np.arange(hb)] = 1
            H_j_cur += (self.rho / t) * self.B
            H_j_cur = np.maximum(H_j_cur, 0)

            Hessian = 2 * H_j_cur * H_j_cur.transpose(
            ) / self.n_factor + self.mul * self.I_ha
            c = self.W_step * LA.eigvalsh(Hessian)[ha - 1]
            grad_W_pre = W_j_pre * Hessian - 2 * self.data_mat * H_j_cur.transpose(
            ) / self.n_factor
            if self.W_bound:
                W_j_cur = np.minimum(self.max_val,
                                     np.maximum(0, W_j_pre - grad_W_pre / c))
            else:
                W_j_cur = np.maximum(0, W_j_pre - grad_W_pre / c)

            if verbose:
                obj = self.get_obj_val(W_j_cur, H_j_cur)
                pobj = self.get_penalized_obj(W_j_cur, H_j_cur)
                # calculate the clustering accurary
                pre_labels = np.argmax(np.asarray(H_j_cur), 0)
                if self.true_labels is None:
                    raise ValueError('Error: no labels!')
                acc = calculate_accuracy(pre_labels, self.true_labels)
                self.acc_iter.append(acc)

                self.obj_iter.append(obj)
                self.pobj_iter.append(pobj)

                cost = self.get_nmf_cost(W_j_cur, H_j_cur)
                self.nmf_cost_iter.append(cost)
                onmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, self.nu,
                                               self.mul)
                sncp_cost = self.get_sncp_cost(W_j_cur, H_j_cur, self.nu,
                                               self.mul, self.rho)
                self.res_manager.add_cost_value('onmf_cost_palm',
                                                onmf_cost)  # store obj val
                self.res_manager.add_cost_value('palm_cost', sncp_cost)
                nmf_cost = self.get_onmf_cost(W_j_cur, H_j_cur, 0, 0)
                self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost)

        # check the convergence
            H_j_change = LA.norm(H_j_cur - H_j_pre, 'fro') / LA.norm(
                H_j_pre, 'fro')
            W_j_change = LA.norm(W_j_cur - W_j_pre, 'fro') / LA.norm(
                W_j_pre, 'fro')

            # update the pres
            H_j_pre = np.asmatrix(np.copy(H_j_cur))
            W_j_pre = np.asmatrix(np.copy(W_j_cur))

            end_time = time.time()
            self.time_used += end_time - start_time
            self.time_iter.append(self.time_used)
            #self.res_manager.push_time(self.time_used)
            #self.res_manager.push_iters(self.rho, j+1)

            if H_j_change + W_j_change < tol:
                self.res_manager.push_iters(self.rho, j + 1)
                break

        return (W_j_cur, H_j_cur, j + 1)

    def update_scheme(self):
        '''
        The updating rules for primal variables, W, H and the penalty parameter rho
        use proximal gradient method to update each varialbe once for each iteration
        '''
        # update
        # (self.W, self.H, inner_iter_num) = self.update_prim_var_by_PALM0(self.get_iter_num(), self.W, self.H, 3000, self.inner_tol, verbose = False)
        (self.W, self.H,
         inner_iter_num) = self.update_prim_var_by_PALM1(self.get_iter_num(),
                                                         self.W,
                                                         self.H,
                                                         3000,
                                                         self.inner_tol,
                                                         verbose=False)

        # show the feasibility satisfaction level HH^{T} - I
        (ha, hb) = self.H.shape
        H_norm = np.asmatrix(
            np.diag(np.diag(self.H * self.H.transpose())**(-0.5))) * self.H
        fea = LA.norm(H_norm * H_norm.transpose() - np.asmatrix(np.eye(ha)),
                      'fro') / (ha * ha)

        start_time = time.time()
        #if self.get_iter_num() > 0 and fea > 1e-10:
        self.rho = np.minimum(self.rho * self.gamma, 1e10)
        print self.rho
        end_time = time.time()
        self.time_used += end_time - start_time

        return inner_iter_num

    def solve(self):
        '''
        problem formulation
            min ||X - WH||_{F}^{2} + rho * sum{||hj||_{1} - ||hj||_{infty} + 0.5 *nu * ||H||_F^2 * 0.5 * mul * ||W||_F^2
        '''
        obj = self.get_obj_val(self.W, self.H)
        p_obj = self.get_penalized_obj(self.W, self.H)
        print 'The initial error: iter = ' + str(self.get_iter_num(
        )) + ', obj_val =' + str(obj) + ', penalized_obj =' + str(p_obj)
        #self.converge.add_obj_value(cost)
        self.converge.add_obj_value(obj)
        self.converge.add_prim_value('W', self.W)
        self.converge.add_prim_value('H', self.H)

        print self.H[:, 0]

        inner_iter_nums = []  # record the inner iterations number
        acc_sncp = []  # record the clustering accuracy for each SNCP iteration
        time_sncp = []  # record the time used after each SNCP iteration
        nmf_cost_sncp = []  # record the nmf cost after each SNCP iteration
        pobj_sncp = [
        ]  # record the penalized objective value after each iteration

        cost = self.get_nmf_cost(self.W, self.H)
        nmf_cost_sncp.append(cost)
        pobj_sncp.append(p_obj)

        self.pobj_iter.append(p_obj)
        self.nmf_cost_iter.append(cost)
        self.obj_iter.append(obj)

        # calculate the clustering accurary
        pre_labels = np.argmax(np.asarray(self.H), 0)
        if self.true_labels is None:
            raise ValueError('Error: no labels!')
        print len(self.true_labels)
        acc = calculate_accuracy(pre_labels, self.true_labels)
        acc_sncp.append(acc)
        self.acc_iter.append(acc)

        time_sncp.append(self.time_used)
        self.time_iter.append(self.time_used)

        fea = 100

        self.res_manager.push_W(self.W)  # store W
        self.res_manager.push_H(self.H)  # store H
        self.res_manager.push_H_norm_ortho()  # store feasibility
        nmf_cost = self.get_onmf_cost(self.W, self.H, 0, 0)
        onmf_cost = self.get_onmf_cost(self.W, self.H, self.nu, self.mul)
        sncp_cost = self.get_sncp_cost(self.W, self.H, self.nu, self.mul,
                                       self.rho)
        self.res_manager.add_cost_value('onmf_cost_sncp',
                                        onmf_cost)  # store obj val
        self.res_manager.add_cost_value('sncp_cost', sncp_cost)
        self.res_manager.add_cost_value('onmf_cost_palm',
                                        onmf_cost)  # store obj val
        self.res_manager.add_cost_value('palm_cost', sncp_cost)
        self.res_manager.add_cost_value('nmf_cost_sncp', nmf_cost)
        self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost)
        cls_assign = self.res_manager.calculate_cluster_quality(
            self.true_labels)  # calculate and store clustering quality
        self.res_manager.push_time(self.time_used)

        print 'Start to solve the problem by SNCP2 ----------'
        while not self.converge.d() or fea > 1e-10:

            # update the variable W , H
            num = self.update_scheme()
            inner_iter_nums.append(num)
            time_sncp.append(self.time_used)
            print 'time used: ' + str(
                self.time_used) + ', inner_num: ' + str(num)

            # calculate the clustering accurary
            pre_labels = np.argmax(np.asarray(self.H), 0)
            if self.true_labels is None:
                raise ValueError('Error: no labels!')
            acc = calculate_accuracy(pre_labels, self.true_labels)
            acc_sncp.append(acc)

            # store the newly obtained values for convergence analysis
            self.converge.add_prim_value('W', self.W)
            self.converge.add_prim_value('H', self.H)

            # store the nmf approximation error value
            obj = self.get_obj_val(self.W, self.H)
            p_obj = self.get_penalized_obj(self.W, self.H)
            nmf_cost_sncp.append(self.get_nmf_cost(self.W, self.H))

            self.converge.add_obj_value(obj)
            pobj_sncp.append(p_obj)
            print 'onmf_SNCP2: iter = ' + str(self.get_iter_num(
            )) + ', obj_val = ' + str(obj) + ' penalized_obj = ' + str(p_obj)

            # store the satisfaction of feasible conditions
            (ha, hb) = self.H.shape
            H_norm = np.asmatrix(
                np.diag(np.diag(self.H * self.H.transpose())**(-0.5))) * self.H
            fea = LA.norm(
                H_norm * H_norm.transpose() - np.asmatrix(np.eye(ha)),
                'fro') / (ha * ha)

            #print 'normalized orthogonality: ' + str(fea)
            self.converge.add_fea_condition_value('HTH_I', fea)

            # store the generated results by result manager
            self.res_manager.push_W(self.W)  # store W
            self.res_manager.push_H(self.H)  # store H
            self.res_manager.push_H_norm_ortho()  # store feasibility
            self.res_manager.push_W_norm_residual()
            self.res_manager.push_H_norm_residual()
            nmf_cost = self.get_onmf_cost(self.W, self.H, 0, 0)
            onmf_cost = self.get_onmf_cost(self.W, self.H, self.nu, self.mul)
            sncp_cost = self.get_sncp_cost(self.W, self.H, self.nu, self.mul,
                                           self.rho)
            self.res_manager.add_cost_value('onmf_cost_sncp',
                                            onmf_cost)  # store obj val
            self.res_manager.add_cost_value('sncp_cost', sncp_cost)
            self.res_manager.add_cost_value('onmf_cost_palm',
                                            onmf_cost)  # store obj val
            self.res_manager.add_cost_value('palm_cost', sncp_cost)
            self.res_manager.add_cost_value('nmf_cost_sncp', nmf_cost)
            self.res_manager.add_cost_value('nmf_cost_palm', nmf_cost)
            cls_assign = self.res_manager.calculate_cluster_quality(
                self.true_labels)  # calculate and store clustering quality
            self.res_manager.push_time(self.time_used)

        print 'HTH:'
        print self.H * self.H.transpose()
        print 'the L2-norm of columns of H:'
        print LA.norm(self.H, axis=0)

        # show the number of inner iterations
        self.converge.save_data(inner_iter_nums, self.output_dir,
                                'inner_nums.csv')
        #self.converge.save_data(time_sncp, self.output_dir, 'time_sncp.csv')
        self.converge.save_data(acc_sncp, self.output_dir, 'acc_sncp.csv')
        self.converge.save_data(nmf_cost_sncp, self.output_dir,
                                'nmf_cost_sncp.csv')
        self.converge.save_data(pobj_sncp, self.output_dir, 'pobj_sncp.csv')

        self.converge.save_data(self.obj_iter, self.output_dir,
                                'obj_iters.csv')
        self.converge.save_data(self.acc_iter, self.output_dir,
                                'acc_iters.csv')
        self.converge.save_data(self.nmf_cost_iter, self.output_dir,
                                'nmf_cost_iters.csv')
        self.converge.save_data(self.pobj_iter, self.output_dir,
                                'pobj_iters.csv')

        self.converge.save_data(self.time_iter, self.output_dir,
                                'time_iters.csv')

        print 'Stop the solve the problem ---------'
        self.converge_analysis()
        self.res_manager.write_to_csv(
        )  # store the generated results to csv files

    ''' return the solution W, H '''

    def get_solution(self):
        return self.W, self.H

    ''' return the optimal obj val '''

    def get_opt_obj_and_fea(self):
        return self.get_nmf_cost(
            self.W,
            self.H), self.converge.get_last_fea_condition_value('HTH_I')

    ''' return the iteration number and time used '''

    def get_iter_and_time(self):
        return self.get_iter_num(), self.time_used

    def get_time(self):
        return self.time_used

    ''' return the cluster assignment from H '''

    def get_cls_assignment_from_H(self):
        labels = np.argmax(np.asarray(self.H), 0)
        if len(labels) != self.data_mat.shape[1]:
            raise ValueError(
                'Error: the size of data samples must = the length of labels!')
        return labels

    ''' simulation result analysis (convergence plot) '''

    def converge_analysis(self):
        # get the dirname to store the result: data file and figure
        #dir_name = path.join(self.res_dir, 'onmf', 'penalty', 'inner<1e-3', 'rank' + str(self.rank), 'SNR-3', 'seed' + str(self.seed_num))
        dir_name = self.output_dir
        print 'Start to plot and store the obj convergence ------'
        self.converge.plot_convergence_obj(dir_name)
        print 'Start to plot and store the primal change ------'
        self.converge.plot_convergence_prim_var(dir_name)
        print 'Start to plot and store the fea condition change ------'
        self.converge.plot_convergence_fea_condition(dir_name)
        print 'Start to store the obj values and the factors'
        self.converge.store_obj_val(dir_name)
        self.converge.store_prim_val(
            -1, dir_name)  # store the last element of primal variabl
예제 #5
0
class HALS_Solver(object):
    def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
        self.data_manager = data_manager
        self.data_mat = self.data_manager.get_data_mat()
        self.data_mat = np.asmatrix(np.copy(self.data_mat).transpose())
        W_init, H_init = self.data_manager.gen_inits_WH(init='random',
                                                        seed=seed_num,
                                                        H_ortho=False)
        self.F, self.G = H_init.transpose(), W_init
        self.res_dir = res_dir
        self.rank = rank
        #self.SNR = SNR
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        #np.random.seed(seed_num)  # set the seed so that each run will get the same initial values
        (m, n) = self.data_mat.shape
        self.flag = 0  # flag to indicate whether G can be negative or not
        # flag = 0 : the G should be nonnegative
        # flag = 1: the G can be negative
        #self.n_factor = m * n # set the normalization factor to normalize the objective value
        self.n_factor = LA.norm(self.data_mat, 'fro')**2
        self.time_used = 0  # record the time used by the method
        self.U = None  # used  for update F

    def set_max_iters(self, num):
        self.converge.set_max_iters(num)

    def set_tol(self, tol):
        self.converge.set_tolerance(tol)

    def get_obj_val(self):
        res = LA.norm(self.data_mat - self.F * self.G.transpose(),
                      'fro')**2 / self.n_factor
        return res

    def get_iter_num(self):
        return self.converge.len()

    def update_F(self):
        A = self.data_mat * self.G
        B = self.G.transpose() * self.G
        #print self.F.shape
        #print self.G.shape
        for j in range(self.rank):
            Fj = self.U - self.F[:, j]
            #print 'B[ji]'
            #print B[j, j]
            h = A[:, j] - self.F * B[:, j] + B[j, j] * self.F[:, j]
            #print 'Fj' + str(Fj.shape)
            #print 'h' + str(h.shape)
            #print 'Fj * Fj' + str(Fj.transpose() * Fj)
            tmp = np.multiply(Fj.transpose() * h, Fj) / np.asscalar(
                Fj.transpose() * Fj)
            tmp = h - tmp
            fj = np.maximum(1e-30, tmp)
            #print (Fj.transpose() * Fj)[0, 0]
            #print fj
            #print LA.norm(fj, 2)
            fj = fj / LA.norm(fj, 2)
            self.F[:, j] = fj
            self.U = Fj + fj

    def update_G(self):
        C = self.data_mat.transpose() * self.F
        D = self.F.transpose() * self.F
        #print D
        for j in range(self.rank):
            if self.flag == 0:
                temp = C[:, j] - self.G * D[:, j] + D[j, j] * self.G[:, j]
                self.G[:, j] = np.maximum(temp, 1e-30)
            else:
                self.G[:,
                       j] = C[:, j] - self.G * D[:, j] + D[j, j] * self.G[:, j]

    def solve(self):

        obj_val = self.get_obj_val()
        print 'The initial error: iter = ' + str(
            self.get_iter_num()) + ', obj =' + str(obj_val)
        self.converge.add_obj_value(obj_val)
        self.converge.add_prim_value('F', self.F)
        self.converge.add_prim_value('G', self.G)

        # initialize U
        start_time = time.time()
        self.U = self.F * np.asmatrix(np.ones(self.rank)).transpose()
        end_time = time.time()
        self.time_used += end_time - start_time
        #print self.F[0, :]
        #print self.U[0]

        print 'Start to solve the problem by HALS ONMF ----------'
        while not self.converge.d():
            # update the variable W , H iteratively according to DTPP method
            start_time = time.time()  # record the start time
            self.update_F()
            self.update_G()
            end_time = time.time()  # record the end time
            self.time_used += end_time - start_time
            #print self.F[0:5, 0:5]
            # store the newly obtained values for convergence analysis
            self.converge.add_prim_value('F', self.F)
            self.converge.add_prim_value('G', self.G)

            # store the objective function value
            obj_val = self.get_obj_val()
            self.converge.add_obj_value(obj_val)
            print 'onmf_HALS: iter = ' + str(
                self.get_iter_num()) + ', obj = ' + str(obj_val)

            # store the satisfaction of feasible conditions
            #(ha, hb) = self.F.shape
            fea = LA.norm(
                self.F.transpose() * self.F - np.asmatrix(np.eye(self.rank)),
                'fro') / (self.rank * self.rank)
            self.converge.add_fea_condition_value('FTF_I', fea)

        print 'Stop the solve the problem ---------'
        self.converge_analysis()

    ''' return the solution W, H '''

    def get_solution(self):
        return self.G, self.F.transpose()

    ''' return the optimal obj val '''

    def get_opt_obj_and_fea(self):
        return self.get_obj_val(), self.converge.get_last_fea_condition_value(
            'FTF_I')

    ''' return the iteration number and time used '''

    def get_iter_and_time(self):
        return self.get_iter_num(), self.time_used

    def get_time(self):
        return self.time_used

    ''' return the cluster assignment from H '''

    def get_cls_assignment_from_H(self):
        labels = np.argmax(np.asarray(self.F), 1)
        #print len(labels)
        #print self.data_mat.shape[1]
        if len(labels) != self.data_mat.shape[0]:
            raise ValueError(
                'Error: the size of data samples must = the length of labels!')
        return labels

    ''' simulation result analysis (convergence plot) '''

    def converge_analysis(self):
        # get the dirname to store the result: data file and figure
        m_name = 'hals_' + str(self.flag)
        dir_name = path.join(self.res_dir, 'onmf', m_name,
                             'rank' + str(self.rank),
                             self.data_manager.get_data_name(),
                             'seed' + str(self.seed_num))
        print 'Start to plot and store the obj convergence ------'
        self.converge.plot_convergence_obj(dir_name)
        print 'Start to plot and store the primal change ------'
        self.converge.plot_convergence_prim_var(dir_name)
        print 'Start to plot and store the fea condition change ------'
        self.converge.plot_convergence_fea_condition(dir_name)
        print 'Start to store the obj values and the factors'
        self.converge.store_obj_val(dir_name)
        self.converge.store_prim_val(
            -1, dir_name)  # store the last element of primal variable
예제 #6
0
class ONPMF_Solver(object):

    def __init__(self, data_manager = None, res_dir = None, rank = 4, seed_num = 1):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: input is missing!')
        self.rank = rank
        self.res_dir = res_dir
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        self.data_manager = data_manager
	self.data_mat = self.data_manager.get_data_mat()
        self.W, self.H = self.data_manager.gen_inits_WH(init = 'random', seed = seed_num, H_ortho = True)
	self.W = np.asmatrix(self.W, dtype = np.float64)
	self.H = np.asmatrix(self.H, dtype = np.float64)
        #np.random.seed(seed_num)  # set the seed so that each run will get the same initial values
        #(m, n) = self.data_mat.shape
        self.n_factor = LA.norm(self.data_mat, 'fro') ** 2 # set the normalization factor to normalize the objective value
        self.time_used = 0
	self.flag = 0 # the flag indicates whether the W can be negative or not depending on the data
		      # flag = 0 : W must be nonnegative
		      # flag = 1: W can be negative


    def set_max_iters(self, num):
        self.converge.set_max_iters(num)

    def set_tol(self, tol):
        self.converge.set_tolerance(tol)


    ''' initialize the primal variables '''
    def initialize_prim_vars(self):
        self.W = np.asmatrix(self.W)
        '''
        U, s, V = LA.svd(self.data_mat)
        self.H = np.asmatrix(V[0:10, :])
        
        print self.H * self.H.transpose()
         
	(ha, hb) = self.H.shape
        labels = np.argmax(np.asarray(np.abs(self.H)), 0)
 	H = np.zeros((ha, hb))
        for j in range(hb):
            H[labels[j], j] = 1
        H = np.asmatrix(H)
        H = np.asmatrix(np.diag(np.diag(H * H.transpose()) ** (-0.5))) * H
        print H * H.transpose()
	self.H = H
        '''
	# self.H = np.maximum(self.H, 0)
	
        self.converge.add_prim_value('W', self.W) # store the initial values
        self.converge.add_prim_value('H', self.H)


    def initialize_dual_vars(self):
        (m, n) = self.data_mat.shape
        self.Z = np.asmatrix(np.zeros(shape = (self.rank, n), dtype = np.float64))
        self.converge.add_dual_value('Z', self.Z) # store the initial value for dual variables

    def initialize_penalty_para(self, flag = 0):
        # set the penalty parameter rol
        self.rol = 0.01
        self.alpha = 100
        self.gamma = 1.01
	'''
	if self.data_manager.get_data_name() == 'mnist#8':
	    self.scale = 0.001
	else: 
	    self.scale = 0.00001
	'''
	self.scale = 0.00001
	if self.flag == 0:
	    self.mul = 0
	else: self.mul = 1e-10  # used when U can be negative 

    ''' compute the lagrangian function value for testing '''
    def get_lag_val(self):
        sum_t = LA.norm(self.data_mat - self.W * self.H, 'fro') ** 2 / 2 \
                + self.mul * LA.norm(self.W, 'fro')**2 + np.trace(self.Z.transpose() * (-self.H)) + \
                0.5 * self.rol * LA.norm(np.minimum(self.H, 0), 'fro')**2
        return sum_t


    def get_obj_val(self):
        res = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / self.n_factor
        return res

    def get_iter_num(self):
        return self.converge.len()

    ''' update the primal variable with a given name at each iteration of ADMM'''
    def update_prim_var(self, var_name):
        if var_name == 'H': # update primal variable Y
            beta = 0.01
            step_size = 1
            gradient = self.W.transpose() * (self.W * self.H - self.data_mat) - self.Z + self.rol * np.minimum(0, self.H)
	    #print (gradient)
	    #print (self.W)
	    #print (self.Z)
	    #print (self.rol)
            Lx = 0.5 * LA.norm(self.data_mat - self.W * self.H, 'fro')**2 
            Lx = Lx + np.trace(self.Z.transpose() * (-self.H)) 
	    Lx = Lx + 0.5 * self.rol * LA.norm(np.minimum(self.H, 0), 'fro') ** 2
            #print 'Lx: ' + str(Lx)
            while True:
                B = self.H - step_size * gradient
                #U, s, V = LA.svd(B)
		#print (B)
		#print (self.H)
		#print (step_size)
		#print (gradient)
		U, s, V = scipy.linalg.svd(B)
                (a, b) = B.shape
                E = np.asmatrix(np.eye(a, b))
                H_new = np.asmatrix(U) * E * np.asmatrix(V)  # pay attention V do not to be transposed
                #H_new = H_new.transpose()   # this is very important
                Lz = 0.5 * LA.norm(self.data_mat - self.W * H_new, 'fro')**2 \
                        + np.trace(self.Z.transpose() * (-H_new)) \
                        + 0.5 * self.rol * LA.norm(np.minimum(H_new, 0), 'fro') ** 2
                #print 'update H : ' + str(step_size) + ' ' + str(Lz)
                if Lz <= Lx + self.scale * np.trace(gradient.transpose() * (H_new - self.H)):
                    break
                step_size = step_size * beta
            self.H = np.asmatrix(np.copy(H_new))

        elif var_name == 'W': # update primal variable P
	    if self.flag == 0:
		'''
                for j in range(10):
                    beta = 0.1
                    step_size = 1
                    gradient = (self.W * self.H - self.data_mat) * self.H.transpose()
                    f_x = LA.norm(self.data_mat - self.W * self.H, 'fro')**2 / 2
                    while True:
                        W_new = np.maximum(0, self.W - step_size * gradient)
                        f_z = LA.norm(self.data_mat - W_new * self.H, 'fro') ** 2 / 2
                        if f_z <= f_x + 0.00001 * np.trace(gradient.transpose() * (W_new - self.W)):
                            break
                        step_size = step_size * beta
                    test = LA.norm(W_new - self.W, 'fro') / LA.norm(self.W, 'fro')
                    self.W = np.asmatrix(np.copy(W_new))
                    if test < 1e-5:
                        #print 'satisfy the stopping criteria!'
                        break	
		'''
		(wa, wb) = self.W.shape
		for i in range(wa):
		    b = np.array(self.data_mat[i, :]).flatten()
		    t, bla = optm.nnls(self.H.transpose(), b)
                    self.W[i, :] = np.asmatrix(t)
	    else:
		(ha, hb) = self.H.shape
                I_ha = np.asmatrix(np.eye(ha))
		self.W = self.data_mat * self.H.transpose() * LA.inv(self.H * self.H.transpose() + self.mul * I_ha)
	        	
        else:
            raise ValueError('Error: no primal variable with this name to be updated!')

    ''' update the dual variable with a given name at each iteration of ADMM'''
    def update_dual_var(self, var_name):
        if var_name == 'Z': # update dual variable Z
            k = np.maximum(self.get_iter_num(), 1)
            self.Z = np.maximum(0, self.Z - (self.alpha / k) * self.H)
        else:
            raise ValueError('Error: no dual variable with this name to be updated!')

    ''' update the penalty parameters rol1 and rol2 adaptively '''
    def update_penalty_parameters(self):
        self.rol = self.gamma * self.rol

    def solve(self):
        self.initialize_prim_vars()
        self.initialize_dual_vars()
        self.initialize_penalty_para()
	data_name = self.data_manager.get_data_name()
	if data_name.startswith('tdt2') or data_name.startswith('tcga'):
	    self.set_max_iters(500)
	#self.set_max_iters(500)
	print self.H[:, 0]

        #obj_val = self.get_obj_val()
        #print 'The initial error: iter' + str(self.get_iter_num()) + ', obj =' + str(obj_val)
        print 'Start to solve the problem by ADMM ------------'
        while not self.converge.d():
            start_time = time.time()

            # update the primal and dual variables accroding to ADMM algorithm
            #print 'befor update, lag_val: ' + str(self.get_lag_val())
            # update primal variable Y
            self.update_prim_var('W')
            #print 'after update W, lag_val: ' + str(self.get_lag_val())
            # update primal variable H
            self.update_prim_var('H')
            #print 'after update H, lag_val: ' + str(self.get_lag_val())

            # update dual varialbe Z
            self.update_dual_var('Z')

            self.update_penalty_parameters()

            end_time = time.time()
            self.time_used += end_time - start_time


            # store the newly obtained values for convergence analysis
            # note that the change of each primal and dual varialbes will also be computed and added if any
            self.converge.add_prim_value('W', self.W)
            self.converge.add_prim_value('H', self.H)
            self.converge.add_dual_value('Z', self.Z)
		
            
            obj_val = self.get_obj_val()
            self.converge.add_obj_value(obj_val)
            print 'onmf_onpmf: iter = ' + str(self.get_iter_num()) + ', obj = ' + str(obj_val)
	    
	    #print 'onmf_onpmf: iter = ' + str(self.get_iter_num()) 
            # store the satisfaction of feasible conditions
            (ha, hb) = self.H.shape
            fea = LA.norm(self.H * self.H.transpose() - np.asmatrix(np.eye(ha)), 'fro') / (ha * ha)
            self.converge.add_fea_condition_value('HTH_I', fea)

        print 'Stop to solve the problem ------'
        self.converge_analysis()

    ''' return the solution W, H '''
    def get_solution(self):
        return self.W, self.H

    ''' return the optimal objective value and feasibility level '''
    def get_opt_obj_and_fea(self):
        return self.get_obj_val(), self.converge.get_last_fea_condition_value('HTH_I')

    ''' return the iteration number and time used '''
    def get_iter_and_time(self):
        return self.get_iter_num(), self.time_used

    def get_time(self):
	return self.time_used

    ''' return the cluster assignment from H '''
    def get_cls_assignment_from_H(self):
        labels = np.argmax(np.asarray(self.H), 0)
        if len(labels) != self.data_mat.shape[1]:
            raise ValueError('Error: the size of data samples must = the length of labels!')
        return labels

    ''' simulation result analysis (convergence plot) '''
    def converge_analysis(self):
        # get the dirname to store the result: data file and figure
        #dir_name = path.join(self.res_dir, path.basename(path.normpath(self.data_path)), 'rank' + str(self.rank))
	m_name = 'onp_mf1_' + str(self.flag)
        dir_name = path.join(self.res_dir, 'onmf', m_name, 'alpha100', 'rank' + str(self.rank), self.data_manager.get_data_name(), 'seed' + str(self.seed_num))
        print 'Start to plot and store the obj convergence ------'
        self.converge.plot_convergence_obj(dir_name)
        print 'Start to plot and store the primal change ------'
        self.converge.plot_convergence_prim_var(dir_name)
        print 'Start to plot and store the dual change ------'
        self.converge.plot_convergence_dual_var(dir_name)
        print 'Start to plot and store the fea condition change ------'
        self.converge.plot_convergence_fea_condition(dir_name)
        print 'Start to store the obj values and the factors'
        self.converge.store_obj_val(dir_name)
        self.converge.store_prim_val(-1, dir_name) # store the last element of primal varialbes list
예제 #7
0
class DTPP_Solver(object):
    def __init__(self, data_manager=None, res_dir=None, rank=4, seed_num=1):
        if data_manager is None or res_dir is None:
            raise ValueError('Error: some inputs are missing!')
        self.data_manager = data_manager
        self.W, self.H = self.data_manager.gen_inits_WH(init='random',
                                                        seed=seed_num,
                                                        H_ortho=False)
        self.data_mat = self.data_manager.get_data_mat()
        #self.true_labels = self.data_manager.get_labels()
        self.res_dir = res_dir
        self.rank = rank
        self.seed_num = seed_num
        self.converge = Convergence(res_dir)
        np.random.seed(
            seed_num
        )  # set the seed so that each run will get the same initial values
        (m, n) = self.data_mat.shape
        #self.n_factor = m * n # set the normalization factor to normalize the objective value
        self.n_factor = LA.norm(self.data_mat, 'fro')**2
        self.time_used = 0  # record the time used by the method
#self.set_max_iters(1000)

    def set_max_iters(self, num):
        self.converge.set_max_iters(num)

    def set_tol(self, tol):
        self.converge.set_tolerance(tol)

    def get_obj_val(self):
        #print self.data_mat.shape
        #print self.W.shape
        #print self.H.shape
        res = LA.norm(self.data_mat - self.W * self.H,
                      'fro')**2 / self.n_factor
        return res

    def get_iter_num(self):
        return self.converge.len()

    def update_prim_var(self, var_name):
        if var_name == 'W':
            #W = W .* ((V * H') ./ max(W * (H * H'), myeps));
            temp = np.divide(self.data_mat * self.H.transpose(), \
                    np.maximum(self.W * (self.H * self.H.transpose()), 1e-20))
            self.W = np.multiply(self.W, temp)
        elif var_name == 'H':
            #H = H .* (((W' * V) ./ max(W' * V * (H' * H), myeps)) .^ (1/2));
            temp = np.divide(self.W.transpose() * self.data_mat, np.maximum(self.W.transpose() * self.data_mat * (self.H.transpose() * self.H), \
                    1e-20))
            self.H = np.multiply(self.H, np.power(temp, 0.5))
        else:
            raise ValueError('Error: no other variable should be updated!')

    def solve(self):

        obj_val = self.get_obj_val()
        print 'The initial error: iter = ' + str(
            self.get_iter_num()) + ', obj =' + str(obj_val)
        #print 'max_iter: ' + str(self.
        self.converge.add_obj_value(obj_val)
        self.converge.add_prim_value('W', self.W)
        self.converge.add_prim_value('H', self.H)

        print 'H0'
        print self.H

        print 'Start to solve the problem by DTPP ----------'
        while not self.converge.d():
            # update the variable W , H iteratively according to DTPP method
            start_time = time.time()  # record the start time
            self.update_prim_var('W')
            self.update_prim_var('H')
            end_time = time.time()  # record the end time
            self.time_used += end_time - start_time

            # store the newly obtained values for convergence analysis
            self.converge.add_prim_value('W', self.W)
            self.converge.add_prim_value('H', self.H)

            # store the objective function value
            obj_val = self.get_obj_val()
            self.converge.add_obj_value(obj_val)
            print 'onmf_DTPP: iter = ' + str(
                self.get_iter_num()) + ', obj = ' + str(obj_val)

            # store the satisfaction of feasible conditions
            (ha, hb) = self.H.shape
            fea = LA.norm(
                self.H * self.H.transpose() - np.asmatrix(np.eye(ha)),
                'fro') / (ha * ha)
            self.converge.add_fea_condition_value('HTH_I', fea)

        print 'Stop the solve the problem ---------'
        self.converge_analysis()

    ''' return the solution W, H '''

    def get_solution(self):
        return self.W, self.H

    ''' return the optimal obj val '''

    def get_opt_obj_and_fea(self):
        return self.get_obj_val(), self.converge.get_last_fea_condition_value(
            'HTH_I')

    ''' return the iteration number and time used '''

    def get_iter_and_time(self):
        return self.get_iter_num(), self.time_used

    def get_time(self):
        return self.time_used

    ''' return the cluster assignment from H '''

    def get_cls_assignment_from_H(self):
        labels = np.argmax(np.asarray(self.H), 0)
        if len(labels) != self.data_mat.shape[1]:
            raise ValueError(
                'Error: the size of data samples must = the length of labels!')
        return labels

    ''' simulation result analysis (convergence plot) '''

    def converge_analysis(self):
        # get the dirname to store the result: data file and figure
        dir_name = path.join(self.res_dir, 'onmf', 'dtpp',
                             'rank' + str(self.rank),
                             self.data_manager.get_data_name(),
                             'seed' + str(self.seed_num))
        print 'Start to plot and store the obj convergence ------'
        self.converge.plot_convergence_obj(dir_name)
        print 'Start to plot and store the primal change ------'
        self.converge.plot_convergence_prim_var(dir_name)
        print 'Start to plot and store the fea condition change ------'
        self.converge.plot_convergence_fea_condition(dir_name)
        print 'Start to store the obj values and the factors'
        self.converge.store_obj_val(dir_name)
        self.converge.store_prim_val(
            -1, dir_name)  # store the last element of primal variabl