Beispiel #1
0
def cvx_online_dict_learning(X,
                             y_true,
                             n_hat,
                             k_cluster,
                             T,
                             lmda,
                             eps,
                             flag=True,
                             version='Rr'):
    '''
    X: R^(n * m)
    y_true: str^n
    W_0: R^(n_hat * k)
    x_i : R^m
    alpha: R^k
    cvx_online problem 
        min||x_i - X.T * W * alpha|| + lambda * ||alpha||

    in the online setting, there is no X in (n * m), 
    instead, we need to store a candidate set and solve the subproblem:
        min ||x_i - X_hat * W_hat * alpha|| + lambda * ||alpha||

    X_hat : R^(m * n_hat)
    W_hat : R^(n_hat * k)

    version: Rr, restricted, heuristic approach
             Ru, uniform, random assignment
    '''
    n_dim, m_dim = X.shape

    A_t = np.zeros((k_cluster, k_cluster))
    B_t = np.zeros((m_dim, k_cluster))
    x_sum = 0
    alpha_sum = 0

    # step 1: sample n_hat * k_cluster points as initial X_hat.
    X_0 = np.zeros((m_dim, n_hat))
    for idx in range(n_hat):
        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]
        X_0[:, idx] = x_sample

    # step 1: initialization, get X_hat (including clusters info)
    # and W_hat from X_0, using same init as in CNMF.
    # here representative_size_count is the n_1_hat, n_2_hat, ..., n_k_hat.
    t1 = time.time()
    X_hat, W_hat, representative_size_count = initialize_X_W_hat(
        X_0, k_cluster)
    X_0, W_0 = X_hat.copy(), W_hat.copy()
    t2 = time.time()
    # print('init cost {:.4f}'.format(t2 - t1))

    # step 2: after initialization of X_hat, update alpha, W_hat and X_hat alternatively.
    t_start = time.time()
    print(lmda, _NF, eps)
    g_hat_list = []
    error_eval_list = []
    t_cur = 0
    for t in range(T):
        # t_start_online = time.time()
        g_hat_i = get_g_hat_value(t, W_hat, X_hat, A_t, B_t, x_sum, alpha_sum)
        D_tmp_dummy = X_hat @ W_hat
        error_i = eval_g_hat_with_DnX(X, D_tmp_dummy.T, n_dim, m_dim)
        g_hat_list.append((t_cur, g_hat_i))
        error_eval_list.append((t_cur, error_i))
        if t % 50 == 0 and flag:
            D_t = np.matmul(X_hat, W_hat)
            tmp_assignment = get_clustering_assignment_1(X, D_t, k_cluster)
            tmp_acc, tmp_AMI = evaluation_clustering(tmp_assignment, y_true)
            print('1)iteration {}, distance acc = {:.4f}, AMI = {:.4f}'.format(
                t, tmp_acc, tmp_AMI))

            tmp_assignment = get_clustering_assignment_2(
                X, D_t, k_cluster, lmda)
            tmp_acc, tmp_AMI = evaluation_clustering(tmp_assignment, y_true)
            print(
                '2)iteration {}, kmeans of weights acc = {:.4f}, AMI = {:.4f}'.
                format(t, tmp_acc, tmp_AMI))
            t_end = time.time()
            print('time elapse = {:.4f}s'.format(t_end - t_start))
            t_start = t_end

            print('-' * 7)

        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]

        # update alpha
        t1 = time.time()
        lars_lasso = LassoLars(alpha=lmda, max_iter=500)
        D_t = np.matmul(X_hat, W_hat)
        lars_lasso.fit(D_t, x_sample)
        alpha_t = lars_lasso.coef_
        # t2 = time.time()
        # print('lasso cost {:.4f}s'.format(t2 - t1))

        # using different clustering assignment
        # t1 = time.time()
        if version == 'Rr':
            cluster_of_x_i = np.argmax(alpha_t)
        # elif version == 'Ru':
        else:
            cluster_of_x_i = int(np.random.uniform(0, k_cluster))
        # t2 = time.time()
        # print('argmax alpha cost {:.4f}s'.format(t2 - t1))

        # t1 = time.time()
        A_t += np.matmul(alpha_t.reshape(k_cluster, 1),
                         alpha_t.reshape(1, k_cluster))
        B_t += np.matmul(x_sample.reshape(m_dim, 1),
                         alpha_t.reshape(1, k_cluster))
        x_sum += (np.linalg.norm(x_sample)**2)
        alpha_sum += lmda * np.linalg.norm(alpha_t, 1)
        # t2 = time.time()
        # print('update At, Bt cost {:.4f}s'.format(t2 - t1))

        # update X_hat
        # t1 = time.time()
        W_hat, X_hat = update_W_X_hat(W_hat, X_hat, representative_size_count,
                                      x_sample, cluster_of_x_i, A_t, B_t,
                                      x_sum, alpha_sum, t, eps)
        t2 = time.time()
        t_cur += (t2 - t1)
        # print('update X_hat, W_hat cost {:.4f}s'.format(t2 - t1))

    print('Dcitionary update done! Time elapse {:.04f}s'.format(time.time() -
                                                                t_start))

    return W_hat, X_hat, representative_size_count, X_0, W_0, g_hat_list, error_eval_list
Beispiel #2
0
def update_W_X_hat(W_hat, X_hat, repre_size_count, x_sample, cluster_of_x_i, 
        A_t, B_t, x_sum, alpha_sum, t, eps):
    # add W_hat block diagonal constraint,
    # using projection.
    # linalg.init()

    # W_hat_gpu = gpuarray.to_gpu(W_hat.astype(np.float64))
    # tmp_x = np.ascontiguousarray(X_hat)
    # X_hat_gpu = gpuarray.to_gpu(tmp_x.astype(np.float64))
    # A_t_gpu = gpuarray.to_gpu(A_t.astype(np.float64))
    # B_t_gpu = gpuarray.to_gpu(B_t.astype(np.float64))


    cluster_seperation_idx = np.cumsum(repre_size_count)
    end_idx = cluster_seperation_idx[cluster_of_x_i]
    start_idx = end_idx - repre_size_count[cluster_of_x_i]
    A_t_inv = np.linalg.pinv(A_t)

    # W_opt_old_X = opt_cal_W_hat_numpy(W_hat, X_hat, A_t, B_t, x_sum, alpha_sum, eps, t)
    W_opt_old_X = opt_cal_W_hat_solve(W_hat, X_hat, A_t_inv, B_t, x_sum, alpha_sum, eps, t)
    g_hat_old_X = get_g_hat_value(t, W_opt_old_X, X_hat, A_t, B_t, x_sum, alpha_sum)

    # W_opt_old_X = update_W_hat_skcuda(W_hat_gpu, X_hat_gpu, A_t_gpu, B_t_gpu, 
    #       x_sum, alpha_sum, eps, t)
    # g_hat_old_X = get_g_hat_value(t, W_opt_old_X.get(), X_hat, A_t, B_t, x_sum, alpha_sum)

    list_of_W_opt_new_X = [W_opt_old_X]
    list_of_g_hat_new_X = [g_hat_old_X]
    list_of_new_X = [X_hat]

    # print('starting loop in update_W_X, total {}'.format(end_idx - start_idx))
    for idx in range(start_idx, end_idx):
        # print('iter # {}'.format(idx))
        t1 = time.time()
        X_hat_new  = X_hat.copy()
        X_hat_new[:, idx] = x_sample
        list_of_new_X.append(X_hat_new)
        # tmp_x = np.ascontiguousarray(X_hat_new)
        # X_hat_new_gpu = gpuarray.to_gpu(tmp_x.astype(np.float64))
        t2 = time.time()
        # print('\t update X_hat cost {:.4f}s'.format(t2 - t1))

        t1 = time.time()
        # W_opt_new_X = opt_cal_W_hat_numpy(W_hat, X_hat_new, A_t, B_t, x_sum, alpha_sum, eps, t)
        # W_opt_new_X = update_W_hat_numpy(W_hat, X_hat_new, A_t, B_t, x_sum, alpha_sum, eps, t)
        W_opt_new_X = opt_cal_W_hat_solve(W_hat, X_hat_new, A_t_inv, B_t, x_sum, alpha_sum, eps, t)
        g_hat_new_X = get_g_hat_value(t, W_opt_new_X, X_hat_new, A_t, B_t, x_sum, alpha_sum)

        # W_opt_new_X = update_W_hat_skcuda(W_hat_gpu, X_hat_new_gpu, A_t_gpu, B_t_gpu, 
        #       x_sum, alpha_sum, eps, t)
        # g_hat_new_X = get_g_hat_value(t, W_opt_new_X.get(), X_hat_new, A_t, B_t, x_sum, alpha_sum)
        t2 = time.time()
        # print('\t update W_hat_new cost {:.4f}'.format(t2 - t1))

        t1 = time.time()
        list_of_W_opt_new_X.append(W_opt_new_X)
        list_of_g_hat_new_X.append(g_hat_new_X)
        t2 = time.time()
        # print('appending W_opt list cost {:.4f}s'.format(t2 - t1))

    min_g_idx = np.argmin(list_of_g_hat_new_X)

    X_hat_new = list_of_new_X[min_g_idx]
    W_hat_new = list_of_W_opt_new_X[min_g_idx]
    # if list_of_g_hat_new_X[min_g_idx] <= g_hat_old_X:
    #     X_hat_new = X_hat.copy()
    #     X_hat_new[:, start_idx + min_g_idx] = x_sample
    #     # W_hat_new = list_of_W_opt_new_X[min_g_idx].get()
    #     W_hat_new = list_of_W_opt_new_X[min_g_idx].copy()
    # else:
    #     X_hat_new = X_hat.copy()
    #     # W_hat_new = W_opt_old_X.get()
    #     W_hat_new = W_opt_old_X.copy()


    return W_hat_new, X_hat_new
Beispiel #3
0
        x_sum += (np.linalg.norm(x_sample)**2)
        alpha_sum += lmda * np.linalg.norm(alpha_i, 1)

        # T1 = 1/2 * (np.linalg.norm(x_sample.reshape(m_dim, 1) -
        #     X_hat @ W_hat @ alpha_i.reshape(k, 1)) ** 2)
        # T2 = lmda * np.linalg.norm(alpha_i, 1)
        # g_val_culmulative += (T1 + T2)

    t1 = time.time()
    for _ in range(10):
        W_cuda = update_W_hat_skcuda(W_hat, X_hat, A_t, B_t, x_sum, alpha_sum,
                                     eps, T)
    t2 = time.time()
    print('pycuda cost {:.4f}s'.format(t2 - t1))
    # print('total number of loop {}, each cost {:.4f}s'.format(k, (t2-t1)/k))
    g_hat_val_cuda = get_g_hat_value(T, W_cuda.get(), X_hat, A_t, B_t, x_sum,
                                     alpha_sum)
    print('g value = {:.4f}'.format(g_hat_val_cuda))

    print('-' * 7)

    t1 = time.time()
    for _ in range(10):
        W_numpy = update_W_hat_numpy(W_hat, X_hat, A_t, B_t, x_sum, alpha_sum,
                                     eps, T)
    t2 = time.time()
    print('numpy dense cost {:.4f}s'.format(t2 - t1))
    # print('total number of loop {}, each cost {:.4f}s'.format(k, (t2-t1)/k))
    g_hat_val_np = get_g_hat_value(T, W_numpy, X_hat, A_t, B_t, x_sum,
                                   alpha_sum)
    print('g value = {:.4f}'.format(g_hat_val_np))
    print('-' * 7)