def bcv(v, r, iteration, seed, row_sep_num, column_sep_num, error_print):
    n, m = v.shape
    n_sep_size = int(n / row_sep_num)
    m_sep_size = int(m / column_sep_num)
    counter = 1
    bic_tmp = 0
    seeds = ff.two_seeds(seed)
    error = np.zeros(iteration)

    if n - n_sep_size < r | m - m_sep_size < r:
        print("Error: row or column size of D is bigger than R",
              file=sys.stderr)
        sys.exit(1)

    for r_sep in range(row_sep_num):
        for c_sep in range(column_sep_num):  # for folded row and column
            row_s = n_sep_size * r_sep
            column_s = m_sep_size * c_sep

            if r_sep != row_sep_num - 1:
                row_e = n_sep_size * (r_sep + 1)
            else:
                row_e = n
            if c_sep != column_sep_num - 1:
                column_e = m_sep_size * (c_sep + 1)
            else:
                column_e = m

            # set Matrices ABCD  ------------------------------------------------------------------------------------
            a = v[row_s:row_e, column_s:column_e]
            b = np.concatenate(
                [v[row_s:row_e, :column_s], v[row_s:row_e, column_e:]], 1)
            c = np.concatenate(
                [v[:row_s, column_s:column_e], v[row_e:, column_s:column_e]],
                0)

            d_1 = np.concatenate([v[:row_s, :column_s], v[:row_s, column_e:]],
                                 1)
            d_2 = np.concatenate([v[row_e:, :column_s], v[row_e:, column_e:]],
                                 1)
            d = np.concatenate([d_1, d_2], 0)

            # fit NMF to D  --------------------------------------------------------------------------------
            w_d = ff.generate_w(d.shape[0], r, seeds[0], c_mode=0)
            h_d = ff.generate_h(r, d.shape[1], seeds[1], c_mode=0)

            for i in range(iteration):
                w_d, h_d = ff.update(d, w_d, h_d, 0)
                if error_print & (row_e == n) & (column_e == m):
                    error[i] = np.linalg.norm(d - np.dot(w_d, h_d))**2
                if i % 1000 == 999:
                    print("BCV  NMF ( r={}  {} / {}) : {} times update".format(
                        r, counter, row_sep_num * column_sep_num, i + 1))

            # calculate BCV
            a_r = np.dot(np.dot(b, np.linalg.pinv(h_d)),
                         np.dot(np.linalg.pinv(w_d), c))
            bic_tmp += np.linalg.norm(a - a_r)**2
            counter += 1
    return bic_tmp, error
def bic(v, r, iteration, seed):
    n, m = v.shape
    seeds = ff.two_seeds(seed)
    w = ff.generate_w(n, r, seeds[0], c_mode=0)
    h = ff.generate_h(r, m, seeds[1], c_mode=0)
    error = np.zeros(iteration)

    for i in range(iteration):
        w, h = ff.update(v, w, h, 0)
        error[i] = np.linalg.norm(v - np.dot(w, h))**2
        if i % 1000 == 999:
            print("BIC  NMF ( r={} ) : {} times update".format(r, i + 1))
    v_reconstructed = np.dot(w, h)

    a = (n + m) / (n * m)
    d_eu = np.linalg.norm(v - v_reconstructed)**2
    return np.log(d_eu) + r * a * np.log(1 / a), error
def parallel_least_square_w_eval(n, m, r, approximate_size, v, o_w, iteration,
                                 wh_seed, c_mode, column_sketching):
    seeds = ff.two_seeds(wh_seed)
    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v,
                              approximate_size,
                              seeds[0] + 1,
                              right_product=column_sketching)
    w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)

    for i in range(0, iteration):
        w, h, nmf_error[i] = calculate(v, o_w, w, h, c_mode, i)

    for i in range(0, iteration):
        w_s, h_s, snmf_error[i] = calculate(v_s, o_w, w_s, h_s, c_mode, i)
    h_os = ff.calculate_h(v, w_s)

    return nmf_error, snmf_error, w, h, w_s, h_os
# preparation ----------------------------------------------------------------------------------------------------------
print("-----------------   START   ------------------")

program_code = ff.program_name(mode,
                               c_mode,
                               n,
                               m,
                               r,
                               approximate_size,
                               "_unknown",
                               v_seed,
                               wh_seed,
                               program_num,
                               convergence_D_mode=convergence_D_mode,
                               CDC=CDC)
seeds = ff.two_seeds(wh_seed)

print("mode = " + str(mode) + "\nstart  " + program_code + "  >>>>>\n")
print("seed of V=" + str(v_seed) + "  seed of WH=" + str(wh_seed))

# generate V------------------------------------------------------------------------------------------------------------
V, original_W, original_H = ff.generate_wh(n, m, r, v_seed)

# get iteration---------------------------------------------------------------------------------------------------------
print("\n\n\n--------------  get iteration  --------------")

if mode == 0:
    iteration_list = ct.get_v_ite(V,
                                  n,
                                  m,
                                  r,
Example #5
0
def error_calculate(r_size, ap_size, v_origin, w_origin, ite, wh_seed, c_mode,
                    nmfqp, t_flag):
    theta_start = 5
    seeds = ff.two_seeds(wh_seed)
    v_nmf_error = np.zeros(ite)
    v_snmf_error = np.zeros(ite)
    w_nmf_error = np.zeros(ite)
    w_snmf_error = np.zeros(ite)

    n_size, m_size = v_origin.shape
    w = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r_size, m_size, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v_origin, ap_size, 0)

    # NMF calculate  ---------------------------------------------------------------------------------------------------
    if c_mode == 2:
        theta_w = theta_h = theta_start

    if nmfqp:
        print("NMF matrix H is calculated by QP.")
    for i in range(0, ite):
        if c_mode == 2:
            w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w,
                                                   theta_h)
        else:
            w, h = ff.update(v_origin, w, h, c_mode)

        if nmfqp:
            h_result = ff.calculate_h(v_origin, w, print_interim=False)
        else:
            h_result = h

    # v evaluate  -------------------
        v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h_result))**2
        # w evaluate  -------------------
        if t_flag:
            d = np.linalg.lstsq(h.T, w_origin)[0]
            w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(h.T, d))**2
        else:
            d = np.linalg.lstsq(w, w_origin)[0]
            w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d))**2

        if (i == 0) | (i % 100 == 99):
            print("NMF ( r=" + str(r_size) + "  k=" + str(ap_size) + " ) : " +
                  str(i + 1) + " times update")

    # else:
    #     for i in range(0, ite):
    #         if c_mode != 2:
    #             w, h = ff.update(v_origin, w, h, c_mode)
    #         else:
    #             w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w, theta_h)
    #
    #         # v evaluate  -------------------
    #         v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h)) ** 2
    #         # w evaluate  -------------------
    #         d = np.linalg.lstsq(w, w_origin)[0]
    #         w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d)) ** 2
    #
    #         if (i == 0) | (i % 2000 == 1999):
    #             print("NMF ( r=" + str(r_size) + "  k=" + str(ap_size) + " ) : " + str(i + 1) + " times update")

    # SNMF calculate  --------------------------------------------------------------------------------------------------
    if c_mode == 2:
        theta_w = theta_h = theta_start

    w_s = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r_size, ap_size, seeds[1], c_mode=c_mode)

    for i in range(0, ite):
        if c_mode == 2:
            w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w,
                                                       theta_h)
        else:
            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)

        # v evaluate  -------------------
        os_M = ff.calculate_h(v_origin, w_s, print_interim=False)
        v_snmf_error[i] = np.linalg.norm(v_origin - np.dot(w_s, os_M))**2
        # w evaluate  -------------------
        if t_flag:
            d_s = np.linalg.lstsq(os_M.T, w_origin)[0]
            w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(os_M.T, d_s))**2
        else:
            d_s = np.linalg.lstsq(w_s, w_origin)[0]
            w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(w_s, d_s))**2

        if (i == 0) | (i % 100 == 99):
            print(
                "SketchingNMF ( r={}  k={} seed={} ) : {} times update".format(
                    r_size, ap_size, wh_seed, i + 1))

    if t_flag:
        return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, h.T, w.T, h_s.T, w_s.T, os_M
    else:
        return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, w, h, w_s, h_s, os_M
def parallel_v_error_eval(r,
                          approximate_size,
                          v,
                          iteration,
                          wh_seed,
                          c_mode,
                          nmfqp,
                          t_flag,
                          snmf_only=False):
    theta_start = 5
    n, m = v.shape
    seeds = ff.two_seeds(wh_seed)
    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1)

    # NMF  -------------------------------------------------------------------------------------------------------------
    theta_w = theta_h = theta_start
    if not snmf_only:
        if nmfqp:
            print("NMF matrix H is calculated by QP.")
            for i in range(0, iteration):
                if c_mode != 2:
                    w, h = ff.update(v, w, h, c_mode)
                else:
                    w, h, theta_w, theta_h = ff.fgd_update(
                        v, w, h, theta_w, theta_h)
                h_qp = ff.calculate_h(v, w, print_interim=False)
                nmf_error[i] = np.linalg.norm(v - np.dot(w, h_qp))**2
                if (i == 0) | (i % 100 == 99):
                    print("NMF ( r=" + str(r) + "  k=" +
                          str(approximate_size) + " ) : " + str(i + 1) +
                          " times update")
            h = h_qp

        else:
            for i in range(0, iteration):
                if c_mode != 2:
                    w, h = ff.update(v, w, h, c_mode)
                else:
                    w, h, theta_w, theta_h = ff.fgd_update(
                        v, w, h, theta_w, theta_h)
                nmf_error[i] = np.linalg.norm(v - np.dot(w, h))**2
                if (i == 0) | (i % 2000 == 1999):
                    print("NMF ( r=" + str(r) + "  k=" +
                          str(approximate_size) + " ) : " + str(i + 1) +
                          " times update")

    # SNMF  ------------------------------------------------------------------------------------------------------------
    w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)
    for i in range(0, iteration):
        if c_mode != 2:
            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)
        else:
            w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w,
                                                       theta_h)
        h_os = ff.calculate_h(v, w_s, print_interim=False)
        snmf_error[i] = np.linalg.norm(v - np.dot(w_s, h_os))**2
        if (i == 0) | (i % 100 == 99):
            print(
                "SketchingNMF ( r={}  k={} seed={} ) : {} times update".format(
                    r, approximate_size, wh_seed, i + 1))
    if t_flag & snmf_only:
        return nmf_error, snmf_error, None, None, h_os.T, w_s.T, h_s.T
    elif t_flag:
        return nmf_error, snmf_error, h.T, w.T, h_os.T, w_s.T, h_s.T
    elif snmf_only:
        return nmf_error, snmf_error, None, None, w_s, h_os, h_s
    else:
        return nmf_error, snmf_error, w, h, w_s, h_os, h_s
Example #7
0
def apply_data(v, r, approximate_size, iteration, program_num, v_seed, wh_seed, c_mode, mode, w_path, nmfqp,
               column_sketching=True, convergence_D_mode=0, CDC=0):
    n, m = v.shape

    program_code = "realdata_" + ff.program_name(mode, c_mode, n, m, r, approximate_size, iteration, v_seed, wh_seed,
                                                 program_num, convergence_D_mode=convergence_D_mode, CDC=CDC)
    seeds = ff.two_seeds(wh_seed)

    print("-----------------   START   ------------------")
    print("start  " + program_code + "  >>>>>\n")
    print("seed of V=" + str(v_seed) + "  seed of WH=" + str(wh_seed))

    # calculate ----------------------------
    if mode == 0:
        t_result, w, h, w_s, h_os = tm.time_measurement(n, m, r, approximate_size, v, iteration, seeds, c_mode, column_sketching)
    elif mode == 1:
        nmf_error, _, w, h, w_s, h_os = qr_eval.pivoting_qr_q_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, column_sketching)
    elif mode == 2:
        nmf_error, snmf_error, w, h, w_s, h_os = v_eval.v_error_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, nmfqp, column_sketching)
    elif mode == 3:
        nmf_error, _, w, h, w_s, h_os = ls_eval.least_square_w_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, column_sketching)

    # result----------------------------
    # make directory
    os.makedirs(w_path + "/graph", exist_ok=True)
    os.makedirs(w_path + "/error", exist_ok=True)
    os.makedirs(w_path + "/time", exist_ok=True)
    os.makedirs(w_path + "/matrix", exist_ok=True)

    # plot error graph
    # if mode != 0:
    #     plt.plot(range(1, iteration + 1), nmf_error, label="existing method")
    #     if mode == 2:
    #         plt.plot(range(1, iteration + 1), snmf_error, label="proposed method")
    #     plt.xlabel("number of iteration")
    #     plt.ylabel("Frobenius norm")
    #     # plt.title("error plot")
    #     plt.legend()
    #     plt.savefig(w_path + "/graph/" + program_code + ".pdf")
    #     plt.close()
    #
    # # plot error difference graph
    # if mode == 2:
    #     plt.plot(range(1, iteration + 1), snmf_error - nmf_error)
    #     plt.xlabel("the number of iteration")
    #     plt.ylabel("error difference")
    #     # plt.title("The difference between Sketching NMF error and NMF error")
    #     plt.savefig(w_path + "/graph/Dif" + program_code + ".pdf")
    #     plt.close()

    # save error list
    if mode == 2:
        e_result = pd.DataFrame([nmf_error, snmf_error], index=["NMF error", "SNMF error"])
        e_result.T.to_csv(w_path + "/error/" + program_code + "_error.csv")
    elif mode != 0:
        e_result = pd.DataFrame([nmf_error], index=["NMF error"])
        e_result.T.to_csv(w_path + "/error/" + program_code + "_error.csv")

    # save time result
    # if mode == 0:
    #     tf_result = pd.DataFrame({"NMF time": t_result[0], "SNMF time": t_result[1]}, index=[0])
    #     tf_result.to_csv(w_path + "/time/" + program_code + "_time.csv")

    # save matrix
    # np.savetxt(w_path + "/matrix/w_" + program_code + ".csv", w, delimiter=",")
    # np.savetxt(w_path + "/matrix/h_" + program_code + ".csv", h, delimiter=",")
    # np.savetxt(w_path + "/matrix/W_s_" + program_code + ".csv", w_s, delimiter=",")
    # np.savetxt(w_path + "/matrix/H_s_" + program_code + ".csv", h_os, delimiter=",")
    print("\nfinish  " + program_code + "  >>>>>\n")

    if mode == 0:
        return t_result
    else:
        return nmf_error, snmf_error