def bcv(v, r, iteration, seed, row_sep_num, column_sep_num, error_print): n, m = v.shape n_sep_size = int(n / row_sep_num) m_sep_size = int(m / column_sep_num) counter = 1 bic_tmp = 0 seeds = ff.two_seeds(seed) error = np.zeros(iteration) if n - n_sep_size < r | m - m_sep_size < r: print("Error: row or column size of D is bigger than R", file=sys.stderr) sys.exit(1) for r_sep in range(row_sep_num): for c_sep in range(column_sep_num): # for folded row and column row_s = n_sep_size * r_sep column_s = m_sep_size * c_sep if r_sep != row_sep_num - 1: row_e = n_sep_size * (r_sep + 1) else: row_e = n if c_sep != column_sep_num - 1: column_e = m_sep_size * (c_sep + 1) else: column_e = m # set Matrices ABCD ------------------------------------------------------------------------------------ a = v[row_s:row_e, column_s:column_e] b = np.concatenate( [v[row_s:row_e, :column_s], v[row_s:row_e, column_e:]], 1) c = np.concatenate( [v[:row_s, column_s:column_e], v[row_e:, column_s:column_e]], 0) d_1 = np.concatenate([v[:row_s, :column_s], v[:row_s, column_e:]], 1) d_2 = np.concatenate([v[row_e:, :column_s], v[row_e:, column_e:]], 1) d = np.concatenate([d_1, d_2], 0) # fit NMF to D -------------------------------------------------------------------------------- w_d = ff.generate_w(d.shape[0], r, seeds[0], c_mode=0) h_d = ff.generate_h(r, d.shape[1], seeds[1], c_mode=0) for i in range(iteration): w_d, h_d = ff.update(d, w_d, h_d, 0) if error_print & (row_e == n) & (column_e == m): error[i] = np.linalg.norm(d - np.dot(w_d, h_d))**2 if i % 1000 == 999: print("BCV NMF ( r={} {} / {}) : {} times update".format( r, counter, row_sep_num * column_sep_num, i + 1)) # calculate BCV a_r = np.dot(np.dot(b, np.linalg.pinv(h_d)), np.dot(np.linalg.pinv(w_d), c)) bic_tmp += np.linalg.norm(a - a_r)**2 counter += 1 return bic_tmp, error
def bic(v, r, iteration, seed): n, m = v.shape seeds = ff.two_seeds(seed) w = ff.generate_w(n, r, seeds[0], c_mode=0) h = ff.generate_h(r, m, seeds[1], c_mode=0) error = np.zeros(iteration) for i in range(iteration): w, h = ff.update(v, w, h, 0) error[i] = np.linalg.norm(v - np.dot(w, h))**2 if i % 1000 == 999: print("BIC NMF ( r={} ) : {} times update".format(r, i + 1)) v_reconstructed = np.dot(w, h) a = (n + m) / (n * m) d_eu = np.linalg.norm(v - v_reconstructed)**2 return np.log(d_eu) + r * a * np.log(1 / a), error
def parallel_least_square_w_eval(n, m, r, approximate_size, v, o_w, iteration, wh_seed, c_mode, column_sketching): seeds = ff.two_seeds(wh_seed) nmf_error = np.zeros(iteration) snmf_error = np.zeros(iteration) w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1, right_product=column_sketching) w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) for i in range(0, iteration): w, h, nmf_error[i] = calculate(v, o_w, w, h, c_mode, i) for i in range(0, iteration): w_s, h_s, snmf_error[i] = calculate(v_s, o_w, w_s, h_s, c_mode, i) h_os = ff.calculate_h(v, w_s) return nmf_error, snmf_error, w, h, w_s, h_os
# preparation ---------------------------------------------------------------------------------------------------------- print("----------------- START ------------------") program_code = ff.program_name(mode, c_mode, n, m, r, approximate_size, "_unknown", v_seed, wh_seed, program_num, convergence_D_mode=convergence_D_mode, CDC=CDC) seeds = ff.two_seeds(wh_seed) print("mode = " + str(mode) + "\nstart " + program_code + " >>>>>\n") print("seed of V=" + str(v_seed) + " seed of WH=" + str(wh_seed)) # generate V------------------------------------------------------------------------------------------------------------ V, original_W, original_H = ff.generate_wh(n, m, r, v_seed) # get iteration--------------------------------------------------------------------------------------------------------- print("\n\n\n-------------- get iteration --------------") if mode == 0: iteration_list = ct.get_v_ite(V, n, m, r,
def error_calculate(r_size, ap_size, v_origin, w_origin, ite, wh_seed, c_mode, nmfqp, t_flag): theta_start = 5 seeds = ff.two_seeds(wh_seed) v_nmf_error = np.zeros(ite) v_snmf_error = np.zeros(ite) w_nmf_error = np.zeros(ite) w_snmf_error = np.zeros(ite) n_size, m_size = v_origin.shape w = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode) h = ff.generate_h(r_size, m_size, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v_origin, ap_size, 0) # NMF calculate --------------------------------------------------------------------------------------------------- if c_mode == 2: theta_w = theta_h = theta_start if nmfqp: print("NMF matrix H is calculated by QP.") for i in range(0, ite): if c_mode == 2: w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w, theta_h) else: w, h = ff.update(v_origin, w, h, c_mode) if nmfqp: h_result = ff.calculate_h(v_origin, w, print_interim=False) else: h_result = h # v evaluate ------------------- v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h_result))**2 # w evaluate ------------------- if t_flag: d = np.linalg.lstsq(h.T, w_origin)[0] w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(h.T, d))**2 else: d = np.linalg.lstsq(w, w_origin)[0] w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d))**2 if (i == 0) | (i % 100 == 99): print("NMF ( r=" + str(r_size) + " k=" + str(ap_size) + " ) : " + str(i + 1) + " times update") # else: # for i in range(0, ite): # if c_mode != 2: # w, h = ff.update(v_origin, w, h, c_mode) # else: # w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w, theta_h) # # # v evaluate ------------------- # v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h)) ** 2 # # w evaluate ------------------- # d = np.linalg.lstsq(w, w_origin)[0] # w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d)) ** 2 # # if (i == 0) | (i % 2000 == 1999): # print("NMF ( r=" + str(r_size) + " k=" + str(ap_size) + " ) : " + str(i + 1) + " times update") # SNMF calculate -------------------------------------------------------------------------------------------------- if c_mode == 2: theta_w = theta_h = theta_start w_s = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r_size, ap_size, seeds[1], c_mode=c_mode) for i in range(0, ite): if c_mode == 2: w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w, theta_h) else: w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) # v evaluate ------------------- os_M = ff.calculate_h(v_origin, w_s, print_interim=False) v_snmf_error[i] = np.linalg.norm(v_origin - np.dot(w_s, os_M))**2 # w evaluate ------------------- if t_flag: d_s = np.linalg.lstsq(os_M.T, w_origin)[0] w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(os_M.T, d_s))**2 else: d_s = np.linalg.lstsq(w_s, w_origin)[0] w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(w_s, d_s))**2 if (i == 0) | (i % 100 == 99): print( "SketchingNMF ( r={} k={} seed={} ) : {} times update".format( r_size, ap_size, wh_seed, i + 1)) if t_flag: return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, h.T, w.T, h_s.T, w_s.T, os_M else: return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, w, h, w_s, h_s, os_M
def parallel_v_error_eval(r, approximate_size, v, iteration, wh_seed, c_mode, nmfqp, t_flag, snmf_only=False): theta_start = 5 n, m = v.shape seeds = ff.two_seeds(wh_seed) nmf_error = np.zeros(iteration) snmf_error = np.zeros(iteration) w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1) # NMF ------------------------------------------------------------------------------------------------------------- theta_w = theta_h = theta_start if not snmf_only: if nmfqp: print("NMF matrix H is calculated by QP.") for i in range(0, iteration): if c_mode != 2: w, h = ff.update(v, w, h, c_mode) else: w, h, theta_w, theta_h = ff.fgd_update( v, w, h, theta_w, theta_h) h_qp = ff.calculate_h(v, w, print_interim=False) nmf_error[i] = np.linalg.norm(v - np.dot(w, h_qp))**2 if (i == 0) | (i % 100 == 99): print("NMF ( r=" + str(r) + " k=" + str(approximate_size) + " ) : " + str(i + 1) + " times update") h = h_qp else: for i in range(0, iteration): if c_mode != 2: w, h = ff.update(v, w, h, c_mode) else: w, h, theta_w, theta_h = ff.fgd_update( v, w, h, theta_w, theta_h) nmf_error[i] = np.linalg.norm(v - np.dot(w, h))**2 if (i == 0) | (i % 2000 == 1999): print("NMF ( r=" + str(r) + " k=" + str(approximate_size) + " ) : " + str(i + 1) + " times update") # SNMF ------------------------------------------------------------------------------------------------------------ w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) for i in range(0, iteration): if c_mode != 2: w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) else: w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w, theta_h) h_os = ff.calculate_h(v, w_s, print_interim=False) snmf_error[i] = np.linalg.norm(v - np.dot(w_s, h_os))**2 if (i == 0) | (i % 100 == 99): print( "SketchingNMF ( r={} k={} seed={} ) : {} times update".format( r, approximate_size, wh_seed, i + 1)) if t_flag & snmf_only: return nmf_error, snmf_error, None, None, h_os.T, w_s.T, h_s.T elif t_flag: return nmf_error, snmf_error, h.T, w.T, h_os.T, w_s.T, h_s.T elif snmf_only: return nmf_error, snmf_error, None, None, w_s, h_os, h_s else: return nmf_error, snmf_error, w, h, w_s, h_os, h_s
def apply_data(v, r, approximate_size, iteration, program_num, v_seed, wh_seed, c_mode, mode, w_path, nmfqp, column_sketching=True, convergence_D_mode=0, CDC=0): n, m = v.shape program_code = "realdata_" + ff.program_name(mode, c_mode, n, m, r, approximate_size, iteration, v_seed, wh_seed, program_num, convergence_D_mode=convergence_D_mode, CDC=CDC) seeds = ff.two_seeds(wh_seed) print("----------------- START ------------------") print("start " + program_code + " >>>>>\n") print("seed of V=" + str(v_seed) + " seed of WH=" + str(wh_seed)) # calculate ---------------------------- if mode == 0: t_result, w, h, w_s, h_os = tm.time_measurement(n, m, r, approximate_size, v, iteration, seeds, c_mode, column_sketching) elif mode == 1: nmf_error, _, w, h, w_s, h_os = qr_eval.pivoting_qr_q_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, column_sketching) elif mode == 2: nmf_error, snmf_error, w, h, w_s, h_os = v_eval.v_error_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, nmfqp, column_sketching) elif mode == 3: nmf_error, _, w, h, w_s, h_os = ls_eval.least_square_w_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, column_sketching) # result---------------------------- # make directory os.makedirs(w_path + "/graph", exist_ok=True) os.makedirs(w_path + "/error", exist_ok=True) os.makedirs(w_path + "/time", exist_ok=True) os.makedirs(w_path + "/matrix", exist_ok=True) # plot error graph # if mode != 0: # plt.plot(range(1, iteration + 1), nmf_error, label="existing method") # if mode == 2: # plt.plot(range(1, iteration + 1), snmf_error, label="proposed method") # plt.xlabel("number of iteration") # plt.ylabel("Frobenius norm") # # plt.title("error plot") # plt.legend() # plt.savefig(w_path + "/graph/" + program_code + ".pdf") # plt.close() # # # plot error difference graph # if mode == 2: # plt.plot(range(1, iteration + 1), snmf_error - nmf_error) # plt.xlabel("the number of iteration") # plt.ylabel("error difference") # # plt.title("The difference between Sketching NMF error and NMF error") # plt.savefig(w_path + "/graph/Dif" + program_code + ".pdf") # plt.close() # save error list if mode == 2: e_result = pd.DataFrame([nmf_error, snmf_error], index=["NMF error", "SNMF error"]) e_result.T.to_csv(w_path + "/error/" + program_code + "_error.csv") elif mode != 0: e_result = pd.DataFrame([nmf_error], index=["NMF error"]) e_result.T.to_csv(w_path + "/error/" + program_code + "_error.csv") # save time result # if mode == 0: # tf_result = pd.DataFrame({"NMF time": t_result[0], "SNMF time": t_result[1]}, index=[0]) # tf_result.to_csv(w_path + "/time/" + program_code + "_time.csv") # save matrix # np.savetxt(w_path + "/matrix/w_" + program_code + ".csv", w, delimiter=",") # np.savetxt(w_path + "/matrix/h_" + program_code + ".csv", h, delimiter=",") # np.savetxt(w_path + "/matrix/W_s_" + program_code + ".csv", w_s, delimiter=",") # np.savetxt(w_path + "/matrix/H_s_" + program_code + ".csv", h_os, delimiter=",") print("\nfinish " + program_code + " >>>>>\n") if mode == 0: return t_result else: return nmf_error, snmf_error