def time_measurement(n, m, r, approximate_size, v, iteration, seeds, c_mode, NMF_or_SNMF): if NMF_or_SNMF == 0: # NMF------------------------------------ print("\n\n\n------------------ NMF -------------------") start = time.time() w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) for i in range(0, iteration): w, h = ff.update(v, w, h, c_mode) t_result = time.time() - start print("\nNMF time: " + str(t_result)) else: # Sketching NMF -------------------------- print("\n\n------------- Sketching NMF --------------") start = time.time() v_s = ff.uniform_sampling(v, approximate_size, 0, t=True) w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) for i in range(0, iteration): w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) _ = ff.calculate_h(v, w_s, True) t_result = time.time() - start print("\nSketching NMF time: " + str(t_result) + "\n") return t_result
def v_error_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, nmfqp, column_sketching): nmf_error = np.zeros(iteration) snmf_error = np.zeros(iteration) w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1, right_product=column_sketching) print("\n\n\n------------------ NMF -------------------") if nmfqp: print("NMF matrix H is calculated by QP.") for i in range(0, iteration): w, h = ff.update(v, w, h, c_mode) h_qp = ff.calculate_h(v, w, print_interim=True) nmf_error[i] = np.linalg.norm(v - np.dot(w, h_qp))**2 if (i == 0) | (i % 100 == 99): print( str(i + 1) + " times update error: " + str(nmf_error[i])) h = h_qp else: for i in range(0, iteration): w, h = ff.update(v, w, h, c_mode) nmf_error[i] = np.linalg.norm(v - np.dot(w, h))**2 if (i == 0) | (i % 100 == 99): print( str(i + 1) + " times update error: " + str(nmf_error[i])) print("\n\n------------- Sketching NMF --------------") if column_sketching: w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) for i in range(0, iteration): w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) h_os = ff.calculate_h(v, w_s, print_interim=True) snmf_error[i] = np.linalg.norm(v - np.dot(w_s, h_os))**2 if (i == 0) | (i % 100 == 99): print( str(i + 1) + " times update error: " + str(snmf_error[i])) h_s = h_os else: w_s = ff.generate_w(approximate_size, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, m, seeds[1], c_mode=c_mode) for i in range(0, iteration): w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) w_os = ff.calculate_h(v, h_s, print_interim=True) snmf_error[i] = np.linalg.norm(v - np.dot(w_os, h_s))**2 if (i == 0) | (i % 100 == 99): print( str(i + 1) + " times update error: " + str(snmf_error[i])) w_s = w_os return nmf_error, snmf_error, w, h, w_s, h_s
def get_v_ite(v, n, m, r, approximate_size, seeds, c_mode, CDC, NMFQP=False): iteration_list = np.zeros([2], dtype="int") # NMF------------------------------------ print("\n------------------ NMF -------------------") nmf_error = 1000000 w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) if NMFQP: print("NMF matrix H is calculated by QP.") while nmf_error >= CDC: w, h = ff.update(v, w, h, c_mode) h_qp = ff.calculate_h(v, w, True) nmf_error = np.linalg.norm(v - np.dot(w, h_qp))**2 if (iteration_list[0] == 0) | (iteration_list[0] % 100 == 99): print( str(iteration_list[0] + 1) + " times update error: " + str(nmf_error)) iteration_list[0] += 1 else: while nmf_error >= CDC: w, h = ff.update(v, w, h, c_mode) nmf_error = np.linalg.norm(v - np.dot(w, h))**2 if (iteration_list[0] == 0) | (iteration_list[0] % 100 == 99): print( str(iteration_list[0] + 1) + " times update error: " + str(nmf_error)) iteration_list[0] += 1 # Sketching NMF -------------------------- print("\n------------- Sketching NMF --------------") snmf_error = 1000000 v_s = ff.uniform_sampling(v, approximate_size, 0, t=True) w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) while snmf_error >= CDC: w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) h_os = ff.calculate_h(v, w_s, True) snmf_error = np.linalg.norm(v - np.dot(w_s, h_os))**2 if (iteration_list[1] == 0) | (iteration_list[1] % 100 == 99): print( str(iteration_list[1] + 1) + " times update error: " + str(snmf_error)) iteration_list[1] = iteration_list[1] + 1 print(iteration_list) return iteration_list
def pivoting_qr_q_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, o_w=0): print("\n\n\n------------------ NMF -------------------") nmf_error = np.zeros(iteration) snmf_error = np.zeros(iteration) w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v, approximate_size, 0, t=True) w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) if np.all(o_w == 0): for i in range(0, iteration): w, h = ff.update(v, w, h, r, c_mode) qr_q, qr_r, p = linalg.qr(w, pivoting=True) w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) qr_q_s, qr_r_s, p_s = linalg.qr(w_s, pivoting=True) frobenius_norm = np.linalg.norm( np.dot(qr_q_s, qr_r_s) - np.dot(qr_q, qr_r))**2 nmf_error[i] = frobenius_norm if (i == 0) | (i % 100 == 99): print( str(i + 1) + " times update error: " + str(frobenius_norm)) return nmf_error, snmf_error, w, h, w_s, h_s else: original_q, original_r, original_p = linalg.qr(o_w, pivoting=True) o_w = np.dot(original_q, original_r) # NMF------------------------------------ print("\n\n\n------------------ NMF -------------------") for i in range(0, iteration): nmf_error[i] = calculate(v, o_w, w, h, c_mode, i) print("\n\n------------- Sketching NMF --------------") for i in range(0, iteration): snmf_error[i] = calculate(v_s, o_w, w_s, h_s, c_mode, i) h_os = ff.calculate_h(v, w_s, True) return nmf_error, snmf_error, w, h, w_s, h_os
def bcv(v, r, iteration, seed, row_sep_num, column_sep_num, error_print): n, m = v.shape n_sep_size = int(n / row_sep_num) m_sep_size = int(m / column_sep_num) counter = 1 bic_tmp = 0 seeds = ff.two_seeds(seed) error = np.zeros(iteration) if n - n_sep_size < r | m - m_sep_size < r: print("Error: row or column size of D is bigger than R", file=sys.stderr) sys.exit(1) for r_sep in range(row_sep_num): for c_sep in range(column_sep_num): # for folded row and column row_s = n_sep_size * r_sep column_s = m_sep_size * c_sep if r_sep != row_sep_num - 1: row_e = n_sep_size * (r_sep + 1) else: row_e = n if c_sep != column_sep_num - 1: column_e = m_sep_size * (c_sep + 1) else: column_e = m # set Matrices ABCD ------------------------------------------------------------------------------------ a = v[row_s:row_e, column_s:column_e] b = np.concatenate( [v[row_s:row_e, :column_s], v[row_s:row_e, column_e:]], 1) c = np.concatenate( [v[:row_s, column_s:column_e], v[row_e:, column_s:column_e]], 0) d_1 = np.concatenate([v[:row_s, :column_s], v[:row_s, column_e:]], 1) d_2 = np.concatenate([v[row_e:, :column_s], v[row_e:, column_e:]], 1) d = np.concatenate([d_1, d_2], 0) # fit NMF to D -------------------------------------------------------------------------------- w_d = ff.generate_w(d.shape[0], r, seeds[0], c_mode=0) h_d = ff.generate_h(r, d.shape[1], seeds[1], c_mode=0) for i in range(iteration): w_d, h_d = ff.update(d, w_d, h_d, 0) if error_print & (row_e == n) & (column_e == m): error[i] = np.linalg.norm(d - np.dot(w_d, h_d))**2 if i % 1000 == 999: print("BCV NMF ( r={} {} / {}) : {} times update".format( r, counter, row_sep_num * column_sep_num, i + 1)) # calculate BCV a_r = np.dot(np.dot(b, np.linalg.pinv(h_d)), np.dot(np.linalg.pinv(w_d), c)) bic_tmp += np.linalg.norm(a - a_r)**2 counter += 1 return bic_tmp, error
def least_square_w_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, o_w=0): nmf_error = np.zeros(iteration) snmf_error = np.zeros(iteration) w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1, t=True) w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) if np.all(o_w == 0): print("\n\n\n------------------ NMF -------------------") for i in range(0, iteration): w, h = ff.update(v, w, h, r, c_mode) w_s, h_s, nmf_error[i] = calculate(v, w, w_s, h_s, c_mode, i) return nmf_error, snmf_error, w, h, w_s, h_s else: print("\n\n\n------------------ NMF -------------------") for i in range(0, iteration): w, h, nmf_error[i] = calculate(v, o_w, w, h, c_mode, i) print("\n\n------------- Sketching NMF --------------") for i in range(0, iteration): w_s, h_s, snmf_error[i] = calculate(v_s, o_w, w_s, h_s, c_mode, i) h_os = ff.calculate_h(v, w_s, print_interim=True) return nmf_error, snmf_error, w, h, w_s, h_os
def calculate(v, o_w, w, h, c_mode, i): w, h = ff.update(v, w, h, c_mode) qr_q, qr_r, p = linalg.qr(w, pivoting=True) frobenius_norm = np.linalg.norm(o_w - np.dot(qr_q, qr_r))**2 if (i == 0) | (i % 100 == 99): print(str(i + 1) + " times update error: " + str(frobenius_norm)) return w, h, frobenius_norm
def calculate(v, o_w, w, h, c_mode, i): w, h = ff.update(v, w, h, c_mode) d = np.linalg.lstsq(w, o_w)[0] frobenius_norm = np.linalg.norm(w - np.dot(w, d))**2 if (i == 0) | (i % 100 == 99): print(str(i + 1) + " times update error: " + str(frobenius_norm)) return w, h, frobenius_norm
def calculate(v, w, h, c_mode, i, qp_opt=False): w, h = ff.update(v, w, h, c_mode) if qp_opt: h_qp = ff.calculate_h(v, w, print_interim=True) frobenius_norm = np.linalg.norm(v - np.dot(w, h_qp))**2 else: frobenius_norm = np.linalg.norm(v - np.dot(w, h))**2 if (i == 0) | (i % 100 == 99): print(str(i + 1) + " times update error: " + str(frobenius_norm)) return w, h, frobenius_norm
def bic(v, r, iteration, seed): n, m = v.shape seeds = ff.two_seeds(seed) w = ff.generate_w(n, r, seeds[0], c_mode=0) h = ff.generate_h(r, m, seeds[1], c_mode=0) error = np.zeros(iteration) for i in range(iteration): w, h = ff.update(v, w, h, 0) error[i] = np.linalg.norm(v - np.dot(w, h))**2 if i % 1000 == 999: print("BIC NMF ( r={} ) : {} times update".format(r, i + 1)) v_reconstructed = np.dot(w, h) a = (n + m) / (n * m) d_eu = np.linalg.norm(v - v_reconstructed)**2 return np.log(d_eu) + r * a * np.log(1 / a), error
def error_calculate(r_size, ap_size, v_origin, w_origin, ite, wh_seed, c_mode, nmfqp, t_flag): theta_start = 5 seeds = ff.two_seeds(wh_seed) v_nmf_error = np.zeros(ite) v_snmf_error = np.zeros(ite) w_nmf_error = np.zeros(ite) w_snmf_error = np.zeros(ite) n_size, m_size = v_origin.shape w = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode) h = ff.generate_h(r_size, m_size, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v_origin, ap_size, 0) # NMF calculate --------------------------------------------------------------------------------------------------- if c_mode == 2: theta_w = theta_h = theta_start if nmfqp: print("NMF matrix H is calculated by QP.") for i in range(0, ite): if c_mode == 2: w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w, theta_h) else: w, h = ff.update(v_origin, w, h, c_mode) if nmfqp: h_result = ff.calculate_h(v_origin, w, print_interim=False) else: h_result = h # v evaluate ------------------- v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h_result))**2 # w evaluate ------------------- if t_flag: d = np.linalg.lstsq(h.T, w_origin)[0] w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(h.T, d))**2 else: d = np.linalg.lstsq(w, w_origin)[0] w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d))**2 if (i == 0) | (i % 100 == 99): print("NMF ( r=" + str(r_size) + " k=" + str(ap_size) + " ) : " + str(i + 1) + " times update") # else: # for i in range(0, ite): # if c_mode != 2: # w, h = ff.update(v_origin, w, h, c_mode) # else: # w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w, theta_h) # # # v evaluate ------------------- # v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h)) ** 2 # # w evaluate ------------------- # d = np.linalg.lstsq(w, w_origin)[0] # w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d)) ** 2 # # if (i == 0) | (i % 2000 == 1999): # print("NMF ( r=" + str(r_size) + " k=" + str(ap_size) + " ) : " + str(i + 1) + " times update") # SNMF calculate -------------------------------------------------------------------------------------------------- if c_mode == 2: theta_w = theta_h = theta_start w_s = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r_size, ap_size, seeds[1], c_mode=c_mode) for i in range(0, ite): if c_mode == 2: w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w, theta_h) else: w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) # v evaluate ------------------- os_M = ff.calculate_h(v_origin, w_s, print_interim=False) v_snmf_error[i] = np.linalg.norm(v_origin - np.dot(w_s, os_M))**2 # w evaluate ------------------- if t_flag: d_s = np.linalg.lstsq(os_M.T, w_origin)[0] w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(os_M.T, d_s))**2 else: d_s = np.linalg.lstsq(w_s, w_origin)[0] w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(w_s, d_s))**2 if (i == 0) | (i % 100 == 99): print( "SketchingNMF ( r={} k={} seed={} ) : {} times update".format( r_size, ap_size, wh_seed, i + 1)) if t_flag: return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, h.T, w.T, h_s.T, w_s.T, os_M else: return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, w, h, w_s, h_s, os_M
def parallel_v_error_eval(r, approximate_size, v, iteration, wh_seed, c_mode, nmfqp, t_flag, snmf_only=False): theta_start = 5 n, m = v.shape seeds = ff.two_seeds(wh_seed) nmf_error = np.zeros(iteration) snmf_error = np.zeros(iteration) w = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h = ff.generate_h(r, m, seeds[1], c_mode=c_mode) v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1) # NMF ------------------------------------------------------------------------------------------------------------- theta_w = theta_h = theta_start if not snmf_only: if nmfqp: print("NMF matrix H is calculated by QP.") for i in range(0, iteration): if c_mode != 2: w, h = ff.update(v, w, h, c_mode) else: w, h, theta_w, theta_h = ff.fgd_update( v, w, h, theta_w, theta_h) h_qp = ff.calculate_h(v, w, print_interim=False) nmf_error[i] = np.linalg.norm(v - np.dot(w, h_qp))**2 if (i == 0) | (i % 100 == 99): print("NMF ( r=" + str(r) + " k=" + str(approximate_size) + " ) : " + str(i + 1) + " times update") h = h_qp else: for i in range(0, iteration): if c_mode != 2: w, h = ff.update(v, w, h, c_mode) else: w, h, theta_w, theta_h = ff.fgd_update( v, w, h, theta_w, theta_h) nmf_error[i] = np.linalg.norm(v - np.dot(w, h))**2 if (i == 0) | (i % 2000 == 1999): print("NMF ( r=" + str(r) + " k=" + str(approximate_size) + " ) : " + str(i + 1) + " times update") # SNMF ------------------------------------------------------------------------------------------------------------ w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode) h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode) for i in range(0, iteration): if c_mode != 2: w_s, h_s = ff.update(v_s, w_s, h_s, c_mode) else: w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w, theta_h) h_os = ff.calculate_h(v, w_s, print_interim=False) snmf_error[i] = np.linalg.norm(v - np.dot(w_s, h_os))**2 if (i == 0) | (i % 100 == 99): print( "SketchingNMF ( r={} k={} seed={} ) : {} times update".format( r, approximate_size, wh_seed, i + 1)) if t_flag & snmf_only: return nmf_error, snmf_error, None, None, h_os.T, w_s.T, h_s.T elif t_flag: return nmf_error, snmf_error, h.T, w.T, h_os.T, w_s.T, h_s.T elif snmf_only: return nmf_error, snmf_error, None, None, w_s, h_os, h_s else: return nmf_error, snmf_error, w, h, w_s, h_os, h_s