def print_posterior(Z, W, data_dim): N, K = Z.shape for j in range(2**K): binary = list(map(int, "{0:b}".format(j))) pad_binary = [0] * (K - len(binary)) + binary prob = np.exp(Z_posterior(pad_binary, Z)) if prob > 0.01: pad_binary = np.array(pad_binary) reconstruct = np.dot(pad_binary, W) print("pad binary, reconstruct, probability") print(pad_binary) print(prob) display_W(reconstruct, data_dim, 'four')
def print_paintbox(tree, W, data_dim, flag='four'): small_x, small_y, big_x, big_y = data_dim vec = get_vec(tree) F, D = get_FD(tree) K = int(math.log(D, 2)) print("outputting paintbox") for j in range(D): binary = list(map(int, "{0:b}".format(j))) pad_binary = [0] * (K - len(binary)) + binary prob = Z_paintbox(pad_binary, vec) if prob > 0.01: pad_binary = np.array(pad_binary) reconstruct = np.dot(pad_binary, W) print("pad binary, reconstruct, probability") print(pad_binary) print(prob) display_W(reconstruct, data_dim, flag) return 0
data_type, corr_value=2) Y = full_data[:data_count, :] held_out = full_data[data_count:, :] Z, W, ll_set, pred_ll = ugibbs_sampler(Y, held_out, alpha, sig, sig_w, iterate, select) approx = np.dot(Z, W) for i in range(10): print("sample: " + str(i)) print("features probability") #print(prob_matrix[i,:]) print("features selected") print(Z[i, :]) display_W(approx[i:i + 1, :], data_dim, 'nine') print("data: " + str(i)) display_W(Y[i:i + 1, :], data_dim, 'nine') #Output the posterior Z_trunc, W_trunc = truncate(Z, W, select) print_posterior(Z_trunc, W_trunc) # display_W(W) # Z_final = Z_set[iterate-1] # plotRowHistogram(Z_final) # plotMatrixHistogram(Z_final) # title = 'Slice Sampler: log likelihood vs. iterations' # x_axis = 'iterations' # y_axis = 'log likelihood' # data_x = [i for i in range(1,iterate+1)] # data_y = ll_set
title = 'Nonparametric Paintbox: feature vs. time' x_axis = 'time' y_axis = 'features' data_x = time_avg data_y = f_avg plot(title, x_axis, y_axis, data_x, data_y) #visual verification of data reconstruction approx = np.dot(Z, W) for i in range(10): print("sample: " + str(i)) print("features probability") print(prob_matrix[i, :]) print("features selected") print(Z[i, :]) display_W(approx[i:i + 1, :], small_x, small_y, big_x, big_y, flag) print("data: " + str(i)) display_W(Y[i:i + 1, :], small_x, small_y, big_x, big_y, flag) #printing paintbox (only makes sense for single run) print_paintbox(tree, W, small_x, small_y, big_x, big_y, flag) #display_W(W,small_x,small_y,big_x,big_y,'nine') # pb_scale = scale(pb) # plt.imshow(pb_scale,interpolation='nearest') # plt.show() #print(ll_list) #plt.imshow(W,interpolation='nearest') #plt.show() #display_W(W)
D = res**F #discretization T = 36 #length of datapoint N = 100 #data size sig = 0.1 sig_w = 5.0 print("GENERATE DATA") Y,Z_gen,gen_pb = generate_data(res,D,F,N,T,sig) print('FINISH GENERATE') #gen_pb = scale(gen_pb) #plt.imshow(gen_pb,interpolation='nearest') #plt.show() iterate = 100 #print("DATA") #print(Y) #init variables #profile.run('gibbs_sample(Y,sig,iterate,D,F,N,T)') ll_list,Z,W,pb = gibbs_sample(Y,sig,sig_w,iterate,D,F,N,T) approx = np.dot(Z,W) #print(Z) pb_scale = scale(pb) plt.imshow(pb_scale,interpolation='nearest') plt.show() #print(ll_list) #plt.imshow(W,interpolation='nearest') #plt.show() display_W(W) plt.plot(ll_list) plt.show()
data_type = 'random' #full_data,Z_gen = generate_data(feature_count,data_count + held_out,T,sig,data_type) full_data, Z_gen = construct_data(small_x, small_y, big_x, big_y, data_count + held_out, sig, data_type, corr_value=2) Y = full_data[:data_count, :] held_out = full_data[data_count:, :] sig_alg = 0.1 nu_set, phi_set, Phi_set, tau_set, pred_ll = run_vi( Y, held_out, alpha, sig_w, sig_alg, iterate, feature_count) W = phi_set[iterate - 1] display_W(W, small_x, small_y, big_x, big_y, 'nine') print(pred_ll) #A = phi_set[34] # print(elbo_set) # nu = nu_set[iterate-1] # print(nu) # print(Phi_set[iterate-1]) # N,K = nu.shape # nu_var = np.zeros((N,K)) # for n in range(N): # for k in range(K): # nu_var[n,k] = nu[n,k]*(1-nu[n,k]) # variance = nu_var.sum() # print(variance)
def upaintbox_sample(data_dim, log_res, hold, Y, held_out, ext, sig, sig_w, iterate, K, truncate): small_x, small_y, big_x, big_y = data_dim N, T = Y.shape #technically this is a bug #generating tree with res = 1 is wrong. #but you fixed it in tree paintbox hard coded 0.5 res = 1 tree = gen_tree(K, res) ctree, ptree = tree Z = draw_Z_tree(tree, N) #Z = np.loadtxt('assignments.txt') print(Z) #W = sample_W(Y,Z,sig,sig_w) W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T)) #W = np.loadtxt('features.txt') # full = generate_gg_blocks() # W = np.zeros((3,T)) # W[0,:] = full[0,:] # W[1,:] = full[2,:] # W[2,:] = full[0,:] + full[2,:] display_W(W, 'four') ll_list = [] iter_time = [] f_count = [] lapse_data = [] pred_ll = [] pred = 0 rec = 0 for redo in range(1): if redo == 1: res = 1 N, K = Z.shape tree = gen_tree(K, res) ctree, ptree = tree for it in range(iterate): if it % hold == 0: if res < 2**log_res: res = res * 2 start = time.time() N, K = Z.shape #sample Z Z, prob_matrix = sample_Z(Y, Z, W, sig, sig_w, tree) if it % 10 == 0: print("iteration: " + str(it)) print("Sparsity: " + str(np.sum(Z, axis=0))) print('predictive log likelihood: ' + str(pred)) print('recover log likelihood: ' + str(rec)) #sample paintbox tree, lapse = sample_pb(Z, tree, res) #sample W W = sample_W(Y, Z, sig, sig_w) #add new features ll_list.append(log_data_zw(Y, Z, W, sig)) F, D = get_FD(tree) f_count.append(F) #predictive log likelihood if it % 100 == 0: pred = pred_ll_paintbox(held_out, W, tree, sig) pred_ll.append(pred) rec = recover_paintbox(held_out, held_out[:, :T / 2], W, tree, sig) if it % 500 == 0 and it > 0: print_paintbox(tree, W, data_dim, 'four') #if it%200 == 0 and it > 0: # display_W(W,data_dim,'nine') #handling last iteration edge case drop = 0 if it == iterate - 1: drop = 1 Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop, truncate) end = time.time() iter_time.append(end - start) lapse_data.append(lapse) #iter_time = np.cumsum(iter_time) return (ll_list, iter_time, f_count, lapse_data, Z, W, prob_matrix, pred_ll, tree)
title = 'Nonparametric Paintbox: feature vs. time' x_axis = 'time' y_axis = 'features' data_x = time_avg data_y = f_avg plot(title, x_axis, y_axis, data_x, data_y) #visual verification of data reconstruction approx = np.dot(Z, W) for i in range(10): print("sample: " + str(i)) print("features probability") print(prob_matrix[i, :]) print("features selected") print(Z[i, :]) display_W(approx[i:i + 1, :], data_dim, flag) print("data: " + str(i)) display_W(Y[i:i + 1, :], data_dim, flag) #printing paintbox (only makes sense for single run) print_paintbox(tree, W, data_dim, flag) #display_W(W,small_x,small_y,big_x,big_y,'nine') # pb_scale = scale(pb) # plt.imshow(pb_scale,interpolation='nearest') # plt.show() #print(ll_list) #plt.imshow(W,interpolation='nearest') #plt.show() #display_W(W)