Esempio n. 1
0
def print_posterior(Z, W, data_dim):
    N, K = Z.shape
    for j in range(2**K):
        binary = list(map(int, "{0:b}".format(j)))
        pad_binary = [0] * (K - len(binary)) + binary
        prob = np.exp(Z_posterior(pad_binary, Z))
        if prob > 0.01:
            pad_binary = np.array(pad_binary)
            reconstruct = np.dot(pad_binary, W)
            print("pad binary, reconstruct, probability")
            print(pad_binary)
            print(prob)
            display_W(reconstruct, data_dim, 'four')
Esempio n. 2
0
def print_paintbox(tree, W, data_dim, flag='four'):
    small_x, small_y, big_x, big_y = data_dim
    vec = get_vec(tree)
    F, D = get_FD(tree)
    K = int(math.log(D, 2))
    print("outputting paintbox")
    for j in range(D):
        binary = list(map(int, "{0:b}".format(j)))
        pad_binary = [0] * (K - len(binary)) + binary
        prob = Z_paintbox(pad_binary, vec)
        if prob > 0.01:
            pad_binary = np.array(pad_binary)
            reconstruct = np.dot(pad_binary, W)
            print("pad binary, reconstruct, probability")
            print(pad_binary)
            print(prob)
            display_W(reconstruct, data_dim, flag)
    return 0
Esempio n. 3
0
                                      data_type,
                                      corr_value=2)

    Y = full_data[:data_count, :]
    held_out = full_data[data_count:, :]
    Z, W, ll_set, pred_ll = ugibbs_sampler(Y, held_out, alpha, sig, sig_w,
                                           iterate, select)

    approx = np.dot(Z, W)
    for i in range(10):
        print("sample: " + str(i))
        print("features probability")
        #print(prob_matrix[i,:])
        print("features selected")
        print(Z[i, :])
        display_W(approx[i:i + 1, :], data_dim, 'nine')
        print("data: " + str(i))
        display_W(Y[i:i + 1, :], data_dim, 'nine')

    #Output the posterior
    Z_trunc, W_trunc = truncate(Z, W, select)
    print_posterior(Z_trunc, W_trunc)
#    display_W(W)
#    Z_final = Z_set[iterate-1]
#    plotRowHistogram(Z_final)
#    plotMatrixHistogram(Z_final)
#    title = 'Slice Sampler: log likelihood vs. iterations'
#    x_axis = 'iterations'
#    y_axis = 'log likelihood'
#    data_x = [i for i in range(1,iterate+1)]
#    data_y = ll_set
Esempio n. 4
0
    title = 'Nonparametric Paintbox: feature vs. time'
    x_axis = 'time'
    y_axis = 'features'
    data_x = time_avg
    data_y = f_avg
    plot(title, x_axis, y_axis, data_x, data_y)

    #visual verification of data reconstruction
    approx = np.dot(Z, W)
    for i in range(10):
        print("sample: " + str(i))
        print("features probability")
        print(prob_matrix[i, :])
        print("features selected")
        print(Z[i, :])
        display_W(approx[i:i + 1, :], small_x, small_y, big_x, big_y, flag)
        print("data: " + str(i))
        display_W(Y[i:i + 1, :], small_x, small_y, big_x, big_y, flag)

    #printing paintbox (only makes sense for single run)
    print_paintbox(tree, W, small_x, small_y, big_x, big_y, flag)
    #display_W(W,small_x,small_y,big_x,big_y,'nine')

#    pb_scale = scale(pb)
#    plt.imshow(pb_scale,interpolation='nearest')
#    plt.show()
#print(ll_list)

#plt.imshow(W,interpolation='nearest')
#plt.show()
#display_W(W)
Esempio n. 5
0
    D = res**F #discretization
    T = 36 #length of datapoint
    N = 100 #data size
    sig = 0.1
    sig_w = 5.0
    print("GENERATE DATA")
    Y,Z_gen,gen_pb = generate_data(res,D,F,N,T,sig)
    print('FINISH GENERATE')
    #gen_pb = scale(gen_pb)
    #plt.imshow(gen_pb,interpolation='nearest')
    #plt.show()
    iterate = 100
    #print("DATA")
    #print(Y)
    #init variables
    #profile.run('gibbs_sample(Y,sig,iterate,D,F,N,T)') 
    ll_list,Z,W,pb = gibbs_sample(Y,sig,sig_w,iterate,D,F,N,T)
    approx = np.dot(Z,W)
    #print(Z)
    pb_scale = scale(pb)
    plt.imshow(pb_scale,interpolation='nearest')
    plt.show()   
    #print(ll_list)
    
    #plt.imshow(W,interpolation='nearest')
    #plt.show()
    display_W(W)
    
    plt.plot(ll_list)
    plt.show()
Esempio n. 6
0
    data_type = 'random'
    #full_data,Z_gen = generate_data(feature_count,data_count + held_out,T,sig,data_type)
    full_data, Z_gen = construct_data(small_x,
                                      small_y,
                                      big_x,
                                      big_y,
                                      data_count + held_out,
                                      sig,
                                      data_type,
                                      corr_value=2)
    Y = full_data[:data_count, :]
    held_out = full_data[data_count:, :]
    sig_alg = 0.1
    nu_set, phi_set, Phi_set, tau_set, pred_ll = run_vi(
        Y, held_out, alpha, sig_w, sig_alg, iterate, feature_count)
    W = phi_set[iterate - 1]
    display_W(W, small_x, small_y, big_x, big_y, 'nine')
    print(pred_ll)
    #A = phi_set[34]
#    print(elbo_set)
#    nu = nu_set[iterate-1]
#    print(nu)
#    print(Phi_set[iterate-1])
#    N,K  = nu.shape
#    nu_var = np.zeros((N,K))
#    for n in range(N):
#        for k in range(K):
#            nu_var[n,k] = nu[n,k]*(1-nu[n,k])
#    variance = nu_var.sum()
#    print(variance)
Esempio n. 7
0
def upaintbox_sample(data_dim, log_res, hold, Y, held_out, ext, sig, sig_w,
                     iterate, K, truncate):
    small_x, small_y, big_x, big_y = data_dim
    N, T = Y.shape
    #technically this is a bug
    #generating tree with res = 1 is wrong.
    #but you fixed it in tree paintbox hard coded 0.5
    res = 1
    tree = gen_tree(K, res)
    ctree, ptree = tree
    Z = draw_Z_tree(tree, N)
    #Z = np.loadtxt('assignments.txt')
    print(Z)
    #W = sample_W(Y,Z,sig,sig_w)
    W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T))
    #W = np.loadtxt('features.txt')
    #    full = generate_gg_blocks()
    #    W = np.zeros((3,T))
    #    W[0,:] = full[0,:]
    #    W[1,:] = full[2,:]
    #    W[2,:] = full[0,:] + full[2,:]
    display_W(W, 'four')
    ll_list = []
    iter_time = []
    f_count = []
    lapse_data = []
    pred_ll = []
    pred = 0
    rec = 0
    for redo in range(1):
        if redo == 1:
            res = 1
            N, K = Z.shape
            tree = gen_tree(K, res)
            ctree, ptree = tree
        for it in range(iterate):
            if it % hold == 0:
                if res < 2**log_res:
                    res = res * 2

            start = time.time()
            N, K = Z.shape
            #sample Z
            Z, prob_matrix = sample_Z(Y, Z, W, sig, sig_w, tree)
            if it % 10 == 0:
                print("iteration: " + str(it))
                print("Sparsity: " + str(np.sum(Z, axis=0)))
                print('predictive log likelihood: ' + str(pred))
                print('recover log likelihood: ' + str(rec))
            #sample paintbox
            tree, lapse = sample_pb(Z, tree, res)
            #sample W
            W = sample_W(Y, Z, sig, sig_w)
            #add new features
            ll_list.append(log_data_zw(Y, Z, W, sig))
            F, D = get_FD(tree)
            f_count.append(F)
            #predictive log likelihood
            if it % 100 == 0:
                pred = pred_ll_paintbox(held_out, W, tree, sig)
                pred_ll.append(pred)
                rec = recover_paintbox(held_out, held_out[:, :T / 2], W, tree,
                                       sig)
            if it % 500 == 0 and it > 0:
                print_paintbox(tree, W, data_dim, 'four')
            #if it%200 == 0 and it > 0:
            #    display_W(W,data_dim,'nine')
            #handling last iteration edge case
            drop = 0
            if it == iterate - 1:
                drop = 1
            Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w,
                                     drop, truncate)
            end = time.time()
            iter_time.append(end - start)
            lapse_data.append(lapse)
        #iter_time = np.cumsum(iter_time)
    return (ll_list, iter_time, f_count, lapse_data, Z, W, prob_matrix,
            pred_ll, tree)
Esempio n. 8
0
    title = 'Nonparametric Paintbox: feature vs. time'
    x_axis = 'time'
    y_axis = 'features'
    data_x = time_avg
    data_y = f_avg
    plot(title, x_axis, y_axis, data_x, data_y)

    #visual verification of data reconstruction
    approx = np.dot(Z, W)
    for i in range(10):
        print("sample: " + str(i))
        print("features probability")
        print(prob_matrix[i, :])
        print("features selected")
        print(Z[i, :])
        display_W(approx[i:i + 1, :], data_dim, flag)
        print("data: " + str(i))
        display_W(Y[i:i + 1, :], data_dim, flag)

    #printing paintbox (only makes sense for single run)
    print_paintbox(tree, W, data_dim, flag)
    #display_W(W,small_x,small_y,big_x,big_y,'nine')

#    pb_scale = scale(pb)
#    plt.imshow(pb_scale,interpolation='nearest')
#    plt.show()
#print(ll_list)

#plt.imshow(W,interpolation='nearest')
#plt.show()
#display_W(W)