예제 #1
0
def upaintbox_sample(log_res, hold, Y, held_out, ext, sig, sig_w, iterate, K,
                     data_run):
    print("Trial Number: " + str(data_run))
    N, T = Y.shape
    res = 1
    tree = gen_tree(K, res)
    ctree, ptree = tree
    Z = draw_Z_tree(tree, N)
    #W = sample_W(Y,Z,sig,sig_w)
    W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T))
    ll_list = []
    iter_time = []
    f_count = []
    lapse_data = []
    pred_ll = []
    pred = 0
    for it in range(iterate):
        if it % hold == 0:
            if res < 2**log_res:
                res = res * 2

        start = time.time()
        N, K = Z.shape
        #sample Z
        Z, prob_matrix = sample_Z(Y, Z, W, sig, sig_w, tree)
        #        if it%10 == 0:
        #            print("iteration: " + str(it))
        #            print("Sparsity: " + str(np.sum(Z,axis=0)))
        #            print('predictive log likelihood: ' + str(pred))
        #sample paintbox
        tree, lapse = sample_pb(Z, tree, res)
        #sample W
        W = sample_W(Y, Z, sig, sig_w)
        #add new features
        ll_list.append(log_data_zw(Y, Z, W, sig))
        F, D = get_FD(tree)
        f_count.append(F)
        #predictive log likelihood
        #        if it%500 == 0:
        #            pred = pred_ll_paintbox(held_out, W, tree, sig)
        #            pred_ll.append(pred)
        #        if it%1000 == 0 and it > 0:
        #            display_W(W,3,3,3,3,'four')
        #handling last iteration edge case
        drop = 0
        if it == iterate - 1:
            drop = 1
        Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop)
        end = time.time()
        iter_time.append(end - start)
        lapse_data.append(lapse)
    iter_time = np.cumsum(iter_time)
    return (ll_list, iter_time, f_count, lapse_data, Z, W, prob_matrix,
            pred_ll, tree)
예제 #2
0
def new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w):
    ctree, ptree = tree
    Z, W, tree = drop_feature(Z, W, tree)
    F, D = get_FD(tree)
    if F > 8:
        return (Z, W, tree)
    else:
        if F + ext < K:
            more = K - F
        else:
            more = ext
        tree = add(tree, more, res)
        Z = draw_feature(Z, tree, res, more)
        W = np.vstack((W, np.random.normal(0, sig_w, (more, T))))
    #W = sample_W(Y,Z,sig,sig_w)
    return (Z, W, tree)
예제 #3
0
def print_paintbox(tree,W,data_dim,flag='four'):
    small_x,small_y,big_x,big_y = data_dim
    vec = get_vec(tree)
    F,D = get_FD(tree)
    K = int(math.log(D,2))
    #print("outputting paintbox")
    for j in range(D):
        binary = list(map(int,"{0:b}".format(j)))
        pad_binary = [0]*(K-len(binary)) + binary
        prob = Z_paintbox(pad_binary,vec)
        if prob > 0.01:
            pad_binary = np.array(pad_binary)
            reconstruct = np.dot(pad_binary,W)
            #print("pad binary, reconstruct, probability")
            #print(pad_binary)
            #print(prob)
            #display_W(reconstruct,data_dim,flag)
    return 0
예제 #4
0
def new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop):
    ctree, ptree = tree
    #debugging invariant
    vec = get_vec(tree)
    Z, W, tree = drop_feature(Z, W, tree)
    F, D = get_FD(tree)
    if F >= 12 or drop:
        return (Z, W, tree)
    else:
        if F + ext < K:
            more = K - F
        else:
            more = ext
        tree = add(tree, more, res)
        Z = draw_feature(Z, tree, res, more)
        W = np.vstack((W, np.random.normal(0, sig_w, (more, T))))
    #W = sample_W(Y,Z,sig,sig_w)
    return (Z, W, tree)
예제 #5
0
def draw_Z_tree(tree, N):
    F, D = get_FD(tree)
    vec = get_vec(tree)
    normal_vec = 1. / np.sum(vec) * vec
    draws = np.random.multinomial(N, normal_vec)
    ctree, ptree = tree
    Z = np.zeros((N, F))
    cum_draws = np.cumsum(draws)
    #generate Z
    for i in range(D):
        density = draws[i]
        binary = list(map(int, "{0:b}".format(i)))
        row = [0] * (F - len(binary)) + binary
        data_chunk = np.tile(row, (density, 1))
        if i == 0:
            Z[0:cum_draws[i], :] = data_chunk
        else:
            Z[cum_draws[i - 1]:cum_draws[i], :] = data_chunk
    #np.random.shuffle(Z)
    return Z
예제 #6
0
def ugibbs_sample(Y, ext, sig, sig_w, iterate, K, data_run):
    print("Trial Number: " + str(data_run))
    N, T = Y.shape
    tree = gen_tree(K, res)
    ctree, ptree = tree
    Z = draw_Z_tree(tree, N)
    #W = sample_W(Y,Z,sig,sig_w)
    W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T))
    ll_list = []
    iter_time = []
    f_count = []
    for it in range(iterate):

        start = time.time()
        N, K = Z.shape
        #sample Z
        Z = sample_Z(Y, Z, W, sig, sig_w, tree)
        if it % 10 == 0:
            print("iteration: " + str(it))
            print("Sparsity: " + str(np.sum(Z, axis=0)))
        #sample paintbox
        tree = sample_pb(Z, tree, res)
        #ctree,ptree = tree
        #sample W
        W = sample_W(Y, Z, sig, sig_w)
        #add new features
        vec = get_vec(tree)
        #ll_list.append(log_data_zw(Y,Z,W,sig) + Z_vec(Z,vec) + log_w_sig(W,sig))
        ll_list.append(log_data_zw(Y, Z, W, sig))
        F, D = get_FD(tree)
        f_count.append(F)
        Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w)
        end = time.time()
        iter_time.append(end - start)
    iter_time = np.cumsum(iter_time)
    return (ll_list, iter_time, f_count, Z, W)
예제 #7
0
def upaintbox_sample(log_res,hold,Y,held_out,ext,sig,sig_w,iterate,K,truncate,obs_indices,limit,Z_init=[],W_init=[],data_dim = [3,3,2,2],init=False,display=False):
    #print('time limit')
    #print(limit)
    small_x,small_y,big_x,big_y = data_dim
    N,T = Y.shape
    #technically this is a bug
    #generating tree with res = 1 is wrong.  
    #but you fixed it in tree paintbox hard coded 0.5 
    res = 1
    tree = gen_tree(K,res)
    ctree,ptree = tree
    if init:
        Z = draw_Z_tree(tree,N)
        #Z = Z_init
        W = W_init
    else:
        Z = draw_Z_tree(tree,N)
        W = np.reshape(np.random.normal(0,sig_w,K*T),(K,T))
    #Z = np.loadtxt('assignments.txt')
    #print(Z)
    #W = sample_W(Y,Z,sig,sig_w)
    #W = np.loadtxt('features.txt')
#    full = generate_gg_blocks()
#    W = np.zeros((3,T))
#    W[0,:] = full[0,:]
#    W[1,:] = full[2,:]
#    W[2,:] = full[0,:] + full[2,:]
#    display_W(W,'four')
    ll_list = [] 
    iter_time = [] 
    f_count = [] 
    lapse_data = [] 
    pred_ll = []
    pred = 0
    rec_ll = []
    rec = 0
    observe = held_out[:,obs_indices]
    for redo in range(1):
        if redo == 1:
            res = 1
            N,K = Z.shape
            tree = gen_tree(K,res)
            ctree,ptree = tree
        for it in range(iterate):
            if it == 0:
                start = time.time()
            if it > 0:
                #print(np.sum(iter_time))
                #print(limit)
                if np.sum(iter_time) > limit:
                    break     
            if it%hold == 0:
                if res < 2**log_res:
                    res = res*2     
            start = time.time()
            N,K = Z.shape
            #sample Z
            Z,prob_matrix = sample_Z(Y,Z,W,sig,tree)
            if it%10 == 0 and display:
                print("iteration: " + str(it))
                print("Sparsity: " + str(np.sum(Z,axis=0)))
                #print('predictive log likelihood: ' + str(pred))
                print('recover log likelihood: ' + str(rec))
            #sample paintbox
            tree,lapse = sample_pb(Z,tree,res)
            #sample W        
            W = sample_W(Y,Z,sig,sig_w)
            #add new features
            ll_list.append(log_data_zw(Y,Z,W,sig))
            F,D = get_FD(tree)
            f_count.append(F)
            #recovered log likelihood
            if it%50 == 49 and it > 0:
                #pred = pred_ll_paintbox(held_out, W, tree, sig)
                pred = 0
                pred_ll.append(pred)
                #rec = sample_recover(held_out,observe,W,tree,sig,obs_indices)
                rec = recover_paintbox(held_out,observe,W,tree,sig,obs_indices)
                rec_ll.append(rec)
                end = time.time()
                iter_time.append(end - start)
                start = time.time()
            #Auxiliary printouts
            #if it%500 == 0 and it > 0:
            #    print_paintbox(tree,W,data_dim,'four')
            #if it%200 == 0 and it > 0:
            #    display_W(W,data_dim,'nine')
            #handling last iteration edge case
            drop = 0
            if it == iterate - 1:
                drop = 1
            Z,W,tree = new_feature(Y,Z,W,tree,ext,K,res,sig,sig_w,drop,truncate)
            lapse_data.append(lapse)
    iter_time = np.cumsum(iter_time)
    return (ll_list,iter_time,f_count,lapse_data,Z,W,prob_matrix,pred_ll,rec_ll,tree)
예제 #8
0
def sample_pb(Z,tree,res):
    bound = 2 #the exponent
    F,D = get_FD(tree)
    ctree,ptree = tree
    vec = get_vec(tree)
    compact = Z_compact(Z)
    #iterate over features (row of paintbox)
    start_pb = time.time()
    count = 0
    for i in range(F):
        #iterate over nodes j in tree layer i 
        for j in range(2**i):
            start_zero = j*2**(F-i)
            end_one = (j+1)*2**(F-i) - 1
            if np.sum(compact[start_zero:end_one+1]) == 0:
                continue
            count = count + 1
            end_zero = j*2**(F-i) + 2**(F-i-1) - 1
            #start_one =  j*2**(F-i) + 2**(F-i-1)
            start_one = end_zero + 1
            tot = np.sum(vec[start_zero:end_one+1])
            if tot == 0:
                continue
            else:
                binary = map(int,"{0:b}".format(int(start_zero)))
                #start = np.concatenate((np.zeros(F-len(binary)), binary))
                old_prob = float(np.sum(vec[start_one:end_one+1]))/tot 
                unit = float(np.sum(vec[start_zero:end_one+1]))/res
                log_roulette = []
                center = int(round(res*old_prob))
                if center == res:
                    lbound = res - 1
                    ubound = res
                elif center == 0:
                    lbound = 0
                    ubound = 1
                else:
                    lbound = center - 1
                    ubound = center + 1
                #lbound = 0
                #ubound = res
                mat_vec = np.tile(vec,(ubound-lbound+1,1))
                wheel = [w for w in range(ubound-lbound+1)]
                for k in range(lbound,ubound+1):
                    mat_pos = k - lbound
                    new_prob = float(k)/res
                    if old_prob != new_prob:
                        if old_prob == 0:
                            ratio_zero = float((1 - new_prob))/(1 - old_prob) 
                            mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1]                       
                            mat_vec[mat_pos,start_one] = unit*k
                        elif old_prob == 1:
                            ratio_one = float(new_prob)/old_prob
                            mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1]
                            mat_vec[mat_pos,end_zero] = unit*(res-k)
                        else:        
                            ratio_one = float(new_prob)/old_prob
                            ratio_zero = float((1 - new_prob))/(1 - old_prob)
                            mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1]
                            mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1]
                    #bottleneck line  
                    #val = excise(Z,mat_vec[mat_pos,:],start,i)
                    val = excise2(compact,mat_vec[mat_pos,:],start_zero,end_one)
                    if math.isinf(val) or math.isnan(val) or val == -1:
                        wheel.remove(mat_pos)
                    else:
                        log_roulette.append(val)
                if len(log_roulette) == 0:
                    #print("paintbox update broken")
                    sys.exit()
                shift = max(log_roulette)
                roulette = [np.exp(lr - shift) for lr in log_roulette] 
                normal_roulette = [r/np.sum(roulette) for r in roulette]
                #Hacked Solution Beware
                try:
                    bucket = int(np.where(np.random.multinomial(1,normal_roulette) == 1)[0])
                    chosen = wheel[bucket]
                except TypeError:
                    #BEWARE, THIS CHANGES IF YOU ADJUST THE EXPONENT OF RES
                    chosen = 1
                    #print("INVARIANT BROKEN")
                vec = mat_vec[chosen,:]
            ctree[i,j] = 0
            ctree[i,j] = float(chosen+lbound)/res
    end_pb = time.time()
    lapse = end_pb-start_pb
    tree = update((ctree,ptree))
    return tree,lapse
예제 #9
0
def sample_pb(Z, tree, res):
    F, D = get_FD(tree)
    ctree, ptree = tree
    vec = get_vec(tree)
    #iterate over features (row of paintbox)
    for i in range(F):
        #iterate over nodes j in tree layer i
        for j in range(2**i):
            mat_vec = np.tile(vec, (res + 1, 1))
            start_zero = j * 2**(F - i)
            end_zero = j * 2**(F - i) + 2**(F - i - 1) - 1
            start_one = j * 2**(F - i) + 2**(F - i - 1)
            end_one = (j + 1) * 2**(F - i) - 1
            tot = np.sum(vec[start_zero:end_one + 1])
            binary = map(int, "{0:b}".format(int(start_zero)))
            start = np.concatenate((np.zeros(F - len(binary)), binary))
            if tot == 0:
                continue
            else:
                old_prob = float(np.sum(vec[start_one:end_one + 1])) / tot
                unit = float(np.sum(vec[start_zero:end_one + 1])) / res
                roulette = []
                for k in range(res + 1):
                    new_prob = float(k) / res
                    if old_prob != new_prob:
                        if old_prob == 0:
                            ratio_zero = float((1 - new_prob)) / (1 - old_prob)
                            mat_vec[k, start_zero:end_zero +
                                    1] = ratio_zero * mat_vec[
                                        k, start_zero:end_zero + 1]
                            mat_vec[k, start_one] = unit * k
                        elif old_prob == 1:
                            ratio_one = float(new_prob) / old_prob
                            mat_vec[k, start_one:end_one +
                                    1] = ratio_one * mat_vec[
                                        k, start_one:end_one + 1]
                            mat_vec[k, end_zero] = unit * (res - k)
                        else:
                            ratio_one = float(new_prob) / old_prob
                            ratio_zero = float((1 - new_prob)) / (1 - old_prob)
                            mat_vec[k, start_one:end_one +
                                    1] = ratio_one * mat_vec[
                                        k, start_one:end_one + 1]
                            mat_vec[k, start_zero:end_zero +
                                    1] = ratio_zero * mat_vec[
                                        k, start_zero:end_zero + 1]
                    #bottleneck line
                    val = excise(Z, mat_vec[k, :], start, i)
                    #val = Z_vec(Z,mat_vec[k,:])
                    if math.isinf(val) or math.isnan(val) or val == 0:
                        roulette.append(0.0)
                    else:
                        roulette.append(np.exp(val))
                if np.sum(roulette) == 0:
                    roulette = 1. / res * np.ones(res + 1)
                normal_roulette = [r / np.sum(roulette) for r in roulette]
                #Hacked Solution Beware
                try:
                    chosen = int(
                        np.where(
                            np.random.multinomial(1, normal_roulette) == 1)[0])
                except TypeError:
                    chosen = int(round(res * old_prob))
                    #print("INVARIANT BROKEN")
                #print("Before Paintbox Update")
                #run_line = Z_vec(Z,vec)
                vec = mat_vec[chosen, :]
                ctree[i, j] = float(chosen) / res
                #print(roulette)
                #print("After Paintbox Update")
                #run_line = Z_vec(Z,vec)
                #print("ILLEGAL PAINTBOX UPDATE")
    tree = update((ctree, ptree))
    return tree