예제 #1
0
def pred_ll_paintbox(held,W,tree,sig):
    #should you be comparing the predictive log likelihood?  I think you should
    R,T = held.shape
    K,T = W.shape
    log_pred = 0
    vec = get_vec(tree)
    for i in range(R):
        pred_row = 0
        for j in range(2**K):
            binary = list(map(int,"{0:b}".format(j)))
            pad_binary = [0]*(K-len(binary)) + binary
            log_z_post = np.log(Z_paintbox(pad_binary,vec))
            total_z = np.array(pad_binary)
            pred_row = pred_row + np.exp(log_data_zw(held[i,:],total_z,W,sig) + log_z_post)
        log_pred = log_pred + np.log(pred_row)
    return log_pred
예제 #2
0
def print_paintbox(tree,W,data_dim,flag='four'):
    small_x,small_y,big_x,big_y = data_dim
    vec = get_vec(tree)
    F,D = get_FD(tree)
    K = int(math.log(D,2))
    #print("outputting paintbox")
    for j in range(D):
        binary = list(map(int,"{0:b}".format(j)))
        pad_binary = [0]*(K-len(binary)) + binary
        prob = Z_paintbox(pad_binary,vec)
        if prob > 0.01:
            pad_binary = np.array(pad_binary)
            reconstruct = np.dot(pad_binary,W)
            #print("pad binary, reconstruct, probability")
            #print(pad_binary)
            #print(prob)
            #display_W(reconstruct,data_dim,flag)
    return 0
예제 #3
0
def new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop):
    ctree, ptree = tree
    #debugging invariant
    vec = get_vec(tree)
    Z, W, tree = drop_feature(Z, W, tree)
    F, D = get_FD(tree)
    if F >= 12 or drop:
        return (Z, W, tree)
    else:
        if F + ext < K:
            more = K - F
        else:
            more = ext
        tree = add(tree, more, res)
        Z = draw_feature(Z, tree, res, more)
        W = np.vstack((W, np.random.normal(0, sig_w, (more, T))))
    #W = sample_W(Y,Z,sig,sig_w)
    return (Z, W, tree)
예제 #4
0
def draw_Z_tree(tree, N):
    F, D = get_FD(tree)
    vec = get_vec(tree)
    normal_vec = 1. / np.sum(vec) * vec
    draws = np.random.multinomial(N, normal_vec)
    ctree, ptree = tree
    Z = np.zeros((N, F))
    cum_draws = np.cumsum(draws)
    #generate Z
    for i in range(D):
        density = draws[i]
        binary = list(map(int, "{0:b}".format(i)))
        row = [0] * (F - len(binary)) + binary
        data_chunk = np.tile(row, (density, 1))
        if i == 0:
            Z[0:cum_draws[i], :] = data_chunk
        else:
            Z[cum_draws[i - 1]:cum_draws[i], :] = data_chunk
    #np.random.shuffle(Z)
    return Z
예제 #5
0
def add_feature(i,Y,Z,W,tree,vec,prior,sig,sig_w):
    N,T = Y.shape
    N,K = Z.shape
    old = log_data_zw(Y,Z,W,sig)
    col = np.zeros((N,1))
    col[i,0] = 1
    Z_new = np.hstack((Z,col))
    W_new = np.vstack((W,np.random.normal(0,sig_w,(1,T))))
    #W_new = sample_W(Y,Z_new,sig,sig_w)
    new = log_data_zw(Y,Z_new,W_new,sig)
    new = new - old
    old = 0
    roulette = [np.exp(old)*prior[0],np.exp(new)*prior[1]]
    normal_roulette = [float(r)/np.sum(roulette) for r in roulette]
    chosen = int(np.where(np.random.multinomial(1,normal_roulette) == 1)[0])
    if chosen:
        Z = Z_new
        W = W_new
        tree = add(tree,res)
        vec = get_vec(tree)
    return (Z,W,tree,vec)
예제 #6
0
def recover_paintbox(held, observe, W, tree, sig):
    N, half = observe.shape
    R, T = held.shape
    K, T = W.shape
    log_recover = 0
    vec = get_vec(tree)
    for i in range(R):
        full_ll = 0
        observe_ll = 0
        for j in range(2**K):
            binary = list(map(int, "{0:b}".format(j)))
            pad_binary = [0] * (K - len(binary)) + binary
            log_z_post = np.log(Z_paintbox(pad_binary, vec))
            total_z = np.array(pad_binary)
            full_ll = full_ll + np.exp(
                log_data_zw(held[i, :], total_z, W, sig) + log_z_post)
            observe_ll = observe_ll + np.exp(
                log_data_zw(observe[i, :], total_z, W[:, :half], sig) +
                log_z_post)
        log_recover = log_recover + np.log(full_ll) - np.log(observe_ll)
    return log_recover
예제 #7
0
def sample_Z(Y, Z, W, sig, tree):
    N, T = Y.shape
    N, K = Z.shape
    prob_matrix = np.zeros([N, K])
    vec = get_vec(tree)
    for i in range(N):
        for j in range(K):
            Z_one = np.copy(Z)
            Z_zero = np.copy(Z)
            Z_one[i, j] = 1
            Z_zero[i, j] = 0
            zp_one = Z_paintbox(Z_one[i, :], vec)
            zp_zero = Z_paintbox(Z_zero[i, :], vec)
            if zp_one == 0 or zp_zero == 0:
                if zp_one == 0:
                    Z[i, j] == 0
                    prob_matrix[i, j] = 0
                if zp_zero == 0:
                    #if np.sum(Z,axis=0)[j] == 0:
                    #print("Feature Kick: Undesirable Behavior")
                    Z[i, j] == 1
                    prob_matrix[i, j] = 1
            else:
                #numerical adjustment
                yz_one = log_uncollapsed(Y[i, :], Z_one[i, :], W, sig)
                yz_zero = log_uncollapsed(Y[i, :], Z_zero[i, :], W, sig)
                yz_one = yz_one - yz_zero
                yz_zero = 0
                p_one = 0
                if math.isinf(np.exp(yz_one)):
                    p_one = 1
                else:
                    p_one = float(np.exp(yz_one) * zp_one) / (
                        np.exp(yz_one) * zp_one + np.exp(yz_zero) * zp_zero)
                Z[i, j] = np.random.binomial(1, p_one)
                prob_matrix[i, j] = p_one
        #There is an indent here
        K = Z.shape[1]
    return (Z, prob_matrix)
예제 #8
0
def sample_Z(Y, Z, W, sig, sig_w, tree):
    N, T = Y.shape
    N, K = Z.shape
    vec = get_vec(tree)
    for i in range(N):
        for j in range(K):
            Z_one = np.copy(Z)
            Z_zero = np.copy(Z)
            Z_one[i, j] = 1
            Z_zero[i, j] = 0
            zp_one = Z_paintbox(Z_one[i, :], vec, sig, D)
            zp_zero = Z_paintbox(Z_zero[i, :], vec, sig, D)
            if zp_one == 0 or zp_zero == 0:
                if zp_one == 0:
                    Z[i, j] == 0
                if zp_zero == 0:
                    if np.sum(Z, axis=0)[j] == 0:
                        print("Feature Kick: Undesirable Behavior")
                    Z[i, j] == 1
            else:
                #numerical adjustment
                yz_one = log_uncollapsed(Y[i, :], Z_one[i, :], W, sig)
                yz_zero = log_uncollapsed(Y[i, :], Z_zero[i, :], W, sig)
                yz_one = yz_one - yz_zero
                yz_zero = 0
                p_one = 0
                if math.isinf(np.exp(yz_one)):
                    p_one = 1
                else:
                    p_one = float(np.exp(yz_one) * zp_one) / (
                        np.exp(yz_one) * zp_one + np.exp(yz_zero) * zp_zero)
                #if math.isnan(p_one):
                #    print("P IS NAN")
                Z[i, j] = np.random.binomial(1, p_one)
            #if math.isinf(Z_vec(Z,vec)):
            #    print("TOTALLY ILLEGAL")
        K = Z.shape[1]
    return Z
예제 #9
0
def sample_recover(held,observe,W,tree,sig,obs_indices):
    N,obs = observe.shape 
    R,T = held.shape
    K,T = W.shape
    log_recover = 0
    vec = get_vec(tree)
    W_obs = W[:,obs_indices]
    #initialize Z
    Z = draw_Z_tree(tree,R)
    indices = [i for i in range(T)]
    hidden = [x for x in indices if x not in obs_indices]
    #print(hidden)
    iterate = 100
    for it in range(iterate):         
        N,K = Z.shape
        #sample Z
        Z,prob_matrix = sample_Z(held[:,obs_indices],Z,W_obs,sig,tree)
    #print("sampled Z")
    #print(Z)
    for row in Z:
        log_recover += np.log(Z_paintbox(row,vec))
    log_recover += log_data_zw(held[:,hidden],Z,W[:,hidden],sig) 
    return log_recover
예제 #10
0
def ugibbs_sample(Y, ext, sig, sig_w, iterate, K, data_run):
    print("Trial Number: " + str(data_run))
    N, T = Y.shape
    tree = gen_tree(K, res)
    ctree, ptree = tree
    Z = draw_Z_tree(tree, N)
    #W = sample_W(Y,Z,sig,sig_w)
    W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T))
    ll_list = []
    iter_time = []
    f_count = []
    for it in range(iterate):

        start = time.time()
        N, K = Z.shape
        #sample Z
        Z = sample_Z(Y, Z, W, sig, sig_w, tree)
        if it % 10 == 0:
            print("iteration: " + str(it))
            print("Sparsity: " + str(np.sum(Z, axis=0)))
        #sample paintbox
        tree = sample_pb(Z, tree, res)
        #ctree,ptree = tree
        #sample W
        W = sample_W(Y, Z, sig, sig_w)
        #add new features
        vec = get_vec(tree)
        #ll_list.append(log_data_zw(Y,Z,W,sig) + Z_vec(Z,vec) + log_w_sig(W,sig))
        ll_list.append(log_data_zw(Y, Z, W, sig))
        F, D = get_FD(tree)
        f_count.append(F)
        Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w)
        end = time.time()
        iter_time.append(end - start)
    iter_time = np.cumsum(iter_time)
    return (ll_list, iter_time, f_count, Z, W)
예제 #11
0
def recover_paintbox(held,observe,W,tree,sig,obs_indices):
    N,obs = observe.shape 
    R,T = held.shape
    K,T = W.shape
    log_recover = 0
    upper_bound = 0
    lower_bound = 0
    vec = get_vec(tree)
    for i in range(R):
        f_max = 0
        o_max = 0
        f_error = 0
        o_error = 0
        numu = 0
        numl = 0
        denu = 0
        denl = 0
        valid = True
        for j in range(2**K):
            binary = list(map(int,"{0:b}".format(j)))
            pad_binary = [0]*(K-len(binary)) + binary
            log_z_post = np.log(Z_paintbox(pad_binary,vec))
            if math.isinf(log_z_post):
                continue
            total_z = np.array(pad_binary)
            fll = log_data_zw(held[i,:],total_z,W,sig) + log_z_post
            oll = log_data_zw(observe[i,:],total_z,W[:,obs_indices],sig) + log_z_post
            if valid:
                f_max = fll
                o_max = oll
                valid = False
            else:
                if fll > f_max:
                    f_error = f_max
                    f_max = fll
                if oll > o_max:
                    o_error = o_max
                    o_max = oll
        log_recover = log_recover + f_max - o_max
        if f_error == 0:
            numu = numu + f_max
        elif f_max - f_error > 10:
            numu = numu + f_max
        else:
            numu = numu + np.log(np.exp(f_max-f_error) + (2**K - 1)) + f_error
        
        numl = numl + f_max
        
        if o_error == 0:
            denu = denu + o_max
        elif o_max - o_error > 10:
            denu = denu + o_max
        else:
            denu = denu + np.log(np.exp(o_max-o_error) + (2**K - 1)) + o_error
        
        denl = denl + o_max
        
        upper_bound = upper_bound + numu - denl
        lower_bound = lower_bound + numl - denu
        #if math.isinf(lower_bound):
            #print("lower bound isinf")
            
    #print("estimate")
    #print(log_recover)
    #print("lower bound")
    #print(lower_bound)
    #print("upper bound")
    #print(upper_bound)
    return log_recover
예제 #12
0
def sample_pb(Z,tree,res):
    bound = 2 #the exponent
    F,D = get_FD(tree)
    ctree,ptree = tree
    vec = get_vec(tree)
    compact = Z_compact(Z)
    #iterate over features (row of paintbox)
    start_pb = time.time()
    count = 0
    for i in range(F):
        #iterate over nodes j in tree layer i 
        for j in range(2**i):
            start_zero = j*2**(F-i)
            end_one = (j+1)*2**(F-i) - 1
            if np.sum(compact[start_zero:end_one+1]) == 0:
                continue
            count = count + 1
            end_zero = j*2**(F-i) + 2**(F-i-1) - 1
            #start_one =  j*2**(F-i) + 2**(F-i-1)
            start_one = end_zero + 1
            tot = np.sum(vec[start_zero:end_one+1])
            if tot == 0:
                continue
            else:
                binary = map(int,"{0:b}".format(int(start_zero)))
                #start = np.concatenate((np.zeros(F-len(binary)), binary))
                old_prob = float(np.sum(vec[start_one:end_one+1]))/tot 
                unit = float(np.sum(vec[start_zero:end_one+1]))/res
                log_roulette = []
                center = int(round(res*old_prob))
                if center == res:
                    lbound = res - 1
                    ubound = res
                elif center == 0:
                    lbound = 0
                    ubound = 1
                else:
                    lbound = center - 1
                    ubound = center + 1
                #lbound = 0
                #ubound = res
                mat_vec = np.tile(vec,(ubound-lbound+1,1))
                wheel = [w for w in range(ubound-lbound+1)]
                for k in range(lbound,ubound+1):
                    mat_pos = k - lbound
                    new_prob = float(k)/res
                    if old_prob != new_prob:
                        if old_prob == 0:
                            ratio_zero = float((1 - new_prob))/(1 - old_prob) 
                            mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1]                       
                            mat_vec[mat_pos,start_one] = unit*k
                        elif old_prob == 1:
                            ratio_one = float(new_prob)/old_prob
                            mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1]
                            mat_vec[mat_pos,end_zero] = unit*(res-k)
                        else:        
                            ratio_one = float(new_prob)/old_prob
                            ratio_zero = float((1 - new_prob))/(1 - old_prob)
                            mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1]
                            mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1]
                    #bottleneck line  
                    #val = excise(Z,mat_vec[mat_pos,:],start,i)
                    val = excise2(compact,mat_vec[mat_pos,:],start_zero,end_one)
                    if math.isinf(val) or math.isnan(val) or val == -1:
                        wheel.remove(mat_pos)
                    else:
                        log_roulette.append(val)
                if len(log_roulette) == 0:
                    #print("paintbox update broken")
                    sys.exit()
                shift = max(log_roulette)
                roulette = [np.exp(lr - shift) for lr in log_roulette] 
                normal_roulette = [r/np.sum(roulette) for r in roulette]
                #Hacked Solution Beware
                try:
                    bucket = int(np.where(np.random.multinomial(1,normal_roulette) == 1)[0])
                    chosen = wheel[bucket]
                except TypeError:
                    #BEWARE, THIS CHANGES IF YOU ADJUST THE EXPONENT OF RES
                    chosen = 1
                    #print("INVARIANT BROKEN")
                vec = mat_vec[chosen,:]
            ctree[i,j] = 0
            ctree[i,j] = float(chosen+lbound)/res
    end_pb = time.time()
    lapse = end_pb-start_pb
    tree = update((ctree,ptree))
    return tree,lapse
예제 #13
0
def sample_pb(Z, tree, res):
    F, D = get_FD(tree)
    ctree, ptree = tree
    vec = get_vec(tree)
    #iterate over features (row of paintbox)
    for i in range(F):
        #iterate over nodes j in tree layer i
        for j in range(2**i):
            mat_vec = np.tile(vec, (res + 1, 1))
            start_zero = j * 2**(F - i)
            end_zero = j * 2**(F - i) + 2**(F - i - 1) - 1
            start_one = j * 2**(F - i) + 2**(F - i - 1)
            end_one = (j + 1) * 2**(F - i) - 1
            tot = np.sum(vec[start_zero:end_one + 1])
            binary = map(int, "{0:b}".format(int(start_zero)))
            start = np.concatenate((np.zeros(F - len(binary)), binary))
            if tot == 0:
                continue
            else:
                old_prob = float(np.sum(vec[start_one:end_one + 1])) / tot
                unit = float(np.sum(vec[start_zero:end_one + 1])) / res
                roulette = []
                for k in range(res + 1):
                    new_prob = float(k) / res
                    if old_prob != new_prob:
                        if old_prob == 0:
                            ratio_zero = float((1 - new_prob)) / (1 - old_prob)
                            mat_vec[k, start_zero:end_zero +
                                    1] = ratio_zero * mat_vec[
                                        k, start_zero:end_zero + 1]
                            mat_vec[k, start_one] = unit * k
                        elif old_prob == 1:
                            ratio_one = float(new_prob) / old_prob
                            mat_vec[k, start_one:end_one +
                                    1] = ratio_one * mat_vec[
                                        k, start_one:end_one + 1]
                            mat_vec[k, end_zero] = unit * (res - k)
                        else:
                            ratio_one = float(new_prob) / old_prob
                            ratio_zero = float((1 - new_prob)) / (1 - old_prob)
                            mat_vec[k, start_one:end_one +
                                    1] = ratio_one * mat_vec[
                                        k, start_one:end_one + 1]
                            mat_vec[k, start_zero:end_zero +
                                    1] = ratio_zero * mat_vec[
                                        k, start_zero:end_zero + 1]
                    #bottleneck line
                    val = excise(Z, mat_vec[k, :], start, i)
                    #val = Z_vec(Z,mat_vec[k,:])
                    if math.isinf(val) or math.isnan(val) or val == 0:
                        roulette.append(0.0)
                    else:
                        roulette.append(np.exp(val))
                if np.sum(roulette) == 0:
                    roulette = 1. / res * np.ones(res + 1)
                normal_roulette = [r / np.sum(roulette) for r in roulette]
                #Hacked Solution Beware
                try:
                    chosen = int(
                        np.where(
                            np.random.multinomial(1, normal_roulette) == 1)[0])
                except TypeError:
                    chosen = int(round(res * old_prob))
                    #print("INVARIANT BROKEN")
                #print("Before Paintbox Update")
                #run_line = Z_vec(Z,vec)
                vec = mat_vec[chosen, :]
                ctree[i, j] = float(chosen) / res
                #print(roulette)
                #print("After Paintbox Update")
                #run_line = Z_vec(Z,vec)
                #print("ILLEGAL PAINTBOX UPDATE")
    tree = update((ctree, ptree))
    return tree