def pred_ll_paintbox(held,W,tree,sig): #should you be comparing the predictive log likelihood? I think you should R,T = held.shape K,T = W.shape log_pred = 0 vec = get_vec(tree) for i in range(R): pred_row = 0 for j in range(2**K): binary = list(map(int,"{0:b}".format(j))) pad_binary = [0]*(K-len(binary)) + binary log_z_post = np.log(Z_paintbox(pad_binary,vec)) total_z = np.array(pad_binary) pred_row = pred_row + np.exp(log_data_zw(held[i,:],total_z,W,sig) + log_z_post) log_pred = log_pred + np.log(pred_row) return log_pred
def print_paintbox(tree,W,data_dim,flag='four'): small_x,small_y,big_x,big_y = data_dim vec = get_vec(tree) F,D = get_FD(tree) K = int(math.log(D,2)) #print("outputting paintbox") for j in range(D): binary = list(map(int,"{0:b}".format(j))) pad_binary = [0]*(K-len(binary)) + binary prob = Z_paintbox(pad_binary,vec) if prob > 0.01: pad_binary = np.array(pad_binary) reconstruct = np.dot(pad_binary,W) #print("pad binary, reconstruct, probability") #print(pad_binary) #print(prob) #display_W(reconstruct,data_dim,flag) return 0
def new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop): ctree, ptree = tree #debugging invariant vec = get_vec(tree) Z, W, tree = drop_feature(Z, W, tree) F, D = get_FD(tree) if F >= 12 or drop: return (Z, W, tree) else: if F + ext < K: more = K - F else: more = ext tree = add(tree, more, res) Z = draw_feature(Z, tree, res, more) W = np.vstack((W, np.random.normal(0, sig_w, (more, T)))) #W = sample_W(Y,Z,sig,sig_w) return (Z, W, tree)
def draw_Z_tree(tree, N): F, D = get_FD(tree) vec = get_vec(tree) normal_vec = 1. / np.sum(vec) * vec draws = np.random.multinomial(N, normal_vec) ctree, ptree = tree Z = np.zeros((N, F)) cum_draws = np.cumsum(draws) #generate Z for i in range(D): density = draws[i] binary = list(map(int, "{0:b}".format(i))) row = [0] * (F - len(binary)) + binary data_chunk = np.tile(row, (density, 1)) if i == 0: Z[0:cum_draws[i], :] = data_chunk else: Z[cum_draws[i - 1]:cum_draws[i], :] = data_chunk #np.random.shuffle(Z) return Z
def add_feature(i,Y,Z,W,tree,vec,prior,sig,sig_w): N,T = Y.shape N,K = Z.shape old = log_data_zw(Y,Z,W,sig) col = np.zeros((N,1)) col[i,0] = 1 Z_new = np.hstack((Z,col)) W_new = np.vstack((W,np.random.normal(0,sig_w,(1,T)))) #W_new = sample_W(Y,Z_new,sig,sig_w) new = log_data_zw(Y,Z_new,W_new,sig) new = new - old old = 0 roulette = [np.exp(old)*prior[0],np.exp(new)*prior[1]] normal_roulette = [float(r)/np.sum(roulette) for r in roulette] chosen = int(np.where(np.random.multinomial(1,normal_roulette) == 1)[0]) if chosen: Z = Z_new W = W_new tree = add(tree,res) vec = get_vec(tree) return (Z,W,tree,vec)
def recover_paintbox(held, observe, W, tree, sig): N, half = observe.shape R, T = held.shape K, T = W.shape log_recover = 0 vec = get_vec(tree) for i in range(R): full_ll = 0 observe_ll = 0 for j in range(2**K): binary = list(map(int, "{0:b}".format(j))) pad_binary = [0] * (K - len(binary)) + binary log_z_post = np.log(Z_paintbox(pad_binary, vec)) total_z = np.array(pad_binary) full_ll = full_ll + np.exp( log_data_zw(held[i, :], total_z, W, sig) + log_z_post) observe_ll = observe_ll + np.exp( log_data_zw(observe[i, :], total_z, W[:, :half], sig) + log_z_post) log_recover = log_recover + np.log(full_ll) - np.log(observe_ll) return log_recover
def sample_Z(Y, Z, W, sig, tree): N, T = Y.shape N, K = Z.shape prob_matrix = np.zeros([N, K]) vec = get_vec(tree) for i in range(N): for j in range(K): Z_one = np.copy(Z) Z_zero = np.copy(Z) Z_one[i, j] = 1 Z_zero[i, j] = 0 zp_one = Z_paintbox(Z_one[i, :], vec) zp_zero = Z_paintbox(Z_zero[i, :], vec) if zp_one == 0 or zp_zero == 0: if zp_one == 0: Z[i, j] == 0 prob_matrix[i, j] = 0 if zp_zero == 0: #if np.sum(Z,axis=0)[j] == 0: #print("Feature Kick: Undesirable Behavior") Z[i, j] == 1 prob_matrix[i, j] = 1 else: #numerical adjustment yz_one = log_uncollapsed(Y[i, :], Z_one[i, :], W, sig) yz_zero = log_uncollapsed(Y[i, :], Z_zero[i, :], W, sig) yz_one = yz_one - yz_zero yz_zero = 0 p_one = 0 if math.isinf(np.exp(yz_one)): p_one = 1 else: p_one = float(np.exp(yz_one) * zp_one) / ( np.exp(yz_one) * zp_one + np.exp(yz_zero) * zp_zero) Z[i, j] = np.random.binomial(1, p_one) prob_matrix[i, j] = p_one #There is an indent here K = Z.shape[1] return (Z, prob_matrix)
def sample_Z(Y, Z, W, sig, sig_w, tree): N, T = Y.shape N, K = Z.shape vec = get_vec(tree) for i in range(N): for j in range(K): Z_one = np.copy(Z) Z_zero = np.copy(Z) Z_one[i, j] = 1 Z_zero[i, j] = 0 zp_one = Z_paintbox(Z_one[i, :], vec, sig, D) zp_zero = Z_paintbox(Z_zero[i, :], vec, sig, D) if zp_one == 0 or zp_zero == 0: if zp_one == 0: Z[i, j] == 0 if zp_zero == 0: if np.sum(Z, axis=0)[j] == 0: print("Feature Kick: Undesirable Behavior") Z[i, j] == 1 else: #numerical adjustment yz_one = log_uncollapsed(Y[i, :], Z_one[i, :], W, sig) yz_zero = log_uncollapsed(Y[i, :], Z_zero[i, :], W, sig) yz_one = yz_one - yz_zero yz_zero = 0 p_one = 0 if math.isinf(np.exp(yz_one)): p_one = 1 else: p_one = float(np.exp(yz_one) * zp_one) / ( np.exp(yz_one) * zp_one + np.exp(yz_zero) * zp_zero) #if math.isnan(p_one): # print("P IS NAN") Z[i, j] = np.random.binomial(1, p_one) #if math.isinf(Z_vec(Z,vec)): # print("TOTALLY ILLEGAL") K = Z.shape[1] return Z
def sample_recover(held,observe,W,tree,sig,obs_indices): N,obs = observe.shape R,T = held.shape K,T = W.shape log_recover = 0 vec = get_vec(tree) W_obs = W[:,obs_indices] #initialize Z Z = draw_Z_tree(tree,R) indices = [i for i in range(T)] hidden = [x for x in indices if x not in obs_indices] #print(hidden) iterate = 100 for it in range(iterate): N,K = Z.shape #sample Z Z,prob_matrix = sample_Z(held[:,obs_indices],Z,W_obs,sig,tree) #print("sampled Z") #print(Z) for row in Z: log_recover += np.log(Z_paintbox(row,vec)) log_recover += log_data_zw(held[:,hidden],Z,W[:,hidden],sig) return log_recover
def ugibbs_sample(Y, ext, sig, sig_w, iterate, K, data_run): print("Trial Number: " + str(data_run)) N, T = Y.shape tree = gen_tree(K, res) ctree, ptree = tree Z = draw_Z_tree(tree, N) #W = sample_W(Y,Z,sig,sig_w) W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T)) ll_list = [] iter_time = [] f_count = [] for it in range(iterate): start = time.time() N, K = Z.shape #sample Z Z = sample_Z(Y, Z, W, sig, sig_w, tree) if it % 10 == 0: print("iteration: " + str(it)) print("Sparsity: " + str(np.sum(Z, axis=0))) #sample paintbox tree = sample_pb(Z, tree, res) #ctree,ptree = tree #sample W W = sample_W(Y, Z, sig, sig_w) #add new features vec = get_vec(tree) #ll_list.append(log_data_zw(Y,Z,W,sig) + Z_vec(Z,vec) + log_w_sig(W,sig)) ll_list.append(log_data_zw(Y, Z, W, sig)) F, D = get_FD(tree) f_count.append(F) Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w) end = time.time() iter_time.append(end - start) iter_time = np.cumsum(iter_time) return (ll_list, iter_time, f_count, Z, W)
def recover_paintbox(held,observe,W,tree,sig,obs_indices): N,obs = observe.shape R,T = held.shape K,T = W.shape log_recover = 0 upper_bound = 0 lower_bound = 0 vec = get_vec(tree) for i in range(R): f_max = 0 o_max = 0 f_error = 0 o_error = 0 numu = 0 numl = 0 denu = 0 denl = 0 valid = True for j in range(2**K): binary = list(map(int,"{0:b}".format(j))) pad_binary = [0]*(K-len(binary)) + binary log_z_post = np.log(Z_paintbox(pad_binary,vec)) if math.isinf(log_z_post): continue total_z = np.array(pad_binary) fll = log_data_zw(held[i,:],total_z,W,sig) + log_z_post oll = log_data_zw(observe[i,:],total_z,W[:,obs_indices],sig) + log_z_post if valid: f_max = fll o_max = oll valid = False else: if fll > f_max: f_error = f_max f_max = fll if oll > o_max: o_error = o_max o_max = oll log_recover = log_recover + f_max - o_max if f_error == 0: numu = numu + f_max elif f_max - f_error > 10: numu = numu + f_max else: numu = numu + np.log(np.exp(f_max-f_error) + (2**K - 1)) + f_error numl = numl + f_max if o_error == 0: denu = denu + o_max elif o_max - o_error > 10: denu = denu + o_max else: denu = denu + np.log(np.exp(o_max-o_error) + (2**K - 1)) + o_error denl = denl + o_max upper_bound = upper_bound + numu - denl lower_bound = lower_bound + numl - denu #if math.isinf(lower_bound): #print("lower bound isinf") #print("estimate") #print(log_recover) #print("lower bound") #print(lower_bound) #print("upper bound") #print(upper_bound) return log_recover
def sample_pb(Z,tree,res): bound = 2 #the exponent F,D = get_FD(tree) ctree,ptree = tree vec = get_vec(tree) compact = Z_compact(Z) #iterate over features (row of paintbox) start_pb = time.time() count = 0 for i in range(F): #iterate over nodes j in tree layer i for j in range(2**i): start_zero = j*2**(F-i) end_one = (j+1)*2**(F-i) - 1 if np.sum(compact[start_zero:end_one+1]) == 0: continue count = count + 1 end_zero = j*2**(F-i) + 2**(F-i-1) - 1 #start_one = j*2**(F-i) + 2**(F-i-1) start_one = end_zero + 1 tot = np.sum(vec[start_zero:end_one+1]) if tot == 0: continue else: binary = map(int,"{0:b}".format(int(start_zero))) #start = np.concatenate((np.zeros(F-len(binary)), binary)) old_prob = float(np.sum(vec[start_one:end_one+1]))/tot unit = float(np.sum(vec[start_zero:end_one+1]))/res log_roulette = [] center = int(round(res*old_prob)) if center == res: lbound = res - 1 ubound = res elif center == 0: lbound = 0 ubound = 1 else: lbound = center - 1 ubound = center + 1 #lbound = 0 #ubound = res mat_vec = np.tile(vec,(ubound-lbound+1,1)) wheel = [w for w in range(ubound-lbound+1)] for k in range(lbound,ubound+1): mat_pos = k - lbound new_prob = float(k)/res if old_prob != new_prob: if old_prob == 0: ratio_zero = float((1 - new_prob))/(1 - old_prob) mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1] mat_vec[mat_pos,start_one] = unit*k elif old_prob == 1: ratio_one = float(new_prob)/old_prob mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1] mat_vec[mat_pos,end_zero] = unit*(res-k) else: ratio_one = float(new_prob)/old_prob ratio_zero = float((1 - new_prob))/(1 - old_prob) mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1] mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1] #bottleneck line #val = excise(Z,mat_vec[mat_pos,:],start,i) val = excise2(compact,mat_vec[mat_pos,:],start_zero,end_one) if math.isinf(val) or math.isnan(val) or val == -1: wheel.remove(mat_pos) else: log_roulette.append(val) if len(log_roulette) == 0: #print("paintbox update broken") sys.exit() shift = max(log_roulette) roulette = [np.exp(lr - shift) for lr in log_roulette] normal_roulette = [r/np.sum(roulette) for r in roulette] #Hacked Solution Beware try: bucket = int(np.where(np.random.multinomial(1,normal_roulette) == 1)[0]) chosen = wheel[bucket] except TypeError: #BEWARE, THIS CHANGES IF YOU ADJUST THE EXPONENT OF RES chosen = 1 #print("INVARIANT BROKEN") vec = mat_vec[chosen,:] ctree[i,j] = 0 ctree[i,j] = float(chosen+lbound)/res end_pb = time.time() lapse = end_pb-start_pb tree = update((ctree,ptree)) return tree,lapse
def sample_pb(Z, tree, res): F, D = get_FD(tree) ctree, ptree = tree vec = get_vec(tree) #iterate over features (row of paintbox) for i in range(F): #iterate over nodes j in tree layer i for j in range(2**i): mat_vec = np.tile(vec, (res + 1, 1)) start_zero = j * 2**(F - i) end_zero = j * 2**(F - i) + 2**(F - i - 1) - 1 start_one = j * 2**(F - i) + 2**(F - i - 1) end_one = (j + 1) * 2**(F - i) - 1 tot = np.sum(vec[start_zero:end_one + 1]) binary = map(int, "{0:b}".format(int(start_zero))) start = np.concatenate((np.zeros(F - len(binary)), binary)) if tot == 0: continue else: old_prob = float(np.sum(vec[start_one:end_one + 1])) / tot unit = float(np.sum(vec[start_zero:end_one + 1])) / res roulette = [] for k in range(res + 1): new_prob = float(k) / res if old_prob != new_prob: if old_prob == 0: ratio_zero = float((1 - new_prob)) / (1 - old_prob) mat_vec[k, start_zero:end_zero + 1] = ratio_zero * mat_vec[ k, start_zero:end_zero + 1] mat_vec[k, start_one] = unit * k elif old_prob == 1: ratio_one = float(new_prob) / old_prob mat_vec[k, start_one:end_one + 1] = ratio_one * mat_vec[ k, start_one:end_one + 1] mat_vec[k, end_zero] = unit * (res - k) else: ratio_one = float(new_prob) / old_prob ratio_zero = float((1 - new_prob)) / (1 - old_prob) mat_vec[k, start_one:end_one + 1] = ratio_one * mat_vec[ k, start_one:end_one + 1] mat_vec[k, start_zero:end_zero + 1] = ratio_zero * mat_vec[ k, start_zero:end_zero + 1] #bottleneck line val = excise(Z, mat_vec[k, :], start, i) #val = Z_vec(Z,mat_vec[k,:]) if math.isinf(val) or math.isnan(val) or val == 0: roulette.append(0.0) else: roulette.append(np.exp(val)) if np.sum(roulette) == 0: roulette = 1. / res * np.ones(res + 1) normal_roulette = [r / np.sum(roulette) for r in roulette] #Hacked Solution Beware try: chosen = int( np.where( np.random.multinomial(1, normal_roulette) == 1)[0]) except TypeError: chosen = int(round(res * old_prob)) #print("INVARIANT BROKEN") #print("Before Paintbox Update") #run_line = Z_vec(Z,vec) vec = mat_vec[chosen, :] ctree[i, j] = float(chosen) / res #print(roulette) #print("After Paintbox Update") #run_line = Z_vec(Z,vec) #print("ILLEGAL PAINTBOX UPDATE") tree = update((ctree, ptree)) return tree