def sample_weights(self): Ws = [] log_p_W_sum = 0 log_q_W_sum = 0 for layer_i in range(len(self.net)-1): input_size_i = self.net[layer_i]+1 #plus 1 for bias output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 #Get vars [I,O] W_means = self.W_means[layer_i] W_logvars = self.W_logvars[layer_i] #Sample weights [IS,OS]*[IS,OS]=[IS,OS] eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) W = tf.add(W_means, tf.multiply(tf.sqrt(tf.exp(W_logvars)), eps)) #Compute probs of samples [1] flat_w = tf.reshape(W,[input_size_i*output_size_i]) #[IS*OS] flat_W_means = tf.reshape(W_means, [input_size_i*output_size_i]) #[IS*OS] flat_W_logvars = tf.reshape(W_logvars, [input_size_i*output_size_i]) #[IS*OS] log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i]))) log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) Ws.append(W) return Ws, log_p_W_sum, log_q_W_sum
def sample_weights(self, scale_log_probs=False): Ws = [] log_p_W_sum = 0 log_q_W_sum = 0 W_dim_count = 0. for layer_i in range(len(self.net) - 1): input_size_i = self.net[layer_i] + 1 #plus 1 for bias output_size_i = self.net[layer_i + 1] #plus 1 because we want layer i+1 #Get vars [I,O] W_means = self.W_means[layer_i] W_logvars = self.W_logvars[layer_i] #Sample weights [IS,OS]*[IS,OS]=[IS,OS] eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) W = tf.add(W_means, tf.multiply(tf.sqrt(tf.exp(W_logvars)), eps)) # W = W_means #Compute probs of samples [1] flat_w = tf.reshape(W, [input_size_i * output_size_i]) #[IS*OS] flat_W_means = tf.reshape(W_means, [input_size_i * output_size_i]) #[IS*OS] flat_W_logvars = tf.reshape( W_logvars, [input_size_i * output_size_i]) #[IS*OS] log_p_W_sum += log_normal3( flat_w, tf.zeros([input_size_i * output_size_i]), tf.log( tf.ones([input_size_i * output_size_i]) * self.prior_var)) # log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i])*100.)) log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) # print W W_dim_count += tf.cast(tf.shape(flat_w)[0], tf.float32) Ws.append(W) # afsasd # return Ws, log_p_W_sum, log_q_W_sum if scale_log_probs: return Ws, (log_p_W_sum) / (W_dim_count), (log_q_W_sum) / ( W_dim_count) else: return Ws, log_p_W_sum, log_q_W_sum
def sample_weights(self, scale_log_probs=False): Ws = [] log_p_W_sum = 0 log_q_W_sum = 0 W_dim_count = 0. for layer_i in range(len(self.net)-1): input_size_i = self.net[layer_i]+1 #plus 1 for bias output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 #Get vars [I,O] W_means = self.W_means[layer_i] W_logvars = self.W_logvars[layer_i] #Sample weights [IS,OS]*[IS,OS]=[IS,OS] eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) W = tf.add(W_means, tf.multiply(tf.sqrt(tf.exp(W_logvars)), eps)) # W = W_means #Compute probs of samples [1] flat_w = tf.reshape(W,[input_size_i*output_size_i]) #[IS*OS] flat_W_means = tf.reshape(W_means, [input_size_i*output_size_i]) #[IS*OS] flat_W_logvars = tf.reshape(W_logvars, [input_size_i*output_size_i]) #[IS*OS] log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i]))) # log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i])*100.)) log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) # print W W_dim_count += tf.cast(tf.shape(flat_w)[0], tf.float32) Ws.append(W) # afsasd # return Ws, log_p_W_sum, log_q_W_sum if scale_log_probs: return Ws, (log_p_W_sum)/(W_dim_count), (log_q_W_sum)/(W_dim_count) else: return Ws, log_p_W_sum, log_q_W_sum
def sample_weight_means(self): Ws = [] log_p_W_sum = 0 for layer_i in range(len(self.net)-1): input_size_i = self.net[layer_i]+1 #plus 1 for bias output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 #Get vars [I,O] W_means = self.W_means[layer_i] flat_w = tf.reshape(W_means,[input_size_i*output_size_i]) #[IS*OS] log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i]))) Ws.append(W_means) return Ws, log_p_W_sum
def sample_weight_means(self): Ws = [] zs = [] log_p_W_sum = 0 log_q_W_sum = 0 log_q_z_sum = 0 log_r_z_sum = 0 for layer_i in range(len(self.net) - 1): input_size_i = self.net[layer_i] + 1 #plus 1 for bias output_size_i = self.net[layer_i + 1] #plus 1 because we want layer i+1 #Sample z [I] eps = tf.random_normal([input_size_i], 0, 1, seed=self.rs) z0 = tf.add( self.z_means[layer_i], tf.multiply(tf.sqrt(tf.exp(self.z_logvars[layer_i])), eps)) z0 = tf.reshape(z0, [1, input_size_i]) #[1,I] log_q_z_sum += log_normal2(z0, self.z_means[layer_i], self.z_logvars[layer_i]) #[1] # Transform z0 z = z0 #Flows z0 -> zT Right now its only a single flow, #should allow it to be more. Should use similar code to original MNF mask = self.random_bernoulli(tf.shape(z), p=0.5) h = tf.matmul((mask * z), self.fgk[layer_i][0]) #[1,30] h = tf.tanh(h) mew_ = tf.matmul(h, self.fgk[layer_i][1]) #[1,I] sig_ = tf.nn.sigmoid(tf.matmul(h, self.fgk[layer_i][2])) #[1,I] # zT zT = (mask * z) + (1 - mask) * (z * sig_ + (1 - sig_) * mew_) zT = tf.reshape(zT, [input_size_i, 1]) #[I,1] logdet = tf.reduce_sum((1 - mask) * tf.log(sig_), axis=1) log_q_z_sum -= logdet # Multiply mean by zT [I,O] W_means = self.W_means[ layer_i] * zT #broadcast [I,O]*[I,1] = [I,O] #Sample weights [IS,OS]*[IS,OS]=[IS,OS] eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) W = tf.add( W_means, tf.multiply(tf.sqrt(tf.exp(self.W_logvars[layer_i])), eps)) # r(zT|W) cW = tf.tanh(tf.matmul(self.cbb[layer_i][0], W)) # [1,I]*[I,O] =[1,O] b1cW = tf.matmul(self.cbb[layer_i][1], cW) #[I,1]*[1,O]=[I,O] ones = tf.ones([output_size_i, 1]) / tf.to_float( output_size_i) #[O,1] b1cW = tf.matmul(b1cW, ones) #[I,O]*[O,1]=[I,1] b1cW = tf.reshape(b1cW, [input_size_i]) #[I] b2cW = tf.matmul(self.cbb[layer_i][2], cW) #[I,1]*[1,O]=[I,O] b2cW = tf.matmul(b2cW, ones) #[I,O]*[O,1]=[I,1] b2cW = tf.reshape(b2cW, [input_size_i]) #[I] #Flows zT -> zB zT = tf.reshape(zT, [1, input_size_i]) #[1,I] mask = self.random_bernoulli(tf.shape(zT), p=0.5) h = tf.matmul((mask * zT), self.fgk2[layer_i][0]) #[1,30] h = tf.tanh(h) mew_ = tf.matmul(h, self.fgk2[layer_i][1]) #[1,I] sig_ = tf.nn.sigmoid(tf.matmul(h, self.fgk2[layer_i][2])) #[1,I] zB = (mask * zT) + (1 - mask) * (zT * sig_ + (1 - sig_) * mew_) logdet = tf.reduce_sum((1 - mask) * tf.log(sig_), axis=1) log_r_z_sum += log_normal2(zB, b1cW, b2cW) #[1] log_r_z_sum += logdet #Compute probs of samples [1] flat_w = tf.reshape(W, [input_size_i * output_size_i]) #[IS*OS] flat_W_means = tf.reshape(W_means, [input_size_i * output_size_i]) #[IS*OS] flat_W_logvars = tf.reshape( self.W_logvars[layer_i], [input_size_i * output_size_i]) #[IS*OS] log_p_W_sum += log_normal3( flat_w, tf.zeros([input_size_i * output_size_i]), tf.log(tf.ones([input_size_i * output_size_i]))) log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) Ws.append(W) zs.append(zT) log_p_W_sum = log_p_W_sum + log_r_z_sum log_q_W_sum = log_q_W_sum + log_q_z_sum return Ws return Ws
def sample_weights(self, scale_log_probs): Ws = [] log_p_W_sum = 0. log_q_W_sum = 0. log_p_s_sum = 0. log_q_s_sum = 0. W_dim_count = 0. s_dim_count = 0. for layer_i in range(len(self.net) - 1): input_size_i = self.net[layer_i] + 1 #plus 1 for bias output_size_i = self.net[layer_i + 1] #plus 1 because we want layer i+1 #Get vars [I] s_means = self.s_means[layer_i] s_logvars = self.s_logvars[layer_i] #Sample scales [I]*[I]=[I] eps = tf.random_normal([input_size_i], 0, 1, seed=self.rs) s = tf.add(s_means, tf.multiply(tf.sqrt(tf.exp(s_logvars)), eps)) #Compute probs of s samples [1] log_p_s_sum += tf.reduce_sum(tf.abs(s)) log_q_s_sum += log_normal3(s, s_means, s_logvars) #Get vars [I,O] W_means = self.W_means[layer_i] W_logvars = self.W_logvars[layer_i] s = tf.reshape(s, [-1, 1]) W_means_s = W_means * s #[I,O] W_vars_s = tf.exp(W_logvars) * tf.square(s) #[I,O] #Sample weights [IS,OS]*[IS,OS]=[IS,OS] eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) W = tf.add(W_means_s, tf.multiply(tf.sqrt(W_vars_s), eps)) #Compute probs of samples [1] flat_w = tf.reshape(W, [input_size_i * output_size_i]) #[IS*OS] flat_W_means = tf.reshape(W_means_s, [input_size_i * output_size_i]) #[IS*OS] flat_W_logvars = tf.log( tf.reshape(W_vars_s, [input_size_i * output_size_i])) #[IS*OS] log_squared_s = tf.reshape(tf.log(tf.square(s)), [-1, 1]) log_squared_s = tf.tile(log_squared_s, [1, output_size_i]) flat_log_squared_s = tf.reshape(log_squared_s, [input_size_i * output_size_i]) log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) log_p_W_sum += log_normal3( flat_w, tf.zeros([input_size_i * output_size_i]), flat_log_squared_s) W_dim_count += tf.cast(tf.shape(flat_w)[0], tf.float32) s_dim_count += tf.cast(tf.shape(s)[0], tf.float32) Ws.append(W) # afsasd if scale_log_probs: return Ws, (log_p_W_sum + log_p_s_sum) / ( W_dim_count + s_dim_count), (log_q_W_sum + log_q_s_sum) / ( W_dim_count + s_dim_count) # return Ws, (log_p_W_sum+log_p_s_sum)-tf.log(W_dim_count+s_dim_count), (log_q_W_sum+log_q_s_sum)-tf.log(W_dim_count+s_dim_count) else: return Ws, (log_p_W_sum + log_p_s_sum), (log_q_W_sum + log_q_s_sum)
def sample_weight_means(self): Ws = [] zs = [] log_p_W_sum = 0 log_q_W_sum = 0 log_q_z_sum = 0 log_r_z_sum = 0 for layer_i in range(len(self.net)-1): input_size_i = self.net[layer_i]+1 #plus 1 for bias output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 #Sample z [I] eps = tf.random_normal([input_size_i], 0, 1, seed=self.rs) z0 = tf.add(self.z_means[layer_i], tf.multiply(tf.sqrt(tf.exp(self.z_logvars[layer_i])), eps)) z0 = tf.reshape(z0,[1,input_size_i]) #[1,I] log_q_z_sum += log_normal2(z0, self.z_means[layer_i], self.z_logvars[layer_i]) #[1] # Transform z0 z = z0 #Flows z0 -> zT Right now its only a single flow, #should allow it to be more. Should use similar code to original MNF mask = self.random_bernoulli(tf.shape(z), p=0.5) h = tf.matmul((mask * z), self.fgk[layer_i][0]) #[1,30] h = tf.tanh(h) mew_ = tf.matmul(h,self.fgk[layer_i][1]) #[1,I] sig_ = tf.nn.sigmoid(tf.matmul(h,self.fgk[layer_i][2])) #[1,I] # zT zT = (mask * z) + (1-mask)*(z*sig_ + (1-sig_)*mew_) zT = tf.reshape(zT, [input_size_i,1]) #[I,1] logdet = tf.reduce_sum((1-mask)*tf.log(sig_), axis=1) log_q_z_sum -= logdet # Multiply mean by zT [I,O] W_means = self.W_means[layer_i] * zT #broadcast [I,O]*[I,1] = [I,O] #Sample weights [IS,OS]*[IS,OS]=[IS,OS] eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) W = tf.add(W_means, tf.multiply(tf.sqrt(tf.exp(self.W_logvars[layer_i])), eps)) # r(zT|W) cW = tf.tanh(tf.matmul(self.cbb[layer_i][0], W)) # [1,I]*[I,O] =[1,O] b1cW = tf.matmul(self.cbb[layer_i][1], cW) #[I,1]*[1,O]=[I,O] ones = tf.ones([output_size_i, 1]) / tf.to_float(output_size_i) #[O,1] b1cW = tf.matmul(b1cW, ones) #[I,O]*[O,1]=[I,1] b1cW = tf.reshape(b1cW, [input_size_i]) #[I] b2cW = tf.matmul(self.cbb[layer_i][2], cW) #[I,1]*[1,O]=[I,O] b2cW = tf.matmul(b2cW, ones) #[I,O]*[O,1]=[I,1] b2cW = tf.reshape(b2cW, [input_size_i]) #[I] #Flows zT -> zB zT = tf.reshape(zT, [1,input_size_i]) #[1,I] mask = self.random_bernoulli(tf.shape(zT), p=0.5) h = tf.matmul((mask * zT), self.fgk2[layer_i][0]) #[1,30] h = tf.tanh(h) mew_ = tf.matmul(h,self.fgk2[layer_i][1]) #[1,I] sig_ = tf.nn.sigmoid(tf.matmul(h,self.fgk2[layer_i][2])) #[1,I] zB = (mask * zT) + (1-mask)*(zT*sig_ + (1-sig_)*mew_) logdet = tf.reduce_sum((1-mask)*tf.log(sig_), axis=1) log_r_z_sum += log_normal2(zB, b1cW, b2cW) #[1] log_r_z_sum += logdet #Compute probs of samples [1] flat_w = tf.reshape(W,[input_size_i*output_size_i]) #[IS*OS] flat_W_means = tf.reshape(W_means, [input_size_i*output_size_i]) #[IS*OS] flat_W_logvars = tf.reshape(self.W_logvars[layer_i], [input_size_i*output_size_i]) #[IS*OS] log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i]))) log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) Ws.append(W) zs.append(zT) log_p_W_sum = log_p_W_sum + log_r_z_sum log_q_W_sum = log_q_W_sum + log_q_z_sum return Ws return Ws
def sample_weights(self): #Sample aux var z eps = tf.random_normal((1, 2), 0, 1, seed=self.rs) self.z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_logvar)), eps)) log_qz = log_normal3(self.z, self.z_mean, self.z_logvar) #Predict weights q(W|z) B = 1 net = self.q_Wz_weights cur_val = tf.reshape(self.z, [1, 2]) #[B,X] for layer_i in range(len(net)): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val, tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # if self.act_func[layer_i] != None: if layer_i != len(net) - 1: #if not last layer cur_val = tf.nn.softplus(cur_val) W = cur_val log_qW = tf.zeros([1]) #Predict r(z|W) net = self.r_zW_weights cur_val = W # print cur_val for layer_i in range(len(net)): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val, tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # print cur_val # if self.act_func[layer_i] != None: if layer_i != len(net) - 1: #if not last layer cur_val = tf.nn.softplus(cur_val) rz_mean, rz_logvar = split_mean_logvar(cur_val) log_rz = log_normal3(self.z, rz_mean, rz_logvar) # Slit W vector into a list Ws = [] prev_spot = 0 for i in range(len(self.net) - 1): size_of_W_layer = (self.net[i] + 1) * self.net[i + 1] this_layer = tf.slice(W, [0, prev_spot], [1, size_of_W_layer]) this_layer = tf.reshape(this_layer, [self.net[i] + 1, self.net[i + 1]]) Ws.append(this_layer) prev_spot = size_of_W_layer + prev_spot # Ws = [] # log_p_W_sum = 0 # log_q_W_sum = 0 # W_dim_count = 0. # for layer_i in range(len(self.net)-1): # input_size_i = self.net[layer_i]+1 #plus 1 for bias # output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 # #Get vars [I,O] # W_means = self.W_means[layer_i] # W_logvars = self.W_logvars[layer_i] # #Sample weights [IS,OS]*[IS,OS]=[IS,OS] # eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) # W = tf.add(W_means, tf.multiply(tf.sqrt(tf.exp(W_logvars)), eps)) # # W = W_means # #Compute probs of samples [1] # flat_w = tf.reshape(W,[input_size_i*output_size_i]) #[IS*OS] # flat_W_means = tf.reshape(W_means, [input_size_i*output_size_i]) #[IS*OS] # flat_W_logvars = tf.reshape(W_logvars, [input_size_i*output_size_i]) #[IS*OS] # log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i]))) # # log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i])*100.)) # log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) # # print W # W_dim_count += tf.cast(tf.shape(flat_w)[0], tf.float32) # Ws.append(W) # # afsasd # # return Ws, log_p_W_sum, log_q_W_sum # if scale_log_probs: # return Ws, (log_p_W_sum)/(W_dim_count), (log_q_W_sum)/(W_dim_count) # else: # return Ws, log_p_W_sum, log_q_W_sum return Ws, log_rz, log_qz
def sample_weight_means(self): # Ws = [] # for layer_i in range(len(self.net)-1): # input_size_i = self.net[layer_i]+1 #plus 1 for bias # output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 # #Get vars [I,O] # W_means = self.W_means[layer_i] # Ws.append(W_means) #Sample aux var z eps = tf.random_normal((1, 2), 0, 1, seed=self.rs) z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_logvar)), eps)) log_qz = log_normal3(z, self.z_mean, self.z_logvar) #Predict weights q(W|z) B = 1 net = self.q_Wz_weights cur_val = tf.reshape(z, [1, 2]) #[B,X] for layer_i in range(len(net) - 1): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val, tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # if self.act_func[layer_i] != None: if layer_i != len(net) - 1: #if not last layer cur_val = tf.nn.softplus(cur_val) W = cur_val log_qW = tf.zeros([1]) #Predict r(z|W) net = self.r_zW_weights cur_val = W for layer_i in range(len(net) - 1): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val, tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # if self.act_func[layer_i] != None: if layer_i != len(net) - 1: #if not last layer cur_val = tf.nn.softplus(cur_val) rz_mean, rz_logvar = split_mean_logvar(cur_val) log_rz = log_normal3(z, rz_mean, rz_logvar) # Slit W vector into a list Ws = [] prev_spot = 0 for i in range(len(self.net) - 1): size_of_W_layer = (self.net[i] + 1) * self.net[i + 1] this_layer = tf.slice(W, [0, prev_spot], [1, size_of_W_layer]) this_layer = tf.reshape(this_layer, [self.net[i] + 1, self.net[i + 1]]) Ws.append(this_layer) prev_spot = size_of_W_layer + prev_spot return Ws
def sample_weight_means(self): # Ws = [] # for layer_i in range(len(self.net)-1): # input_size_i = self.net[layer_i]+1 #plus 1 for bias # output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 # #Get vars [I,O] # W_means = self.W_means[layer_i] # Ws.append(W_means) #Sample aux var z eps = tf.random_normal((1,2), 0, 1, seed=self.rs) z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_logvar)), eps)) log_qz = log_normal3(z, self.z_mean, self.z_logvar) #Predict weights q(W|z) B = 1 net = self.q_Wz_weights cur_val = tf.reshape(z, [1,2]) #[B,X] for layer_i in range(len(net)-1): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val,tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # if self.act_func[layer_i] != None: if layer_i != len(net)-1: #if not last layer cur_val = tf.nn.softplus(cur_val) W = cur_val log_qW = tf.zeros([1]) #Predict r(z|W) net = self.r_zW_weights cur_val = W for layer_i in range(len(net)-1): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val,tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # if self.act_func[layer_i] != None: if layer_i != len(net)-1: #if not last layer cur_val = tf.nn.softplus(cur_val) rz_mean, rz_logvar = split_mean_logvar(cur_val) log_rz = log_normal3(z, rz_mean, rz_logvar) # Slit W vector into a list Ws = [] prev_spot = 0 for i in range(len(self.net)-1): size_of_W_layer = (self.net[i]+1) * self.net[i+1] this_layer = tf.slice(W, [0,prev_spot],[1,size_of_W_layer]) this_layer = tf.reshape(this_layer, [self.net[i]+1, self.net[i+1]]) Ws.append(this_layer) prev_spot = size_of_W_layer + prev_spot return Ws
def sample_weights(self): #Sample aux var z eps = tf.random_normal((1,2), 0, 1, seed=self.rs) self.z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_logvar)), eps)) log_qz = log_normal3(self.z, self.z_mean, self.z_logvar) #Predict weights q(W|z) B = 1 net = self.q_Wz_weights cur_val = tf.reshape(self.z, [1,2]) #[B,X] for layer_i in range(len(net)): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val,tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # if self.act_func[layer_i] != None: if layer_i != len(net)-1: #if not last layer cur_val = tf.nn.softplus(cur_val) W = cur_val log_qW = tf.zeros([1]) #Predict r(z|W) net = self.r_zW_weights cur_val = W # print cur_val for layer_i in range(len(net)): w_layer = net[layer_i] #[X,X'] #Concat 1 to input for biases [B,P,X]->[B,P,X+1] cur_val = tf.concat([cur_val,tf.ones([B, 1])], axis=1) cur_val = tf.matmul(cur_val, w_layer) # print cur_val # if self.act_func[layer_i] != None: if layer_i != len(net)-1: #if not last layer cur_val = tf.nn.softplus(cur_val) rz_mean, rz_logvar = split_mean_logvar(cur_val) log_rz = log_normal3(self.z, rz_mean, rz_logvar) # Slit W vector into a list Ws = [] prev_spot = 0 for i in range(len(self.net)-1): size_of_W_layer = (self.net[i]+1) * self.net[i+1] this_layer = tf.slice(W, [0,prev_spot],[1,size_of_W_layer]) this_layer = tf.reshape(this_layer, [self.net[i]+1, self.net[i+1]]) Ws.append(this_layer) prev_spot = size_of_W_layer + prev_spot # Ws = [] # log_p_W_sum = 0 # log_q_W_sum = 0 # W_dim_count = 0. # for layer_i in range(len(self.net)-1): # input_size_i = self.net[layer_i]+1 #plus 1 for bias # output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 # #Get vars [I,O] # W_means = self.W_means[layer_i] # W_logvars = self.W_logvars[layer_i] # #Sample weights [IS,OS]*[IS,OS]=[IS,OS] # eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) # W = tf.add(W_means, tf.multiply(tf.sqrt(tf.exp(W_logvars)), eps)) # # W = W_means # #Compute probs of samples [1] # flat_w = tf.reshape(W,[input_size_i*output_size_i]) #[IS*OS] # flat_W_means = tf.reshape(W_means, [input_size_i*output_size_i]) #[IS*OS] # flat_W_logvars = tf.reshape(W_logvars, [input_size_i*output_size_i]) #[IS*OS] # log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i]))) # # log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), tf.log(tf.ones([input_size_i*output_size_i])*100.)) # log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) # # print W # W_dim_count += tf.cast(tf.shape(flat_w)[0], tf.float32) # Ws.append(W) # # afsasd # # return Ws, log_p_W_sum, log_q_W_sum # if scale_log_probs: # return Ws, (log_p_W_sum)/(W_dim_count), (log_q_W_sum)/(W_dim_count) # else: # return Ws, log_p_W_sum, log_q_W_sum return Ws, log_rz, log_qz
def sample_weights(self, scale_log_probs): Ws = [] log_p_W_sum = 0. log_q_W_sum = 0. log_p_s_sum = 0. log_q_s_sum = 0. W_dim_count = 0. s_dim_count = 0. for layer_i in range(len(self.net)-1): input_size_i = self.net[layer_i]+1 #plus 1 for bias output_size_i = self.net[layer_i+1] #plus 1 because we want layer i+1 #Get vars [I] s_means = self.s_means[layer_i] s_logvars = self.s_logvars[layer_i] #Sample scales [I]*[I]=[I] eps = tf.random_normal([input_size_i], 0, 1, seed=self.rs) s = tf.add(s_means, tf.multiply(tf.sqrt(tf.exp(s_logvars)), eps)) #Compute probs of s samples [1] log_p_s_sum += tf.reduce_sum(tf.abs(s)) log_q_s_sum += log_normal3(s, s_means, s_logvars) #Get vars [I,O] W_means = self.W_means[layer_i] W_logvars = self.W_logvars[layer_i] s = tf.reshape(s, [-1,1]) W_means_s = W_means*s #[I,O] W_vars_s = tf.exp(W_logvars)*tf.square(s) #[I,O] #Sample weights [IS,OS]*[IS,OS]=[IS,OS] eps = tf.random_normal((input_size_i, output_size_i), 0, 1, seed=self.rs) W = tf.add(W_means_s, tf.multiply(tf.sqrt(W_vars_s), eps)) #Compute probs of samples [1] flat_w = tf.reshape(W,[input_size_i*output_size_i]) #[IS*OS] flat_W_means = tf.reshape(W_means_s, [input_size_i*output_size_i]) #[IS*OS] flat_W_logvars = tf.log(tf.reshape(W_vars_s, [input_size_i*output_size_i])) #[IS*OS] log_squared_s = tf.reshape(tf.log(tf.square(s)), [-1,1]) log_squared_s = tf.tile(log_squared_s, [1,output_size_i]) flat_log_squared_s = tf.reshape(log_squared_s, [input_size_i*output_size_i]) log_q_W_sum += log_normal3(flat_w, flat_W_means, flat_W_logvars) log_p_W_sum += log_normal3(flat_w, tf.zeros([input_size_i*output_size_i]), flat_log_squared_s) W_dim_count += tf.cast(tf.shape(flat_w)[0], tf.float32) s_dim_count += tf.cast(tf.shape(s)[0], tf.float32) Ws.append(W) # afsasd if scale_log_probs: return Ws, (log_p_W_sum+log_p_s_sum)/(W_dim_count+s_dim_count), (log_q_W_sum+log_q_s_sum)/(W_dim_count+s_dim_count) # return Ws, (log_p_W_sum+log_p_s_sum)-tf.log(W_dim_count+s_dim_count), (log_q_W_sum+log_q_s_sum)-tf.log(W_dim_count+s_dim_count) else: return Ws, (log_p_W_sum+log_p_s_sum), (log_q_W_sum+log_q_s_sum)