def __init__(self, name, n_x, n_y, N_h1, NF_h, N_h2, St1, St2, sz_f, sz_im): ''' Initialisation INPUTS: name - name to assign to the decoder n_x - channels of the input n_y - channels of output N_h - array of number of channels in the hidden units [Nhx,Nh1,Nh2,...,Nhn] St - array of strides to use every operation (must be one longer then the above) sz_f - filters sizes in the format [H,W] ''' self.n_x = n_x self.n_y = n_y self.N_h1 = N_h1 self.N_h2 = N_h2 self.NF_h = NF_h self.Sz1 = NN_utils.compute_size(sz_im, St1) self.Sz2 = NN_utils.compute_size(self.Sz1[-1], St2) self.St = np.concatenate((St1, np.ones(np.shape(NF_h)[0]), St2), 0) self.sz_f = sz_f self.sz_im = sz_im self.name = name self.bias_start = 0.0 network_weights = self._create_weights() self.weights = network_weights # Choice of non-linearity (tf.nn.relu/tf.nn.leaky_relu/tf.nn.elu) self.nonlinearity = tf.nn.leaky_relu
def compute_py(self, xl): ''' compute moments of output Gaussian distribution INPUTS: x - input OUTPUTS: mu_y - mean of output Gaussian distribution log_sig_sq_y - log variance of output Gaussian distribution ''' x, _ = NN_utils.reshape_and_extract(xl, self.sz_im) hidden_post = layers.tf_conv_layer(x, self.weights['W_x_to_h1'], self.weights['b_x_to_h1'], self.St[0], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) num_layers_1 = np.shape(self.N_h1)[0] - 1 for i in range(num_layers_1): ni = i + 2 hidden_post = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)], self.weights['b_h{}_to_h{}'.format(ni - 1, ni)], self.St[ni - 1], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) hidden_post = NN_utils.flatten(hidden_post) # print(tf.shape(hidden_post).numpy()) num_layers_F = np.shape(self.NF_h)[0] for i in range(num_layers_F): ni = ni + 1 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) # print(tf.shape(hidden_post).numpy()) p_un = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_py'.format(ni)]), self.weights['b_h{}_to_py'.format(ni)]) p_un = tf.nn.sigmoid(p_un) + 1e-6 py = tfm.divide( p_un, tf.tile(tf.expand_dims(tfm.reduce_sum(p_un, axis=1), axis=1), [1, self.n_y])) return py
def _create_weights(self): ''' Initialise weights ''' all_weights = collections.OrderedDict() all_weights['W_z_to_h1'] = tf.Variable(vae_utils.xavier_init( self.n_z, self.N_h[0]), dtype=tf.float32) all_weights['b_z_to_h1'] = tf.Variable( tf.zeros([self.N_h[0]], dtype=tf.float32) * self.bias_start) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 all_weights['W_h{}_to_h{}'.format(ni - 1, ni)] = tf.Variable( vae_utils.xavier_init(self.N_h[ni - 2], self.N_h[ni - 1]), dtype=tf.float32) all_weights['b_h{}_to_h{}'.format(ni - 1, ni)] = tf.Variable( tf.zeros([self.N_h[ni - 1]], dtype=tf.float32) * self.bias_start) all_weights['W_h{}_to_mux'.format(ni)] = tf.Variable( vae_utils.xavier_init(self.N_h[ni - 1], self.n_x), dtype=tf.float32) all_weights['b_h{}_to_mux'.format(ni)] = tf.Variable( tf.zeros([self.n_x], dtype=tf.float32) * self.bias_start) all_weights['W_h{}_to_sx'.format(ni)] = tf.Variable( vae_utils.xavier_init(self.N_h[ni - 1], self.n_x), dtype=tf.float32) all_weights['b_h{}_to_sx'.format(ni)] = tf.Variable( tf.zeros([self.n_x], dtype=tf.float32) * self.bias_start) return all_weights
def _create_weights(self): ''' Initialise weights. each of the functions you can import from "networks" in the compute function above has a "make_weights" counterpart you need to call in here ''' # first, we initialise an empty ordered dictionary all_weights = collections.OrderedDict() # we can make the weights for the convolutional network taking x: all_weights = networks.conv_2D_make_weights(all_weights, self.n_ch_x, self.N_x, self.fs_x, ID=0) # now we make the weights for the fully connected network taking z: all_weights = networks.fc_make_weights(all_weights, self.n_z, self.N_z, ID=1) # to initialise the weights for the last fully connected networ, we need to know its input size (size of hidden_post_z + size of hidden_post_x). # there is a function in "NN_utils" to compute the size of the images at each conv layer we can use: im_sizes = NN_utils.compute_size( self.x_siz, self.st_x ) # this computes a length(st_x) x 2 array where each row is the dimensions of the images at each hhidden layer siz_hidden_post_x = im_sizes[-1, 0] * im_sizes[-1, 1] * self.N_x[ -1] # the size of hidden_post_x is the last layer's height*width*n_channels dim_input = siz_hidden_post_x + self.N_z[ -1] # the size of the input to the last layer is then the sum of the above and the dimensionality of the last fully connected layer that took z as input # now we can make the weights for the fully connected network taking the concatenated layers: all_weights = networks.fc_make_weights(all_weights, dim_input, self.N_comb, ID=2) # lastly, we initialise the weights for the two single matrices to get mu and log_sigma_square from the last layer all_weights = networks.fc_make_weights(all_weights, self.N_comb[-1], [self.n_y], add_b=False, ID=3) all_weights = networks.fc_make_weights(all_weights, self.N_comb[-1], [self.n_y], add_b=False, ID=4) return all_weights
def compute_moments(self, xl): ''' compute moments of output Gaussian distribution INPUTS: x - input OUTPUTS: mu_y - mean of output Gaussian distribution log_sig_sq_y - log variance of output Gaussian distribution ''' x, l = NN_utils.reshape_and_extract(xl, self.sz_im) hidden_post = layers.tf_conv_layer(x, self.weights['W_x_to_h1'], self.weights['b_x_to_h1'], self.St[0], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) num_layers_1 = np.shape(self.N_h1)[0] - 1 for i in range(num_layers_1): ni = i + 2 hidden_post = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)], self.weights['b_h{}_to_h{}'.format(ni - 1, ni)], self.St[ni - 1], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) hidden_post = NN_utils.flatten(hidden_post) hidden_post = tf.concat([hidden_post, l], axis=1) # print(tf.shape(hidden_post).numpy()) num_layers_F = np.shape(self.NF_h)[0] for i in range(num_layers_F): ni = ni + 1 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) # print(tf.shape(hidden_post).numpy()) hidden_post = NN_utils.reshape_to_images(hidden_post, self.Sz2[0, :]) # print(tf.shape(hidden_post).numpy()) num_layers_2 = np.shape(self.N_h2)[0] for i in range(num_layers_2): ni = ni + 1 hidden_post = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)], self.weights['b_h{}_to_h{}'.format(ni - 1, ni)], self.St[ni - 1], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) mu_y = layers.tf_conv_layer(hidden_post, self.weights['W_h{}_to_muy'.format(ni)], self.weights['b_h{}_to_muy'.format(ni)], 1, self.nonlinearity) mu_y = tf.nn.sigmoid(mu_y) log_sig_sq_y = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_sy'.format(ni)], self.weights['b_h{}_to_sy'.format(ni)], 1, self.nonlinearity) log_sig_sq_y = 100 * (tf.nn.sigmoid(log_sig_sq_y / 100) - 0.5) mu_y = NN_utils.flatten(mu_y) mu_y = tf.concat([mu_y, tf.zeros([tf.shape(mu_y)[0], 1])], axis=1) log_sig_sq_y = NN_utils.flatten(log_sig_sq_y) log_sig_sq_y = tf.concat( [log_sig_sq_y, tf.zeros([tf.shape(log_sig_sq_y)[0], 1])], axis=1) return mu_y, log_sig_sq_y
def _create_weights(self): ''' Initialise weights ''' all_weights = collections.OrderedDict() all_weights['W_y_to_h1y'] = tf.Variable(vae_utils.xavier_init( self.n_y, self.N_hy[0]), dtype=tf.float32) all_weights['b_y_to_h1y'] = tf.Variable( tf.zeros([self.N_hy[0]], dtype=tf.float32) * self.bias_start) num_layers_middle_y = np.shape(self.N_hy)[0] - 1 for i in range(num_layers_middle_y): ni = i + 2 all_weights['W_h{}y_to_h{}y'.format(ni - 1, ni)] = tf.Variable( vae_utils.xavier_init(self.N_hy[ni - 2], self.N_hy[ni - 1]), dtype=tf.float32) all_weights['b_h{}y_to_h{}y'.format(ni - 1, ni)] = tf.Variable( tf.zeros([self.N_hy[ni - 1]], dtype=tf.float32) * self.bias_start) all_weights['W_x_to_h1x'] = tf.Variable(vae_utils.xavier_init( self.n_x, self.N_hx[0]), dtype=tf.float32) all_weights['b_x_to_h1x'] = tf.Variable( tf.zeros([self.N_hx[0]], dtype=tf.float32) * self.bias_start) num_layers_middle_x = np.shape(self.N_hx)[0] - 1 for i in range(num_layers_middle_x): ni = i + 2 all_weights['W_h{}x_to_h{}x'.format(ni - 1, ni)] = tf.Variable( vae_utils.xavier_init(self.N_hx[ni - 2], self.N_hx[ni - 1]), dtype=tf.float32) all_weights['b_h{}x_to_h{}x'.format(ni - 1, ni)] = tf.Variable( tf.zeros([self.N_hx[ni - 1]], dtype=tf.float32) * self.bias_start) all_weights['W_x2_to_h1x2'] = tf.Variable(vae_utils.xavier_init( self.n_x2, self.N_hx2[0]), dtype=tf.float32) all_weights['b_x2_to_h1x2'] = tf.Variable( tf.zeros([self.N_hx2[0]], dtype=tf.float32) * self.bias_start) num_layers_middle_x2 = np.shape(self.N_hx2)[0] - 1 for i in range(num_layers_middle_x2): ni = i + 2 all_weights['W_h{}x2_to_h{}x2'.format(ni - 1, ni)] = tf.Variable( vae_utils.xavier_init(self.N_hx2[ni - 2], self.N_hx2[ni - 1]), dtype=tf.float32) all_weights['b_h{}x2_to_h{}x2'.format(ni - 1, ni)] = tf.Variable( tf.zeros([self.N_hx2[ni - 1]], dtype=tf.float32) * self.bias_start) all_weights['W_h0_to_h1'] = tf.Variable(vae_utils.xavier_init( self.N_hy[-1] + self.N_hx[-1] + self.N_hx2[-1], self.N_h[0]), dtype=tf.float32) all_weights['b_h0_to_h1'] = tf.Variable( tf.zeros([self.N_h[0]], dtype=tf.float32) * self.bias_start) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 all_weights['W_h{}_to_h{}'.format(ni - 1, ni)] = tf.Variable( vae_utils.xavier_init(self.N_h[ni - 2], self.N_h[ni - 1]), dtype=tf.float32) all_weights['b_h{}_to_h{}'.format(ni - 1, ni)] = tf.Variable( tf.zeros([self.N_h[ni - 1]], dtype=tf.float32) * self.bias_start) all_weights['W_h{}_to_muz'.format(ni)] = tf.Variable( vae_utils.xavier_init(self.N_h[ni - 1], self.n_z), dtype=tf.float32) all_weights['b_h{}_to_muz'.format(ni)] = tf.Variable( tf.zeros([self.n_z], dtype=tf.float32) * self.bias_start) all_weights['W_h{}_to_sz'.format(ni)] = tf.Variable( vae_utils.xavier_init(self.N_h[ni - 1], self.n_z), dtype=tf.float32) all_weights['b_h{}_to_sz'.format(ni)] = tf.Variable( tf.zeros([self.n_z], dtype=tf.float32) * self.bias_start) return all_weights