def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) # ait = K.dot(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): eij = K.tanh(K.dot(x, self.W)) ai = K.exp(eij) weights = ai / K.sum(ai, axis=1).dimshuffle(0, 'x') weighted_input = x * weights.dimshuffle(0, 1, 'x') return weighted_input.sum(axis=1)
def sampling(args): z_mean = args[0] z_log_sigma = args[1] batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] #flattened_dim = functools.reduce(lambda x,y:x*y,[*dim,3]) epsilon = tf.reshape(K.random_normal(shape=(batch, dim), dtype=tf.float32), (batch, dim)) xout = z_mean + K.exp(z_log_sigma) * epsilon return xout
def sampling(args): """Reparameterization trick by sampling fr an isotropic unit Gaussian. # Arguments args (tensor): mean and log of variance of Q(z|X) # Returns z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon
def call(self, x, mask=None): eij = K.dot(x, self.W) if self.use_bias: eij = K.bias_add(eij, self.bias) if self.activation == 'tanh': eij = K.tanh(eij) elif self.activation == 'relu': eij = K.relu(eij) else: eij = eij ai = K.exp(eij) weights = ai / K.sum(ai, axis=1, keepdims=True) weighted_input = x * weights return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tile(K.expand_dims(self.W, axis=0), (K.shape(x)[0], 1, 1)) uit = tf.matmul(x, uit) uit = K.tanh(K.bias_add(uit, self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, -1) ait = K.exp(ait) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def sampleLoss(true_y, pred_y): z_mean = pred_y[:, :, 0] z_log_sigma = pred_y[:, :, 1] return -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
def train(self, epochs, batch_size=32, sample_interval=50, save_interval=1500): # # Load the dataset # (X_train, _), (_, _) = mnist.load_data() loss = [] # Load the images X_train = self.load_images() # image_size = X_train.shape[1] # original_dim = image_size * image_size # Normalize X_train = X_train / 255 # Reshape X_train = X_train.reshape((len(X_train), np.prod(X_train.shape[1:]))) # VAE loss = mse_loss or xent_loss + kl_loss print(self.inputs) print(self.outputs) reconstruction_loss = mse(self.inputs, self.outputs) reconstruction_loss *= self.img_rows * self.img_cols # reconstruction_loss = np.mean(reconstruction_loss, axis=(1, 2)) kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp( self.z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 print(reconstruction_loss.shape, kl_loss.shape) vae_loss = K.mean(reconstruction_loss + kl_loss) self.vae.add_loss(vae_loss) self.vae.compile(optimizer='adam') self.vae.summary() # plot_model(self.vae, # to_file='vae_mlp.png', # show_shapes=True) try: for i in range(1, int(epochs / sample_interval) + 1): print("True Epoch: " + str(i * sample_interval)) # train the autoencoder history = self.vae.fit( X_train, shuffle=True, epochs=int(epochs / sample_interval), batch_size=batch_size, validation_data=(X_train, None)) # TODO: make test self.vae.save_weights('vae_mlp_fruit.h5') self.sample_images(X_train, i * sample_interval, noise=False) self.sample_images(X_train, i * sample_interval) loss.append(history.history['loss']) except KeyboardInterrupt: pass loss = np.stack(loss).flatten() history_file = open("histories/%d-history.pkl" % time.time(), "wb") pickle.dump(history, history_file) plt.clf() plt.plot(loss, label="loss") plt.legend() plt.title(label='VAE-GAN Loss') plt.savefig("images/plots/%d-vae-gan_loss.png" % time.time()) plt.show()
def sample_z(args): z_m, z_l_s = args eps = K.random_normal(shape=(batch_size, self.latent_shape), mean=0., std=1.) return z_m + K.exp(z_l_s / 2) * eps
def build_variational_architecture(self): e1 = Convolution2D(64, 6, 6, subsample=(2, 2), activation='relu', border_mode='valid', name='e1')(self.autoencoder_input) e3 = Convolution2D(64, 6, 6, subsample=(2, 2), activation='relu', border_mode='same', name='e3')(e1) e4 = Convolution2D(64, 6, 6, subsample=(2, 2), activation='relu', border_mode='same', name='e4')(e3) e5 = Dense(512, activation='relu')(flatten(e4)) self.z_mean = Dense(self.latent_shape, activation='linear')(e5) self.z_log_sigma = Dense(self.latent_shape, activation='linear')(e5) batch_size = tf.shape(self.autoencoder_input)[0] def sample_z(args): z_m, z_l_s = args eps = K.random_normal(shape=(batch_size, self.latent_shape), mean=0., std=1.) return z_m + K.exp(z_l_s / 2) * eps # Sample z z = Lambda(sample_z)([self.z_mean, self.z_log_sigma]) # Decoder layers d1 = Dense(6400, activation='relu', name='d1') d2 = Reshape((10, 10, 64), name='d2') d3 = Deconvolution2D(64, 6, 6, output_shape=(None, 20, 20, 64), subsample=(2, 2), activation='relu', border_mode='same', name='d3') d4 = Deconvolution2D(64, 6, 6, output_shape=(None, 40, 40, 64), subsample=(2, 2), activation='relu', border_mode='same', name='d4') d5 = Deconvolution2D(1, 6, 6, output_shape=(None, 84, 84, 1), subsample=(2, 2), activation='sigmoid', border_mode='valid', name='d5') # Full autoencoder d1_full = d1(z) d2_full = d2(d1_full) d3_full = d3(d2_full) d4_full = d4(d3_full) d5_full = d5(d4_full) d7_full = Reshape((7056, ))(d5_full) # Only decoding d1_decoder = d1(self.decoder_input) d2_decoder = d2(d1_decoder) d3_decoder = d3(d2_decoder) d4_decoder = d4(d3_decoder) d5_decoder = d5(d4_decoder) d7_decoder = Reshape((7056, ))(d5_decoder) self.decoder_output = d7_decoder self.autoencoder_output = d7_full self.encoder_output = self.z_mean self.emulator_reconstruction_loss = K.sum(K.binary_crossentropy( self.autoencoder_output, flatten(self.autoencoder_input)), axis=1) kl_loss = -0.5 * K.sum(1 + self.z_log_sigma - K.square(self.z_mean) - K.exp(self.z_log_sigma), axis=-1) self.autoencoder_loss = tf.add(self.emulator_reconstruction_loss, kl_loss)
def step(self, inputs, states): h_tm1 = states[0] c_tm1 = states[1] dp_mask = states[2] rec_dp_mask = states[3] x_input = states[4] # alignment model h_att = K.repeat(h_tm1, self.timestep_dim) att = _time_distributed_dense(x_input, self.attention_weights, self.attention_bias, output_dim=K.int_shape( self.attention_weights)[1]) attention_ = self.attention_activation( K.dot(h_att, self.attention_recurrent_weights) + att) attention_ = K.squeeze( K.dot(attention_, self.attention_recurrent_bias), 2) alpha = K.exp(attention_) if dp_mask is not None: alpha *= dp_mask[0] alpha /= K.sum(alpha, axis=1, keepdims=True) alpha_r = K.repeat(alpha, self.input_dim) alpha_r = K.permute_dimensions(alpha_r, (0, 2, 1)) # make context vector (soft attention after Bahdanau et al.) z_hat = x_input * alpha_r context_sequence = z_hat z_hat = K.sum(z_hat, axis=1) if self.implementation == 2: z = K.dot(inputs * dp_mask[0], self.kernel) z += K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel) z += K.dot(z_hat, self.attention_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] z2 = z[:, 2 * self.units:3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) else: if self.implementation == 0: x_i = inputs[:, :self.units] x_f = inputs[:, self.units:2 * self.units] x_c = inputs[:, 2 * self.units:3 * self.units] x_o = inputs[:, 3 * self.units:] elif self.implementation == 1: x_i = K.dot(inputs * dp_mask[0], self.kernel_i) + self.bias_i x_f = K.dot(inputs * dp_mask[1], self.kernel_f) + self.bias_f x_c = K.dot(inputs * dp_mask[2], self.kernel_c) + self.bias_c x_o = K.dot(inputs * dp_mask[3], self.kernel_o) + self.bias_o else: raise ValueError('Unknown `implementation` mode.') i = self.recurrent_activation( x_i + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_i) + K.dot(z_hat, self.attention_i)) f = self.recurrent_activation( x_f + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_f) + K.dot(z_hat, self.attention_f)) c = f * c_tm1 + i * self.activation( x_c + K.dot(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c) + K.dot(z_hat, self.attention_c)) o = self.recurrent_activation( x_o + K.dot(h_tm1 * rec_dp_mask[3], self.recurrent_kernel_o) + K.dot(z_hat, self.attention_o)) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: h._uses_learning_phase = True if self.return_attention: return context_sequence, [h, c] else: return h, [h, c]
def sampleLoss(true_y, pred_y): z_mean = tf.expand_dims(pred_y[:, :, :, :, 0], -1) z_log_sigma = tf.expand_dims(pred_y[:, :, :, :, 1], -1) return -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
def vae_loss(x, x_decoded_mean): xent_loss = input_dim * objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(latent_dim, ), mean=0., std=epsilon_std) return z_mean + K.exp(z_log_var / 2) * epsilon