def call(self, x): if len(x) != 2: raise Exception('input layers must be a list: mean and stddev') if len(x[0].shape) != 2 or len(x[1].shape) != 2: raise Exception('input shape is not a vector [batchSize, latentSize]') mean = x[0] stddev = x[1] if self.reg == 'bvae': # kl divergence: latent_loss = -0.5 * K.mean(1 + stddev - K.square(mean) - K.exp(stddev), axis=-1) # use beta to force less usage of vector space: # also try to use <capacity> dimensions of the space: latent_loss = self.beta * K.abs(latent_loss - self.capacity/self.shape.as_list()[1]) self.add_loss(latent_loss, x) elif self.reg == 'vae': # kl divergence: latent_loss = -0.5 * K.mean(1 + stddev - K.square(mean) - K.exp(stddev), axis=-1) self.add_loss(latent_loss, x) epsilon = K.random_normal(shape=self.shape, mean=0., stddev=1.) if self.random: # 'reparameterization trick': return mean + K.exp(stddev) * epsilon else: # do not perform random sampling, simply grab the impulse value return mean + 0*stddev # Keras needs the *0 so the gradinent is not None
def manhattan_distance(left, right): return K.exp(-K.sum(K.abs(left - right), axis=1, keepdims=True))
def gaussian_log_nll(y_true, y_pred): return K.sum(0.5 * K.log(2 * np.pi) + 0.5 * K.log(variance) + (0.5 / variance) * K.square(y_true - K.exp(y_pred)), axis=-1)
def vae_kl_loss(y_true, y_pred): return -0.5 * K.mean(1 + vae_z_log_var - K.square(vae_z_mean) - K.exp(vae_z_log_var), axis=-1)
# parser.add_argument("-w", "--weights", help=help_) # help_ = "Use mse loss instead of binary cross entropy (default)" # parser.add_argument("-m", "--mse", help=help_, action='store_true') # args = parser.parse_args() models = (encoder, decoder) #data = (x_test, y_test) # VAE loss = mse_loss or xent_loss + kl_loss # if args.mse: # reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs)) # else: reconstruction_loss = binary_crossentropy(K.flatten(inputs), K.flatten(outputs)) reconstruction_loss *= 120 * 160 * 3 kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') vae.summary() plot_model(vae, to_file='vae_cnn.png', show_shapes=True) train_gen = generator(opts, gen_records, cfg.BATCH_SIZE, True) val_gen = generator(opts, gen_records, cfg.BATCH_SIZE, False) # if args.weights: # vae.load_weights(args.weights) # else: # train the autoencoder
def sampling(args): """ Given a normal mean/variance, pull a random sample """ z_mean_, z_log_var_ = args epsilon = K.random_normal(shape=(BATCH_SIZE, LATENT_DIM), mean=0.) return z_mean_ + K.exp(z_log_var_ / 2) * epsilon
def reparameterization_trick(): epsilon = K.random_normal(shape=logvar.shape, mean=0., stddev=1.) stddev = K.exp(logvar * 0.5) return mean + stddev * epsilon
def sampling(z_mean, z_log_sigma, batch_size, latent_dim): epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., stddev=1.) return z_mean + K.exp( z_log_sigma / 2) * epsilon # equivalent to e * std + mean
def sample2(self, eps): return self.mean + K.exp(self.logsd) * eps
def kl_loss(x, x_decoded_mean): kl_loss = - 0.5 * K.sum(1. + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return K.mean(kl_loss)
def sampling(args): mu, sigma = args eps = K.random_normal(shape=K.shape(mu)) return mu + K.exp(sigma / 2) * eps
def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], K.int_shape(z_mean)[1]), mean=0.) return z_mean + K.exp(z_log_var) * epsilon
def call(self, inputs, **kwargs): x, kernel = K.expand_dims(inputs[0], axis=-1), inputs[1] return K.exp(-self.gamma * K.sum(K.square(x - kernel), axis=-2))
def softmax(x, axis=-1): ex = K.exp(x - K.max(x, axis=axis, keepdims=True)) return ex / K.sum(ex, axis=axis, keepdims=True)
def call(self, x, **kwargs): self.result = backend.exp(-backend.sum(backend.abs(x[0] - x[1]), axis=1, keepdims=True)) return self.result
def call(self, x): eij1 = K.reshape( K.dot(K.reshape(x[:, :, 0:768], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij1 += self.b eij1 = K.expand_dims(eij1) eij2 = K.reshape( K.dot(K.reshape(x[:, :, 768:768 * 2], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij2 += self.b eij2 = K.expand_dims(eij2) eij3 = K.reshape( K.dot(K.reshape(x[:, :, 768 * 2:768 * 3], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij3 += self.b eij3 = K.expand_dims(eij3) eij4 = K.reshape( K.dot(K.reshape(x[:, :, 768 * 3:768 * 4], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij4 += self.b eij4 = K.expand_dims(eij4) eij5 = K.reshape( K.dot(K.reshape(x[:, :, 768 * 4:768 * 5], (-1, self.features_dim)), K.reshape(self.W, (self.features_dim, 1))), (-1, self.step_dim)) eij5 += self.b eij5 = K.expand_dims(eij5) eij = keras.layers.concatenate([eij1, eij2, eij3, eij4, eij5], axis=2) print(eij) eij = K.tanh(eij) a = K.exp(eij) a /= K.cast(K.sum(a, axis=2, keepdims=True) + K.epsilon(), K.floatx()) print(a) temp = a[:, :, 0: 1] * x[:, :, 0: 768] + a[:, :, 1: 2] * x[:, :, 768:768 * 2] + a[:, :, 2: 3] * x[:, :, 768 * 2:768 * 3] + a[:, :, 3: 4] * x[:, :, 768 * 3:768 * 4] + a[:, :, 4: 5] * x[:, :, 768 * 4: 768 * 5] print(temp) return temp
def logps(self, x): return -0.5 * (np.log(2 * np.pi) + 2. * self.logsd + (x - self.mean)**2 / K.exp(2. * self.logsd))
def call(self, inputs): mean, log_var = inputs epsilon = K.random_normal(tf.shape(log_var), mean=0., stddev=1.) sample = epsilon * K.exp( log_var / 2) + mean # equivalent to e * std + mean return sample
def call(self, x, **kwargs): a = K.l2_normalize(x[0], axis=-1) b = K.l2_normalize(x[1], axis=-1) self.result = K.exp(-K.mean(a * b, axis=-1, keepdims=True)) return self.result
def expontial(x): return K.exp(x)
def perplexity(y_true, y_pred): cross_entropy = K.mean(K.sparse_categorical_crossentropy(y_true, y_pred)) perplexity = K.exp(cross_entropy) return perplexity
def calculate_kl_loss(mu, sigma): """ Function to calculate the KL loss term. Considers the tolerance value for which optimization for KL should stop """ # kullback Leibler loss between normal distributions kl_cost = -0.5 * k.mean(1.0 + sigma - k.square(mu) - k.exp(sigma)) return kl_cost
def build(self, input_shape): self.rbm_weight = self.add_weight( name='rbm_weight', shape=(input_shape[1], self.output_dim), initializer='uniform' # Which initializer is optimal? , trainable=True) self.hidden_bias = self.add_weight(name='rbm_hidden_bias', shape=(self.output_dim, ), initializer='uniform', trainable=True) self.visible_bias = K.variable(initializers.get('uniform')( (input_shape[1], )), dtype=K.floatx(), name='rbm_visible_bias') # Make symbolic computation objects. if self.mode == MODE_VISIBLE_BERNOULLI: # Transform visible units. self.input_visible = K.placeholder(shape=(None, input_shape[1]), name='input_visible') self.transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.sigmoid( K.dot(self.input_visible, self.rbm_weight) + self.hidden_bias))) self.transform_func = K.function([self.input_visible], [self.transform]) # Transform hidden units. self.input_hidden = K.placeholder(shape=(None, self.output_dim), name='input_hidden') self.inv_transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.sigmoid( K.dot(self.input_hidden, K.transpose(self.rbm_weight)) + self.visible_bias))) self.inv_transform_func = K.function([self.input_hidden], [self.inv_transform]) elif self.mode == MODE_VISIBLE_GAUSSIAN: # Transform visible units. self.input_visible = K.placeholder(shape=(None, input_shape[1]), name='input_visible') self.transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.relu( K.dot(self.input_visible, self.rbm_weight) + self.hidden_bias))) #? self.transform_func = K.function([self.input_visible], [self.transform]) # Transform hidden units. self.input_hidden = K.placeholder(shape=(None, self.output_dim), name='input_hidden') self.inv_transform = Ke.multivariate_normal_diag( loc=(K.dot(self.input_hidden, K.transpose(self.rbm_weight)) + self.visible_bias), scale_diag=np.ones(shape=(self.hps['batch_size'], input_shape[1]))).sample() self.inv_transform_func = K.function([self.input_hidden], [self.inv_transform]) else: # TODO pass # Calculate free energy. #? self.free_energy = -1 * (K.squeeze(K.dot(self.input_visible, K.expand_dims(self.visible_bias, axis=-1)), -1) +\ K.sum(K.log(1 + K.exp(K.dot(self.input_visible, self.rbm_weight) +\ self.hidden_bias)), axis=-1)) self.free_energy_func = K.function([self.input_visible], [self.free_energy]) super(RBM, self).build(input_shape)
def call(self, inputs, states, training=None): # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state # alignment model h_att = K.repeat(h_tm1, self.timestep_dim) att = _time_distributed_dense(inputs, self.attention_weights, self.attention_bias, input_dim=self.input_dim, output_dim=self.units, timesteps=self.timestep_dim) attention_ = self.attention_activation( K.dot(h_att, self.attention_recurrent_weights) + att) # energy attention_ = K.squeeze(K.dot(attention_, self.attention_recurrent_bias), 2) # energy alpha = K.exp(attention_) if dp_mask is not None: alpha *= dp_mask[0] alpha /= K.sum(alpha, axis=1, keepdims=True) alpha_r = K.repeat(alpha, self.input_dim) alpha_r = K.permute_dimensions(alpha_r, (0, 2, 1)) # make context vector (soft attention after Bahdanau et al.) z_hat = inputs * alpha_r context_sequence = z_hat z_hat = K.sum(z_hat, axis=1) if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation( x_i + K.dot(h_tm1_i, self.recurrent_kernel_i) + K.dot(z_hat, self.attention_i)) f = self.recurrent_activation( x_f + K.dot(h_tm1_f, self.recurrent_kernel_f) + K.dot(z_hat, self.attention_f)) c = f * c_tm1 + i * self.activation( x_c + K.dot(h_tm1_c, self.recurrent_kernel_c) + K.dot(z_hat, self.attention_c)) o = self.recurrent_activation( x_o + K.dot(h_tm1_o, self.recurrent_kernel_o) + K.dot(z_hat, self.attention_o)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) z += K.dot(z_hat, self.attention_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] z2 = z[:, 2 * self.units:3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def predict(input_x): # Transform distribution over real classes into binary real-vs-fake probability prediction = 1. - ( 1. / (K.sum(K.exp(input_x), axis=-1, keepdims=True) + 1.)) return prediction
def sampling(self, args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], self.z_dim), mean=0., stddev=1.) return (z_mean + K.exp(z_log_var / 2.) * epsilon)
def sampling(self, args): z_mean, z_logvar = args epsilon = K.random_normal(shape=(self.latent_dim, )) return z_mean + K.exp(z_logvar * 0.5) * epsilon
def call(self, x, **kwargs): self.result = K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)) return self.result
def sampling(args): z_mean, z_log_var = args return K.random_normal( shape=K.shape(z_log_var), mean=0., stddev=noise_std) * K.exp( .5 * z_log_var) + z_mean
def my_kl(self, inputs, outputs): kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp( self.z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 return kl_loss
def kl_unit_normal(_mean, _logvar): # KL divergence has a closed form solution for unit gaussian # See: https://stats.stackexchange.com/questions/318184/kl-loss-with-a-unit-gaussian _kl_loss = -0.5 * K.sum(1.0 + _logvar - K.square(_mean) - K.exp(_logvar), axis=[-1, -2]) return _kl_loss