def build_optimizer(logits, y): with tf.name_scope('loss'): xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logits) loss = tf.reduce_mean(xentropy, name='loss') with tf.name_scope('train'): learning_rate = 0.0001 lprint('learning_rate =', learning_rate) #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.99, beta2=0.999, amsgrad=True) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) optimizer = AMSGrad(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-8) lprint('optimizer = ', optimizer) training_op = optimizer.minimize(loss) return loss, training_op
def __init__(self, input_size_a, input_size): super(Extra_Attention_MLP_tf, self).__init__() print("init NN model...") self.saver = None self.sess = tf.Session() self.input_size = input_size self.input_size_a = input_size_a self.X = tf.placeholder(tf.float32, shape=(None, input_size), name='x') self.X_A = tf.placeholder(tf.float32, shape=(None, input_size_a), name='x_a') self.Y = tf.placeholder(tf.float32, shape=(None, 3), name='y') with tf.variable_scope('NN'): self.y_pred = self._build_net(self.X_A, self.X, scope='eval_net', trainable=True) self.supervised_loss = tf.reduce_mean(tf.square(self.Y - self.y_pred)) self.supervised_optimizer = tf.train.AdamOptimizer(learning_rate=0.01) # define optimizer # play around with learning rate # self.supervised_train_op = self.supervised_optimizer.minimize(self.supervised_loss) # minimize losss # self.supervised_train_op = AdaBoundOptimizer(learning_rate=0.01, final_lr=0.1, # beta1=0.9, beta2=0.999, amsbound=False).minimize(self.supervised_loss) self.supervised_train_op = AMSGrad(learning_rate=0.01, beta1=0.9, beta2=0.99, epsilon=1e-8).minimize(self.supervised_loss) #self.supervised_train_op = tf.train.GradientDescentOptimizer(0.01).minimize(self.supervised_loss) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() self.graph = tf.get_default_graph()
class OptimizerVGC(object): def __init__(self, preds, labels, preds_X, labels_X, model, num_nodes, pos_weight, norm): preds_sub = preds labels_sub = labels preds_X_sub = preds_X labels_X_sub = labels_X # Define the structure recovery loss self.cost = norm * tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits( logits=preds_sub, targets=labels_sub, pos_weight=pos_weight)) # Define the attribute (content) recovery loss self.cost += norm * tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(logits=preds_X_sub, targets=labels_X_sub, pos_weight=pos_weight)) # Define the cat_loss (second item in our paper) and the kl_loss (third item in our paper) z_mean = K.expand_dims(model.z_mean, 1) z_log_var = K.expand_dims(model.z_log_std, 1) y = model.y z_prior_mean = model.z_prior_mean kl_loss = -0.5 * (z_log_var - K.square(z_prior_mean)) kl_loss = K.mean(K.batch_dot(K.expand_dims(y, 1), kl_loss), 0) / num_nodes cat_loss = K.mean(y * K.log(y + K.epsilon()), 0) / num_nodes # Final loss self.cost = K.sum(self.cost) + K.sum(kl_loss) + K.sum(cat_loss) self.cost = tf.clip_by_value(self.cost, 1e-10, 1e100) self.optimizer = AMSGrad(learning_rate=FLAGS.learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-8) self.opt_op = self.optimizer.minimize(self.cost) self.grads_vars = self.optimizer.compute_gradients(self.cost)
tf.nn.sigmoid_cross_entropy_with_logits(labels=masked_y, logits=logits)) # Total Cost Function with tf.name_scope('Total_Cost'): cost = ms_cost #cost = 0.1*ms_cost + entropy_cost #cost = entropy_cost #cost = ms_cost - 10.0*ssim_cost cost_2 = ms_cost + 10.0 * entropy_cost cost_3 = ms_cost + 10.0 * entropy_cost - 10.0 * ssim_cost # Run Adam Optimizer to minimize cost with tf.name_scope('Optimizer'): if use_AMSGrad: optimizer = AMSGrad(learning_rate=learning_rate, epsilon=1e-06).minimize(cost) optimizer_2 = AMSGrad(learning_rate=learning_rate, epsilon=1e-06).minimize(cost_2) optimizer_3 = AMSGrad(learning_rate=learning_rate, epsilon=1e-06).minimize(cost_3) else: optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-06).minimize(cost) optimizer_2 = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-06).minimize(cost_2) optimizer_3 = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-06).minimize(cost_3) # Define summary of cost for log file tf.summary.scalar("MS_Loss", ms_cost) #tf.summary.scalar("SSIM_Loss", ssim_cost)
history = get_history_layer(history_input, keep_prob, ht=state_fw) context = tf.concat([I, history], axis=1) print 'ss', context pred = tf.matmul(context, weights['out']) + biases['out'] preds = tf.nn.softmax(pred) #optimizer latent_loss = 0.5 * tf.reduce_sum( tf.exp(z_stddev) - 1. - z_stddev + tf.square(z_mean), 1) latent_loss = tf.reduce_mean(latent_loss) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=next_x, logits=pred)) # T_COST = cost + 0.01 * latent_loss optimizer_class = AMSGrad(learning_rate=it_learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-8).minimize(T_COST) pred_mask = tf.arg_max(pred, 1) # a list of result def eos_sentence_batch(sentence_batch, eos_in): return [sentence + [eos_in] for sentence in sentence_batch] initial = tf.global_variables_initializer() def compute(): print 'start train' all_vars = tf.trainable_variables() for v in all_vars[9:17]:
def g_optimizer(self, *args, **kwargs): return AMSGrad(*args, **kwargs)
def build_faae_harness(image_input: tf.Tensor, noise: tf.Tensor, generator: tf.keras.Model, discriminator: tf.keras.Model, encoder: tf.keras.Model, generator_learning_rate=1e-4, discriminator_learning_rate=2e-4, reconstruction_learning_rate=5e-5, noise_format: str = 'SPHERE', adversarial_training: str = 'WASSERSTEIN', no_trainer: bool = False, summarize_activations: bool = False): image_size = image_input.shape.as_list()[1] print("Flipped Adversarial Auto-Encoder: {}x{} images".format( image_size, image_size)) def _generator_fn(z): return generator([z], training=True) def _encoder_fn(x): return encoder([x], training=True) def _discriminator_fn(x, z): return discriminator([x, z], training=True) gan_model = aegan_model( _generator_fn, _discriminator_fn, _encoder_fn, image_input, noise, generator_scope='Generator', discriminator_scope='Discriminator', encoder_scope='Encoder', check_shapes=True) # set to False for 2-level architectures sampled_x = gan_model.generated_data image_grid_summary(sampled_x, grid_size=3, name='generated_data') if summarize_activations: tf.contrib.layers.summarize_activations() tf.contrib.layers.summarize_collection(tf.GraphKeys.TRAINABLE_VARIABLES) # summarize encoded Z with tf.variable_scope(gan_model.encoder_scope): tf.summary.histogram('encoded_z', gan_model.encoder_gen_outputs) gan_loss = gan_loss_by_name(gan_model, adversarial_training, add_summaries=True) # add auto-encoder reconstruction loss rec_loss = code_autoencoder_mse_cosine(gan_model.generator_inputs, gan_model.encoder_gen_outputs, 1e-3, add_summary=True) if no_trainer: train_ops = None else: train_ops = aegan_train_ops( gan_model, gan_loss, rec_loss, generator_optimizer=AMSGrad(generator_learning_rate, beta1=0.5, beta2=0.999), discriminator_optimizer=AMSGrad(discriminator_learning_rate, beta1=0.5, beta2=0.999), reconstruction_optimizer=AMSGrad(reconstruction_learning_rate, beta1=0.5, beta2=0.999), summarize_gradients=True) return (gan_model, gan_loss, rec_loss, train_ops)
def nn(xae_model): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) mean_img = np.mean(mnist.train.images, axis=0) # Parameters learning_rate = 0.01 num_steps = 10000 batch_size = 1280 test_size = 1000 display_step = 1000 # Network Parameters n_hidden_1 = 64 # 1st layer number of neurons n_hidden_2 = 64 # 2nd layer number of neurons num_input = 144 # MNIST data input (img shape: 28*28) num_classes = 3 # MNIST total classes (0-9 digits) # tf Graph input X = tf.placeholder("float", [None, num_input]) Y = tf.placeholder("float", [None, num_classes]) # Store layers weight & bias weights = { 'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])), 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), 'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes])) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'b2': tf.Variable(tf.random_normal([n_hidden_2])), 'out': tf.Variable(tf.random_normal([num_classes])) } # Create model# Creat def neural_net(x): # Hidden fully connected layer with 256 neurons layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) # Hidden fully connected layer with 256 neurons layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) # Output fully connected layer with a neuron for each class out_layer = tf.matmul(layer_2, weights['out']) + biases['out'] return out_layer # Construct model# Const logits = neural_net(X) # Define loss and optimizer loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=Y)) optimizer = AMSGrad(learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) # Evaluate model (with test logits, for dropout to be disabled) correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) prediction = tf.argmax(logits, 1) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() # Start training with tf.Session() as sess: # Run the initializer sess.run(init) for step in range(1, num_steps+1): batch_x, batch_y = mnist.train.next_batch(batch_size) batch_x = np.array([img - mean_img for img in batch_x[np.where(np.any(np.array([ batch_y[:, 0], batch_y[:, 1], batch_y[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) batch_x = sess.run(xae_model['z'], feed_dict={xae_model['x']: batch_x}) batch_y = np.array([label[0:3] for label in batch_y[np.where(np.any(np.array([ batch_y[:, 0], batch_y[:, 1], batch_y[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) # Run optimization op (backprop) sess.run(train_op, feed_dict={X: batch_x[0:128], Y: batch_y[0:128]}) if step % display_step == 0 or step == 1: # Calculate batch loss and accuracy loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x, Y: batch_y}) print("Step " + str(step) + ", Minibatch Loss= " + \ "{:.4f}".format(loss) + ", Training Accuracy= " + \ "{:.3f}".format(acc)) print("Optimization Finished!") test_x, test_y = mnist.train.next_batch(test_size) test_x = np.array([img - mean_img for img in test_x[np.where(np.any(np.array([ test_y[:, 0], test_y[:, 1], test_y[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) test_x = sess.run(xae_model['z'], feed_dict={xae_model['x']: test_x}) test_y = np.array([label[0:3] for label in test_y[np.where(np.any(np.array([ test_y[:, 0], test_y[:, 1], test_y[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) # Calculate accuracy for MNIST test images print("Testing Accuracy:", \ sess.run(accuracy, feed_dict={X: test_x, Y: test_y})) pred_y = sess.run(prediction, feed_dict={X: test_x}) true_y = np.argmax(test_y, axis=1) score = f1_score(true_y, pred_y, average='weighted') print("f1 score:", \ score) return score
def test_mnist(): '''Test the convolutional autoencder using MNIST.''' # %% # load MNIST as before mnist = input_data.read_data_sets('MNIST_data', one_hot=True) mean_img = np.mean(mnist.train.images, axis=0) ae = eXclusiveAutoencoder( input_dimensions = 784, layers = [ { 'n_channels': 144, 'reconstructive_regularizer': 1.0, 'weight_decay': 1.0, 'sparse_regularizer': 1.0, 'sparsity_level': 0.05, 'exclusive_regularizer': 1.0, 'exclusive_type': 'logcosh', 'exclusive_logcosh_scale': 10.0, 'corrupt_prob': 1.0, 'tied_weight': True, 'encode':'sigmoid', 'decode':'linear', 'pathways': [ # range(0, 144), range(0, 96), range(48, 144), ], }, ], init_encoder_weight = None, init_decoder_weight = None, init_encoder_bias = None, init_decoder_bias = None, ) # %% learning_rate = 0.01 n_reload_per_epochs = 10 n_display_per_epochs = 10000 batch_size = 2000 n_epochs = 100000 optimizer_list = [] for layer_i in range(1): optimizer_list.append(AMSGrad(learning_rate).minimize(ae['layerwise_cost'][layer_i]['total'], var_list=[ ae['encoder_weight'][layer_i], ae['encoder_bias'][layer_i], # ae['decoder_weights'][layer_i], # ae['decoder_biases'][layer_i], ])) # optimizer_full = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost']['total']) optimizer_list.append(AMSGrad(learning_rate).minimize(ae['cost']['total'])) # %% # We create a session to use the graph sess = tf.Session() writer = tf.summary.FileWriter('logs', sess.graph) sess.run(tf.global_variables_initializer()) # %% # Fit all training data for optimizer_i, (optimizer) in enumerate(optimizer_list): for epoch_i in range(n_epochs): if (epoch_i) % n_reload_per_epochs == 0: batch_xs, batch_ys = mnist.train.next_batch(batch_size) batch_x0 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ batch_ys[:, 0], # batch_ys[:, 1], # batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) batch_x1 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], batch_ys[:, 1], # batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) batch_x2 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], # batch_ys[:, 1], batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) min_batch_size_x01 = np.min((batch_x0.shape[0], batch_x1.shape[0])) batch_x01 = 0.5*(batch_x0[:min_batch_size_x01]+batch_x1[:min_batch_size_x01]) min_batch_size_x012 = np.min((batch_x0.shape[0], batch_x1.shape[0], batch_x2.shape[0])) batch_x012 = 0.333*(batch_x0[:min_batch_size_x012]+batch_x1[:min_batch_size_x012]+batch_x2[:min_batch_size_x012]) batch_x1 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], batch_ys[:, 1], # batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) batch_x2 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], # batch_ys[:, 1], batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) min_batch_size_x12 = np.min((batch_x1.shape[0], batch_x2.shape[0])) batch_x12 = 0.5*(batch_x1[:min_batch_size_x12]+batch_x2[:min_batch_size_x12]) # train = [] # train.append(batch_x012) train.append(batch_x01) train.append(batch_x12) # train.append(np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], # batch_ys[:, 1], # # batch_ys[:, 2], # # batch_ys[:, 3], # # batch_ys[:, 4], # # batch_ys[:, 5], # # batch_ys[:, 6], # # batch_ys[:, 7], # # batch_ys[:, 8], # # batch_ys[:, 9], # ]) == 1, axis=0))]])) # train.append(np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # # batch_ys[:, 0], # batch_ys[:, 1], # batch_ys[:, 2], # # batch_ys[:, 3], # # batch_ys[:, 4], # # batch_ys[:, 5], # # batch_ys[:, 6], # # batch_ys[:, 7], # # batch_ys[:, 8], # # batch_ys[:, 9], # ]) == 1, axis=0))]])) sess.run(optimizer, feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) if (epoch_i+1) % n_display_per_epochs == 0: if not optimizer is optimizer_list[-1]: cost_total = sess.run(ae['layerwise_cost'][optimizer_i]['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_reconstruction_error = sess.run(ae['layerwise_cost'][optimizer_i]['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_sparsity = sess.run(ae['layerwise_cost'][optimizer_i]['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_exclusivity = sess.run(ae['layerwise_cost'][optimizer_i]['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_weight_decay = sess.run(ae['layerwise_cost'][optimizer_i]['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) print('layer:', optimizer_i+1, ', epoch:', epoch_i+1, ', total cost:', cost_total, ', recon error:', cost_reconstruction_error, ', sparsity:', cost_sparsity, ', weight decay:', cost_weight_decay, ', exclusivity: ', cost_exclusivity) else: cost_total = sess.run(ae['cost']['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_reconstruction_error = sess.run(ae['cost']['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_sparsity = sess.run(ae['cost']['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_exclusivity = sess.run(ae['cost']['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_weight_decay = sess.run(ae['cost']['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) print('layer: full,', 'epoch:', epoch_i+1, ', total cost:', cost_total, ', recon error:', cost_reconstruction_error, ', sparsity:', cost_sparsity, ', weight decay:', cost_weight_decay, ', exclusivity: ', cost_exclusivity) n_examples = 5120 test_xs, test_ys = mnist.test.next_batch(n_examples) test_xs = np.array([img - mean_img for img in test_xs[np.where(np.any(np.array([ test_ys[:, 0], test_ys[:, 1], test_ys[:, 2], # test_ys[:, 3], # test_ys[:, 4], # test_ys[:, 5], # test_ys[:, 6], # test_ys[:, 7], # test_ys[:, 8], # test_ys[:, 9], ]) == 1, axis=0))][:144]]) if not optimizer is optimizer_list[-1]: recon = sess.run(ae['layerwise_y'][layer_i], feed_dict={ae['x']: test_xs}) else: recon = sess.run(ae['y'], feed_dict={ae['x']: test_xs}) weights = sess.run(ae['encoder_weight'][0]) # weights = np.transpose(weights, axes=(3,0,1,2)) # display_network(batch_x012[:144].transpose(), filename='mnist_batch_01.png') display_network(batch_x01[:144].transpose(), filename='mnist_batch_01.png') display_network(batch_x12[:144].transpose(), filename='mnist_batch_12.png') display_network(test_xs.transpose(), filename='mnist_test.png') display_network(recon.reshape((144,784)).transpose(), filename='mnist_results.png') display_network(weights, filename='mnist_weights.png') writer.close() return ae
def __init__(self, is_training, config, input_): self._is_training = is_training self._input = input_ self._rnn_params = None self._cell = None self.batch_size = input_.batch_size self.num_steps = input_.num_steps size = config.hidden_size vocab_size = config.vocab_size with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size], dtype=data_type()) inputs = tf.nn.embedding_lookup(embedding, input_.input_data) print(inputs) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) output, state = self._build_rnn_graph(inputs, config, is_training) softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) # Reshape logits to be a 3-D tensor for sequence loss logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size]) # Use the contrib sequence loss and average over the batches loss = tf.contrib.seq2seq.sequence_loss( logits, input_.targets, tf.ones([self.batch_size, self.num_steps], dtype=data_type()), average_across_timesteps=False, average_across_batch=True) # Update the cost self._cost = tf.reduce_sum(loss) #self._reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), name='reg')#TODO addition self._reg = 0.5 * new_rnn.get_co_loss( tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES)[1]) + 0.5 * new_rnn.get_co_loss( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)[3]) self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self._cost + self._reg, tvars), config.max_grad_norm) if optimizerType == "ADAM": optimizer = tf.train.AdamOptimizer() elif optimizerType == "AMS": optimizer = AMSGrad() else: optimizer = tf.train.GradientDescentOptimizer(self._lr) #TODO #optimizer = tf.train.AdamOptimizer(self.lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.train.get_or_create_global_step()) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
def __init__(self, preds, labels, preds_X, labels_X, model, num_nodes, pos_weight, norm, d_real, d_fake): preds_sub = preds labels_sub = labels preds_X_sub = preds_X labels_X_sub = labels_X # Discrimminator Loss dc_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_real), logits=d_real)) dc_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_fake), logits=d_fake)) self.dc_loss = dc_loss_fake + dc_loss_real # Generator loss self.generator_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_fake), logits=d_fake)) # Define the structure recovery loss self.cost = norm * tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits( logits=preds_sub, targets=labels_sub, pos_weight=pos_weight)) # Define the attribute (content) recovery loss self.cost += norm * tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(logits=preds_X_sub, targets=labels_X_sub, pos_weight=pos_weight)) # Define the cat_loss (second item in our paper) and the kl_loss (third item in our paper) z_mean = K.expand_dims(model.z_mean, 1) z_log_var = K.expand_dims(model.z_log_std, 1) y = model.y z_prior_mean = model.z_prior_mean kl_loss = -0.5 * (z_log_var - K.square(z_prior_mean)) kl_loss = K.mean(K.batch_dot(K.expand_dims(y, 1), kl_loss), 0) / num_nodes cat_loss = K.mean(y * K.log(y + K.epsilon()), 0) / num_nodes # Final loss self.cost = K.sum(self.cost) + K.sum(kl_loss) + K.sum(cat_loss) self.generator_loss = self.generator_loss + self.cost # add to keep the same as AE all_variables = tf.trainable_variables() dc_var = [var for var in all_variables if 'dc_' in var.op.name] en_var = [var for var in all_variables if 'e_' in var.op.name] with tf.variable_scope(tf.get_variable_scope(), reuse=False): self.discriminator_optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.discriminator_learning_rate, beta1=0.9, name='adam1').minimize( self.dc_loss, var_list=dc_var) #minimize(dc_loss_real, var_list=dc_var) self.generator_optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.discriminator_learning_rate, beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var) self.cost = tf.clip_by_value(self.cost, 1e-10, 1e100) self.optimizer = AMSGrad(learning_rate=FLAGS.learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-8) self.opt_op = self.optimizer.minimize(self.cost) self.grads_vars = self.optimizer.compute_gradients(self.cost)
from __future__ import print_function import keras from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout from keras.optimizers import RMSprop #from keras.optimizers import adam import tensorflow as tf # In[5]: from AMSGrad import AMSGrad train_op = AMSGrad(learning_rate=0.01, beta1=0.9, beta2=0.99, epsilon=1e-8).minimize(loss) # In[6]: batch_size = 128 num_classes = 10 epochs = 20 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255