def build_optimizer(loss, update_ops=[], scope=None, reuse=None): with tf.variable_scope(scope, 'gradients', reuse=reuse): print "Building optimizer" optimizer = AdamaxOptimizer(args.lr) if args.adamax else AdamOptimizer( args.lr) # max clip and max norm hyperparameters from Sonderby's LVAE code clipped, grad_norm = clip_gradients(optimizer, loss, max_clip=0.9, max_norm=4) with tf.control_dependencies(update_ops): train_step = optimizer.apply_gradients(clipped) return train_step
def add_training_op(self): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. For more information, see: https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer Examples: https://github.com/pbhatnagar3/cs224s-tensorflow-tutorial/blob/master/tensorflow%20MNIST.ipynb https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/multilayer_perceptron.ipynb """ optimizer = None # logits [16, 50, 2] -> grabbing last timestep to compare logits [16, 2] against targets [16] logits_shape = tf.shape(self.logits) reshaped_logits = tf.slice(self.logits, [0, logits_shape[1] - 1, 0], [-1, 1, -1]) reshaped_logits = tf.reshape(reshaped_logits, shape=[logits_shape[0], logits_shape[2]]) # reshaped_logits = tf.reshape(self.logits, shape=[logits_shape[0], logits_shape[1]*logits_shape[2]]) self.cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=reshaped_logits, labels=self.targets_placeholder)) optimizer = AdamaxOptimizer(Config.lr).minimize(self.cost) # TODO: IS LOGITS[0] LAUGHTER OR LOGITS[1]???? self.pred = tf.argmax(reshaped_logits, 1) correct_pred = tf.equal(tf.argmax(reshaped_logits, 1), tf.cast(self.targets_placeholder, tf.int64)) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) self.optimizer = optimizer
def build_magan(self): # Generator self.g = self.generator(self.z) # Discriminator _, d_real = self.discriminator(self.x) _, d_fake = self.discriminator(self.g, reuse=True) self.d_real_loss = t.mse_loss(self.x, d_real, self.batch_size) self.d_fake_loss = t.mse_loss(self.g, d_fake, self.batch_size) self.d_loss = self.d_real_loss + tf.maximum(0., self.m - self.d_fake_loss) self.g_loss = self.d_fake_loss # Summary tf.summary.scalar("loss/d_loss", self.d_loss) tf.summary.scalar("loss/d_real_loss", self.d_real_loss) tf.summary.scalar("loss/d_fake_loss", self.d_fake_loss) tf.summary.scalar("loss/g_loss", self.g_loss) # Optimizer t_vars = tf.trainable_variables() d_params = [v for v in t_vars if v.name.startswith('d')] g_params = [v for v in t_vars if v.name.startswith('g')] self.d_op = AdamaxOptimizer(learning_rate=self.lr, beta1=self.beta1).minimize( self.d_loss, var_list=d_params) self.g_op = AdamaxOptimizer(learning_rate=self.lr, beta1=self.beta1).minimize( self.g_loss, var_list=g_params) # Merge summary self.merged = tf.summary.merge_all() # Model saver self.saver = tf.train.Saver(max_to_keep=1) self.writer = tf.summary.FileWriter('./model/', self.s.graph)
def createGraph(self): """Creates graph for training""" self.base_cost = 0.0 self.accuracy = 0 num_sizes = len(self.bins) self.cost_list = [] sum_weight = 0 self.bin_losses = [] saturation_loss = [] # Create all bins and calculate losses for them with vs.variable_scope("var_lengths"): for seqLength, itemCount, ind in zip(self.bins, self.count_list, range(num_sizes)): x_in = tf.placeholder("int32", [itemCount, seqLength]) y_in = tf.placeholder("int64", [itemCount, seqLength]) self.x_input.append(x_in) self.y_input.append(y_in) self.saturation_costs = [] c, a, _, _, perItemCost, _ = self.createLoss( x_in, y_in, seqLength) weight = 1.0 #/seqLength sat_cost = tf.add_n(self.saturation_costs) / ( (seqLength**2) * itemCount) saturation_loss.append(sat_cost * weight) self.bin_losses.append(perItemCost) self.base_cost += c * weight sum_weight += weight self.accuracy += a self.cost_list.append(c) tf.get_variable_scope().reuse_variables() # calculate the total loss self.base_cost /= sum_weight self.accuracy /= num_sizes self.sat_loss = tf.reduce_sum( tf.stack(saturation_loss)) * self.saturation_weight / sum_weight cost = self.base_cost + self.sat_loss # add gradient noise proportional to learning rate tvars = tf.trainable_variables() grads_0 = tf.gradients(cost, tvars) grads = [] for grad in grads_0: grad1 = grad + tf.truncated_normal( tf.shape(grad)) * self.learning_rate * 1e-4 grads.append(grad1) # optimizer optimizer = AdamaxOptimizer(self.learning_rate, beta1=0.9, beta2=1.0 - self.beta2_rate, epsilon=1e-8) self.optimizer = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step) # some values for printout max_vals = [] for var in tvars: varV = optimizer.get_slot(var, "m") max_vals.append(varV) self.gnorm = tf.global_norm(max_vals) self.cost_list = tf.stack(self.cost_list)
def __init__(self, encoder, decoder, embeddings, vocab, rev_vocab): """ Initializes your System :param encoder: an encoder that you constructed in train.py :param decoder: a decoder that you constructed in train.py :param args: pass in more arguments as needed """ self.tuple_to_ind = {} self.ind_to_tuple = {} tmp = [] for i in range(0, Config.paraLen): for j in range(i, Config.paraLen): if (j - i) >= Config.max_length: continue tmp.append((i, j)) for i in range(len(tmp)): self.tuple_to_ind[tmp[i]] = i self.ind_to_tuple[i] = tmp[i] self.vocab = vocab self.rev_vocab = rev_vocab # Define loss parameters here startTuples = tf.constant( np.array( [[0, i] for i in range(Config.paraLen) for j in range(Config.paraLen - i if i >= Config.paraLen - Config.max_length else Config.max_length)])) endTuples = tf.constant( np.array( [[j, j + i] for i in range(Config.paraLen) for j in range(Config.paraLen - i if i >= Config.paraLen - Config.max_length else Config.max_length)])) self.startTuples = tf.cast(startTuples, tf.int32) self.endTuples = tf.cast(endTuples, tf.int32) # ==== set up placeholder tokens ======== self.paragraph_placeholder = tf.placeholder(tf.int32, [None, Config.paraLen]) self.para_lens = tf.placeholder(tf.int32, [None]) self.q_placeholder = tf.placeholder(tf.int32, [None, Config.qLen]) self.q_lens = tf.placeholder(tf.int32, [None]) self.labels_placeholder = tf.placeholder( tf.int32, [None, Config.labels_one_hot_size]) # ==== assemble pieces ==== with tf.variable_scope( "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)): self.encoder = encoder self.decoder = decoder self.embeddings = embeddings self.setup_embeddings() self.setup_system() self.setup_loss() # ==== set up training/updating procedure ==== optimizer = AdamaxOptimizer() #optimizer = tf.train.AdamOptimizer(Config.lr) #optimizer = tf.train.GradientDescentOptimizer(Config.lr) train_op = optimizer.minimize(self.loss) self.train_op = train_op
def run(args): print('\nSettings: \n', args, '\n') args.model_signature = str(dt.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') ########## Find GPUs (gpu_config, n_gpu_used) = set_gpus(args.n_gpu) ########## Data, model, and optimizer setup mnist = MNIST(args) x = tf.placeholder(tf.float32, [None, 28, 28, 1]) if args.model == 'hvae': if not args.K: raise ValueError('Must set number of flow steps when using HVAE') elif not args.temp_method: raise ValueError('Must set tempering method when using HVAE') model = HVAE(args, mnist.avg_logit) elif args.model == 'cnn': model = VAE(args, mnist.avg_logit) else: raise ValueError('Invalid model choice') elbo = model.get_elbo(x, args) nll = model.get_nll(x, args) optimizer = AdamaxOptimizer(learning_rate=args.learn_rate, eps=args.adamax_eps) opt_step = optimizer.minimize(-elbo) ########## Tensorflow and saver setup sess = tf.Session(config=gpu_config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() savepath = os.path.join(args.checkpoint_dir, args.model_signature, 'model.ckpt') if not os.path.exists(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) ########## Test that GPU memory is sufficient if n_gpu_used > 0: try: x_test = mnist.next_test_batch() (t_e, t_n) = sess.run((elbo, nll), {x: x_test}) mnist.batch_idx_test = 0 # Reset batch counter if it works except: raise MemoryError(""" Likely insufficient GPU memory Reduce test batch by lowering the -tbs parameter """) ########## Training Loop train_elbo_hist = [] val_elbo_hist = [] # For early stopping best_elbo = -np.inf es_epochs = 0 epoch = 0 train_times = [] for epoch in range(1, args.epochs + 1): t0 = time.time() train_elbo = train(epoch, mnist, opt_step, elbo, x, args, sess) train_elbo_hist.append(train_elbo) train_times.append(time.time() - t0) print('One epoch took {:.2f} seconds'.format(time.time() - t0)) val_elbo = validate(mnist, elbo, x, sess) val_elbo_hist.append(val_elbo) if val_elbo > best_elbo: # Save the model that currently generalizes best es_epochs = 0 best_elbo = val_elbo saver.save(sess, savepath) best_model_epoch = epoch elif args.early_stopping_epochs > 0: es_epochs += 1 if es_epochs >= args.early_stopping_epochs: print('***** STOPPING EARLY ON EPOCH {} of {} *****'.format( epoch, args.epochs)) break print('--> Early stopping: {}/{} (Best ELBO: {:.4f})'.format( es_epochs, args.early_stopping_epochs, best_elbo)) print('\t Current val ELBO: {:.4f}\n'.format(val_elbo)) if np.isnan(val_elbo): raise ValueError('NaN encountered!') train_times = np.array(train_times) mean_time = np.mean(train_times) std_time = np.std(train_times) print('Average train time per epoch: {:.2f} +/- {:.2f}'.format( mean_time, std_time)) ########## Evaluation # Restore the best-performing model saver.restore(sess, savepath) test_elbos = np.zeros(args.n_nll_runs) test_nlls = np.zeros(args.n_nll_runs) for i in range(args.n_nll_runs): print('\n---- Test run {} of {} ----\n'.format(i + 1, args.n_nll_runs)) (test_elbos[i], test_nlls[i]) = evaluate(mnist, elbo, nll, x, args, sess) mean_elbo = np.mean(test_elbos) std_elbo = np.std(test_elbos) mean_nll = np.mean(test_nlls) std_nll = np.std(test_nlls) print('\nTest ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo)) print('Test NLL: {:.2f} +/- {:.2f}'.format(mean_nll, std_nll)) ########## Logging, Saving, and Plotting with open(args.logfile, 'a') as ff: print('----------------- Test ID {} -----------------'.format( args.model_signature), file=ff) print(args, file=ff) print('Stopped after {} epochs'.format(epoch), file=ff) print('Best model from epoch {}'.format(best_model_epoch), file=ff) print('Average train time per epoch: {:.2f} +/- {:.2f}'.format( mean_time, std_time), file=ff) print('FINAL VALIDATION ELBO: {:.2f}'.format(val_elbo_hist[-1]), file=ff) print('Test ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo), file=ff) print('Test NLL: {:.2f} +/- {:.2f}\n'.format(mean_nll, std_nll), file=ff) if not os.path.exists(args.pickle_dir): os.makedirs(args.pickle_dir) train_dict = { 'train_elbo': train_elbo_hist, 'val_elbo': val_elbo_hist, 'args': args } pickle.dump( train_dict, open(os.path.join(args.pickle_dir, args.model_signature + '.p'), 'wb')) if not os.path.exists(args.plot_dir): os.makedirs(args.plot_dir) tf_gen_samples = model.get_samples(args) np_gen_samples = sess.run(tf_gen_samples) plot_digit_samples(np_gen_samples, args) plot_training_curve(train_elbo_hist, val_elbo_hist, args) ########## Email notification upon test completion try: msg_text = """Test completed for ID {0}. Parameters: {1} Test ELBO: {2:.2f} +/- {3:.2f} Test NLL: {4:.2f} +/- {5:.2f} """.format(args.model_signature, args, mean_elbo, std_elbo, mean_nll, std_nll) msg = MIMEText(msg_text) msg['Subject'] = 'Test ID {0} Complete'.format(args.model_signature) msg['To'] = args.receiver msg['From'] = args.sender s = smtplib.SMTP('localhost') s.sendmail(args.sender, [args.receiver], msg.as_string()) s.quit() except: print('Unable to send email from sender {0} to receiver {1}'.format( args.sender, args.receiver))