def __init__(self, encoder, decoder, embeddings, vocab, rev_vocab): """ Initializes your System :param encoder: an encoder that you constructed in train.py :param decoder: a decoder that you constructed in train.py :param args: pass in more arguments as needed """ self.tuple_to_ind = {} self.ind_to_tuple = {} tmp = [] for i in range(0, Config.paraLen): for j in range(i, Config.paraLen): if (j - i) >= Config.max_length: continue tmp.append((i, j)) for i in range(len(tmp)): self.tuple_to_ind[tmp[i]] = i self.ind_to_tuple[i] = tmp[i] self.vocab = vocab self.rev_vocab = rev_vocab # Define loss parameters here startTuples = tf.constant( np.array( [[0, i] for i in range(Config.paraLen) for j in range(Config.paraLen - i if i >= Config.paraLen - Config.max_length else Config.max_length)])) endTuples = tf.constant( np.array( [[j, j + i] for i in range(Config.paraLen) for j in range(Config.paraLen - i if i >= Config.paraLen - Config.max_length else Config.max_length)])) self.startTuples = tf.cast(startTuples, tf.int32) self.endTuples = tf.cast(endTuples, tf.int32) # ==== set up placeholder tokens ======== self.paragraph_placeholder = tf.placeholder(tf.int32, [None, Config.paraLen]) self.para_lens = tf.placeholder(tf.int32, [None]) self.q_placeholder = tf.placeholder(tf.int32, [None, Config.qLen]) self.q_lens = tf.placeholder(tf.int32, [None]) self.labels_placeholder = tf.placeholder( tf.int32, [None, Config.labels_one_hot_size]) # ==== assemble pieces ==== with tf.variable_scope( "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)): self.encoder = encoder self.decoder = decoder self.embeddings = embeddings self.setup_embeddings() self.setup_system() self.setup_loss() # ==== set up training/updating procedure ==== optimizer = AdamaxOptimizer() #optimizer = tf.train.AdamOptimizer(Config.lr) #optimizer = tf.train.GradientDescentOptimizer(Config.lr) train_op = optimizer.minimize(self.loss) self.train_op = train_op
def run(args): print('\nSettings: \n', args, '\n') args.model_signature = str(dt.datetime.now())[0:19].replace(' ', '_') args.model_signature = args.model_signature.replace(':', '_') ########## Find GPUs (gpu_config, n_gpu_used) = set_gpus(args.n_gpu) ########## Data, model, and optimizer setup mnist = MNIST(args) x = tf.placeholder(tf.float32, [None, 28, 28, 1]) if args.model == 'hvae': if not args.K: raise ValueError('Must set number of flow steps when using HVAE') elif not args.temp_method: raise ValueError('Must set tempering method when using HVAE') model = HVAE(args, mnist.avg_logit) elif args.model == 'cnn': model = VAE(args, mnist.avg_logit) else: raise ValueError('Invalid model choice') elbo = model.get_elbo(x, args) nll = model.get_nll(x, args) optimizer = AdamaxOptimizer(learning_rate=args.learn_rate, eps=args.adamax_eps) opt_step = optimizer.minimize(-elbo) ########## Tensorflow and saver setup sess = tf.Session(config=gpu_config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() savepath = os.path.join(args.checkpoint_dir, args.model_signature, 'model.ckpt') if not os.path.exists(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) ########## Test that GPU memory is sufficient if n_gpu_used > 0: try: x_test = mnist.next_test_batch() (t_e, t_n) = sess.run((elbo, nll), {x: x_test}) mnist.batch_idx_test = 0 # Reset batch counter if it works except: raise MemoryError(""" Likely insufficient GPU memory Reduce test batch by lowering the -tbs parameter """) ########## Training Loop train_elbo_hist = [] val_elbo_hist = [] # For early stopping best_elbo = -np.inf es_epochs = 0 epoch = 0 train_times = [] for epoch in range(1, args.epochs + 1): t0 = time.time() train_elbo = train(epoch, mnist, opt_step, elbo, x, args, sess) train_elbo_hist.append(train_elbo) train_times.append(time.time() - t0) print('One epoch took {:.2f} seconds'.format(time.time() - t0)) val_elbo = validate(mnist, elbo, x, sess) val_elbo_hist.append(val_elbo) if val_elbo > best_elbo: # Save the model that currently generalizes best es_epochs = 0 best_elbo = val_elbo saver.save(sess, savepath) best_model_epoch = epoch elif args.early_stopping_epochs > 0: es_epochs += 1 if es_epochs >= args.early_stopping_epochs: print('***** STOPPING EARLY ON EPOCH {} of {} *****'.format( epoch, args.epochs)) break print('--> Early stopping: {}/{} (Best ELBO: {:.4f})'.format( es_epochs, args.early_stopping_epochs, best_elbo)) print('\t Current val ELBO: {:.4f}\n'.format(val_elbo)) if np.isnan(val_elbo): raise ValueError('NaN encountered!') train_times = np.array(train_times) mean_time = np.mean(train_times) std_time = np.std(train_times) print('Average train time per epoch: {:.2f} +/- {:.2f}'.format( mean_time, std_time)) ########## Evaluation # Restore the best-performing model saver.restore(sess, savepath) test_elbos = np.zeros(args.n_nll_runs) test_nlls = np.zeros(args.n_nll_runs) for i in range(args.n_nll_runs): print('\n---- Test run {} of {} ----\n'.format(i + 1, args.n_nll_runs)) (test_elbos[i], test_nlls[i]) = evaluate(mnist, elbo, nll, x, args, sess) mean_elbo = np.mean(test_elbos) std_elbo = np.std(test_elbos) mean_nll = np.mean(test_nlls) std_nll = np.std(test_nlls) print('\nTest ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo)) print('Test NLL: {:.2f} +/- {:.2f}'.format(mean_nll, std_nll)) ########## Logging, Saving, and Plotting with open(args.logfile, 'a') as ff: print('----------------- Test ID {} -----------------'.format( args.model_signature), file=ff) print(args, file=ff) print('Stopped after {} epochs'.format(epoch), file=ff) print('Best model from epoch {}'.format(best_model_epoch), file=ff) print('Average train time per epoch: {:.2f} +/- {:.2f}'.format( mean_time, std_time), file=ff) print('FINAL VALIDATION ELBO: {:.2f}'.format(val_elbo_hist[-1]), file=ff) print('Test ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo), file=ff) print('Test NLL: {:.2f} +/- {:.2f}\n'.format(mean_nll, std_nll), file=ff) if not os.path.exists(args.pickle_dir): os.makedirs(args.pickle_dir) train_dict = { 'train_elbo': train_elbo_hist, 'val_elbo': val_elbo_hist, 'args': args } pickle.dump( train_dict, open(os.path.join(args.pickle_dir, args.model_signature + '.p'), 'wb')) if not os.path.exists(args.plot_dir): os.makedirs(args.plot_dir) tf_gen_samples = model.get_samples(args) np_gen_samples = sess.run(tf_gen_samples) plot_digit_samples(np_gen_samples, args) plot_training_curve(train_elbo_hist, val_elbo_hist, args) ########## Email notification upon test completion try: msg_text = """Test completed for ID {0}. Parameters: {1} Test ELBO: {2:.2f} +/- {3:.2f} Test NLL: {4:.2f} +/- {5:.2f} """.format(args.model_signature, args, mean_elbo, std_elbo, mean_nll, std_nll) msg = MIMEText(msg_text) msg['Subject'] = 'Test ID {0} Complete'.format(args.model_signature) msg['To'] = args.receiver msg['From'] = args.sender s = smtplib.SMTP('localhost') s.sendmail(args.sender, [args.receiver], msg.as_string()) s.quit() except: print('Unable to send email from sender {0} to receiver {1}'.format( args.sender, args.receiver))