def main(): args = setup_args() outfile = args.out_dir + args.comment f_out = open(outfile, 'w') #Write meta-info about the particular run into the master file before each run timestr = time.strftime("%Y%m%d-%H%M%S") f = open(master_meta_info_file, 'a+') f.write(timestr + " #### " + args.comment + " ##### " + str(args) + "\n") f.close() hole_feature_filename = args.out_dir + "hole_features_" + args.comment dataset = getData(args.hole_window_size, args.num_files * args.num_of_holes_per_file, args.dataset_type, args.sup_window_size, args.num_sup_tokens, args.num_of_holes_per_file, args.sup_def, args.method) #Get the size of the vocabulary vocab_size, encoder = get_vocab_size() model = Seq2SeqModel(vocab_size, bias_init=None) if args.load_model: y = tf.reshape(tf.Variable(1, dtype=tf.int32), (1, 1)) model(y, y, False) model.load_weights( args.model_load_dir).expect_partial() #to supress warnings print("Loaded Weights from: ", args.model_load_dir) size = args.num_files * args.num_of_holes_per_file bar = tqdm(total=size) print("Evaluating " + args.dataset_type + " Data.......") subword_loss, token_loss, error, hole_features = evaluate( model, dataset, args.method, bar, args.inner_learning_rate, args.sup_batch_size, args.num_of_updates) bar.close() print(args.dataset_type + " Statistics..........") f_out.write(args.dataset_type + " Statistics..........") print("Token Cross-Entropy = {:.4f} ".format(token_loss)) print("{:.4f} confidence error over mean cross-entropy = {:.4f}".format( CONFIDENCE_INTERVAL, error)) f_out.write("Token Cross-Entropy = {:.4f} ".format(token_loss)) f_out.write( "{:.4f} confidence error over mean cross-entropy = {:.4f}".format( CONFIDENCE_INTERVAL, error)) f_out.flush() with open(hole_feature_filename, 'wb') as f: pickle.dump(hole_features, f)
def main(): args = setup_args() outfile = args.out_dir + args.comment f_out = open(outfile, 'w') #Write meta-info about the particular run into the master file before each run timestr = time.strftime("%Y%m%d-%H%M%S") f = open(master_meta_info_file, 'a+') f.write(timestr+" #### "+ args.comment+ " ##### " + str(args)+"\n") f.close() dataset_train = getData(args.hole_window_size, args.num_train_files*args.num_of_holes_per_file, 'train', args.sup_window_size, args.num_sup_tokens, args.num_of_holes_per_file, args.sup_def, is_eval=False, data_type='hole_and_sup') dataset_val = getData(args.hole_window_size, args.num_val_files*args.num_of_holes_per_file, 'val', args.sup_window_size, args.num_sup_tokens, args.num_of_holes_per_file, args.sup_def, is_eval=True, data_type='hole_and_sup') #Get the size of the vocabulary vocab_size, encoder = get_vocab_size() # define bias initializer based on log(prob in vocab) subword_dict = pickle.load(open(subword_vocab_filename, 'rb')) subword_dict = {k: v / total for total in (sum(subword_dict.values()),) for k, v in subword_dict.items()} lowest_entry = min(subword_dict, key=subword_dict.get) subword_vocab = np.zeros(vocab_size) for i in range(vocab_size): if i in subword_dict: subword_vocab[i] = np.log(subword_dict[i]) else: subword_vocab[i] = np.log(subword_dict[lowest_entry]) def bias_init(shape, dtype=None, partition_info=None): return tf.convert_to_tensor(subword_vocab, dtype=tf.float32) #Define the optimizers and initialize the seq2seq model optimizer_outer = tf.compat.v1.train.AdamOptimizer(learning_rate = args.outer_learning_rate) optimizer_inner = tf.compat.v1.train.AdamOptimizer(learning_rate = args.inner_learning_rate) model = Seq2SeqModel(vocab_size, bias_init) #the model save directory is named based on the comment (so that it is easy to restore it if needed) model_save_dir = args.checkpoint_dir + args.comment +"/" if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) if args.load_model: y = tf.reshape(tf.Variable(1, dtype=tf.int32), (1,1)) model(y, y, False) model.load_weights(args.model_load_dir) print("Loaded Weights from: ", args.model_load_dir) # required for tqdm bar progress train_size = args.num_train_files*args.num_of_holes_per_file #calculate initial perplexity over the training data (without training) tqdm.write('Calculating Initial Train Cross-Entropy') bar = tqdm(total=train_size) init_subword_loss, init_token_loss, init_train_error, hole_features = evaluate(model, dataset_train, bar, args.inner_learning_rate, args.batch_size_sup, args.num_of_updates) print("Init Token Train Cross-Entropy = {:.4f} ".format(init_token_loss)) print("{:.4f} confidence error over init train mean cross-entropy = {:.4f}".format(CONFIDENCE_INTERVAL, init_train_error)) f_out.write("Init Token Train Cross-Entropy = {:.4f} ".format(init_token_loss)+"\n") f_out.write("{:.4f} confidence error over init train mean cross-entropy = {:.4f}".format(CONFIDENCE_INTERVAL, init_train_error)+"\n\n") # required for tqdm bar progress val_size = args.num_val_files*args.num_of_holes_per_file best_token_loss = None val_losses = [] for epoch in range(args.num_epochs): hole_feature_filename = args.out_dir + "epoch_"+str(epoch+1)+"_hole_features_"+ args.comment #Train 1 epoch bar = tqdm(total=train_size) # calculate epoch wise train cross-entropy outfile = args.out_dir + args.comment train_subword_loss, train_token_loss, error = train(model, optimizer_inner, optimizer_outer, dataset_train, args.train_method, bar, args.epsilon_reptile, args.batch_size_sup, args.num_of_updates) bar.close() #Evaluate 1 epoch bar = tqdm(total=val_size) # calculate val cross-entropy val_subword_loss, val_token_loss, val_error, hole_features = evaluate(model, dataset_val, bar, args.inner_learning_rate, args.batch_size_sup, args.num_of_updates) val_losses.append(val_token_loss) bar.close() with open(hole_feature_filename, 'wb') as f: pickle.dump(hole_features, f) # checkpoint if the val_loss decreases if not best_token_loss or val_token_loss < best_token_loss: if args.save_model: model.save_weights(model_save_dir, save_format='tf') print("\nSaved Weights to: ", model_save_dir) best_subword_loss = val_subword_loss best_token_loss = val_token_loss #early stopping if val_loss doesn't improve if len(val_losses) >= args.val_monitor_interval: if np.all(np.array(val_losses)> best_token_loss): print("\n Early Stopping because val loss didn't improve after ", args.val_monitor_interval, "intervals") break else: val_losses.pop(0) #print epoch-wise statistics print('\nEpoch {}: Train Token Cross-Entropy = {:.4f}, Val Token Cross-Entropy = {:.4f}'.format(epoch + 1, train_token_loss, val_token_loss)) print('\n{:.4f} confidence error over mean train_train cross-entropy = {:.4f}, mean val cross-entropy = {:.4f}'.format(CONFIDENCE_INTERVAL, error, val_error)+"\n") f_out.write('Epoch {}: Train Token Cross-Entropy = {:.4f}, Val Token Cross-Entropy = {:.4f}'.format(epoch + 1, train_token_loss, val_token_loss)+"\n") f_out.write('{:.4f} confidence error over mean train_train cross-entropy = {:.4f}, mean val cross-entropy = {:.4f}'.format(CONFIDENCE_INTERVAL, error, val_error)+'\n\n') f_out.flush() #print best performance on val data over all epochs print("\n Best Token Val Cross-Entropy = {:.4f} ".format(best_token_loss)) f_out.write("Best Token Val Cross-Entropy = {:.4f} ".format(best_token_loss)+"\n") f_out.close()