def create_sub_class_test_data(related, config): ''' Args: related: a numpy array of booleans that are true if related, false if unrelated ''' unrelated = np.logical_not(related) # includes unrelated X, y, glove_matrix, max_input_lengths, word_to_glove_index = create_embeddings( training_size=config.training_size, random_split=config.random_split, truncate_headlines=config.truncate_headlines, truncate_articles=config.truncate_articles, classification_problem=1, max_headline_length=config.max_headline_length, max_article_length=config.max_article_length, glove_set=None, debug=False) # isolate test data for classification problem 2 _, _, (h_glove_index_matrix, a_glove_index_matrix, h_seq_lengths, a_seq_lengths, labels) = create_data_sets_for_model(X, y) unrelated_labels = labels[unrelated] related_labels = labels[related] related_h_glove_index_matrix = h_glove_index_matrix[related] related_a_glove_index_matrix = a_glove_index_matrix[related] related_h_seq_lengths = np.transpose(np.transpose(h_seq_lengths)[related]) related_a_seq_lengths = np.transpose(np.transpose(a_seq_lengths)[related]) return glove_matrix, related_h_glove_index_matrix, related_a_glove_index_matrix, related_h_seq_lengths, related_a_seq_lengths, max_input_lengths, related_labels, unrelated_labels
def main(debug=True): # Parse Arguments parser = argparse.ArgumentParser() parser.add_argument('--epoch', type=int, default=5) parser.add_argument('--restore', action='store_true') args = parser.parse_args() # Create Config config = Config() if args.epoch: config.n_epochs = args.epoch X, y, glove_matrix, max_input_lengths, word_to_glove_index = create_embeddings( training_size=config.training_size, random_split=config.random_split, truncate_headlines=config.truncate_headlines, truncate_articles=config.truncate_articles, classification_problem=config.classification_problem, max_headline_length=config.max_headline_length, max_article_length=config.max_article_length, glove_set=None, debug=debug) if config.uniform_data_split: X, y = produce_uniform_data_split(X, y) # Each set is of the form: # [headline_glove_index_matrix, article_glove_index_matrix, h_seq_lengths, a_seq_lengths, labels] train_examples, dev_set, test_set = create_data_sets_for_model(X, y) print "Distribution of Train {}".format(np.sum(train_examples[4], axis=0)) print "Distribtion of Dev {}".format(np.sum(dev_set[4], axis=0)) print "Distribution of Test{}".format(np.sum(test_set[4], axis=0)) with tf.Graph().as_default(): print 80 * "=" print "INITIALIZING" print 80 * "=" # Create and configure model print "Building model...", start = time.time() model = Attention_Conditonal_Encoding_LSTM_Model( config, report_score, max_input_lengths, glove_matrix, debug) model.print_params() print "took {:.2f} seconds\n".format(time.time() - start) # Initialize variables init = tf.global_variables_initializer() saver = None if debug else tf.train.Saver() with tf.Session() as session: # Load weights if necessary session.run(init) saver = create_tensorflow_saver(model.exclude_names) if args.restore: saver.restore(session, model.curr_weights_fn) # Finalize graph session.graph.finalize() # Train Model print 80 * "=" print "TRAINING" print 80 * "=" model.fit(session, saver, train_examples, dev_set) if not debug: print 80 * "=" print "TESTING" print 80 * "=" print "Restoring the best model weights found on the dev set" saver.restore(session, model.best_weights_fn) print "Final evaluation on test set", test_score, _, test_confusion_matrix_str = model.predict( session, test_set, save_preds=True) with open(model.test_confusion_matrix_fn, 'w') as file: file.write(test_confusion_matrix_str)
def main(debug=True): # Parse Arguments arg_epoch, arg_restore, arg_test = parse_args() # Create Config config = Config() if arg_epoch: config.n_epochs = arg_epoch X, y, glove_matrix, max_input_lengths, word_to_glove_index = create_embeddings( training_size=config.training_size, random_split=config.random_split, truncate_headlines=config.truncate_headlines, truncate_articles=config.truncate_articles, classification_problem=config.classification_problem, max_headline_length=config.max_headline_length, max_article_length=config.max_article_length, glove_set=None, debug=debug) if config.uniform_data_split: X, y = produce_uniform_data_split(X, y) # Each set is of the form: # [headline_glove_index_matrix, article_glove_index_matrix, h_seq_lengths, a_seq_lengths, labels] train_examples, dev_set, test_set = create_data_sets_for_model(X, y) print "Distribution of Train {}".format(np.sum(train_examples[4], axis=0)) print "Distribtion of Dev {}".format(np.sum(dev_set[4], axis=0)) print "Distribution of Test{}".format(np.sum(test_set[4], axis=0)) with tf.Graph().as_default(): print 80 * "=" print "INITIALIZING" print 80 * "=" # Create and configure model print "Building model...", start = time.time() model = Bimpmp(config, report_score, max_input_lengths, glove_matrix, debug) model.print_params() print "took {:.2f} seconds\n".format(time.time() - start) # Initialize variables init = tf.global_variables_initializer() saver = None if debug else tf.train.Saver() with tf.Session(config=tf.ConfigProto( log_device_placement=True)) as session: # Load weights if necessary session.run(init) saver = create_tensorflow_saver(model.exclude_names) if arg_restore != None: weights_path = './data/{}/{}/weights'.format( model.get_model_name(), arg_restore) restore_path = '{}/{}'.format(weights_path, model.get_fn_names()[1]) saver.restore(session, model.curr_weights_fn) # Finalize graph session.graph.finalize() # train_vars = [var for var in tf.global_variables()] # for var in train_vars: # print (var.name, var.get_shape()) # Train Model print 80 * "=" print "TRAINING" print 80 * "=" model.fit(session, saver, train_examples, dev_set) if not debug: print 80 * "=" print "TESTING" print 80 * "=" print "Restoring the best model weights found on the dev set" saver.restore(session, model.best_weights_fn) print "Final evaluation on test set", test_score, _, test_confusion_matrix_str = model.predict( session, test_set, save_preds=True) with open(model.test_confusion_matrix_fn, 'w') as file: file.write(test_confusion_matrix_str)
def main(debug=False): # Parse Arguments parser = argparse.ArgumentParser() parser.add_argument('--restore', action='store_true') args = parser.parse_args() # Load Data config = Config() X, y, glove_matrix, max_input_lengths, word_to_glove_index = create_embeddings( training_size=config.training_size, random_split=config.random_split, truncate_headlines=config.truncate_headlines, truncate_articles=config.truncate_articles, classification_problem=config.classification_problem, max_headline_length=config.max_headline_length, max_article_length=config.max_article_length, glove_set=None, debug=debug) train_examples, dev_set, test_set = create_data_sets_for_model(X, y, debug) print "Distribution of Train {}".format(np.sum(train_examples[4], axis=0)) print "Distribtion of Dev {}".format(np.sum(dev_set[4], axis=0)) print "Distribution of Test{}".format(np.sum(test_set[4], axis=0)) # Define hyperparameters hyperparameters = { 'lr': [0.001, .0001], 'dropout_rate': [.8, .9, 1], 'beta': [.01, .001] } # Run model over all these hyper parameters pp = pprint.PrettyPrinter(indent=4) best_test_score = -1 best_config = None best_test_confusion_matrix = '' for lr in hyperparameters['lr']: config = Config() config.lr = lr if debug: config.embed_size = 2 for dropout_rate in hyperparameters['dropout_rate']: config.dropout_rate = dropout_rate for beta in hyperparameters['beta']: config.beta = beta print "-" * 80 print "Using Configs:" pp.pprint(config.__dict__) print "-" * 80 test_score, test_confusion_matrix = run_model( config, max_input_lengths, glove_matrix, args, train_examples, dev_set, test_set) if test_score > best_test_score: best_test_score = test_score best_config = config best_test_confusion_matrix = test_confusion_matrix print '-' * 80 print "Best Config:" pp.pprint(best_config.__dict__) print "Best Test Score:" print best_test_score print "Confusion Matrix" print best_test_confusion_matrix print '-' * 80