def run1(models=[treeRNN, treeRNN_batch], batch_sizes=[2**i for i in range(1, 10)], configs=[tf.ConfigProto(device_count={'GPU': 0}), None]): epochs = 3 make_experiment_folder() _data_util = data_util.DataUtil() data = _data_util.get_data() run_times_list = [] epoch_times_list = [] for model in models: for config in configs: avg_run_times = [] avg_epoch_times = [] for batch_size in batch_sizes: run_times = [] epoch_times = [] with tf.Graph().as_default(): trainer.train(model(data, "test/"), load=False, config=config, batch_size=batch_size, epochs=epochs, run_times=run_times, epoch_times=epoch_times) avg_run_times.append(np.average(run_times)) avg_epoch_times.append(np.average(epoch_times)) run_times_list.append(avg_run_times) epoch_times_list.append(avg_epoch_times) np.savez("../experiments/run1.npz", run_times_list=run_times_list, epoch_times_list=epoch_times_list)
def run_speed_test(placement): make_experiment_folder() epochs = 4 batch_sizes = [4,16,32,64,128] config_CPU = False config_GPU = True to_be_tested = [ (treeRNN_neerbek, config_GPU), (treeRNN_batch, config_GPU), #(treeLSTM, config_GPU), #(deepRNN, config_GPU), #(treeLSTM_tracker, config_GPU), (treeRNN_neerbek, config_CPU), (treeRNN_batch, config_CPU), #(treeLSTM, config_CPU), #(deepRNN, config_CPU), #(treeLSTM_tracker, config_CPU) ] labels = [ "TreeRNN - GPU", "MTreeRNN - GPU", #"TreeLSTM - GPU", #"DeepRNN - GPU", #"TreeLSTM w. Tracker - GPU", "TreeRNN - CPU", "MTreeRNN - CPU", #"TreeLSTM - CPU", #"DeepRNN - CPU", #"TreeLSTM w. Tracker - CPU", ] _data_util = data_util.DataUtil() data = _data_util.get_data() word_embed = GloVe(mode=constants.PRETRAINED_MODE, dimensions=FLAGS.word_embedding_size) run_times_list = [] epoch_times_list = [] for model, config in to_be_tested: avg_run_times = [] avg_epoch_times = [] for batch_size in batch_sizes: run_times = [] epoch_times = [] with tf.Graph().as_default(): trainer.train(model(data, word_embed, FLAGS.model_name), load=False, gpu=config, batch_size=batch_size, epochs=epochs, run_times=run_times, epoch_times=epoch_times, compute_performance=False) avg_run_times.append(np.average(run_times)) avg_epoch_times.append(np.average(epoch_times)) run_times_list.append(avg_run_times) epoch_times_list.append(avg_epoch_times) np.savez(placement, run_times_list=run_times_list, epoch_times_list=epoch_times_list, labels=labels, batch_sizes=batch_sizes)
def run2(): make_experiment_folder() epochs = 4 batch_sizes = [2**i for i in range(1, 10)] config_CPU = False config_GPU = True placement = "../experiments/run2.npz" to_be_tested = [(treeRNN, config_CPU), (treeRNN_neerbek, config_GPU), (treeRNN_batch, config_GPU), (treeLSTM, config_GPU), (deepRNN, config_GPU), (treeRNN_tracker, config_GPU)] labels = [ "TreeRNN Neerbek - CPU", "TreeRNN Neerbek - GPU", "TreeRNN Our - GPU", "TreeLSTM - GPU", "DeepRNN - GPU", "TreeRNN tracker - GPU" ] _data_util = data_util.DataUtil() data = _data_util.get_data() word_embed = GloVe(mode=constants.PRETRAINED_MODE, dimensions=FLAGS.word_embedding_size) run_times_list = [] epoch_times_list = [] for model, config in to_be_tested: avg_run_times = [] avg_epoch_times = [] for batch_size in batch_sizes: run_times = [] epoch_times = [] with tf.Graph().as_default(): model_placement = directories.TRAINED_MODELS_DIR + FLAGS.model_name + "model.ckpt" trainer.train(model(data, word_embed, model_placement), load=False, gpu=config, batch_size=batch_size, epochs=epochs, run_times=run_times, epoch_times=epoch_times) avg_run_times.append(np.average(run_times[1:])) avg_epoch_times.append(np.average(epoch_times[1:])) run_times_list.append(avg_run_times) epoch_times_list.append(avg_epoch_times) np.savez(placement, run_times_list=run_times_list, epoch_times_list=epoch_times_list, labels=labels, batch_sizes=batch_sizes)
def run3(): make_experiment_folder() epochs = 4 num_threads_list = [i for i in range(0, 8)] placement = "../experiments/runThreads.npz" to_be_tested = [(treeRNN_neerbek, 32), (treeRNN_neerbek, 64), (treeRNN_neerbek, 128)] labels = [ "Batch size 32", "Batch size 64", "Batch size 128", ] _data_util = data_util.DataUtil() data = _data_util.get_data() word_embed = GloVe(mode=constants.PRETRAINED_MODE, dimensions=FLAGS.word_embedding_size) run_times_list = [] epoch_times_list = [] for model, batch_size in to_be_tested: avg_run_times = [] avg_epoch_times = [] for num_threads in num_threads_list: run_times = [] epoch_times = [] with tf.Graph().as_default(): trainer.train(model(data, word_embed, batch_size=batch_size), load=False, gpu=True, batch_size=batch_size, epochs=epochs, run_times=run_times, epoch_times=epoch_times, num_threads=num_threads) avg_run_times.append(np.average(run_times[1:])) avg_epoch_times.append(np.average(epoch_times[1:])) run_times_list.append(avg_run_times) epoch_times_list.append(avg_epoch_times) np.savez(placement, run_times_list=run_times_list, epoch_times_list=epoch_times_list, labels=labels, batch_sizes=batch_sizes)
from utils import data_util from models.trees.treeRNN_batch import treeRNN from utils.flags import FLAGS import trainers.TreeTrainer as trainer _data_util = data_util.DataUtil() data = _data_util.get_data() model = treeRNN(data, FLAGS.models_dir + FLAGS.model_name + "model.ckpt") trainer.train(model, load=False)
def get_data(): if not os.path.exists(directories.CLASSIFIER_DATA_DIR): os.mkdir(directories.CLASSIFIER_DATA_DIR) if not os.path.exists(directories.CLASSIFIER_DATA(FLAGS.model_name)): os.mkdir(directories.CLASSIFIER_DATA(FLAGS.model_name)) if os.path.exists( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'x_train.npy'): x_train = np.load( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'x_train.npy') y_train = np.load( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'y_train.npy') x_val = np.load( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'x_val.npy') y_val = np.load( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'y_val.npy') x_test = np.load( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'x_test.npy') y_test = np.load( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'y_test.npy') else: _data_util = data_util.DataUtil() data = _data_util.get_data() roots_size = [ tree_util.size_of_tree(root) for root in data.train_trees ] data.train_trees = helper.sort_by(data.train_trees, roots_size) roots_size = [tree_util.size_of_tree(root) for root in data.val_trees] data.val_trees = helper.sort_by(data.val_trees, roots_size) roots_size = [tree_util.size_of_tree(root) for root in data.test_trees] data.test_trees = helper.sort_by(data.test_trees, roots_size) if FLAGS.use_gpu: config = None else: config = tf.ConfigProto(device_count={'GPU': 0}) if FLAGS.word_embed_model == constants.WORD2VEC: word_embeddings = Word2Vec(mode=FLAGS.word_embed_mode, dimensions=FLAGS.word_embedding_size) elif FLAGS.word_embed_model == constants.FASTTEXT: word_embeddings = FastText(mode=FLAGS.word_embed_mode, dimensions=FLAGS.word_embedding_size) else: # FLAGS.word_embed_model == constants.GLOVE word_embeddings = GloVe(mode=FLAGS.word_embed_mode, dimensions=FLAGS.word_embedding_size) g_tree = tf.Graph() with g_tree.as_default(): model = None if FLAGS.model == constants.DEEP_RNN: model = deepRNN(data, word_embeddings, FLAGS.model_name) elif FLAGS.model == constants.BATCH_TREE_RNN: model = treeRNN_batch(data, word_embeddings, FLAGS.model_name) elif FLAGS.model == constants.NEERBEK_TREE_RNN: model = treeRNN_neerbek(data, word_embeddings, FLAGS.model_name) elif FLAGS.model == constants.TREE_LSTM: model = treeLSTM(data, word_embeddings, FLAGS.model_name) elif FLAGS.model == constants.TRACKER_TREE_RNN: model = treeRNN_tracker(data, word_embeddings, FLAGS.model_name) elif FLAGS.model == constants.TRACKER_TREE_LSTM: model = treeLSTM_tracker(data, word_embeddings, FLAGS.model_name) elif FLAGS.model == constants.LSTM: model = LSTM(data, word_embeddings, FLAGS.model_name) with tf.Session(config=tf.ConfigProto( device_count={'GPU': 0})) as sess: saver = tf.train.Saver() model.load_best(sess, saver, "validation") x_train = np.array( model.get_representation(data.train_trees, sess)) y_train = np.array(get_labels(data.train_trees)) x_val = np.array(model.get_representation( data.val_trees, sess)) y_val = np.array(get_labels(data.val_trees)) x_test = np.array( model.get_representation(data.test_trees, sess)) y_test = np.array(get_labels(data.test_trees)) np.save( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'x_train', x_train) np.save( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'y_train', y_train) np.save( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'x_val', x_val) np.save( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'y_val', y_val) np.save( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'x_test', x_test) np.save( directories.CLASSIFIER_DATA(FLAGS.model_name) + 'y_test', y_test) return { 'train': (x_train, y_train), 'val': (x_val, y_val), 'test': (x_test, y_test) }
def main(): _data_util = data_util.DataUtil() data = _data_util.get_data() if FLAGS.use_gpu: config = None else: config = tf.ConfigProto(device_count={'GPU': 0}) model_name = FLAGS.model_name if FLAGS.model_name == "": model_name = FLAGS.model + \ "_BathcSize" + str(FLAGS.batch_size) + \ "_LrStart" + str(FLAGS.learning_rate) + \ "_LrEnd" + str(FLAGS.learning_rate_end) + \ "_ExpDecay" + str(FLAGS.lr_decay) + \ "_ConvCond" + str(FLAGS.conv_cond) + \ "_WordEmbed" + str(FLAGS.word_embed_model) + '-' + str(FLAGS.word_embed_mode) + \ "_WordEmbedDim" + str(FLAGS.word_embedding_size) if FLAGS.word_embed_model == constants.WORD2VEC: word_embeddings = Word2Vec(mode=FLAGS.word_embed_mode, dimensions=FLAGS.word_embedding_size) elif FLAGS.word_embed_model == constants.FASTTEXT: word_embeddings = FastText(mode=FLAGS.word_embed_mode, dimensions=FLAGS.word_embedding_size) else: # FLAGS.word_embed_model == constants.GLOVE word_embeddings = GloVe(mode=FLAGS.word_embed_mode, dimensions=FLAGS.word_embedding_size) for r in range(FLAGS.repeat_num): version = "" if r >= 1: version = "_V" + str(r + 1) model_name_version = model_name + version model = None if FLAGS.model == constants.DEEP_RNN: model = deepRNN(data, word_embeddings, model_name_version) elif FLAGS.model == constants.BATCH_TREE_RNN: model = treeRNN_batch(data, word_embeddings, model_name_version) elif FLAGS.model == constants.NEERBEK_TREE_RNN: model = treeRNN_neerbek(data, word_embeddings, model_name_version) elif FLAGS.model == constants.TREE_LSTM: model = treeLSTM(data, word_embeddings, model_name_version) elif FLAGS.model == constants.TRACKER_TREE_RNN: model = treeRNN_tracker(data, word_embeddings, model_name_version) elif FLAGS.model == constants.TRACKER_TREE_LSTM: model = treeLSTM_tracker(data, word_embeddings, model_name_version) elif FLAGS.model == constants.LSTM: model = LSTM(data, word_embeddings, model_name_version) # TODO: Check if MODEL_DIR is made prematurely load = FLAGS.load_model and os.path.exists( directories.TMP_MODEL_DIR(model_name_version)) if FLAGS.evaluate: trainer.evaluate(model, gpu=FLAGS.use_gpu) elif FLAGS.use_selective_training: trainer.selective_train(model, load=load, gpu=FLAGS.use_gpu) else: trainer.train(model, load=load, gpu=FLAGS.use_gpu)