def read_and_split(dirn='../data', one_hot=True): class DataSets(object): pass data_sets = DataSets() TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' TRAIN1_SIZE = 30000 BASE_PATH = dirn + '/' train_images = extract_images(BASE_PATH + TRAIN_IMAGES) train_labels = extract_labels((BASE_PATH + TRAIN_LABELS), one_hot=one_hot) test_images = extract_images(BASE_PATH + TEST_IMAGES) test_labels = extract_labels((BASE_PATH + TEST_LABELS), one_hot=one_hot) train1_images = train_images[:TRAIN1_SIZE] train1_labels = train_labels[:TRAIN1_SIZE] train2_images = train_images[TRAIN1_SIZE:] train2_labels = train_labels[TRAIN1_SIZE:] data_sets.train1 = DataSet(train1_images, train1_labels) data_sets.train2 = DataSet(train2_images, train2_labels) data_sets.test = DataSet(test_images, test_labels) return data_sets
def main(_): logger.info('Loading Models From {:}'.format(FLAGS.output_dir)) logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None test_dataset = DataSet(csv_file_path=FLAGS.test_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) validation_dataset = DataSet(csv_file_path=FLAGS.validation_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) validation_predictions = np.empty( (len(FLAGS.model_names), validation_dataset.num_examples)) test_predictions_ = np.empty( (len(FLAGS.model_names), test_dataset.num_examples)) for i in xrange(0, len(FLAGS.model_names)): predictions = get_prediction_from_model(FLAGS.model_names[i], FLAGS.model_params[i][0], FLAGS.model_params[i][1], FLAGS.model_params[i][2], test_dataset, validation_dataset) validation_predictions[i, :] = predictions[0] test_predictions_[i, :] = predictions[1] ensemble_predictor = [ ensemble_prediction_rf_regression, ensemble_prediction_top_k, ensemble_prediction_greedy ] predictor_names = ["Random forest regression", "Top 10", "Greedy"] for fun, name in zip(ensemble_predictor, predictor_names): emsemble_preditions = fun(validation_dataset, validation_predictions, test_predictions_) prediction_metric = get_metric(emsemble_preditions, test_dataset.labels) logger.info("Method {:} RMSE: {:}, AAE: {:}, R: {:}".format( name, prediction_metric[0], prediction_metric[1], prediction_metric[2])) final_prediction_path = os.path.join(FLAGS.output_dir, "ensemble_test_prediction.csv") save_results(final_prediction_path, test_dataset.labels, emsemble_preditions) logging.info( "------------------------------DONE------------------------------") logging.info("") logging.info("")
def main(*args): model_dir = os.path.join(FLAGS.output_dir, FLAGS.model_name) if tf.gfile.Exists(model_dir): tf.gfile.DeleteRecursively(model_dir) tf.gfile.MakeDirs(model_dir) with tf.Graph().as_default(): # Create a session for running Ops on the Graph. sess = tf.Session() logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None logger.info('Loading Training dataset from {:}'.format( FLAGS.training_file)) train_dataset = DataSet(csv_file_path=FLAGS.training_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info('Loading validation dataset from {:}'.format( FLAGS.validation_file)) validation_dataset = DataSet(csv_file_path=FLAGS.validation_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(FLAGS.model_name, encoding_nn_hidden_size=FLAGS.model_params[0], encoding_nn_output_size=FLAGS.model_params[1], output_nn_hidden_size=FLAGS.model_params[2], batch_size=FLAGS.batch_size, learning_rate=0.001, add_logp=FLAGS.add_logp, clip_gradients=FLAGS.clip_gradient) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() sess.run(init) logger.info('Run the Op to initialize the variables') ugrnn_model.train(sess, FLAGS.max_epochs, train_dataset, validation_dataset, model_dir) ugrnn_model.save_model(sess, model_dir, FLAGS.max_epochs)
def partition_data(self): n_examples = self.mnist_train.images.shape[0] random_order = np.random.permutation(n_examples) n_common_examples = int(self.common_examples_fraction * n_examples) n_subset_examples = int( (n_examples - n_common_examples) / self.n_machines) common_examples_indices = random_order[0:n_common_examples] common_examples = self.mnist_train.images[common_examples_indices, :] common_examples_labels = self.mnist_train.labels[ common_examples_indices, :] if self.sync_iterations: n_examples_per_machine = n_common_examples + n_subset_examples n_epochs_per_machine = int( np.ceil(self.n_iterations * self.minibatch_size / n_examples_per_machine)) perm_list = self.get_permutations(n_epochs_per_machine, n_examples_per_machine, n_common_examples, n_subset_examples) self.training_data_sets = [] for i_machine in range(self.n_machines): slice_start = n_common_examples + n_subset_examples * i_machine slice_end = n_common_examples + n_subset_examples * (i_machine + 1) subset_examples_indices = random_order[slice_start:slice_end] subset_examples = self.mnist_train.images[ subset_examples_indices, :] subset_examples_labels = self.mnist_train.labels[ subset_examples_indices, :] images = np.concatenate([common_examples, subset_examples], axis=0) labels = np.concatenate( [common_examples_labels, subset_examples_labels], axis=0) # using dtype = dtypes.uint8 to prevent the DataSet class to scale the features by 1/255 data_set = DataSet(images, labels, reshape=False, dtype=dtypes.uint8) if self.sync_iterations: data_set.perm_list = perm_list self.training_data_sets.append(data_set)
def __init__(self, train=True, common_params=None, solver_params=None, net_params=None, dataset_params=None): self.data_l = tf.placeholder(tf.float32, (self.batch_size, self.height, self.width, 1)) if common_params: self.device_id = int(common_params['gpus']) self.image_size = int(common_params['image_size']) self.height = self.image_size self.width = self.image_size self.batch_size = int(common_params['batch_size']) self.num_gpus = 1 if solver_params: self.learning_rate = float(solver_params['learning_rate']) self.moment = float(solver_params['moment']) self.max_steps = int(solver_params['max_iterators']) self.train_dir = str(solver_params['train_dir']) self.lr_decay = float(solver_params['lr_decay']) self.decay_steps = int(solver_params['decay_steps']) self.train = train self.net = Net(train=train, common_params=common_params, net_params=net_params) self.dataset = DataSet(common_params=common_params, dataset_params=dataset_params)
def run_once(session, output_dir, train_data, valid_data, logp_col_name, experiment_name = ''): # logp_col_name=logp_col_name, train_dataset = DataSet(smiles=train_data[0], labels=train_data[1], contract_rings=FLAGS.contract_rings) validation_dataset = DataSet(smiles=valid_data[0], labels=valid_data[1], contract_rings=FLAGS.contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(FLAGS.model_name, encoding_nn_hidden_size=FLAGS.model_params[0], encoding_nn_output_size=FLAGS.model_params[1], output_nn_hidden_size=FLAGS.model_params[2], batch_size=FLAGS.batch_size, learning_rate=0.001, add_logp=FLAGS.add_logp, clip_gradients=FLAGS.clip_gradient) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() session.run(init) logger.info('Run the Op to initialize the variables') print('FLAGS.enable_plotting',FLAGS.enable_plotting) ugrnn_model.train(session, FLAGS.max_epochs, train_dataset, validation_dataset, output_dir, enable_plotting = int(FLAGS.enable_plotting)) ugrnn_model.save_model(session, output_dir, FLAGS.max_epochs)
def test_model(model_name, data_dir, output_dir, batch_size): sess = tf.Session() saver = tf.train.import_meta_graph(model_name + ".meta") # print(model_name) # inverse_intrinsic_matrix = np.linalg.inv(intrinsic_matrix) saver.restore(sess, model_name) # tf.train.latest_checkpoint('./')) graph = tf.get_default_graph() outputs = graph.get_tensor_by_name("outputs:0") targets_placeholder = graph.get_tensor_by_name("targets_placeholder:0") images_placeholder = graph.get_tensor_by_name("images_placeholder:0") train_mode = graph.get_tensor_by_name( "train_mode:0") # FIXME Podria arrojar exception start_loading_time = time.time() images, targets, _, groups, _ = read_data_sets(data_dir) start_infer_time = time.time() dataset = DataSet(images, targets, groups, fake_data=False) relative_poses_prediction, relative_poses_target = infer_relative_poses( sess, dataset, batch_size, images_placeholder, outputs, targets_placeholder, train_mode) end_time = time.time() print("Inference time: {}".format(end_time - start_infer_time)) print("Load Images + Inference Time: {}".format(end_time - start_loading_time)) print("Images in the seq: {}".format(relative_poses_prediction.shape[0])) frames, abs_distance = plot_frames_vs_abs_distance( relative_poses_prediction, relative_poses_target, dataset, output_dir, save_txt=True, plot=True) points = np.array(zip(frames, abs_distance)) np.savetxt(os.path.join(output_dir, "frames_vs_abs_distance.txt"), points) np.savetxt(os.path.join(output_dir, "relative_poses_prediction.txt"), relative_poses_prediction.reshape(-1, 12), delimiter=' ') np.savetxt(os.path.join(output_dir, "relative_poses_target.txt"), relative_poses_target.reshape(-1, 12), delimiter=' ') absolute_poses_prediction = get_absolute_poses(relative_poses_prediction) absolute_poses_target = get_absolute_poses(relative_poses_target) np.savetxt(os.path.join(output_dir, "absolute_poses_prediction.txt"), absolute_poses_prediction.reshape(-1, 12), delimiter=' ') np.savetxt(os.path.join(output_dir, "absolute_poses_target.txt"), absolute_poses_target.reshape(-1, 12), delimiter=' ')
def build_and_train(logger, session, output_dir, train_data, valid_data, experiment_name='', regression=True, binary_classification=False, model_name='ugrnn_1', batch_size=10, clip_gradient=False, model_params=None, contract_rings=False, learning_rate=1e-3, max_epochs=150, enable_plotting=False, Targets_UnNormalization_fn=lambda x: x, weight_decay_factor=0, *args, **kwargs): # TODO: figure out what causes the internal Tensorflow bug that requires this hack ('remove_SMILES_longer_than'). # is it due to a new ("improved") tensorflow version? train_data = utils.remove_SMILES_longer_than(train_data, config.max_seq_len) valid_data = utils.remove_SMILES_longer_than(valid_data, config.max_seq_len) train_labels, is_masked_t = utils.create_labels_NaN_mask(train_data[1]) valid_labels, is_masked_v = utils.create_labels_NaN_mask(valid_data[1]) # inferring stuff based on the data is_masked = is_masked_t or is_masked_v multitask = (not regression) and binary_classification num_tasks = train_labels.shape[-1] if train_labels.ndim > 1 else 1 assert not ( regression and binary_classification ), 'ERROR: arguments <regression>==True and <binary_classification>==True are mutually exclusive.' if is_masked: if not is_masked_t: train_labels, is_masked_t = utils.create_labels_NaN_mask( train_data[1], force_masked=1) if not is_masked_v: valid_labels, is_masked_v = utils.create_labels_NaN_mask( valid_data[1], force_masked=1) train_dataset = DataSet(smiles=train_data[0], labels=train_labels, contract_rings=contract_rings) validation_dataset = DataSet(smiles=valid_data[0], labels=valid_labels, contract_rings=contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(model_name, encoding_nn_hidden_size=model_params[0], encoding_nn_output_size=model_params[1], output_nn_hidden_size=model_params[2], batch_size=batch_size, learning_rate=learning_rate, add_logp=False, clip_gradients=clip_gradient, regression=regression, weight_decay_factor=weight_decay_factor, num_tasks=num_tasks, multitask=multitask, weighted_loss=is_masked) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() session.run(init) training_scores_dict, validation_scores_dict = ugrnn_model.train( session, max_epochs, train_dataset, validation_dataset, output_dir, enable_plotting=bool(enable_plotting), Targets_UnNormalization_fn=Targets_UnNormalization_fn) ugrnn_model.save_model(session, output_dir, max_epochs) return training_scores_dict, validation_scores_dict
class Solver(object): def __init__(self, train=True, common_params=None, solver_params=None, net_params=None, dataset_params=None): self.data_l = tf.placeholder(tf.float32, (self.batch_size, self.height, self.width, 1)) if common_params: self.device_id = int(common_params['gpus']) self.image_size = int(common_params['image_size']) self.height = self.image_size self.width = self.image_size self.batch_size = int(common_params['batch_size']) self.num_gpus = 1 if solver_params: self.learning_rate = float(solver_params['learning_rate']) self.moment = float(solver_params['moment']) self.max_steps = int(solver_params['max_iterators']) self.train_dir = str(solver_params['train_dir']) self.lr_decay = float(solver_params['lr_decay']) self.decay_steps = int(solver_params['decay_steps']) self.train = train self.net = Net(train=train, common_params=common_params, net_params=net_params) self.dataset = DataSet(common_params=common_params, dataset_params=dataset_params) def construct_graph(self, scope): with tf.device('/gpu:' + str(self.device_id)): self.gt_ab_313 = tf.placeholder(tf.float32, (self.batch_size, int(self.height / 4), int(self.width / 4), 313)) self.prior_boost_nongray = tf.placeholder(tf.float32, (self.batch_size, int(self.height / 4), int(self.width / 4), 1)) self.conv8_313 = self.net.inference(self.data_l) new_loss, g_loss = self.net.loss( scope, self.conv8_313, self.prior_boost_nongray, self.gt_ab_313) tf.summary.scalar('new_loss', new_loss) tf.summary.scalar('total_loss', g_loss) return new_loss, g_loss def train_model(self): with tf.device('/gpu:' + str(self.device_id)): self.global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step, self.decay_steps, self.lr_decay, staircase=True) opt = tf.train.AdamOptimizer( learning_rate=learning_rate, beta2=0.99) with tf.name_scope('gpu') as scope: new_loss, self.total_loss = self.construct_graph(scope) self.summaries = tf.get_collection( tf.GraphKeys.SUMMARIES, scope) grads = opt.compute_gradients(new_loss) self.summaries.append( tf.summary.scalar('learning_rate', learning_rate)) for grad, var in grads: if grad is not None: self.summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) apply_gradient_op = opt.apply_gradients( grads, global_step=self.global_step) for var in tf.trainable_variables(): self.summaries.append(tf.summary.histogram(var.op.name, var)) variable_averages = tf.train.ExponentialMovingAverage( 0.999, self.global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) saver = tf.train.Saver(write_version=1) saver1 = tf.train.Saver() summary_op = tf.summary.merge(self.summaries) init = tf.global_variables_initializer() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init) # saver1.restore(sess, './models/model.ckpt') # nilboy summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph) for step in range(self.max_steps): start_time = time.time() t1 = time.time() data_l, gt_ab_313, prior_boost_nongray = self.dataset.batch() t2 = time.time() _, loss_value = sess.run([train_op, self.total_loss], feed_dict={self.data_l: data_l, self.gt_ab_313: gt_ab_313, self.prior_boost_nongray: prior_boost_nongray}) duration = time.time() - start_time t3 = time.time() print('io: ' + str(t2 - t1) + '; compute: ' + str(t3 - t2)) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 1 == 0: num_examples_per_step = self.batch_size * self.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / self.num_gpus format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 10 == 0: summary_str = sess.run(summary_op, feed_dict={self.data_l: data_l, self.gt_ab_313: gt_ab_313, self.prior_boost_nongray: prior_boost_nongray}) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0: checkpoint_path = os.path.join( self.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
import numpy as np import pandas as pd from input_data import DataSet from sklearn.metrics import confusion_matrix import tensorflow as tf input_size = (28, 28, 1) images_folder = '/Users/roms/Documents/Kaggle/StateFarm/Data/imgs' images_folder = '/home/ubuntu/data/kaggle_statefarm' test = pd.read_csv('test_labels.csv', names=['image', 'label']) test.image = test.image.apply(lambda x: images_folder + '/test/' + x) mnist = DataSet(folder=images_folder, new_size=input_size, substract_mean=False, subsample_size=None, test=test) lr = 1e-3 keep_prob_ = 0.5 lambda_ = 0. def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.01, shape=shape) return tf.Variable(initial) def conv2d(x, W):
def main(*args): #Next 5 lines of code - To handle a Possible Error that can occur in UGRNN Code df_ext = pd.read_csv("../../External Test Set/External_Test_Set.csv") if (df_ext.shape[1] == 3): print("Moving Forward") else: df_ext.to_csv("../../External Test Set/External_Test_Set.csv") model_dir = os.path.join(FLAGS.output_dir, FLAGS.model_name) # if tf.io.gfile.exists(model_dir): # tf.io.gfile.DeleteRecursively(model_dir) # tf.io.gfile.makedirs(model_dir) with tf.Graph().as_default(): sess = tf.Session() logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None logger.info('Loading Training dataset from {:}'.format( FLAGS.training_file)) train_dataset = DataSet(csv_file_path=FLAGS.training_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info('Loading validation dataset from {:}'.format( FLAGS.validation_file)) validation_dataset = DataSet(csv_file_path=FLAGS.validation_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info('Loading test dataset from {:}'.format(FLAGS.test_file)) test_dataset = DataSet(csv_file_path=FLAGS.test_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(FLAGS.model_name, encoding_nn_hidden_size=FLAGS.model_params[0], encoding_nn_output_size=FLAGS.model_params[1], output_nn_hidden_size=FLAGS.model_params[2], batch_size=FLAGS.batch_size, learning_rate=0.001, add_logp=FLAGS.add_logp, clip_gradients=FLAGS.clip_gradient) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() sess.run(init) logger.info('Run the Op to initialize the variables') ugrnn_model.train(sess, FLAGS.max_epochs, train_dataset, validation_dataset, model_dir) print('Saving model...') ugrnn_model.save_model(sess, model_dir, FLAGS.max_epochs) # print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') # hidden_train = ugrnn_model.Hidden(sess, train_dataset) # hidden_validate = ugrnn_model.Hidden(sess, validation_dataset) hidden_test = pd.DataFrame(ugrnn_model.Hidden(sess, test_dataset)) Raw_Test_filtered = pd.read_csv( "../../External Test Set/External_Test_Set_filtered.csv") hidden_test['Canonical SMILES'] = Raw_Test_filtered['Canonical SMILES'] print('Hidden_test created!') # pd.DataFrame(hidden_train).to_csv("./data/DILI/Final_data/Predictions/train_HidenRepresentation.csv") hidden_test.to_csv( "./data/DILI/Final_data/Predictions/UGRNN Encoddings.csv")
def concate_dataset(dataset1, dataset2): return DataSet(np.concatenate((dataset1.images, dataset2.images)), np.concatenate((dataset1.labels, dataset2.labels)), True)
def main(_): pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.experiment_dir): os.makedirs(FLAGS.experiment_dir) expt_num = "1" else: expts = os.listdir(FLAGS.experiment_dir) last_expr = max([int(folder) for folder in expts]) expt_num = str(last_expr + 1) expt_result_path = os.path.join(FLAGS.experiment_dir, expt_num) os.makedirs(expt_result_path) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) chkpt_result_path = os.path.join(FLAGS.checkpoint_dir, expt_num) os.makedirs(chkpt_result_path) params_e_path = os.path.join(expt_result_path, "params.json") params_c_path = os.path.join(chkpt_result_path, "params.json") with open(params_e_path, 'w') as params_e, \ open(params_c_path, 'w') as params_c: json.dump(flags.FLAGS.__flags, params_e) json.dump(flags.FLAGS.__flags, params_c) # Generate the indexes word2idx, field2idx, qword2idx, nF, max_words_in_table, word_set = \ setup(FLAGS.data_dir, '../embeddings', FLAGS.n, FLAGS.batch_size, FLAGS.nW, FLAGS.min_field_freq, FLAGS.nQ) # Create the dataset objects train_dataset = DataSet(FLAGS.data_dir, 'train', FLAGS.n, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.batch_size, word2idx, field2idx, qword2idx, FLAGS.max_fields, FLAGS.word_max_fields, max_words_in_table, word_set) num_train_examples = train_dataset.num_examples() valid_dataset = DataSet(FLAGS.data_dir, 'valid', FLAGS.n, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.batch_size, word2idx, field2idx, qword2idx, FLAGS.max_fields, FLAGS.word_max_fields, max_words_in_table, word_set) test_dataset = DataSet(FLAGS.data_dir, 'test', FLAGS.n, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.batch_size, word2idx, field2idx, qword2idx, FLAGS.max_fields, FLAGS.word_max_fields, max_words_in_table, word_set) # The sizes of respective conditioning variables # for placeholder generation context_size = (FLAGS.n - 1) zp_size = context_size * FLAGS.word_max_fields zm_size = context_size * FLAGS.word_max_fields gf_size = FLAGS.max_fields gw_size = max_words_in_table copy_size = FLAGS.word_max_fields proj_size = FLAGS.nW + max_words_in_table # Generate the TensorFlow graph with tf.Graph().as_default(): #Set the random seed for reproducibility tf.set_random_seed(1234) # Create the CopyAttention model model = CopyAttention(FLAGS.n, FLAGS.d, FLAGS.g, FLAGS.nhu, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.learning_rate, max_words_in_table, FLAGS.max_fields, FLAGS.word_max_fields, FLAGS.xavier) # Placeholders for train and validation context_pl, zp_pl, zm_pl, gf_pl, gw_pl, next_pl, copy_pl, proj_pl = \ placeholder_inputs(FLAGS.batch_size, context_size, zp_size, zm_size, gf_size, gw_size, copy_size, proj_size) # Placeholders for test context_plt, zp_plt, zm_plt, gf_plt, gw_plt, copy_plt, proj_plt, next_plt = \ placeholder_inputs_single(context_size, zp_size, zm_size, gf_size, gw_size, copy_size, proj_size) # Train and validation part of the model predict = model.inference(FLAGS.batch_size, context_pl, zp_pl, zm_pl, gf_pl, gw_pl, copy_pl, proj_pl) loss = model.loss(predict, next_pl) train_op = model.training(loss) # evaluate = model.evaluate(predict, next_pl) # Test component of the model # The batch_size parameter is replaced with 1. pred_single = model.inference(1, context_plt, zp_plt, zm_plt, gf_plt, gw_plt, copy_plt, proj_plt) predicted_label = model.predict(pred_single) # Initialize the variables and start the session init = tf.initialize_all_variables() saver = tf.train.Saver() sess = tf.Session() sess.run(init) for epoch in range(1, FLAGS.num_epochs + 1): train_dataset.generate_permutation() start_e = time.time() for i in range(num_train_examples): feed_dict = fill_feed_dict(train_dataset, i, context_pl, zp_pl, zm_pl, gf_pl, gw_pl, next_pl, copy_pl, proj_pl) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if i % FLAGS.print_every == 0: print "Epoch : %d\tStep : %d\tLoss : %0.3f" % (epoch, i, loss_value) if i == -1 and i % FLAGS.valid_every == 0: print "Validation starting" valid_loss = do_eval(sess, train_op, loss, valid_dataset, context_pl, zp_pl, zm_pl, gf_pl, gw_pl, next_pl, copy_pl, proj_pl) print "Epoch : %d\tValidation loss: %0.5f" % (i, valid_loss) if i != 0 and i % FLAGS.sample_every == 0: test_dataset.reset_context() pos = 0 len_sent = 0 prev_predict = word2idx['<start>'] res_path = os.path.join(expt_result_path, 'sample.txt') with open(res_path, 'a') as exp: while pos != 1: feed_dict_t, idx2wq = fill_feed_dict_single(test_dataset, prev_predict, 0, context_plt, zp_plt, zm_plt, gf_plt, gw_plt, next_plt, copy_plt, proj_plt) prev_predict = sess.run([predicted_label], feed_dict=feed_dict_t) prev = prev_predict[0][0][0] if prev in idx2wq: exp.write(idx2wq[prev] + ' ') len_sent = len_sent + 1 else: exp.write('<unk> ') len_sent = len_sent + 1 if prev == word2idx['.']: pos = 1 exp.write('\n') if len_sent == 50: break prev_predict = prev duration_e = time.time() - start_e print "Time taken for epoch : %d is %0.3f minutes" % (epoch, duration_e/60) print "Saving checkpoint for epoch %d" % (epoch) checkpoint_file = os.path.join(chkpt_result_path, str(epoch) + '.ckpt') saver.save(sess, checkpoint_file) print "Validation starting" start = time.time() valid_loss = do_eval(sess, train_op, loss, valid_dataset, context_pl, zp_pl, zm_pl, gf_pl, gw_pl, next_pl, copy_pl, proj_pl) duration = time.time() - start print "Epoch : %d\tValidation loss: %0.5f" % (epoch, valid_loss) print "Time taken for validating epoch %d : %0.3f" % (epoch, duration) valid_res = os.path.join(expt_result_path, 'valid_loss.txt') with open(valid_res, 'a') as vloss_f: vloss_f.write("Epoch : %d\tValidation loss: %0.5f" % (epoch, valid_loss))
def main(_): pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.experiment_dir): os.makedirs(FLAGS.experiment_dir) expt_num = "1" else: expts = os.listdir(FLAGS.experiment_dir) last_expr = max([int(folder) for folder in expts]) expt_num = str(last_expr + 1) expt_result_path = os.path.join(FLAGS.experiment_dir, expt_num) os.makedirs(expt_result_path) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) chkpt_result_path = os.path.join(FLAGS.checkpoint_dir, expt_num) os.makedirs(chkpt_result_path) params_e_path = os.path.join(expt_result_path, "params.json") params_c_path = os.path.join(chkpt_result_path, "params.json") with open(params_e_path, 'w') as params_e, \ open(params_c_path, 'w') as params_c: json.dump(flags.FLAGS.__flags, params_e) json.dump(flags.FLAGS.__flags, params_c) # Generate the indexes word2idx, field2idx, qword2idx, nF, max_words_in_table, word_set = \ setup(FLAGS.data_dir, '../embeddings', FLAGS.n, FLAGS.batch_size, FLAGS.nW, FLAGS.min_field_freq, FLAGS.nQ) # Create the dataset objects train_dataset = DataSet(FLAGS.data_dir, 'train', FLAGS.n, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.batch_size, word2idx, field2idx, qword2idx, FLAGS.max_fields, FLAGS.word_max_fields, max_words_in_table, word_set) num_train_examples = train_dataset.num_examples() valid_dataset = DataSet(FLAGS.data_dir, 'valid', FLAGS.n, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.batch_size, word2idx, field2idx, qword2idx, FLAGS.max_fields, FLAGS.word_max_fields, max_words_in_table, word_set) test_dataset = DataSet(FLAGS.data_dir, 'test', FLAGS.n, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.batch_size, word2idx, field2idx, qword2idx, FLAGS.max_fields, FLAGS.word_max_fields, max_words_in_table, word_set) # The sizes of respective conditioning variables # for placeholder generation context_size = (FLAGS.n - 1) zp_size = context_size * FLAGS.word_max_fields zm_size = context_size * FLAGS.word_max_fields gf_size = FLAGS.max_fields gw_size = max_words_in_table copy_size = FLAGS.word_max_fields proj_size = FLAGS.nW + max_words_in_table # Generate the TensorFlow graph with tf.Graph().as_default(): # Create the CopyAttention model model = CopyAttention(FLAGS.n, FLAGS.d, FLAGS.g, FLAGS.nhu, FLAGS.nW, nF, FLAGS.nQ, FLAGS.l, FLAGS.learning_rate, max_words_in_table, FLAGS.max_fields, FLAGS.word_max_fields) # Placeholders for train and validation context_pl, zp_pl, zm_pl, gf_pl, gw_pl, next_pl, copy_pl, proj_pl = \ placeholder_inputs(FLAGS.batch_size, context_size, zp_size, zm_size, gf_size, gw_size, copy_size, proj_size) # Placeholders for test context_plt, zp_plt, zm_plt, gf_plt, gw_plt, copy_plt, proj_plt, next_plt = \ placeholder_inputs_single(context_size, zp_size, zm_size, gf_size, gw_size, copy_size, proj_size) # Train and validation part of the model predict = model.inference(FLAGS.batch_size, context_pl, zp_pl, zm_pl, gf_pl, gw_pl, copy_pl, proj_pl) loss = model.loss(predict, next_pl) train_op = model.training(loss) # evaluate = model.evaluate(predict, next_pl) # Test component of the model # The batch_size parameter is replaced with 1. pred_single = model.inference(1, context_plt, zp_plt, zm_plt, gf_plt, gw_plt, copy_plt, proj_plt) predicted_label = model.predict(pred_single) # Initialize the variables and start the session init = tf.initialize_all_variables() saver = tf.train.Saver() sess = tf.Session() ckpt_file = os.path.join('../checkpoint', '15', '16.ckpt') saver.restore(sess, ckpt_file) #sess.run(init) start_g = time.time() num_test_boxes = test_dataset.num_infoboxes() res_path = os.path.join('../experiment/', '15', 'generated.txt') with open(res_path, 'a') as exp: for k in range(num_test_boxes): test_dataset.reset_context() pos = 0 len_sent = 0 prev_predict = word2idx['<start>'] while pos != 1: feed_dict_t, idx2wq = fill_feed_dict_single( test_dataset, prev_predict, k, context_plt, zp_plt, zm_plt, gf_plt, gw_plt, next_plt, copy_plt, proj_plt) prev_predict = sess.run([predicted_label], feed_dict=feed_dict_t) prev = prev_predict[0][0][0] if prev in idx2wq: exp.write(idx2wq[prev] + ' ') len_sent = len_sent + 1 else: exp.write('<unk> ') len_sent = len_sent + 1 if prev == word2idx['.']: pos = 1 exp.write('\n') if len_sent == 50: break prev_predict = prev duration_g = time.time() - start_g print "Time taken for generating sentences : %0.3f minutes" % ( duration_g / 60)