def test_dataset_API_with_pygenerator(): data = 'dataset/cone/train_dev' image_dir = os.path.join(data, 'Images') label_dir = os.path.join(data, 'Labels') train_inputs = input_fn(True, image_dir, label_dir, params) eval_inputs = input_fn(False, image_dir, label_dir, params) with tf.Session() as sess: sess.run(train_inputs['iterator_init_op']) images = sess.run(train_inputs['images']) labels = sess.run(train_inputs['labels']) # for img in images: # plt.imshow(img) # plt.show() # check for training data assert images.shape == (params.batch_size, 160, 160, 3), "Unextected training batch image dimension" assert labels.shape == (params.batch_size, 8540, 7), "Unextected training label batch dimension" sess.run(eval_inputs['iterator_init_op']) images = sess.run(eval_inputs['images']) labels = sess.run(eval_inputs['labels']) # Check for validation data assert images.shape == (params.batch_size, 160, 160, 3), "Unextected val batch image dimension" assert labels.shape == (params.batch_size, 8540, 7), "Unextected val label batch dimension"
def main(unused_argv): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu json_path = os.path.join(FLAGS.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Load the parameters from the dataset, that gives the size etc. into params json_path = os.path.join(FLAGS.data_dir, 'dataset_params.json') assert os.path.isfile( json_path), "No json file found at {}, run build_vocab.py".format( json_path) params.update(json_path) path_words = os.path.join(FLAGS.data_dir, 'words.txt') path_train = os.path.join(FLAGS.data_dir, 'train.csv') path_eval = os.path.join(FLAGS.data_dir, 'valid.csv') path_test = os.path.join(FLAGS.data_dir, 'testa.csv') print("train set:", path_train) print("valid set:", path_eval) print("test set:", path_test) config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps) if params.model.startswith("TextCNN"): estimator = TextCNN( params, model_dir=FLAGS.model_dir, config=config, optimizer=params.optimizer if "optimizer" in params else None) if FLAGS.train: train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn( path_train, path_words, params, params.shuffle_buffer_size), max_steps=params.train_steps) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: input_fn(path_eval, path_words, params, 0), throttle_secs=FLAGS.throttle_secs) print("before train and evaluate") tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) print("after train and evaluate") #inputs = {"content": tf.placeholder(shape=[None, params.sentence_max_len], dtype=tf.int32), # "id": tf.placeholder(shape=[None, 1], dtype=tf.int32)} #estimator.export_savedmodel( # export_dir_base=FLAGS.model_dir, serving_input_receiver_fn=tf.estimator.export.build_raw_serving_input_receiver_fn(inputs)) if FLAGS.predict: params.batch_size = 1 test_input_fn = lambda: input_fn(path_test, path_words, params, 0) predictions = estimator.predict(test_input_fn) result = pd.DataFrame(predictions) output_path = os.path.join(FLAGS.model_dir, params.model + '_result.csv') result.to_csv(output_path, index_label="id", columns=OUTPUT_CSV_COLUMNS)
def eval_input_fn(): return input_fn(eval_feature, eval_label, batch_size=params.batch_size, is_training=False, repeat_count=1, prefetch=params.prefetch)
def train_input_fn(): return input_fn(train_feature, train_label, batch_size=params.batch_size, is_training=True, repeat_count=None, prefetch=params.prefetch)
def main(): # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info('Creating the dataset...') data_dir = args.data_dir valid_data_dir = os.path.join(data_dir, 'valid') # Get the filenames and labels from the test set valid_filenames, valid_labels = get_filenames_and_labels( valid_data_dir, params) params.valid_size = len(valid_filenames) params.num_labels = len(set(valid_labels)) # Create the two iterators over the two datasets valid_inputs = input_fn(False, valid_filenames, valid_labels, params) # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', valid_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
def test_input_fn(): return input_fn(test_feature, test_label, batch_size=1, is_training=False, is_test=True, repeat_count=1)
def main(): # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), 'No json configuration file found at {}'.format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) if not os.path.exists(args.restore_from): os.makedirs(args.restore_from) # Create the input data pipeline logging.info('Creating the datasets...') data_dir = args.data_dir train_data_dir = os.path.join(data_dir, 'train') valid_data_dir = os.path.join(data_dir, 'valid') # Get the filenames and labels from the train and valid sets train_filenames, train_labels = get_filenames_and_labels( train_data_dir, params) valid_filenames, valid_labels = get_filenames_and_labels( valid_data_dir, params) params.train_size = len(train_filenames) params.valid_size = len(valid_filenames) params.num_labels = len(set(train_labels)) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_filenames, train_labels, params) valid_inputs = input_fn(False, valid_filenames, valid_labels, params) # Define the model logging.info('Creating the model...') train_model_spec = model_fn('train', train_inputs, params) valid_model_spec = model_fn('eval', valid_inputs, params, reuse=True) # Train the model logging.info('Starting training for {} epoch(s)'.format( params.num_epochs)) train_and_evaluate(train_model_spec, valid_model_spec, args.model_dir, params, args.restore_from)
def train(): # Set the logger set_logger(os.path.join(params['model_dir'], 'train.log')) # log params logging.info(params) # Load vacabulary vocab = tf.contrib.lookup.index_table_from_file(vocab_path, num_oov_buckets=1) # Create the input data pipeline logging.info('Creating the datasets...') train_input_words = load_dataset_from_text(data_dir, train_input_filename, vocab) train_context_words = load_dataset_from_text(data_dir, train_context_filename, vocab) # Create the iterator over the dataset train_inputs = input_fn('train', train_input_words, train_context_words, params) eval_inputs = input_fn('eval', train_input_words, train_context_words, params) logging.info("- done") # Define the model logging.info('Creating the model...') train_model_spec = model_fn('train', train_inputs, params, reuse=tf.AUTO_REUSE) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) logging.info('- done.') # Train the model logging.info('Starting training for {} epochs'.format( params['num_epochs'])) normalized_embedding_matrix = train_and_evaluate(train_model_spec, eval_model_spec, params) save_dict_to_json(params, params['model_dir'] + '/params.json') pd.DataFrame(normalized_embedding_matrix).to_csv(os.path.join( params['model_dir'], 'normalized_embedding_matrix.tsv'), index=False, header=None, sep='\t')
def funct(x): # Set the random seed for the whole graph tf.set_random_seed(230) # Load the parameters args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.data_dir, 'predict.log')) # Create the input data pipeline data_dir = args.data_dir test_data_dir = os.path.join(data_dir) # Get the filenames from the test set test_filenames = [os.path.join(test_data_dir, 'predict.jpg') ] test_labels = [x] # print(test_labels) # specify the size of the evaluation set params.eval_size = len(test_filenames) # create the iterator over the dataset test_inputs = input_fn(False, test_filenames, test_labels, params) # Define the model model_spec = model_fn('eval', test_inputs, params, reuse=tf.AUTO_REUSE) evaluate(model_spec, args.model_dir, params, args.restore_from)
# Load vocabularies words = tf.contrib.lookup.index_table_from_file( path_words, num_oov_buckets=num_oov_buckets) eras = tf.contrib.lookup.index_table_from_file(path_era_tags) # Create the input data pipeline reviews = load_dataset_from_text(path_reviews, words) review_eras = load_dataset_from_text(path_eras, eras, isLabels=True) # Specify other parameters for the dataset and the model params_era.id_pad_word = words.lookup(tf.constant(params_era.pad_word)) params_era.id_pad_tag = words.lookup(tf.constant(params_era.pad_tag)) # Create the iterator over the test set inputs_era = input_fn('eval', reviews, review_eras, params_era) # Define the model print('Creating era models...') model_spec_era = model_fn('eval', inputs_era, params_era, reuse=False) print('Done') print(era_model_path) print(path_words) print(path_era_tags) print(path_reviews) print(path_eras) print(os.path.join(args.model_dir, args.restore_from)) # Evaluate the model... # evaluate(model-spec, args.model_dir, params, args.restore_from)
# Load the parameters args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Create the input data pipeline tf.logging.info("Creating the datasets...") data = tf.contrib.learn.datasets.mnist.load_mnist(args.data_dir) # Specify the sizes of the dataset we evaluate on params.eval_size = data.test.num_examples # Create the test input function test_input_fn = lambda: input_fn(False, data.test.images, data.test.labels, params) # Define the model tf.logging.info("Creating the model...") estimator = tf.estimator.Estimator(model_fn, params=params, model_dir=args.model_dir) # Evaluate the model on the test set tf.logging.info("Evaluation on the test set.") res = estimator.evaluate(test_input_fn) for key in res: print("{}: {}".format(key, res[key]))
eval_masks_filenames = [ os.path.join(dev_masks_dir, f) for f in os.listdir(dev_masks_dir) if f.endswith('.png') ] # Get the images id # assert os.path.isfile(args.label_dir), "Could't find the label file in {} ".format(args.label_dir) # train_labels, eval_labels = get_label(args.label_dir, train_data_dir, dev_data_dir) # Specify the sizes of the dataset we train on and evaluate on params.train_size = len(train_filenames) params.eval_size = len(eval_filenames) # PH2Dataset 0.25 scale,size 191, 143 # Create the two iterators over the two datasets train_inputs = input_fn(True, train_filenames, train_masks_filenames, params) eval_inputs = input_fn(False, eval_filenames, eval_masks_filenames, params) # Define the model logging.info("Creating the model from {}".format(args.model_dir)) train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
# Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Creating the datasets...") train_tfrecord = args.train_tf eval_tfrecord = args.eval_tf # Get the filenames from the train and dev sets # Specify the sizes of the dataset we train on and evaluate on params.train_size = len( [x for x in tf.python_io.tf_record_iterator(train_tfrecord)]) params.eval_size = len( [x for x in tf.python_io.tf_record_iterator(eval_tfrecord)]) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_tfrecord, params) eval_inputs = input_fn(False, eval_tfrecord, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
def eval_input_fn(): path_test_sentences = os.path.join(args.data_dir, 'dev/sentences.txt') path_test_labels = os.path.join(args.data_dir, 'dev/labels.txt') return input_fn('eval', path_test_sentences, path_test_labels, params)
json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") data_dir = args.data_dir test_data_dir = os.path.join(data_dir, "test_signs") # Get the filenames from the test set test_filenames = os.listdir(test_data_dir) test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.jpg')] test_labels = [int(f.split('/')[-1][0]) for f in test_filenames] # specify the size of the evaluation set params.eval_size = len(test_filenames) # create the iterator over the dataset test_inputs = input_fn(False, test_filenames, test_labels, params) # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', test_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
for cv, (train_data, test_data) in enumerate(data_generator): logging.info('Starting fold {}'.format(cv+1)) train_size = train_data[0].shape[0] eval_size = test_data[0].shape[0] if params['train_ae']: tf.reset_default_graph() logging.info('Training autoencoder to compute embeddings') ae_params = params['ae'] ae_params['train_size'] = train_size ae_params['eval_size'] = eval_size logging.info('Creating the inputs for the model') train_inputs = input_fn(True, train_data, ae_params) eval_inputs = input_fn(False, test_data, ae_params) logging.info('Building the model') train_model = ae_model_fn(True, train_inputs, ae_params) eval_model = ae_model_fn(False, eval_inputs, ae_params) logging.info('Start training {} epochs'.format(params['ae']['num_epochs'])) model_dir = os.path.join(args.model_dir, 'cv_' + str(cv+1), 'ae') train_and_save(train_model, eval_model, model_dir, ae_params, restore_weights = args.restore_ae_from) #Update spectra data with embeddings computed from the model logging.info('Compute embeddings of the spectra data') emb_params = {'restore_path' :os.path.join(model_dir,'best_weights'), 'params' :ae_params,\ 'layer_name' :'embeddings', 'evaluate_model' :False}
json_path = os.path.join(args.data_dir, 'dataset_params.json') assert os.path.isfile( json_path), "No json file found at {}, run build.py".format(json_path) params.update(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Get paths for vocabularies and dataset path_eval = os.path.join(args.data_dir, 'test/') # Create the input data pipeline logging.info("Creating the dataset...") test_X = load_Xdataset_from_text(os.path.join(path_eval, 'X.csv')) test_labels = load_Ydataset_from_text(os.path.join(path_eval, 'Y.csv')) # Specify other parameters for the dataset and the model params.eval_size = params.test_size # Create iterator over the test set inputs = input_fn('eval', test_X, test_labels, params) logging.info("- done.") # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', inputs, params, reuse=False) logging.info("- done.") logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
# Load the parameters args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Get paths for dataset path_test_prices = os.path.join(args.data_dir, 'test_inputs.pkl') path_test_deltas = os.path.join(args.data_dir, 'test_labels.pkl') # Create the input data pipeline logging.info("Creating the dataset...") data_dir = args.data_dir test_prices, test_deltas = load_prices_and_deltas(path_test_prices, path_test_deltas, params) # Create the two iterators over the two datasets test_inputs = input_fn('test', test_prices, test_deltas, params) logging.info("- done.") # Define the model logging.info("Creating the model...") model_spec = model_fn('test', test_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
# Load vocabularies words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=num_oov_buckets) sentiments = tf.contrib.lookup.index_table_from_file(path_sentiment_tags) # Create the input data pipeline reviews = load_dataset_from_text(path_reviews,words) review_sentiments = load_dataset_from_text(path_sentiments,sentiments, isLabels=True) # Specify other parameters for the dataset and the model params_sentiment.id_pad_word = words.lookup(tf.constant(params_sentiment.pad_word)) params_sentiment.id_pad_tag = words.lookup(tf.constant(params_sentiment.pad_tag)) # Create the iterator over the test set inputs_sentiment = input_fn('eval', reviews, review_sentiments, params_sentiment) # Define the model print('Creating sentiment and era models...') model_spec_sentiment = model_fn('eval', inputs_sentiment, params_sentiment, reuse=False) print('Done') # Evaluate the model... # evaluate(model-spec, args.model_dir, params, args.restore_from) # initialize saver to restore model saver = tf.train.Saver() with tf.Session() as sess: # Initialize lookup tables for both models sess.run(model_spec_sentiment['variable_init_op'])
from train import import_names_and_labels parser = argparse.ArgumentParser() parser.add_argument("--model_dir", default="experiments/07_full_images") parser.add_argument("--data_dir", default="data/kaggle") parser.add_argument("--restore_from", default="best_weights") parser.add_argument("--set", default="test") if __name__ == "__main__": tf.set_random_seed(230) args = parser.parse_args() json_path = os.path.join(args.model_dir, "params.json") params = Params(json_path) params.evaluate() set_logger(os.path.join(args.model_dir, 'evaluate.log')) logging.info("Creating the dataset...") data_dir = args.data_dir image_dir = os.path.join(data_dir, "images") names, labels = import_names_and_labels(data_dir, "test", params.num_labels) params.eval_size = len(names) inputs = input_fn("test", image_dir, names, labels, params) logging.info("Creating the model...") model_spec = model_fn("eval", inputs, params) logging.info("Evaluating...") evaluate(model_spec, args.model_dir, params, args.restore_from)
assert os.path.isfile(params_file), "No .json file found" with open(params_file) as json_file: params = json.load(json_file) print("Parameters used :\n{}".format(params)) # Load the dataset print("Loading dataset from " + args.data_dir + args.data_set) test_dir = os.path.join(args.data_dir, args.data_set) assert os.path.isdir(test_dir), "No test directory found" # Test data test_pathlist = Path(test_dir).glob("*.jpg") test_filenames = [str(path) for path in test_pathlist] test_filenames = [ s for s in test_filenames if int(s.split("_")[1].split('/')[2]) < params["num_classes"] ] test_labels = [int(s.split("_")[1].split('/')[2]) for s in test_filenames] print("Done loading data") print("Test set size {}\n".format(len(test_filenames))) # Create the estimator cnn_classifier = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, params=params) print("Evaluating model") test_results = cnn_classifier.evaluate(input_fn=lambda: input_fn( is_training=False, filenames=test_filenames, labels=test_labels)) print("Results : \n{}".format(test_results))
# Get paths for dataset path_train = os.path.join(args.data_dir, 'train/') path_eval = os.path.join(args.data_dir, 'dev/') # Create the input data pipeline logging.info("Creating the datasets...") train_X = load_Xdataset_from_text(os.path.join(path_train, 'X.csv')) train_labels = load_Ydataset_from_text(os.path.join(path_train, 'Y.csv')) eval_X = load_Xdataset_from_text(os.path.join(path_eval, 'X.csv')) eval_labels = load_Ydataset_from_text(os.path.join(path_eval, 'Y.csv')) # Specify other parameters for the dataset and the model params.eval_size = params.dev_size params.buffer_size = params.train_size # buffer size for shuffling # Create the two iterators over the two datasets train_inputs = input_fn('train', train_X, train_labels, params) eval_inputs = input_fn('eval', eval_X, eval_labels, params) logging.info("- done.") # Define the models (2 different set of nodes that share weights for train and eval) logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) logging.info("- done.") # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_dir)
#TODO: check and load if there's the best weights so far # model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights")) #set logger set_logger(os.path.join(args.model_dir, 'train.log')) #train/test split train_fpaths, test_fpaths, train_targets, test_targets = \ get_train_test_split(args.json_path, args.data_dir, train_size=args.train_size) params.train_size = len(train_fpaths) params.test_size = len(test_fpaths) logging.info("Creating the dataset...") train_inputs = input_fn(True, train_fpaths, train_targets, params) test_inputs = input_fn(False, test_fpaths, test_targets, params) logging.info("Creating the model...") train_model_spec = model_fn(True, train_inputs, params) test_model_spec = model_fn(False, test_inputs, params, reuse=True) logging.info("train set predict...") predict(train_model_spec, args.model_save_dir, params, args.restore_from) logging.info("test set predict...") predict(test_model_spec, args.model_save_dir, params, args.restore_from) end_time = time.time() logging.info("Elapsed training time is {:.2f} secs".format(end_time - start_time))
# Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Create the input data pipeline tf.logging.info("Creating the datasets...") data = tf.contrib.learn.datasets.mnist.load_mnist(args.data_dir) # Specify the sizes of the dataset we train on and evaluate on params.train_size = data.train.num_examples params.eval_size = data.test.num_examples # Create the two input functions over the two datasets train_input_fn = lambda: input_fn(True, data.train.images, data.train.labels, params) test_input_fn = lambda: input_fn(False, data.test.images, data.test.labels, params) # Define the model tf.logging.info("Creating the model...") config = tf.estimator.RunConfig(tf_random_seed=230, model_dir=args.model_dir, save_summary_steps=params.save_summary_steps) estimator = tf.estimator.Estimator(model_fn, params=params, config=config) # Train the model tf.logging.info("Starting training for {} epoch(s).".format(params.num_epochs)) estimator.train(train_input_fn) # Evaluate the model on the test set tf.logging.info("Evaluation on test set.")
if f.endswith('.jpg') ] eval_filenames = [ os.path.join(dev_data_dir, f) for f in os.listdir(dev_data_dir) if f.endswith('.jpg') ] # Labels will be between 0 and 5 included (6 classes in total) train_labels = [int(f.split('/')[-1][0]) for f in train_filenames] eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames] # print("train_labels = "+str(train_labels)) # print("eval_labels ="+str(eval_labels)) # Specify the sizes of the dataset we train on and evaluate on params.train_size = len(train_filenames) params.eval_size = len(eval_filenames) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_filenames, train_labels, params) eval_inputs = input_fn(False, eval_filenames, eval_labels, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
def predict(inp, target, params, restore_from, config=None,\ model_dir='./ie590_project/experiments/ex1', model_save_dir='./ie590_project/experiments/ex1/model_save/1'): """predict target values given input file paths Args: inp: (list) a string list of image files paths; 2D -> [sample_size, number_of_channels] model_spec: (dict) model specifications of tf Ops params: (Params or str) Params object or params.joson path tar: (list) a float list of target values restore_from: (str) ckpt or directory name where ckpts are located for restoring ... Return: out: (list) a list of precicted target values; have exactly same dimension as target """ assert len(inp) == len(target) iterator_init_op = model_spec['iterator_init_op'] update_metrics_op = model_spec['update_metrics_op'] metrics = model_spec['metrics'] metrics_init_op = model_spec['metrics_init_op'] predictions = model_spec['predictions'] saver = tf.compat.v1.train.Saver() if type(params) is str: assert os.path.isfile( params), "params.json does not exits at {}".format(params) params = Params(params) params.load(params.update) # load parameters params.inp_size = len(inp) set_logger(os.path.join(model_dir, 'train.log')) logging.info("Creating the dataset...") inputs = input_fn(False, inp, target, params) logging.info("Creating the model...") model_spec = model_fn(False, inputs, params) logging.info("Calculating predictions...") with tf.compat.v1.Session(config=config) as sess: sess.run(model_spec['variable_init_op']) save_path = os.path.join(model_save_dir, restore_from) if os.path.isdir(save_path): save_path = tf.train.latest_checkpoint( save_path ) # If restore_from is a directory, get the latest ckpt saver.restore(sess, save_path) num_steps = (params.inp_size + params.batch_size - 1) // params.batch_size sess.run([iterator_init_op, metrics_init_op]) if len(np.shape(target)) == 1: out = np.empty(np.shape(target))[:, np.newaxis] else: out = np.empty(np.shape(target)) for i in tqdm(range(num_steps)): _, predictions_eval = sess.run([update_metrics_op, predictions]) if i < num_steps - 1: out[i * params.batch_size:(i + 1) * params.batch_size, :] = predictions_eval else: out[i * params.batch_size:, :] = predictions_eval return out
# Get paths for vocabularies and dataset path_vocab = os.path.join(args.data_dir, 'vocab{}'.format(params.min_freq)) params.vocab_path = path_vocab path_test_queries = os.path.join(args.data_dir, 'dev/queries.txt') path_test_articles = os.path.join(args.data_dir, 'dev/articles.txt') # Load Vocabularies vocab = tf.contrib.lookup.index_table_from_file( path_vocab, num_oov_buckets=num_oov_buckets, key_column_index=0) # Create the input data pipeline logging.info("Creating the dataset...") test_queries = load_dataset_from_text(path_test_queries, vocab, params) test_articles = load_dataset_from_text(path_test_articles, vocab, params) # Specify other parameters for the dataset and the model params.eval_size = params.test_size params.id_pad_word = vocab.lookup(tf.constant(params.pad_word)) # Create iterator over the test set inputs = input_fn('eval', test_queries, test_articles, params) logging.info("- done.") # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', inputs, params, reuse=False) logging.info("- done.") logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
path_words = os.path.join(args.data_dir, 'words.txt') path_tags = os.path.join(args.data_dir, 'tags.txt') path_eval_sentences = os.path.join(args.data_dir, 'dev/sentences.txt') path_eval_labels = os.path.join(args.data_dir, 'dev/labels.txt') # Load Vocabularies words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=num_oov_buckets) tags = tf.contrib.lookup.index_table_from_file(path_tags) # Create the input data pipeline logging.info("Creating the dataset...") test_sentences = load_dataset_from_text(path_eval_sentences, words) test_labels = load_dataset_from_text(path_eval_labels, tags) # Specify other parameters for the dataset and the model params.eval_size = params.test_size params.id_pad_word = words.lookup(tf.constant(params.pad_word)) params.id_pad_tag = tags.lookup(tf.constant(params.pad_tag)) # Create iterator over the test set inputs = input_fn('eval', test_sentences, test_labels, params) logging.info("- done.") # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', inputs, params, reuse=False) logging.info("- done.") logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
file_name = 'train.csv' else: file_name = 'eval.csv' if args.sample: dp = os.path.join(args.data_dir, 'sample', file_name) if os.path.exists(dp): data_path = dp else: data_path = os.path.join(args.data_dir, file_name) else: data_path = os.path.join(args.data_dir, file_name) tf.logging.info("Predicting the data...") train_predictions = estimator.predict( lambda: input_fn(data_path, params, is_training=False)) preds = [] for i, p in tqdm(enumerate(train_predictions)): if args.sample and i > 100: break probs = p['preds'] gen_line = decode_preds(probs, vocab) preds.append(gen_line) print('Predictions: \n\n{}'.format('\n'.join(preds))) with open(os.path.join(args.model_dir, 'results.txt'), 'w') as file: for line in preds: file.write(f'{line}\n')
def train_input_fn(): path_train_sentences = os.path.join(args.data_dir, 'train/sentences.txt') path_train_labels = os.path.join(args.data_dir, 'train/labels.txt') return input_fn('train', path_train_sentences, path_train_labels, params)