def funct(x): # Set the random seed for the whole graph tf.set_random_seed(230) # Load the parameters args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.data_dir, 'predict.log')) # Create the input data pipeline data_dir = args.data_dir test_data_dir = os.path.join(data_dir) # Get the filenames from the test set test_filenames = [os.path.join(test_data_dir, 'predict.jpg') ] test_labels = [x] # print(test_labels) # specify the size of the evaluation set params.eval_size = len(test_filenames) # create the iterator over the dataset test_inputs = input_fn(False, test_filenames, test_labels, params) # Define the model model_spec = model_fn('eval', test_inputs, params, reuse=tf.AUTO_REUSE) evaluate(model_spec, args.model_dir, params, args.restore_from)
json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") data_dir = args.data_dir test_data_dir = os.path.join(data_dir, "test_signs") # Get the filenames from the test set test_filenames = os.listdir(test_data_dir) test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.jpg')] test_labels = [int(f.split('/')[-1][0]) for f in test_filenames] # specify the size of the evaluation set params.eval_size = len(test_filenames) # create the iterator over the dataset test_inputs = input_fn(False, test_filenames, test_labels, params) # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', test_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
# Check that we are not overwriting some previous experiment # Comment these lines if you are developing your model and don't care about overwritting # model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights")) # overwritting = model_dir_has_best_weights and args.restore_from is None # assert not overwritting, "Weights found in model_dir, aborting to avoid overwrite" # Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Creating the datasets...") # Specify the sizes of the dataset we train on # train_ratio and dataset_size specified in paraams params.train_size = int(params.train_ratio * params.dataset_size) params.eval_size = params.dataset_size - params.train_size logging.info("Train size: {} Eval size: {} Total: {}".format( params.train_size, params.eval_size, params.dataset_size)) # Create the two iterators over the two datasets train_inputs, eval_inputs = input_tf_records_all_fn(args.data_dir, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format( params.dict['num_epochs'])) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
from train import import_names_and_labels parser = argparse.ArgumentParser() parser.add_argument("--model_dir", default="experiments/07_full_images") parser.add_argument("--data_dir", default="data/kaggle") parser.add_argument("--restore_from", default="best_weights") parser.add_argument("--set", default="test") if __name__ == "__main__": tf.set_random_seed(230) args = parser.parse_args() json_path = os.path.join(args.model_dir, "params.json") params = Params(json_path) params.evaluate() set_logger(os.path.join(args.model_dir, 'evaluate.log')) logging.info("Creating the dataset...") data_dir = args.data_dir image_dir = os.path.join(data_dir, "images") names, labels = import_names_and_labels(data_dir, "test", params.num_labels) params.eval_size = len(names) inputs = input_fn("test", image_dir, names, labels, params) logging.info("Creating the model...") model_spec = model_fn("eval", inputs, params) logging.info("Evaluating...") evaluate(model_spec, args.model_dir, params, args.restore_from)
if __name__ == '__main__': # Set the random seed for the whole graph tf.set_random_seed(230) # Load the parameters args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") data_dir = args.data_dir # create the iterator over the dataset data = load_data_malaria(data_dir, params) test_inputs = get_iter_from_raw(data, params, numsuper = params.numsuper, intensity_scale = 1/params.numsuper, training = False) # specify the size of the evaluation set params.eval_size = len(data[5])+len(data[7]) # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', test_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
# Get the filenames from the train and dev sets # train_filenames = [os.path.join(train_data_dir, f) for f in os.listdir(train_data_dir) # if f.endswith('.jpg')] # eval_filenames = [os.path.join(dev_data_dir, f) for f in os.listdir(dev_data_dir) # if f.endswith('.jpg')] # Labels will be between 0 and 5 included (6 classes in total) # train_labels = [int(f.split('/')[-1][0]) for f in train_filenames] # eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames] # Get the data from the train and dev sets train_data, train_labels, eval_data, eval_labels = readDataFromCsv(use_this_data_dir) # Specify the sizes of the dataset we train on and evaluate on params.train_size = train_data.shape[0] params.eval_size = eval_data.shape[0] # Create the two iterators over the two datasets train_inputs = input_fn(True, train_data, train_labels, params) eval_inputs = input_fn(False, eval_data, eval_labels, params) # train_inputs = {'data': train_data, 'labels': train_labels} # eval_inputs = {'data': eval_data, 'labels': eval_labels} # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Create the input data pipeline tf.logging.info("Creating the datasets...") data = tf.contrib.learn.datasets.mnist.load_mnist(args.data_dir) # Specify the sizes of the dataset we train on and evaluate on params.train_size = data.train.num_examples params.eval_size = data.test.num_examples # Define the model tf.logging.info("Creating the model...") config = tf.estimator.RunConfig(tf_random_seed=230, model_dir=args.model_dir, save_summary_steps=params.save_summary_steps) estimator = tf.estimator.Estimator(model_fn, params=params, config=config) # EMBEDDINGS VISUALIZATION # Compute embeddings on the test set tf.logging.info("Predicting") test_images = data.test.images.reshape((-1, params.image_size, params.image_size, 1))
path_train_sentences = os.path.join(args.data_dir, 'train.pkl') path_eval_sentences = os.path.join(args.data_dir, 'dev.pkl') # Load Vocabularies words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=1) # Create the input data pipeline logging.info("Creating the datasets...") train_sentences = load_dataset_from_text(path_train_sentences) eval_sentences = load_dataset_from_text(path_eval_sentences) # Specify other parameters for the dataset and the model params.eval_size = params.dev_size params.buffer_size = params.train_size # buffer size for shuffling params.id_pad_word = words.lookup(tf.constant(params.pad_word)) # Create the two iterators over the two datasets train_inputs = input_fn('train', train_sentences, words, params) eval_inputs = input_fn('eval', eval_sentences, words, params) logging.info("- done.") # Define the models (2 different set of nodes that share weights for train and eval) logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) logging.info("- done.") # Train the model
if __name__ == '__main__': tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Specify the sizes of the dataset we train on and evaluate on # TODO: this should be in the parameters file or somewhere params.train_size = 50000 params.eval_size = 10000 # Define the model tf.logging.info("Creating the model...") config = tf.estimator.RunConfig( tf_random_seed=230, model_dir=args.model_dir, save_summary_steps=params.save_summary_steps) estimator = tf.estimator.Estimator(model_fn, params=params, config=config) # Train the model tf.logging.info("Starting training for {} epoch(s).".format( params.num_epochs)) estimator.train(lambda: train_input_fn(args.data_dir, params)) # Evaluate the model on the test set
json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") data_dir = args.data_dir test_data_dir = os.path.join(data_dir, 'test') test_filenames = os.listdir(test_data_dir) test_features_data = [ os.path.join(test_data_dir, f) for f in test_filenames if f == 'X.npy' ][0] test_labels_data = [ os.path.join(test_data_dir, f) for f in test_filenames if f == "Y.npy" ][0] # specify the size of the evaluation set test_data_loaded = np.load(test_features_data, mmap_mode='r') params.eval_size = test_data_loaded.shape[0] test_inputs = input_fn(test_features_data, test_labels_data, params) # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', test_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
# Create the data pineline logging.info("Creating the datasset...") data_dir = args.data_dir train_data_dir = os.path.join(data_dir, 'train_signs') dev_data_div = os.path.join(data_dir, "dev_signs") # Get the filename from the train and dev sets train_filenames = [os.path.join(train_data_dir, f) for f in os.listdir(train_data_dir) if f.endswith('.jpg')] eval_filenames = [os.path.join(dev_data_div, f) for f in os.listdir(dev_data_div) if f.endswith('.jpg')] train_labels = [int(f.split('/')[-1][0]) for f in train_filenames] eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames] params.train_size = len(train_filenames) params.eval_size = len(eval_labels) train_inputs = input_fn(True, train_filenames, train_labels, params) eval_inputs = input_fn(False, eval_filenames, eval_labels, params) # Defines the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params) logging.info('Starting trainig for {} epochs'.format(params.num_epochs)) train_and_evaluation(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
# Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) # Get paths for datasets # train_filenames = ['train-%.2d-of-%.2d.tfrecords' % ( # shard, params.num_train_shards) for shard in range(params.num_train_shards)] # dev_filenames = ['dev-%.2d-of-%.2d.tfrecords' % ( # shard, params.num_dev_shards) for shard in range(params.num_dev_shards)] # test_filenames = ['test-%.2d-of-%.2d.tfrecords' % ( # shard, params.num_test_shards) for shard in range(params.num_test_shards)] if params.choose_target: params.train_size = np.sum( np.array(params.len_train_by_target)[params.target]) params.eval_size = 10 * params.num_classes pass else: params.target = range(params.num_classes) train_filenames = [ 'train{}.tfrecords'.format(targ + 1) for targ in params.target ] dev_filenames = [ 'dev{}.tfrecords'.format(targ + 1) for targ in params.target ] # test_filenames = ['test{}.tfrecords'.format(targ+1) for targ in params.target] train_files = [ os.path.join(args.data_dir, data_file) for data_file in train_filenames ]
][0] dev_filenames = os.listdir(dev_data_dir) dev_features_data = [ os.path.join(dev_data_dir, f) for f in dev_filenames if f == 'X.npy' ][0] dev_labels_data = [ os.path.join(dev_data_dir, f) for f in dev_filenames if f == "Y.npy" ][0] # Specify the sizes of the dataset we train on and evaluate on train_data_loaded = np.load(train_features_data, mmap_mode='r') params.train_size = train_data_loaded.shape[0] dev_data_loaded = np.load(dev_features_data, mmap_mode='r') params.eval_size = dev_data_loaded.shape[0] # Create the two iterators over the two datasets train_inputs = input_fn(train_features_data, train_labels_data, params) dev_inputs = input_fn(dev_features_data, dev_labels_data, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) dev_model_spec = model_fn('eval', dev_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, dev_model_spec, args.model_dir, params, args.restore_from)
assert not overwritting, "Weights found in model_dir, aborting to avoid overwrite" # Set the logger set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Creating the datasets...") train_tfrecord = args.train_tf eval_tfrecord = args.eval_tf # Get the filenames from the train and dev sets # Specify the sizes of the dataset we train on and evaluate on params.train_size = len( [x for x in tf.python_io.tf_record_iterator(train_tfrecord)]) params.eval_size = len( [x for x in tf.python_io.tf_record_iterator(eval_tfrecord)]) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_tfrecord, params) eval_inputs = input_fn(False, eval_tfrecord, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
size_y = NC + NPERMODE * MMODE data_set = load_data(validation_size_p=VAL_SIZE_P, dirname=dirname, data_size_x=DATA_SIZE, data_size_y=size_y) (train_x, train_y_c, train_y_w, train_y_d, dev_x, dev_y_c, dev_y_w, dev_y_d) = data_set # print(train_y[0:3, 0:3, -1]) (m_train, n_x, n_channel_x) = train_x.shape m_dev = dev_x.shape[0] print(train_x.shape) print(m_dev) params.train_size = m_train params.eval_size = m_dev # Create the two iterators over the two datasets train_inputs = input_fn(True, train_x, train_y_c, train_y_w, train_y_d, params) eval_inputs = input_fn(False, dev_x, dev_y_c, dev_y_w, dev_y_d, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") data_dir = args.data_dir test_data_dir = os.path.join(data_dir, "test_forest") # Get the filenames from the test set test_filenames = [os.path.join(test_data_dir, f) for f in os.listdir(test_data_dir) if f.endswith(ext)] test_images = [f for f in test_filenames if f.endswith('_image'+ext)] test_labels = [f for f in test_filenames if f.endswith('_label'+ext)] test_images.sort() test_labels.sort() # specify the size of the evaluation set params.eval_size = len(test_images) # create the iterator over the dataset test_inputs = input_forest(False, test_images, test_labels, params) # Define the model logging.info("Creating the model...") model_spec = model_unet('eval', test_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) # Set the logger set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") data_dir = args.data_dir test_data_dir = os.path.join(data_dir, "test_signs") # Get the filenames from the test set test_filenames = os.listdir(test_data_dir) test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.jpg')] test_labels = [int(f.split('/')[-1][0]) for f in test_filenames] # specify the size of the evaluation set params.eval_size = 967 # create the iterator over the dataset test_inputs = input_fn(False, test_filenames, test_labels, params) # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', test_inputs, params, reuse=False) logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
if f.endswith('.jpg') ] eval_filenames = [ os.path.join(dev_data_dir, f) for f in os.listdir(dev_data_dir) if f.endswith('.jpg') ] # Labels will be between 0 and 5 included (6 classes in total) train_labels = [int(f.split('/')[-1][0]) for f in train_filenames] eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames] # print("train_labels = "+str(train_labels)) # print("eval_labels ="+str(eval_labels)) # Specify the sizes of the dataset we train on and evaluate on params.train_size = len(train_filenames) params.eval_size = len(eval_filenames) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_filenames, train_labels, params) eval_inputs = input_fn(False, eval_filenames, eval_labels, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
# Create the input data pipeline logging.info("Creating the datasets...") data_dir = args.data_dir # Create the two iterators over the two datasets data = load_data_malaria(data_dir, params) train_inputs = get_iter_from_raw(data, params, numsuper=params.numsuper, intensity_scale=1 / params.numsuper, training=True) eval_inputs = get_iter_from_raw(data, params, numsuper=params.numsuper, intensity_scale=1 / params.numsuper, training=False) #%% # Specify the sizes of the dataset we train on and evaluate on params.train_size = int(params.number * params.split) params.eval_size = int((1 - params.split) * params.number) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) #%% # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)
# Get paths for vocabularies and dataset path_vocab = os.path.join(args.data_dir, 'vocab{}'.format(params.min_freq)) params.vocab_path = path_vocab path_test_queries = os.path.join(args.data_dir, 'dev/queries.txt') path_test_articles = os.path.join(args.data_dir, 'dev/articles.txt') # Load Vocabularies vocab = tf.contrib.lookup.index_table_from_file( path_vocab, num_oov_buckets=num_oov_buckets, key_column_index=0) # Create the input data pipeline logging.info("Creating the dataset...") test_queries = load_dataset_from_text(path_test_queries, vocab, params) test_articles = load_dataset_from_text(path_test_articles, vocab, params) # Specify other parameters for the dataset and the model params.eval_size = params.test_size params.id_pad_word = vocab.lookup(tf.constant(params.pad_word)) # Create iterator over the test set inputs = input_fn('eval', test_queries, test_articles, params) logging.info("- done.") # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', inputs, params, reuse=False) logging.info("- done.") logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
train_npy = np.load(os.path.join(data_dir, "train.npy")) train_npy = np.nan_to_num(train_npy) eval_npy = np.load(os.path.join(data_dir, "test.npy")) eval_npy = np.nan_to_num(eval_npy) # Labels will be between 0 and 5 included (6 classes in total) # train_labels = [int(f.split('/')[-1][0]) for f in train_filenames] # eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames] paths = loadPaths(train_set_number=1, test_set_number=1) train_labels = paths["train"]["labels"] eval_labels = paths["test"]["labels"] # Specify the sizes of the dataset we train on and evaluate on # params.train_size = len(train_filenames) # params.eval_size = len(eval_filenames) params.train_size = len(train_npy) params.eval_size = len(eval_npy) # Create the two iterators over the two datasets train_inputs = input_fn(True, train_npy, train_labels, params) eval_inputs = input_fn(False, eval_npy, eval_labels, params) # Define the model logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from)