Ejemplo n.º 1
0
def test_dataset_API_with_pygenerator():
    data = 'dataset/cone/train_dev'
    image_dir = os.path.join(data, 'Images')
    label_dir = os.path.join(data, 'Labels')
    train_inputs = input_fn(True, image_dir, label_dir, params)
    eval_inputs = input_fn(False, image_dir, label_dir, params)

    with tf.Session() as sess:
        sess.run(train_inputs['iterator_init_op'])
        images = sess.run(train_inputs['images'])
        labels = sess.run(train_inputs['labels'])

        # for img in images:
        #     plt.imshow(img)
        #     plt.show()

        # check for training data
        assert images.shape == (params.batch_size, 160, 160,
                                3), "Unextected training batch image dimension"
        assert labels.shape == (params.batch_size, 8540,
                                7), "Unextected training label batch dimension"

        sess.run(eval_inputs['iterator_init_op'])
        images = sess.run(eval_inputs['images'])
        labels = sess.run(eval_inputs['labels'])

        # Check for validation data
        assert images.shape == (params.batch_size, 160, 160,
                                3), "Unextected val batch image dimension"
        assert labels.shape == (params.batch_size, 8540,
                                7), "Unextected val label batch dimension"
Ejemplo n.º 2
0
def main(unused_argv):
    os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu
    json_path = os.path.join(FLAGS.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)
    # Load the parameters from the dataset, that gives the size etc. into params
    json_path = os.path.join(FLAGS.data_dir, 'dataset_params.json')
    assert os.path.isfile(
        json_path), "No json file found at {}, run build_vocab.py".format(
            json_path)
    params.update(json_path)

    path_words = os.path.join(FLAGS.data_dir, 'words.txt')
    path_train = os.path.join(FLAGS.data_dir, 'train.csv')
    path_eval = os.path.join(FLAGS.data_dir, 'valid.csv')
    path_test = os.path.join(FLAGS.data_dir, 'testa.csv')
    print("train set:", path_train)
    print("valid set:", path_eval)
    print("test set:", path_test)

    config = tf.estimator.RunConfig(
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps)
    if params.model.startswith("TextCNN"):
        estimator = TextCNN(
            params,
            model_dir=FLAGS.model_dir,
            config=config,
            optimizer=params.optimizer if "optimizer" in params else None)
    if FLAGS.train:
        train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(
            path_train, path_words, params, params.shuffle_buffer_size),
                                            max_steps=params.train_steps)
        eval_spec = tf.estimator.EvalSpec(
            input_fn=lambda: input_fn(path_eval, path_words, params, 0),
            throttle_secs=FLAGS.throttle_secs)
        print("before train and evaluate")
        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
        print("after train and evaluate")
        #inputs = {"content": tf.placeholder(shape=[None, params.sentence_max_len], dtype=tf.int32),
        #  "id": tf.placeholder(shape=[None, 1], dtype=tf.int32)}
        #estimator.export_savedmodel(
        #  export_dir_base=FLAGS.model_dir, serving_input_receiver_fn=tf.estimator.export.build_raw_serving_input_receiver_fn(inputs))
    if FLAGS.predict:
        params.batch_size = 1
        test_input_fn = lambda: input_fn(path_test, path_words, params, 0)
        predictions = estimator.predict(test_input_fn)
        result = pd.DataFrame(predictions)
        output_path = os.path.join(FLAGS.model_dir,
                                   params.model + '_result.csv')
        result.to_csv(output_path,
                      index_label="id",
                      columns=OUTPUT_CSV_COLUMNS)
Ejemplo n.º 3
0
 def eval_input_fn():
     return input_fn(eval_feature,
                     eval_label,
                     batch_size=params.batch_size,
                     is_training=False,
                     repeat_count=1,
                     prefetch=params.prefetch)
Ejemplo n.º 4
0
 def train_input_fn():
     return input_fn(train_feature,
                     train_label,
                     batch_size=params.batch_size,
                     is_training=True,
                     repeat_count=None,
                     prefetch=params.prefetch)
Ejemplo n.º 5
0
def main():

    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    params = Params(json_path)

    # Set the logger 
    set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info('Creating the dataset...')
    data_dir = args.data_dir
    valid_data_dir = os.path.join(data_dir, 'valid')
    
    # Get the filenames and labels from the test set
    valid_filenames, valid_labels = get_filenames_and_labels(
        valid_data_dir, params)

    params.valid_size = len(valid_filenames)
    params.num_labels = len(set(valid_labels))

    # Create the two iterators over the two datasets
    valid_inputs = input_fn(False, valid_filenames,
                            valid_labels, params)

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', valid_inputs, params,
                          reuse=False)

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params,
             args.restore_from)
Ejemplo n.º 6
0
 def test_input_fn():
     return input_fn(test_feature,
                     test_label,
                     batch_size=1,
                     is_training=False,
                     is_test=True,
                     repeat_count=1)
Ejemplo n.º 7
0
def main():

    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), 'No json configuration file found at {}'.format(json_path)
    params = Params(json_path)

    # Set the logger 
    set_logger(os.path.join(args.model_dir, 'train.log'))
    
    if not os.path.exists(args.restore_from):
        os.makedirs(args.restore_from)

    # Create the input data pipeline
    logging.info('Creating the datasets...')
    data_dir = args.data_dir
    train_data_dir = os.path.join(data_dir, 'train')
    valid_data_dir = os.path.join(data_dir, 'valid')

    # Get the filenames and labels from the train and valid sets
    train_filenames, train_labels = get_filenames_and_labels(
        train_data_dir, params)
    valid_filenames, valid_labels = get_filenames_and_labels(
        valid_data_dir, params)

    params.train_size = len(train_filenames)
    params.valid_size = len(valid_filenames)
    params.num_labels = len(set(train_labels))

    # Create the two iterators over the two datasets
    train_inputs = input_fn(True, train_filenames,
                            train_labels, params)
    valid_inputs = input_fn(False, valid_filenames,
                            valid_labels, params)

    # Define the model
    logging.info('Creating the model...')
    train_model_spec = model_fn('train', train_inputs,
                                params)
    valid_model_spec = model_fn('eval', valid_inputs,
                                params, reuse=True)
    # Train the model
    logging.info('Starting training for {} epoch(s)'.format(
        params.num_epochs))
    train_and_evaluate(train_model_spec, valid_model_spec,
                       args.model_dir, params, args.restore_from)
Ejemplo n.º 8
0
def train():
    # Set the logger
    set_logger(os.path.join(params['model_dir'], 'train.log'))
    # log params
    logging.info(params)

    # Load vacabulary
    vocab = tf.contrib.lookup.index_table_from_file(vocab_path,
                                                    num_oov_buckets=1)

    # Create the input data pipeline
    logging.info('Creating the datasets...')
    train_input_words = load_dataset_from_text(data_dir, train_input_filename,
                                               vocab)
    train_context_words = load_dataset_from_text(data_dir,
                                                 train_context_filename, vocab)

    # Create the iterator over the dataset
    train_inputs = input_fn('train', train_input_words, train_context_words,
                            params)
    eval_inputs = input_fn('eval', train_input_words, train_context_words,
                           params)
    logging.info("- done")

    # Define the model
    logging.info('Creating the model...')
    train_model_spec = model_fn('train',
                                train_inputs,
                                params,
                                reuse=tf.AUTO_REUSE)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)
    logging.info('- done.')

    # Train the model
    logging.info('Starting training for {} epochs'.format(
        params['num_epochs']))
    normalized_embedding_matrix = train_and_evaluate(train_model_spec,
                                                     eval_model_spec, params)

    save_dict_to_json(params, params['model_dir'] + '/params.json')
    pd.DataFrame(normalized_embedding_matrix).to_csv(os.path.join(
        params['model_dir'], 'normalized_embedding_matrix.tsv'),
                                                     index=False,
                                                     header=None,
                                                     sep='\t')
Ejemplo n.º 9
0
def funct(x):
    # Set the random seed for the whole graph

    tf.set_random_seed(230)

    # Load the parameters
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)



    # Set the logger
    set_logger(os.path.join(args.data_dir, 'predict.log'))

    # Create the input data pipeline

    data_dir = args.data_dir
    test_data_dir = os.path.join(data_dir)

    # Get the filenames from the test set

    test_filenames = [os.path.join(test_data_dir, 'predict.jpg') ]

    test_labels = [x]
    # print(test_labels)

    # specify the size of the evaluation set
    params.eval_size = len(test_filenames)

    # create the iterator over the dataset
    test_inputs = input_fn(False, test_filenames, test_labels, params)

    # Define the model

    model_spec = model_fn('eval', test_inputs, params, reuse=tf.AUTO_REUSE)


    evaluate(model_spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 10
0
# Load vocabularies
words = tf.contrib.lookup.index_table_from_file(
    path_words, num_oov_buckets=num_oov_buckets)
eras = tf.contrib.lookup.index_table_from_file(path_era_tags)

# Create the input data pipeline
reviews = load_dataset_from_text(path_reviews, words)
review_eras = load_dataset_from_text(path_eras, eras, isLabels=True)

# Specify other parameters for the dataset and the model
params_era.id_pad_word = words.lookup(tf.constant(params_era.pad_word))
params_era.id_pad_tag = words.lookup(tf.constant(params_era.pad_tag))

# Create the iterator over the test set
inputs_era = input_fn('eval', reviews, review_eras, params_era)

# Define the model
print('Creating era models...')
model_spec_era = model_fn('eval', inputs_era, params_era, reuse=False)
print('Done')

print(era_model_path)
print(path_words)
print(path_era_tags)
print(path_reviews)
print(path_eras)
print(os.path.join(args.model_dir, args.restore_from))

# Evaluate the model...
# evaluate(model-spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 11
0
    # Load the parameters
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Create the input data pipeline
    tf.logging.info("Creating the datasets...")
    data = tf.contrib.learn.datasets.mnist.load_mnist(args.data_dir)

    # Specify the sizes of the dataset we evaluate on
    params.eval_size = data.test.num_examples

    # Create the test input function
    test_input_fn = lambda: input_fn(False, data.test.images, data.test.labels,
                                     params)

    # Define the model
    tf.logging.info("Creating the model...")
    estimator = tf.estimator.Estimator(model_fn,
                                       params=params,
                                       model_dir=args.model_dir)

    # Evaluate the model on the test set
    tf.logging.info("Evaluation on the test set.")
    res = estimator.evaluate(test_input_fn)
    for key in res:
        print("{}: {}".format(key, res[key]))
Ejemplo n.º 12
0
    eval_masks_filenames = [
        os.path.join(dev_masks_dir, f) for f in os.listdir(dev_masks_dir)
        if f.endswith('.png')
    ]

    # Get the images id
    # assert os.path.isfile(args.label_dir), "Could't find the label file in {} ".format(args.label_dir)

    # train_labels, eval_labels = get_label(args.label_dir, train_data_dir, dev_data_dir)

    # Specify the sizes of the dataset we train on and evaluate on
    params.train_size = len(train_filenames)
    params.eval_size = len(eval_filenames)

    # PH2Dataset 0.25 scale,size 191, 143
    # Create the two iterators over the two datasets
    train_inputs = input_fn(True, train_filenames, train_masks_filenames,
                            params)
    eval_inputs = input_fn(False, eval_filenames, eval_masks_filenames, params)

    # Define the model
    logging.info("Creating the model from {}".format(args.model_dir))
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))

    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_from)
Ejemplo n.º 13
0
    # Set the logger
    set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Creating the datasets...")
    train_tfrecord = args.train_tf
    eval_tfrecord = args.eval_tf

    # Get the filenames from the train and dev sets

    # Specify the sizes of the dataset we train on and evaluate on
    params.train_size = len(
        [x for x in tf.python_io.tf_record_iterator(train_tfrecord)])
    params.eval_size = len(
        [x for x in tf.python_io.tf_record_iterator(eval_tfrecord)])

    # Create the two iterators over the two datasets
    train_inputs = input_fn(True, train_tfrecord, params)
    eval_inputs = input_fn(False, eval_tfrecord, params)

    # Define the model
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_from)
Ejemplo n.º 14
0
def eval_input_fn():
    path_test_sentences = os.path.join(args.data_dir, 'dev/sentences.txt')
    path_test_labels = os.path.join(args.data_dir, 'dev/labels.txt')

    return input_fn('eval', path_test_sentences, path_test_labels, params)
Ejemplo n.º 15
0
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Set the logger
    set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    data_dir = args.data_dir
    test_data_dir = os.path.join(data_dir, "test_signs")

    # Get the filenames from the test set
    test_filenames = os.listdir(test_data_dir)
    test_filenames = [os.path.join(test_data_dir, f) for f in test_filenames if f.endswith('.jpg')]

    test_labels = [int(f.split('/')[-1][0]) for f in test_filenames]

    # specify the size of the evaluation set
    params.eval_size = len(test_filenames)

    # create the iterator over the dataset
    test_inputs = input_fn(False, test_filenames, test_labels, params)

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', test_inputs, params, reuse=False)

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 16
0
for cv, (train_data, test_data) in enumerate(data_generator):
    logging.info('Starting fold {}'.format(cv+1))
    train_size = train_data[0].shape[0]
    eval_size = test_data[0].shape[0]
    
    if params['train_ae']:
        tf.reset_default_graph()
        logging.info('Training autoencoder to compute embeddings')

        ae_params = params['ae']
        ae_params['train_size'] = train_size
        ae_params['eval_size'] = eval_size

        logging.info('Creating the inputs for the model')
        train_inputs = input_fn(True, train_data, ae_params)
        eval_inputs = input_fn(False, test_data, ae_params)

        logging.info('Building the model')
        train_model = ae_model_fn(True, train_inputs, ae_params)
        eval_model = ae_model_fn(False, eval_inputs, ae_params)


        logging.info('Start training {} epochs'.format(params['ae']['num_epochs']))
        model_dir = os.path.join(args.model_dir, 'cv_' + str(cv+1), 'ae')
        train_and_save(train_model, eval_model, model_dir, ae_params, restore_weights = args.restore_ae_from)

        #Update spectra data with embeddings computed from the model
        logging.info('Compute embeddings of the spectra data')
        emb_params = {'restore_path' :os.path.join(model_dir,'best_weights'), 'params' :ae_params,\
                        'layer_name' :'embeddings', 'evaluate_model' :False}
Ejemplo n.º 17
0
    json_path = os.path.join(args.data_dir, 'dataset_params.json')
    assert os.path.isfile(
        json_path), "No json file found at {}, run build.py".format(json_path)
    params.update(json_path)

    # Set the logger
    set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Get paths for vocabularies and dataset
    path_eval = os.path.join(args.data_dir, 'test/')

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    test_X = load_Xdataset_from_text(os.path.join(path_eval, 'X.csv'))
    test_labels = load_Ydataset_from_text(os.path.join(path_eval, 'Y.csv'))

    # Specify other parameters for the dataset and the model
    params.eval_size = params.test_size

    # Create iterator over the test set
    inputs = input_fn('eval', test_X, test_labels, params)
    logging.info("- done.")

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', inputs, params, reuse=False)
    logging.info("- done.")

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 18
0
    # Load the parameters
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Set the logger
    set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Get paths for dataset
    path_test_prices = os.path.join(args.data_dir, 'test_inputs.pkl')
    path_test_deltas = os.path.join(args.data_dir, 'test_labels.pkl')

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    data_dir = args.data_dir
    test_prices, test_deltas = load_prices_and_deltas(path_test_prices,
                                                      path_test_deltas, params)

    # Create the two iterators over the two datasets
    test_inputs = input_fn('test', test_prices, test_deltas, params)
    logging.info("- done.")

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('test', test_inputs, params, reuse=False)

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 19
0
# Load vocabularies
words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=num_oov_buckets)
sentiments = tf.contrib.lookup.index_table_from_file(path_sentiment_tags)

# Create the input data pipeline
reviews = load_dataset_from_text(path_reviews,words)
review_sentiments = load_dataset_from_text(path_sentiments,sentiments, isLabels=True)

# Specify other parameters for the dataset and the model
params_sentiment.id_pad_word = words.lookup(tf.constant(params_sentiment.pad_word))
params_sentiment.id_pad_tag = words.lookup(tf.constant(params_sentiment.pad_tag))


# Create the iterator over the test set
inputs_sentiment = input_fn('eval', reviews, review_sentiments, params_sentiment)

# Define the model
print('Creating sentiment and era models...')
model_spec_sentiment = model_fn('eval', inputs_sentiment, params_sentiment, reuse=False)
print('Done')

# Evaluate the model... 
# evaluate(model-spec, args.model_dir, params, args.restore_from)

# initialize saver to restore model
saver = tf.train.Saver()

with tf.Session() as sess:
	# Initialize lookup tables for both models
	sess.run(model_spec_sentiment['variable_init_op'])
Ejemplo n.º 20
0
from train import import_names_and_labels

parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", default="experiments/07_full_images")
parser.add_argument("--data_dir", default="data/kaggle")
parser.add_argument("--restore_from", default="best_weights")
parser.add_argument("--set", default="test")

if __name__ == "__main__":
    tf.set_random_seed(230)
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, "params.json")
    params = Params(json_path)
    params.evaluate()

    set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    logging.info("Creating the dataset...")
    data_dir = args.data_dir
    image_dir = os.path.join(data_dir, "images")
    names, labels = import_names_and_labels(data_dir, "test",
                                            params.num_labels)
    params.eval_size = len(names)
    inputs = input_fn("test", image_dir, names, labels, params)

    logging.info("Creating the model...")
    model_spec = model_fn("eval", inputs, params)

    logging.info("Evaluating...")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 21
0
    assert os.path.isfile(params_file), "No .json file found"
    with open(params_file) as json_file:
        params = json.load(json_file)
    print("Parameters used :\n{}".format(params))

    # Load the dataset
    print("Loading dataset from " + args.data_dir + args.data_set)
    test_dir = os.path.join(args.data_dir, args.data_set)
    assert os.path.isdir(test_dir), "No test directory found"
    # Test data
    test_pathlist = Path(test_dir).glob("*.jpg")
    test_filenames = [str(path) for path in test_pathlist]
    test_filenames = [
        s for s in test_filenames
        if int(s.split("_")[1].split('/')[2]) < params["num_classes"]
    ]
    test_labels = [int(s.split("_")[1].split('/')[2]) for s in test_filenames]

    print("Done loading data")
    print("Test set size {}\n".format(len(test_filenames)))

    # Create the estimator
    cnn_classifier = tf.estimator.Estimator(model_fn=model_fn,
                                            model_dir=model_dir,
                                            params=params)

    print("Evaluating model")
    test_results = cnn_classifier.evaluate(input_fn=lambda: input_fn(
        is_training=False, filenames=test_filenames, labels=test_labels))
    print("Results : \n{}".format(test_results))
Ejemplo n.º 22
0
    # Get paths for dataset
    path_train = os.path.join(args.data_dir, 'train/')
    path_eval = os.path.join(args.data_dir, 'dev/')

    # Create the input data pipeline
    logging.info("Creating the datasets...")
    train_X = load_Xdataset_from_text(os.path.join(path_train, 'X.csv'))
    train_labels = load_Ydataset_from_text(os.path.join(path_train, 'Y.csv'))
    eval_X = load_Xdataset_from_text(os.path.join(path_eval, 'X.csv'))
    eval_labels = load_Ydataset_from_text(os.path.join(path_eval, 'Y.csv'))

    # Specify other parameters for the dataset and the model
    params.eval_size = params.dev_size
    params.buffer_size = params.train_size  # buffer size for shuffling

    # Create the two iterators over the two datasets
    train_inputs = input_fn('train', train_X, train_labels, params)
    eval_inputs = input_fn('eval', eval_X, eval_labels, params)
    logging.info("- done.")

    # Define the models (2 different set of nodes that share weights for train and eval)
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)
    logging.info("- done.")

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_dir)
Ejemplo n.º 23
0
    #TODO: check and load if there's the best weights so far
    #     model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights"))

    #set logger
    set_logger(os.path.join(args.model_dir, 'train.log'))

    #train/test split
    train_fpaths, test_fpaths, train_targets, test_targets = \
        get_train_test_split(args.json_path, args.data_dir, train_size=args.train_size)

    params.train_size = len(train_fpaths)
    params.test_size = len(test_fpaths)

    logging.info("Creating the dataset...")
    train_inputs = input_fn(True, train_fpaths, train_targets, params)
    test_inputs = input_fn(False, test_fpaths, test_targets, params)

    logging.info("Creating the model...")
    train_model_spec = model_fn(True, train_inputs, params)
    test_model_spec = model_fn(False, test_inputs, params, reuse=True)

    logging.info("train set predict...")
    predict(train_model_spec, args.model_save_dir, params, args.restore_from)

    logging.info("test set predict...")
    predict(test_model_spec, args.model_save_dir, params, args.restore_from)

    end_time = time.time()
    logging.info("Elapsed training time is {:.2f} secs".format(end_time -
                                                               start_time))
Ejemplo n.º 24
0
    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Create the input data pipeline
    tf.logging.info("Creating the datasets...")
    data = tf.contrib.learn.datasets.mnist.load_mnist(args.data_dir)

    # Specify the sizes of the dataset we train on and evaluate on
    params.train_size = data.train.num_examples
    params.eval_size = data.test.num_examples

    # Create the two input functions over the two datasets
    train_input_fn = lambda: input_fn(True, data.train.images, data.train.labels, params)
    test_input_fn = lambda: input_fn(False, data.test.images, data.test.labels, params)

    # Define the model
    tf.logging.info("Creating the model...")
    config = tf.estimator.RunConfig(tf_random_seed=230,
                                    model_dir=args.model_dir,
                                    save_summary_steps=params.save_summary_steps)
    estimator = tf.estimator.Estimator(model_fn, params=params, config=config)

    # Train the model
    tf.logging.info("Starting training for {} epoch(s).".format(params.num_epochs))
    estimator.train(train_input_fn)

    # Evaluate the model on the test set
    tf.logging.info("Evaluation on test set.")
Ejemplo n.º 25
0
        if f.endswith('.jpg')
    ]
    eval_filenames = [
        os.path.join(dev_data_dir, f) for f in os.listdir(dev_data_dir)
        if f.endswith('.jpg')
    ]

    # Labels will be between 0 and 5 included (6 classes in total)
    train_labels = [int(f.split('/')[-1][0]) for f in train_filenames]
    eval_labels = [int(f.split('/')[-1][0]) for f in eval_filenames]
    # print("train_labels = "+str(train_labels))
    # print("eval_labels ="+str(eval_labels))

    # Specify the sizes of the dataset we train on and evaluate on
    params.train_size = len(train_filenames)
    params.eval_size = len(eval_filenames)

    # Create the two iterators over the two datasets
    train_inputs = input_fn(True, train_filenames, train_labels, params)
    eval_inputs = input_fn(False, eval_filenames, eval_labels, params)

    # Define the model
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_from)
Ejemplo n.º 26
0
def predict(inp, target, params, restore_from, config=None,\
            model_dir='./ie590_project/experiments/ex1', model_save_dir='./ie590_project/experiments/ex1/model_save/1'):
    """predict target values given input file paths
    Args:
        inp: (list) a string list of image files paths; 2D -> [sample_size, number_of_channels]
        model_spec: (dict) model specifications of tf Ops
        params: (Params or str) Params object or params.joson path
        tar: (list) a float list of target values
        restore_from: (str) ckpt or directory name where ckpts are located for restoring
        ...
    Return:
        out: (list) a list of precicted target values; have exactly same dimension as target
    """

    assert len(inp) == len(target)

    iterator_init_op = model_spec['iterator_init_op']
    update_metrics_op = model_spec['update_metrics_op']
    metrics = model_spec['metrics']
    metrics_init_op = model_spec['metrics_init_op']
    predictions = model_spec['predictions']

    saver = tf.compat.v1.train.Saver()

    if type(params) is str:
        assert os.path.isfile(
            params), "params.json does not exits at {}".format(params)
        params = Params(params)
        params.load(params.update)  # load parameters
    params.inp_size = len(inp)

    set_logger(os.path.join(model_dir, 'train.log'))

    logging.info("Creating the dataset...")
    inputs = input_fn(False, inp, target, params)

    logging.info("Creating the model...")
    model_spec = model_fn(False, inputs, params)

    logging.info("Calculating predictions...")
    with tf.compat.v1.Session(config=config) as sess:
        sess.run(model_spec['variable_init_op'])

        save_path = os.path.join(model_save_dir, restore_from)
        if os.path.isdir(save_path):
            save_path = tf.train.latest_checkpoint(
                save_path
            )  # If restore_from is a directory, get the latest ckpt
        saver.restore(sess, save_path)

        num_steps = (params.inp_size + params.batch_size -
                     1) // params.batch_size

        sess.run([iterator_init_op, metrics_init_op])

        if len(np.shape(target)) == 1:
            out = np.empty(np.shape(target))[:, np.newaxis]
        else:
            out = np.empty(np.shape(target))
        for i in tqdm(range(num_steps)):
            _, predictions_eval = sess.run([update_metrics_op, predictions])
            if i < num_steps - 1:
                out[i * params.batch_size:(i + 1) *
                    params.batch_size, :] = predictions_eval
            else:
                out[i * params.batch_size:, :] = predictions_eval

    return out
Ejemplo n.º 27
0
    # Get paths for vocabularies and dataset
    path_vocab = os.path.join(args.data_dir, 'vocab{}'.format(params.min_freq))
    params.vocab_path = path_vocab
    path_test_queries = os.path.join(args.data_dir, 'dev/queries.txt')
    path_test_articles = os.path.join(args.data_dir, 'dev/articles.txt')
    # Load Vocabularies
    vocab = tf.contrib.lookup.index_table_from_file(
        path_vocab, num_oov_buckets=num_oov_buckets, key_column_index=0)

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    test_queries = load_dataset_from_text(path_test_queries, vocab, params)
    test_articles = load_dataset_from_text(path_test_articles, vocab, params)

    # Specify other parameters for the dataset and the model
    params.eval_size = params.test_size
    params.id_pad_word = vocab.lookup(tf.constant(params.pad_word))

    # Create iterator over the test set
    inputs = input_fn('eval', test_queries, test_articles, params)
    logging.info("- done.")

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', inputs, params, reuse=False)
    logging.info("- done.")

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 28
0
    path_words = os.path.join(args.data_dir, 'words.txt')
    path_tags = os.path.join(args.data_dir, 'tags.txt')
    path_eval_sentences = os.path.join(args.data_dir, 'dev/sentences.txt')
    path_eval_labels = os.path.join(args.data_dir, 'dev/labels.txt')

    # Load Vocabularies
    words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=num_oov_buckets)
    tags = tf.contrib.lookup.index_table_from_file(path_tags)

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    test_sentences = load_dataset_from_text(path_eval_sentences, words)
    test_labels = load_dataset_from_text(path_eval_labels, tags)

    # Specify other parameters for the dataset and the model
    params.eval_size = params.test_size
    params.id_pad_word = words.lookup(tf.constant(params.pad_word))
    params.id_pad_tag = tags.lookup(tf.constant(params.pad_tag))

    # Create iterator over the test set
    inputs = input_fn('eval', test_sentences, test_labels, params)
    logging.info("- done.")

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', inputs, params, reuse=False)
    logging.info("- done.")

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
Ejemplo n.º 29
0
        file_name = 'train.csv'
    else:
        file_name = 'eval.csv'

    if args.sample:
        dp = os.path.join(args.data_dir, 'sample', file_name)
        if os.path.exists(dp):
            data_path = dp
        else:
            data_path = os.path.join(args.data_dir, file_name)
    else:
        data_path = os.path.join(args.data_dir, file_name)

    tf.logging.info("Predicting the data...")
    train_predictions = estimator.predict(
        lambda: input_fn(data_path, params, is_training=False))

    preds = []
    for i, p in tqdm(enumerate(train_predictions)):
        if args.sample and i > 100:
            break
        probs = p['preds']
        gen_line = decode_preds(probs, vocab)
        preds.append(gen_line)

    print('Predictions: \n\n{}'.format('\n'.join(preds)))

    with open(os.path.join(args.model_dir, 'results.txt'), 'w') as file:
        for line in preds:
            file.write(f'{line}\n')
Ejemplo n.º 30
0
def train_input_fn():
    path_train_sentences = os.path.join(args.data_dir, 'train/sentences.txt')
    path_train_labels = os.path.join(args.data_dir, 'train/labels.txt')

    return input_fn('train', path_train_sentences, path_train_labels, params)