Exemple #1
0
def main():
    tf.set_random_seed(230)
    args = parser.parse_args()
    param_path = os.path.join(args.data_dir, 'params.json')
    assert os.path.isfile(
        param_path), 'No <dataset_params.json> found in path: {}'.format(
            args.data_dir)
    # load parameters
    params = Params(param_path)
    params.buffer_size = params.train_size
    params.hidden_layers = [int(x) for x in params.hidden_layers.split(',')]

    set_logger(os.path.join(args.model_dir, 'train.log'))
    params.print()

    # Step2, create tf.dataset
    logging.info('Create train and eval dataset...')
    train_file = os.path.join(args.data_dir, 'train1.tsv')
    eval_file = os.path.join(args.data_dir, 'dev1.tsv')
    train_dataset = load_dataset_from_text(train_file, params)
    eval_dataset = load_dataset_from_text(eval_file, params)

    # Step3, create train and eval iterator over two datset
    train_inputs = input_fn(train_dataset, params, 'train')
    eval_inputs = input_fn(eval_dataset, params, 'eval')
    logging.info('Completed create input pipeline!')

    # Step4, define model
    logging.info('Create model...')
    model = DeepFM()

    # Step5, build model specification
    train_model_spec = build_model_spec('train',
                                        model,
                                        train_inputs,
                                        params,
                                        reuse=False)
    # If you want to only run evaluate you should set reuse=False.
    eval_model_spec = build_model_spec('eval',
                                       model,
                                       eval_inputs,
                                       params,
                                       reuse=True)
    logging.info('Create train and eval model specification completed!')

    # Step6, train and evaluate model
    logging.info('Start training for {} epochs'.format(params.epochs))
    train_evaluate(train_model_spec, eval_model_spec, args.model_dir, params,
                   args.restore_from)

    # Step7, save model
    with open(args.model_dir + '/model.pkl', 'wb') as f:
        pickle.dump(model, f)
def main(args):

    # TODO: Should these variables still be capitalized?
    MODEL_DIR = args.model_dir
    WORKING_DIR = args.data_dir

    NUM_EPOCHS = args.num_epochs
    EPOCHS_BETWEEN_EVALS = args.epochs_between_evals
    LEARNING_RATE = args.learning_rate
    TRAIN_BATCH_SIZE = args.train_batch_size
    EVAL_BATCH_SIZE = args.eval_batch_size

    NUM_FOLDS = args.num_folds
    FOLDS_TO_TRAIN_AGAINST = args.folds

    # Assumes default Carvana data folder structure...
    IMAGE_DIR = os.path.join(WORKING_DIR, 'train_hq')
    MASK_DIR = os.path.join(WORKING_DIR, 'train_masks')

    IMAGE_FILENAMES = sorted(glob(os.path.join(IMAGE_DIR, '*.jpg')))
    MASK_FILENAMES = sorted(glob(os.path.join(MASK_DIR, '*.gif')))

    NUM_OUTPUT_CLASSES = 2
    # Pixels are classified as either "foreground" or "background"

    # Check if the system's version of TensorFlow was built with CUDA (i.e. uses a GPU)
    data_format = ('channels_first' if tf.test.is_built_with_cuda() \
        else 'channels_last')

    params = {
        'data_format': data_format,
        'num_output_classes': NUM_OUTPUT_CLASSES,
        'learning_rate': LEARNING_RATE
    }

    # Mirror the model accross all available GPUs using the mirrored distribution strategy.
    # TODO: Is there a good way to check if multiple GPUs are available?
    distribution = (tf.contrib.distribute.MirroredStrategy() if args.distribute\
        else None)
    config = tf.estimator.RunConfig(train_distribute=distribution,
                                    keep_checkpoint_max=2,
                                    log_step_count_steps=5)

    folds = KFolds(IMAGE_FILENAMES,
                   MASK_FILENAMES,
                   num_folds=NUM_FOLDS,
                   sort=False,
                   yield_dict=False)

    # Train separate models on each requested fold.
    for fold_num in FOLDS_TO_TRAIN_AGAINST:
        (train_images, train_masks), (eval_images,
                                      eval_masks) = folds.get_fold(fold_num)

        # Initialize the Estimator
        image_segmentor = tf.estimator.Estimator(model_dir='-'.join(
            [MODEL_DIR, str(fold_num)]),
                                                 model_fn=model_fn,
                                                 params=params,
                                                 config=config)

        # Train and evaluate
        for i in range(NUM_EPOCHS // EPOCHS_BETWEEN_EVALS):
            print('\nEntering training epoch %d.\n' %
                  (i * EPOCHS_BETWEEN_EVALS))
            image_segmentor.train(
                # input_fn is expected to take no arguments
                input_fn=lambda: input_fn(train_images,
                                          train_masks,
                                          training=True,
                                          data_format=params['data_format'],
                                          num_repeats=EPOCHS_BETWEEN_EVALS,
                                          batch_size=TRAIN_BATCH_SIZE))

            results = image_segmentor.evaluate(
                input_fn=lambda: input_fn(eval_images,
                                          eval_masks,
                                          training=False,
                                          data_format=params['data_format'],
                                          batch_size=EVAL_BATCH_SIZE))

            # TODO: Look into writing example images to a tf.summary?
            print('\nEvaluation results:\n%s\n' % results)
X = tf.placeholder(name='ip', dtype=tf.float32, shape=(None, 64, 64, 1))
Y = tf.placeholder(tf.int32, [None, 1])
# network = model.model(X_train)
network = model.cnn_model(X)
[optimizer, cost] = training.trainer(network, Y)
print(optimizer)

# Initialization
sess = tf.Session()
num_points = len(filenames)
# Run training
for epoch in range(100):
    for jj in range(int(math.floor((num_points // batch_size) - 1))):
        # Get the data
        sess.run(tf.global_variables_initializer())
        inputs = input_fn.input_fn(filenames, labels, batch_size)
        sess.run(inputs['iterator_init_op'])
        train_X = sess.run(inputs['images'])
        train_Y = sess.run(inputs['labels'])
        train_Y = np.array(train_Y)
        train_Y = train_Y.reshape((batch_size, 1))

        # input_to_sess = {X:train_X, Y:train_Y}
        temp = sess.run(X, feed_dict={X: train_X})
        sess.run(optimizer, feed_dict={X: train_X, Y: train_Y})
        # print('Done with batch {} and epoch {}'.format(jj,epoch))

    # Evaluate the model
    train_X = sess.run(inputs['images'])
    train_Y = sess.run(inputs['labels'])
    train_Y = np.array(train_Y)
Exemple #4
0
def main():
    # Set the random seed for the whole graph for reproductible experiments
    tf.set_random_seed(230)

    # Load model parameters from params.json file in model_dir
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path
    ), "No <params.json> json configuration file found at {}".format(
        args.model_dir)
    # load params
    params = Params(json_path)

    # Load dataset parameters from dataset_params.json file in data_dir
    json_path = os.path.join(args.data_dir, 'dataset_params.json')
    assert os.path.isfile(
        json_path
    ), "No <dataset_params.json> json configuration file found at {}".format(
        args.data_dir)
    params.update(json_path)
    num_oov_buckets = params.num_oov_buckets  # number of buckets for unknown words

    # Check we are not overwriting some previous results
    # if can comment this if you want to overwriting
    # model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, 'best_weights'))
    # overwritting = model_dir_has_best_weights and args.restore_dir is None
    # assert not overwritting, "Weights found in model dir, aborting to avoid overwrite"

    # Set logger
    set_logger(os.path.join(args.model_dir, 'train.log'))
    # print parameters
    params.print()

    # get vocabulary and label filename
    vocab_path = os.path.join(args.data_dir, 'words.txt')
    label_path = os.path.join(args.data_dir, 'tags.txt')

    train_sentences_path = os.path.join(args.data_dir,
                                        'train/sentences.txt.list')
    train_labels_path = os.path.join(args.data_dir, 'train/labels.txt')

    eval_sentences_path = os.path.join(args.data_dir, 'dev/sentences.txt.list')
    eval_labels_path = os.path.join(args.data_dir, 'dev/labels.txt')

    # for batch predict
    # test_sentences_path  = os.path.join(args.data_dir, 'test/sentences.txt.list')
    # test_labels_path = os.path.join(args.data_dir, 'test/labels.txt')

    # Create word lookup table and label lookup table
    words_table = tf.contrib.lookup.index_table_from_file(
        vocab_path, num_oov_buckets=num_oov_buckets)
    tags_table = tf.contrib.lookup.index_table_from_file(label_path)

    # Create data input pipeline
    logging.info('Create dataset...')
    train_sentences = load_dataset_from_text(train_sentences_path, words_table)
    train_labels = load_dataset_from_text(train_labels_path, tags_table)

    eval_sentences = load_dataset_from_text(eval_sentences_path, words_table)
    eval_labels = load_dataset_from_text(eval_labels_path, tags_table)

    # Specify other parameters for the dataset and model
    params.eval_size = params.dev_size
    params.buffer_size = params.train_size  # buffer size for shuffling, this will load all dataset into memory
    params.id_pad_word = words_table.lookup(tf.constant(params.pad_word))
    params.ld_pad_label = tags_table.lookup(tf.constant(params.pad_tag))

    # Create train and eval iterator over the two dataset
    train_inputs = input_fn('train', train_sentences, train_labels, params)
    eval_inputs = input_fn('eval', eval_sentences, eval_labels, params)
    logging.info("- Done")

    # Define the models (two different set of nodes that share weights for train and eval)
    logging.info("Creating the model...")
    model = MyModel()

    train_model_spec = build_model_spec('train', model, train_inputs, params)
    # IF you want to only run evaluate please set resue=False, when training you should reuse variables
    # eval_model_spec = model_fn('eval', model, eval_inputs, params, reuse=True)
    eval_model_spec = build_model_spec('eval',
                                       model,
                                       eval_inputs,
                                       params,
                                       reuse=True)
    logging.info('- Done.')

    # Train the model
    logging.info("Starting training for {} epochs".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_dir)

    # write model as pickle for inference
    with open(args.model_dir + "/mymodel.pkl", 'wb') as fout:
        pickle.dump(model, fout)
if data_dir is None:
    data_dir = params.data_dir

# grab the train image paths and randomly shuffle them
print("[INFO] loading images...")
train_tf = os.path.join(data_dir, "train.tfrecord")
eval_tf = os.path.join(data_dir, "test.tfrecord")

train_size = len([x for x in tf.python_io.tf_record_iterator(train_tf)])
eval_size = len([x for x in tf.python_io.tf_record_iterator(eval_tf)])

x_train_batch, y_train_batch = input_fn(
    train_tf,
    one_hot=True,
    classes=CLASSES,
    is_training=True,
    batch_shape=[BS, IMAGE_DIMS[1], IMAGE_DIMS[1], 3],
    parallelism=4)
x_test_batch, y_test_batch = input_fn(
    eval_tf,
    one_hot=True,
    classes=CLASSES,
    is_training=True,
    batch_shape=[BS, IMAGE_DIMS[1], IMAGE_DIMS[1], 3],
    parallelism=4)

x_batch_shape = x_train_batch.get_shape().as_list()
y_batch_shape = y_train_batch.get_shape().as_list()

x_train_input = Input(tensor=x_train_batch, batch_shape=x_batch_shape)
        y_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='y_ph')

        y_predictions, train_op, loss_op = bert_ner_core(
            input_ids_ph, input_masks_ph, y_masks_ph, y_ph)

        # init_new_vars_op = tf.initialize_variables([])
        # sess.run(init_new_vars_op)
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver(var_list=[
            v for v in tf.trainable_variables()
            if not v.name.startswith('Optimizer')
        ])
        saver.restore(sess, BERT_MODEL_PATH)

        ds_iterator = input_fn().make_one_shot_iterator()
        next_element = ds_iterator.get_next()

        try:
            while True:
                features, labels = sess.run(next_element)

                # print("****" * 30)
                # print("feature:")
                # print(features.keys())
                # print(features['input_ids'])
                # print(features['input_ids'].shape)
                # print(features['input_masks'])
                # print(features['input_masks'].shape)
                # print('y_masks')
                # print(features['y_masks'])
Exemple #7
0
    timestring = strftime("%Y-%m-%d_%H-%M", gmtime())

    data_dir = os.path.join(data_dir, args.dataset)
    model_dir = os.path.join(model_dir,
                             (args.latent_model + '_' + args.dataset + '_' +
                              args.cluster_model + '_' + timestring))

    # Create directory for model combination if not existens
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # copy params.json file to the model direction for reproducible results
    copyfile(json_path, os.path.join(model_dir, 'params.json'))

    # Creates an iterator and a dataset
    train_inputs = input_fn(data_dir, 'train', params)
    cluster_inputs = input_fn(data_dir, 'test', params)

    # Define the models (2 different set of nodes that share weights for train and eval)
    if args.latent_model == 'AE':
        if args.cluster_model == 'IDEC':
            train_model_spec = ae_model_fn('cluster', cluster_inputs, params)
        else:
            train_model_spec = ae_model_fn('train', train_inputs, params)
        cluster_model_spec = ae_model_fn('cluster',
                                         cluster_inputs,
                                         params,
                                         reuse=True)
    elif args.latent_model == 'b_AE':
        train_model_spec = b_ae_model_fn('train', train_inputs, params)
        cluster_model_spec = b_ae_model_fn('cluster',
Exemple #8
0
use_pretrained = False


# grab the train image paths and randomly shuffle them
print("[INFO] loading images...")
train_tf = args["train_tf"]
eval_tf = args["eval_tf"]

train_size = len([x for x in tf.python_io.tf_record_iterator(train_tf)])
eval_size = len([x for x in tf.python_io.tf_record_iterator(eval_tf)])
print(train_size)
x_train_batch, y_train_batch = input_fn(
    train_tf,
    one_hot=True,
    classes=CLASSES,
    is_training=True,
    batch_shape=BATCH_SHAPE,
    parallelism=PARALLELISM)
x_test_batch, y_test_batch = input_fn(
    eval_tf,
    one_hot=True,
    classes=CLASSES,
    is_training=True,
    batch_shape=BATCH_SHAPE,
    parallelism=PARALLELISM)

x_batch_shape = x_train_batch.get_shape().as_list()
y_batch_shape = y_train_batch.get_shape().as_list()

x_train_input = Input(tensor=x_train_batch, batch_shape=x_batch_shape)
Exemple #9
0
def main():
  train_tf_record = os.path.join(FLAGS.data_dir, 'ocr-train-*.tfrecord')
  eval_tf_record = os.path.join(FLAGS.data_dir, 'ocr-validation-*.tfrecord')

  char_map_dict = load_char_map()
  train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
  model_name = 'crnn_ctc_ocr_{:s}.ckpt'.format(str(train_start_time))
  model_save_path = os.path.join(FLAGS.model_dir, model_name)

  config = Config()
  config.batch_size = FLAGS.batch_size
  config.num_classes = len(char_map_dict) + 1
  train_input_fn = input_fn.input_fn(train_tf_record, FLAGS.batch_size, channel_size=FLAGS.channel_size)

  crnn_model = model.CRNN(config)
  saver = tf.train.Saver()
  if not os.path.exists(FLAGS.model_dir):
    os.makedirs(FLAGS.model_dir)

  global_step = tf.train.get_or_create_global_step()
  learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                             global_step,
                                             FLAGS.decay_steps,
                                             FLAGS.decay_rate,
                                             staircase = True)
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.control_dependencies(update_ops):
    train_op= tf.train.AdamOptimizer(
        learning_rate=FLAGS.learning_rate).minimize(crnn_model.loss, 
            global_step=global_step)
    train_op = tf.group([train_op, update_ops])
  decoded, log_prob = tf.nn.ctc_greedy_decoder(crnn_model.logits, crnn_model.sequence_length)
  pred_str_labels = tf.as_string(decoded[0].values)
  pred_tensor = tf.SparseTensor(indices=decoded[0].indices, values=pred_str_labels, dense_shape=decoded[0].dense_shape)
  true_str_labels = tf.as_string(crnn_model.labels.values)
  true_tensor = tf.SparseTensor(indices=crnn_model.labels.indices, values=true_str_labels, dense_shape=crnn_model.labels.dense_shape)
  edit_distance = tf.reduce_mean(tf.edit_distance(pred_tensor, true_tensor, normalize=True), name='distance')
  tf.summary.scalar(name='edit_distance', tensor= edit_distance)
  tf.summary.scalar(name='ctc_loss', tensor=crnn_model.loss)
  #tf.summary.scalar(name='learning_rate', tensor=learning_rate)
  merge_summary_op = tf.summary.merge_all()
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    summary_writer = tf.summary.FileWriter(FLAGS.model_dir)
    summary_writer.add_graph(sess.graph)
    train_next_batch = train_input_fn.get_next()

    save_path = tf.train.latest_checkpoint(FLAGS.model_dir)
    if save_path:
      saver.restore(sess=sess, save_path=save_path)
      print("restore from %s"%(save_path) )
      st = int(save_path.split("-")[-1])
      sess.run(global_step.assign(st))

    for s in range(FLAGS.max_train_steps):
      batch = sess.run(train_next_batch)
      images = batch['images']
      labels = batch['labels']
      sequence_length = batch['sequence_length']
      _, loss , lr,  summary, step, logits, dis = sess.run(
          [train_op, crnn_model.loss, learning_rate, merge_summary_op, global_step , crnn_model.logits , edit_distance ],
          feed_dict = {
            crnn_model.images:images, 
            crnn_model.labels:labels, 
            crnn_model.sequence_length:sequence_length, 
            crnn_model.keep_prob:0.5, 
            crnn_model.is_training:True})

      print("step: {step} lr: {lr} loss: {loss} acc: {dis} ".format(step=step, lr=lr, loss=loss, dis=(1-dis) ))
      if step % FLAGS.step_per_save == 0:
        summary_writer.add_summary(summary=summary, global_step=step)
        saver.save(sess=sess, save_path=model_save_path, global_step=step)

      if False and step % FLAGS.step_per_eval == 0:
        eval_input_fn = input_fn.input_fn(eval_tf_record, FLAGS.batch_size, False, channel_size=FLAGS.channel_size )
        eval_next_batch = eval_input_fn.get_next()
        all_distance =  []
        while True:
          try:
            eval_batch = sess.run(eval_next_batch)
            images = batch['images']
            labels = batch['labels']
            sequence_length = batch['sequence_length']
            train_distance = sess.run([edit_distance], 
                    feed_dict={
                      crnn_model.images:images, 
                      crnn_model.labels:labels, 
                      crnn_model.keep_prob:1.0, 
                      crnn_model.is_training:True, 
                      crnn_model.sequence_length: sequence_length})
            all_distance.append(train_distance[0])
          except tf.errors.OutOfRangeError as e:
            print("eval acc: ", 1 - np.mean(np.array(all_distance)))
            break
    def __init__(
        self,
        FLAGS,
        full_summary=False,
    ):
        '''Input:
        img_shape: [H,W,C]
        '''
        tf.reset_default_graph()

        self.size_glimpse_out = FLAGS.size_glimpse_out
        num_glimpses = FLAGS.num_glimpses
        self.num_scales = len(FLAGS.scale_sizes)
        self.patch_shape = [
            self.num_scales, FLAGS.scale_sizes[0], FLAGS.scale_sizes[0],
            FLAGS.img_shape[-1]
        ]
        self.patch_shape_flat = np.prod(self.patch_shape)
        self.FLAGS = FLAGS
        self.global_step = tf.Variable(0, trainable=False, name='global_step')

        with tf.name_scope('Placeholders'):
            self.is_training = tf.placeholder(tf.bool,
                                              shape=(),
                                              name='is_training')

        with tf.device('/device:CPU:*'):
            with tf.name_scope('Dataset'):
                inputs = input_fn(FLAGS)
                self.features_ph_train = inputs['features_ph_train']
                self.labels_ph_train = inputs['labels_ph_train']
                self.features_ph_valid = inputs['features_ph_valid']
                self.labels_ph_valid = inputs['labels_ph_valid']
                self.features_ph_test = inputs['features_ph_test']
                self.labels_ph_test = inputs['labels_ph_test']
                self.handle = inputs['handle']
                self.train_init_op = inputs['train_init_op']
                self.valid_init_op = inputs['valid_init_op']
                self.test_init_op = inputs['test_init_op']

                self.x, self.y = (inputs['images'], inputs['labels'])
                (x, y) = (tf.tile(self.x, [FLAGS.MC_samples, 1, 1, 1]),
                          tf.tile(self.y, [FLAGS.MC_samples]))

                batch_sz = tf.shape(x)[0]  # potentially variable batch_size

                img_NHWC = tf.reshape(x, [batch_sz] + FLAGS.img_shape)

        with tf.name_scope('learning_rate'):
            self.learning_rate = tf.maximum(
                tf.train.exponential_decay(FLAGS.learning_rate,
                                           self.global_step,
                                           FLAGS.learning_rate_decay_steps,
                                           FLAGS.learning_rate_decay_factor,
                                           staircase=True),
                FLAGS.min_learning_rate)

        location_network = LocationNetwork(img_NHWC, FLAGS)
        retina_sensor = RetinaSensor(FLAGS)
        if FLAGS.ConvGlimpse:
            glimpse_network = GlimpseNetwork_DRAM(FLAGS, self.patch_shape,
                                                  self.size_glimpse_out)
        else:
            glimpse_network = GlimpseNetwork(FLAGS, self.patch_shape,
                                             self.size_glimpse_out)

        with tf.name_scope('CoreNetwork'):
            if FLAGS.cell == 'RNN':
                cell = _rnn_cell_RAM(FLAGS.size_rnn_state,
                                     activation=tf.nn.relu)
            elif FLAGS.cell == 'LSTM':
                cell = tf.nn.rnn_cell.LSTMCell(FLAGS.size_rnn_state,
                                               activation=tf.nn.relu)
            # cell = tf.contrib.cudnn_rnn.CudnnLSTM(num_units=FLAGS.size_rnn_state, num_layers=1)
            locs_ta = tf.TensorArray(tf.float32,
                                     size=num_glimpses,
                                     name='locs_ta')
            loc_means_ta = tf.TensorArray(tf.float32,
                                          size=num_glimpses,
                                          name='loc_means_ta')
            glimpses_ta = tf.TensorArray(
                tf.float32, size=num_glimpses,
                name='glimpses_ta')  # for visualization
            action_ta = tf.TensorArray(tf.float32,
                                       size=num_glimpses,
                                       name='action_ta')
            output_ta = (locs_ta, loc_means_ta, glimpses_ta)

            def loop_fn(time, cell_output, cell_state, loop_state):
                emit_output = cell_output

                if cell_output is None:  # time == 0
                    loc, loc_mean = location_network.initial_loc()
                    next_cell_state = cell.zero_state(batch_sz, tf.float32)
                    loop_state = output_ta
                else:  # time == 1+
                    loc, loc_mean = location_network(cell_output,
                                                     self.is_training)
                    next_cell_state = cell_state

                img_patch_flat = retina_sensor(img_NHWC,
                                               tf.clip_by_value(loc, -1, 1))
                # tf automatically reparametrizes the normal dist., but we don't want to propagate the supervised loss into location
                glimpse = glimpse_network(img_patch_flat,
                                          tf.stop_gradient(loc))

                with tf.name_scope('write_or_finished'):
                    elements_finished = (time >= num_glimpses)
                    finished = tf.reduce_all(elements_finished)

                    def _write():
                        return (loop_state[0].write(time, loc),
                                loop_state[1].write(time, loc_mean),
                                loop_state[2].write(time, img_patch_flat))

                    next_loop_state = tf.cond(finished, lambda: loop_state,
                                              lambda: _write())

                return (elements_finished, glimpse, next_cell_state,
                        emit_output, next_loop_state)

            outputs_ta, final_state, loop_state_ta = tf.nn.raw_rnn(
                cell, loop_fn)
            rnn_outputs = outputs_ta.stack(
                name='stack_rnn_outputs')  # [time, batch_sz, num_cell]

        with tf.name_scope('stack_outputs'):
            self.locs = tf.transpose(
                loop_state_ta[0].stack(name='stack_locs'),
                [1, 0, 2])  # [batch_sz, timesteps, loc_dims]
            loc_means = tf.transpose(
                loop_state_ta[1].stack(name='stack_loc_means'), [1, 0, 2])
            self.glimpses = loop_state_ta[2].stack(name='stack_glimpses')

        with tf.variable_scope('Baseline'):
            self.b_W = weight_variable([FLAGS.size_rnn_state, 1], name='b_W')
            self.b_b = bias_variable([1], name='b_b')
            baselines = [
                tf.squeeze(
                    tf.matmul(tf.stop_gradient(rnn_outputs[i]), self.b_W) +
                    self.b_b) for i in range(num_glimpses - 1)
            ]
            baselines = tf.stack(baselines, axis=1)  # [batch_sz, timesteps]

        # classification after last time-step
        with tf.variable_scope('CoreNetwork_preds'):
            fc_pred = tf.layers.Dense(FLAGS.num_classes,
                                      kernel_initializer=xavier_initializer(),
                                      name='fc_logits')
            logits = fc_pred(rnn_outputs[-1])
            self.probabilities = tf.nn.softmax(logits)
            self.prediction = tf.argmax(logits, 1)

            # store prediction at each step. Tuple of most likely (class, probability) for each step
            self.intermed_preds = []
            for i in range(num_glimpses):
                p = tf.nn.softmax(fc_pred(tf.stop_gradient(rnn_outputs[i])))
                p_class = tf.argmax(p, 1)
                idx = tf.transpose(
                    [tf.cast(tf.range(batch_sz), dtype=tf.int64), p_class])
                self.intermed_preds.append((p_class, tf.gather_nd(p, idx)))

        with tf.name_scope('Cross-entropy_loss'):
            self.xent = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                               logits=logits))

        with tf.name_scope('Rewards'):
            self.Rewards = Rewards(FLAGS)
            self.returns, reward, self.unknown_accuracy = self.Rewards(
                self.prediction, y)

            self.advantages = self.returns - baselines

            with tf.name_scope('loglikelihood'):
                # only want gradients flow through the suggested mean
                # gaussian = tf.distributions.Normal(tmp_mean[:,1:], scale=FLAGS.loc_std)
                # loglik = gaussian._log_prob(tf.stop_gradient(tmp_loc[:,1:]))
                # loglik = tf.reduce_sum(loglik, axis=2)
                z = (tf.stop_gradient(self.locs[:, 1:]) - loc_means[:, 1:]
                     ) / FLAGS.loc_std  # [batch_sz, timesteps, loc_dims]
                loglik = -0.5 * tf.reduce_sum(tf.square(z), axis=2)

            with tf.name_scope('RL_loss'):
                # do not propagate back through advantages
                self.RL_loss = tf.reduce_mean(
                    loglik * tf.stop_gradient(self.advantages))

        with tf.name_scope('Baseline_loss'):
            self.baselines_mse = tf.reduce_mean(
                tf.square(tf.stop_gradient(self.returns) - baselines))

        with tf.name_scope('Hybrid_loss'):
            self.loss = -FLAGS.learning_rate_RL * self.RL_loss + self.xent + self.baselines_mse

        with tf.variable_scope('Adam'):
            train_op = tf.train.AdamOptimizer(self.learning_rate)
            grads_and_vars = train_op.compute_gradients(self.loss)

            # look at selected gradients
            self.gradient_check = {
                v: tf.reduce_mean(g)
                for g, v in grads_and_vars
            }

            clipped_grads_and_vars = [
                (tf.clip_by_norm(grad, FLAGS.max_gradient_norm), var)
                for grad, var in grads_and_vars
            ]
            self.train_op = train_op.apply_gradients(
                clipped_grads_and_vars, global_step=self.global_step)

        with tf.name_scope('Summaries'):
            self.accuracy = tf.reduce_mean(
                tf.cast(tf.equal(self.prediction, y), tf.float32))
            probs = tf.reshape(self.probabilities,
                               [FLAGS.MC_samples, -1, FLAGS.num_classes])
            avg_pred = tf.reduce_mean(probs, axis=0)
            avg_pred = tf.cast(tf.equal(tf.argmax(avg_pred, 1), self.y),
                               tf.float32)
            self.accuracy_MC = tf.reduce_mean(avg_pred, name='accuracy')
            self.reward = tf.reduce_mean(reward, name='avg_reward')
            tf.summary.scalar("loss", self.loss)
            tf.summary.scalar("cross_entropy", self.xent)
            tf.summary.scalar("baseline_mse", self.baselines_mse)
            tf.summary.scalar("RL_loss", self.RL_loss)
            tf.summary.histogram("loglikelihood", tf.reduce_mean(
                loglik, axis=0))  # zero if not sampling!
            tf.summary.histogram("softmax_predictions", self.probabilities)
            tf.summary.scalar("accuracy", self.accuracy)
            tf.summary.scalar("accuracy_MC", self.accuracy_MC)
            tf.summary.scalar("reward", self.reward)
            tf.summary.scalar("advantages", tf.reduce_mean(self.advantages))
            tf.summary.scalar("baseline", tf.reduce_mean(baselines))
            tf.summary.scalar("learning_rate", self.learning_rate)

        if full_summary:
            with tf.name_scope('Summ_RNN'):
                tf.summary.image(
                    'rnn_outputs',
                    tf.reshape(
                        tf.transpose(rnn_outputs,
                                     [1, 0, 2]),  # [batch_sz, cells, time]
                        [-1, FLAGS.size_rnn_state, num_glimpses, 1]),
                    max_outputs=3)
            with tf.name_scope('Summ_Locations'):
                sparse_label = tf.argmax(y, axis=1)
                for gl in range(num_glimpses):
                    tf.summary.histogram("loc_means_x" + str(gl + 1),
                                         loc_means[:, gl, 0])
                    tf.summary.histogram("loc_means_y" + str(gl + 1),
                                         loc_means[:, gl, 1])

                    # visualize for certain digits
                    if gl != 0:  # pass on initial
                        tf.summary.histogram(
                            "num0_loc_means_x" + str(gl + 1),
                            tf.boolean_mask(loc_means[:, gl, 0],
                                            tf.equal(sparse_label, 0)))
                        tf.summary.histogram(
                            "num1_loc_means_x" + str(gl + 1),
                            tf.boolean_mask(loc_means[:, gl, 1],
                                            tf.equal(sparse_label, 1)))
                        tf.summary.histogram(
                            "num6_loc_means_x" + str(gl + 1),
                            tf.boolean_mask(loc_means[:, gl, 0],
                                            tf.equal(sparse_label, 6)))
                        tf.summary.histogram(
                            "num9_loc_means_x" + str(gl + 1),
                            tf.boolean_mask(loc_means[:, gl, 1],
                                            tf.equal(sparse_label, 9)))

            with tf.name_scope('Summ_Trainable'):
                for var in tf.trainable_variables():
                    tf.summary.histogram(var.name, var)
            with tf.name_scope('Summ_Gradients'):
                for grad, var in grads_and_vars:
                    tf.summary.histogram(var.name + '/gradient', grad)

        self.summary = tf.summary.merge_all()

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=2,
                                    name='Saver')
        self.saver_best = tf.train.Saver(tf.global_variables(),
                                         max_to_keep=1,
                                         name='Saver_best')

        # put glimpses back together in a visualizable format
        with tf.variable_scope('Visualization'):
            self.glimpses_composed = []
            self.downscaled_scales = []
            num_scales = len(FLAGS.scale_sizes)
            scale0 = FLAGS.scale_sizes[0]
            out_sz = FLAGS.scale_sizes[-1]
            channel = FLAGS.img_shape[-1]

            masks, paddings = [], []
            for idx in range(num_scales):
                pad_size = (out_sz - FLAGS.scale_sizes[idx]) // 2
                padding = tf.constant(
                    [[0, 0],
                     [pad_size, out_sz - FLAGS.scale_sizes[idx] - pad_size],
                     [pad_size, out_sz - FLAGS.scale_sizes[idx] - pad_size],
                     [0, 0]])

                mask = tf.ones([
                    batch_sz * num_glimpses, FLAGS.scale_sizes[idx],
                    FLAGS.scale_sizes[idx], channel
                ])
                mask = tf.pad(mask,
                              padding,
                              mode='CONSTANT',
                              constant_values=0)

                masks.append(mask)
                paddings.append(padding)

            self.glimpses_reshpd = tf.reshape(self.glimpses,
                                              [batch_sz * num_glimpses, -1])
            glimpse_composed = tf.zeros(
                [batch_sz * num_glimpses, out_sz, out_sz, channel], tf.float32)
            scales = tf.split(self.glimpses_reshpd, num_scales, axis=1)
            last_mask = tf.zeros(
                [batch_sz * num_glimpses, out_sz, out_sz, channel])

            # to check actual model input. Nesting from out to in: scales, glimpses, batch
            for idx in range(num_scales):
                self.downscaled_scales.append(
                    tf.split(tf.reshape(
                        scales[idx],
                        [batch_sz * num_glimpses, scale0, scale0, channel]),
                             num_glimpses,
                             axis=0))

            # Start with smallest scale, pad up to largest, multiply by (mask - last_mask) indicating area not covered by smaller masks
            for idx in range(num_scales):
                # TODO: DO THIS TRANSFORMATION ONCE OUTSIDE THE LOOP TO GET INDICES, THEN USE tf.gather()
                scales[idx] = tf.reshape(scales[idx], [
                    batch_sz * num_glimpses, scale0, scale0, channel
                ])  # resize_images expects [B,H,W,C] -> add channel for MNIST

                # repeat and tile glimpse to scale size (unfortunately there is no tf.repeat)
                repeats = FLAGS.scale_sizes[idx] // scale0
                scales[idx] = tf.transpose(
                    scales[idx], [0, 3, 1, 2])  # put channels in front

                scales[idx] = tf.reshape(
                    tf.tile(
                        tf.reshape(
                            scales[idx],
                            [batch_sz * num_glimpses, channel, scale0**2, 1]),
                        [1, 1, 1, repeats]), [
                            batch_sz * num_glimpses, channel, scale0,
                            repeats * scale0
                        ])
                scales[idx] = tf.reshape(
                    tf.tile(
                        tf.reshape(tf.transpose(scales[idx], [0, 1, 3, 2]), [
                            batch_sz * num_glimpses, channel,
                            repeats * scale0**2, 1
                        ]), [1, 1, 1, repeats]), [
                            batch_sz * num_glimpses, channel, repeats * scale0,
                            repeats * scale0
                        ])

                scales[idx] = tf.transpose(scales[idx],
                                           [0, 3, 2, 1])  # put channels back

                # alternative, but not identical to what model actually sees:
                # scales[idx] = tf.image.resize_images(scales[idx], 2*[FLAGS.scale_sizes[idx]], method=tf.image.ResizeMethod.BILINEAR)

                glimpse_composed += (masks[idx] - last_mask) * tf.pad(
                    scales[idx],
                    paddings[idx],
                    mode='CONSTANT',
                    constant_values=0.)
                last_mask = masks[idx]

            self.glimpses_composed = tf.split(glimpse_composed,
                                              num_glimpses,
                                              axis=0)
Exemple #11
0
    train_label = FLAGS.train_label
    valid_data_dir = FLAGS.valid_data_dir
    valid_label = FLAGS.valid_label

    # Define global variables.
    hidden_size = FLAGS.hidden_size
    num_classes = FLAGS.num_classes
    learning_rate = FLAGS.learning_rate
    num_train_steps = FLAGS.num_train_steps
    num_train_per_eval = FLAGS.num_train_per_eval
    num_epoch = FLAGS.num_epoch

    train_input_fn = lambda: input_fn.input_fn(
        train_data_dir,
        train_label,
        repeat=True,
        batch_size=batch_size,
        num_threads=num_threads,
    )

    valid_input_fn = lambda: input_fn.input_fn(
        valid_data_dir,
        valid_label,
        repeat=False,
        batch_size=batch_size,
        num_threads=num_threads,
    )

    test_input_fn = lambda: input_fn.input_fn(
        FLAGS.test_data_dir,
        None,
print(dftrain)

# Feature columns describe how to use the input.
my_feature_columns = []
for key in dftrain.keys():  # returning our columns(train.keys())
    my_feature_columns.append(
        tf.feature_column.numeric_column(key=key, dtype=tf.float32))

classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    # Two hidden layers of 30 and 10 nodes respectively.
    hidden_units=[30, 10],
    # The model must choose between 9 classes of TREES.
    n_classes=9)

classifier.train(input_fn=lambda: input_fn(dftrain, train_y, training=True),
                 steps=5000)

features = ['Girth', 'Height', 'Volume']
predict = {}
print("Please enter numeric values")
for feature in features:
    valid = True
    while valid:
        val = input(feature + ": ")
        if not val.isdigit(): valid = False

    predict[feature] = [float(val)]

predictions = classifier.predict(input_fn=lambda: input_fn_for_user(predict))
for pred_dict in predictions: