def main(): tf.set_random_seed(230) args = parser.parse_args() param_path = os.path.join(args.data_dir, 'params.json') assert os.path.isfile( param_path), 'No <dataset_params.json> found in path: {}'.format( args.data_dir) # load parameters params = Params(param_path) params.buffer_size = params.train_size params.hidden_layers = [int(x) for x in params.hidden_layers.split(',')] set_logger(os.path.join(args.model_dir, 'train.log')) params.print() # Step2, create tf.dataset logging.info('Create train and eval dataset...') train_file = os.path.join(args.data_dir, 'train1.tsv') eval_file = os.path.join(args.data_dir, 'dev1.tsv') train_dataset = load_dataset_from_text(train_file, params) eval_dataset = load_dataset_from_text(eval_file, params) # Step3, create train and eval iterator over two datset train_inputs = input_fn(train_dataset, params, 'train') eval_inputs = input_fn(eval_dataset, params, 'eval') logging.info('Completed create input pipeline!') # Step4, define model logging.info('Create model...') model = DeepFM() # Step5, build model specification train_model_spec = build_model_spec('train', model, train_inputs, params, reuse=False) # If you want to only run evaluate you should set reuse=False. eval_model_spec = build_model_spec('eval', model, eval_inputs, params, reuse=True) logging.info('Create train and eval model specification completed!') # Step6, train and evaluate model logging.info('Start training for {} epochs'.format(params.epochs)) train_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_from) # Step7, save model with open(args.model_dir + '/model.pkl', 'wb') as f: pickle.dump(model, f)
def main(args): # TODO: Should these variables still be capitalized? MODEL_DIR = args.model_dir WORKING_DIR = args.data_dir NUM_EPOCHS = args.num_epochs EPOCHS_BETWEEN_EVALS = args.epochs_between_evals LEARNING_RATE = args.learning_rate TRAIN_BATCH_SIZE = args.train_batch_size EVAL_BATCH_SIZE = args.eval_batch_size NUM_FOLDS = args.num_folds FOLDS_TO_TRAIN_AGAINST = args.folds # Assumes default Carvana data folder structure... IMAGE_DIR = os.path.join(WORKING_DIR, 'train_hq') MASK_DIR = os.path.join(WORKING_DIR, 'train_masks') IMAGE_FILENAMES = sorted(glob(os.path.join(IMAGE_DIR, '*.jpg'))) MASK_FILENAMES = sorted(glob(os.path.join(MASK_DIR, '*.gif'))) NUM_OUTPUT_CLASSES = 2 # Pixels are classified as either "foreground" or "background" # Check if the system's version of TensorFlow was built with CUDA (i.e. uses a GPU) data_format = ('channels_first' if tf.test.is_built_with_cuda() \ else 'channels_last') params = { 'data_format': data_format, 'num_output_classes': NUM_OUTPUT_CLASSES, 'learning_rate': LEARNING_RATE } # Mirror the model accross all available GPUs using the mirrored distribution strategy. # TODO: Is there a good way to check if multiple GPUs are available? distribution = (tf.contrib.distribute.MirroredStrategy() if args.distribute\ else None) config = tf.estimator.RunConfig(train_distribute=distribution, keep_checkpoint_max=2, log_step_count_steps=5) folds = KFolds(IMAGE_FILENAMES, MASK_FILENAMES, num_folds=NUM_FOLDS, sort=False, yield_dict=False) # Train separate models on each requested fold. for fold_num in FOLDS_TO_TRAIN_AGAINST: (train_images, train_masks), (eval_images, eval_masks) = folds.get_fold(fold_num) # Initialize the Estimator image_segmentor = tf.estimator.Estimator(model_dir='-'.join( [MODEL_DIR, str(fold_num)]), model_fn=model_fn, params=params, config=config) # Train and evaluate for i in range(NUM_EPOCHS // EPOCHS_BETWEEN_EVALS): print('\nEntering training epoch %d.\n' % (i * EPOCHS_BETWEEN_EVALS)) image_segmentor.train( # input_fn is expected to take no arguments input_fn=lambda: input_fn(train_images, train_masks, training=True, data_format=params['data_format'], num_repeats=EPOCHS_BETWEEN_EVALS, batch_size=TRAIN_BATCH_SIZE)) results = image_segmentor.evaluate( input_fn=lambda: input_fn(eval_images, eval_masks, training=False, data_format=params['data_format'], batch_size=EVAL_BATCH_SIZE)) # TODO: Look into writing example images to a tf.summary? print('\nEvaluation results:\n%s\n' % results)
X = tf.placeholder(name='ip', dtype=tf.float32, shape=(None, 64, 64, 1)) Y = tf.placeholder(tf.int32, [None, 1]) # network = model.model(X_train) network = model.cnn_model(X) [optimizer, cost] = training.trainer(network, Y) print(optimizer) # Initialization sess = tf.Session() num_points = len(filenames) # Run training for epoch in range(100): for jj in range(int(math.floor((num_points // batch_size) - 1))): # Get the data sess.run(tf.global_variables_initializer()) inputs = input_fn.input_fn(filenames, labels, batch_size) sess.run(inputs['iterator_init_op']) train_X = sess.run(inputs['images']) train_Y = sess.run(inputs['labels']) train_Y = np.array(train_Y) train_Y = train_Y.reshape((batch_size, 1)) # input_to_sess = {X:train_X, Y:train_Y} temp = sess.run(X, feed_dict={X: train_X}) sess.run(optimizer, feed_dict={X: train_X, Y: train_Y}) # print('Done with batch {} and epoch {}'.format(jj,epoch)) # Evaluate the model train_X = sess.run(inputs['images']) train_Y = sess.run(inputs['labels']) train_Y = np.array(train_Y)
def main(): # Set the random seed for the whole graph for reproductible experiments tf.set_random_seed(230) # Load model parameters from params.json file in model_dir args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path ), "No <params.json> json configuration file found at {}".format( args.model_dir) # load params params = Params(json_path) # Load dataset parameters from dataset_params.json file in data_dir json_path = os.path.join(args.data_dir, 'dataset_params.json') assert os.path.isfile( json_path ), "No <dataset_params.json> json configuration file found at {}".format( args.data_dir) params.update(json_path) num_oov_buckets = params.num_oov_buckets # number of buckets for unknown words # Check we are not overwriting some previous results # if can comment this if you want to overwriting # model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, 'best_weights')) # overwritting = model_dir_has_best_weights and args.restore_dir is None # assert not overwritting, "Weights found in model dir, aborting to avoid overwrite" # Set logger set_logger(os.path.join(args.model_dir, 'train.log')) # print parameters params.print() # get vocabulary and label filename vocab_path = os.path.join(args.data_dir, 'words.txt') label_path = os.path.join(args.data_dir, 'tags.txt') train_sentences_path = os.path.join(args.data_dir, 'train/sentences.txt.list') train_labels_path = os.path.join(args.data_dir, 'train/labels.txt') eval_sentences_path = os.path.join(args.data_dir, 'dev/sentences.txt.list') eval_labels_path = os.path.join(args.data_dir, 'dev/labels.txt') # for batch predict # test_sentences_path = os.path.join(args.data_dir, 'test/sentences.txt.list') # test_labels_path = os.path.join(args.data_dir, 'test/labels.txt') # Create word lookup table and label lookup table words_table = tf.contrib.lookup.index_table_from_file( vocab_path, num_oov_buckets=num_oov_buckets) tags_table = tf.contrib.lookup.index_table_from_file(label_path) # Create data input pipeline logging.info('Create dataset...') train_sentences = load_dataset_from_text(train_sentences_path, words_table) train_labels = load_dataset_from_text(train_labels_path, tags_table) eval_sentences = load_dataset_from_text(eval_sentences_path, words_table) eval_labels = load_dataset_from_text(eval_labels_path, tags_table) # Specify other parameters for the dataset and model params.eval_size = params.dev_size params.buffer_size = params.train_size # buffer size for shuffling, this will load all dataset into memory params.id_pad_word = words_table.lookup(tf.constant(params.pad_word)) params.ld_pad_label = tags_table.lookup(tf.constant(params.pad_tag)) # Create train and eval iterator over the two dataset train_inputs = input_fn('train', train_sentences, train_labels, params) eval_inputs = input_fn('eval', eval_sentences, eval_labels, params) logging.info("- Done") # Define the models (two different set of nodes that share weights for train and eval) logging.info("Creating the model...") model = MyModel() train_model_spec = build_model_spec('train', model, train_inputs, params) # IF you want to only run evaluate please set resue=False, when training you should reuse variables # eval_model_spec = model_fn('eval', model, eval_inputs, params, reuse=True) eval_model_spec = build_model_spec('eval', model, eval_inputs, params, reuse=True) logging.info('- Done.') # Train the model logging.info("Starting training for {} epochs".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, args.restore_dir) # write model as pickle for inference with open(args.model_dir + "/mymodel.pkl", 'wb') as fout: pickle.dump(model, fout)
if data_dir is None: data_dir = params.data_dir # grab the train image paths and randomly shuffle them print("[INFO] loading images...") train_tf = os.path.join(data_dir, "train.tfrecord") eval_tf = os.path.join(data_dir, "test.tfrecord") train_size = len([x for x in tf.python_io.tf_record_iterator(train_tf)]) eval_size = len([x for x in tf.python_io.tf_record_iterator(eval_tf)]) x_train_batch, y_train_batch = input_fn( train_tf, one_hot=True, classes=CLASSES, is_training=True, batch_shape=[BS, IMAGE_DIMS[1], IMAGE_DIMS[1], 3], parallelism=4) x_test_batch, y_test_batch = input_fn( eval_tf, one_hot=True, classes=CLASSES, is_training=True, batch_shape=[BS, IMAGE_DIMS[1], IMAGE_DIMS[1], 3], parallelism=4) x_batch_shape = x_train_batch.get_shape().as_list() y_batch_shape = y_train_batch.get_shape().as_list() x_train_input = Input(tensor=x_train_batch, batch_shape=x_batch_shape)
y_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='y_ph') y_predictions, train_op, loss_op = bert_ner_core( input_ids_ph, input_masks_ph, y_masks_ph, y_ph) # init_new_vars_op = tf.initialize_variables([]) # sess.run(init_new_vars_op) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(var_list=[ v for v in tf.trainable_variables() if not v.name.startswith('Optimizer') ]) saver.restore(sess, BERT_MODEL_PATH) ds_iterator = input_fn().make_one_shot_iterator() next_element = ds_iterator.get_next() try: while True: features, labels = sess.run(next_element) # print("****" * 30) # print("feature:") # print(features.keys()) # print(features['input_ids']) # print(features['input_ids'].shape) # print(features['input_masks']) # print(features['input_masks'].shape) # print('y_masks') # print(features['y_masks'])
timestring = strftime("%Y-%m-%d_%H-%M", gmtime()) data_dir = os.path.join(data_dir, args.dataset) model_dir = os.path.join(model_dir, (args.latent_model + '_' + args.dataset + '_' + args.cluster_model + '_' + timestring)) # Create directory for model combination if not existens if not os.path.exists(model_dir): os.makedirs(model_dir) # copy params.json file to the model direction for reproducible results copyfile(json_path, os.path.join(model_dir, 'params.json')) # Creates an iterator and a dataset train_inputs = input_fn(data_dir, 'train', params) cluster_inputs = input_fn(data_dir, 'test', params) # Define the models (2 different set of nodes that share weights for train and eval) if args.latent_model == 'AE': if args.cluster_model == 'IDEC': train_model_spec = ae_model_fn('cluster', cluster_inputs, params) else: train_model_spec = ae_model_fn('train', train_inputs, params) cluster_model_spec = ae_model_fn('cluster', cluster_inputs, params, reuse=True) elif args.latent_model == 'b_AE': train_model_spec = b_ae_model_fn('train', train_inputs, params) cluster_model_spec = b_ae_model_fn('cluster',
use_pretrained = False # grab the train image paths and randomly shuffle them print("[INFO] loading images...") train_tf = args["train_tf"] eval_tf = args["eval_tf"] train_size = len([x for x in tf.python_io.tf_record_iterator(train_tf)]) eval_size = len([x for x in tf.python_io.tf_record_iterator(eval_tf)]) print(train_size) x_train_batch, y_train_batch = input_fn( train_tf, one_hot=True, classes=CLASSES, is_training=True, batch_shape=BATCH_SHAPE, parallelism=PARALLELISM) x_test_batch, y_test_batch = input_fn( eval_tf, one_hot=True, classes=CLASSES, is_training=True, batch_shape=BATCH_SHAPE, parallelism=PARALLELISM) x_batch_shape = x_train_batch.get_shape().as_list() y_batch_shape = y_train_batch.get_shape().as_list() x_train_input = Input(tensor=x_train_batch, batch_shape=x_batch_shape)
def main(): train_tf_record = os.path.join(FLAGS.data_dir, 'ocr-train-*.tfrecord') eval_tf_record = os.path.join(FLAGS.data_dir, 'ocr-validation-*.tfrecord') char_map_dict = load_char_map() train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'crnn_ctc_ocr_{:s}.ckpt'.format(str(train_start_time)) model_save_path = os.path.join(FLAGS.model_dir, model_name) config = Config() config.batch_size = FLAGS.batch_size config.num_classes = len(char_map_dict) + 1 train_input_fn = input_fn.input_fn(train_tf_record, FLAGS.batch_size, channel_size=FLAGS.channel_size) crnn_model = model.CRNN(config) saver = tf.train.Saver() if not os.path.exists(FLAGS.model_dir): os.makedirs(FLAGS.model_dir) global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.decay_steps, FLAGS.decay_rate, staircase = True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op= tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate).minimize(crnn_model.loss, global_step=global_step) train_op = tf.group([train_op, update_ops]) decoded, log_prob = tf.nn.ctc_greedy_decoder(crnn_model.logits, crnn_model.sequence_length) pred_str_labels = tf.as_string(decoded[0].values) pred_tensor = tf.SparseTensor(indices=decoded[0].indices, values=pred_str_labels, dense_shape=decoded[0].dense_shape) true_str_labels = tf.as_string(crnn_model.labels.values) true_tensor = tf.SparseTensor(indices=crnn_model.labels.indices, values=true_str_labels, dense_shape=crnn_model.labels.dense_shape) edit_distance = tf.reduce_mean(tf.edit_distance(pred_tensor, true_tensor, normalize=True), name='distance') tf.summary.scalar(name='edit_distance', tensor= edit_distance) tf.summary.scalar(name='ctc_loss', tensor=crnn_model.loss) #tf.summary.scalar(name='learning_rate', tensor=learning_rate) merge_summary_op = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter(FLAGS.model_dir) summary_writer.add_graph(sess.graph) train_next_batch = train_input_fn.get_next() save_path = tf.train.latest_checkpoint(FLAGS.model_dir) if save_path: saver.restore(sess=sess, save_path=save_path) print("restore from %s"%(save_path) ) st = int(save_path.split("-")[-1]) sess.run(global_step.assign(st)) for s in range(FLAGS.max_train_steps): batch = sess.run(train_next_batch) images = batch['images'] labels = batch['labels'] sequence_length = batch['sequence_length'] _, loss , lr, summary, step, logits, dis = sess.run( [train_op, crnn_model.loss, learning_rate, merge_summary_op, global_step , crnn_model.logits , edit_distance ], feed_dict = { crnn_model.images:images, crnn_model.labels:labels, crnn_model.sequence_length:sequence_length, crnn_model.keep_prob:0.5, crnn_model.is_training:True}) print("step: {step} lr: {lr} loss: {loss} acc: {dis} ".format(step=step, lr=lr, loss=loss, dis=(1-dis) )) if step % FLAGS.step_per_save == 0: summary_writer.add_summary(summary=summary, global_step=step) saver.save(sess=sess, save_path=model_save_path, global_step=step) if False and step % FLAGS.step_per_eval == 0: eval_input_fn = input_fn.input_fn(eval_tf_record, FLAGS.batch_size, False, channel_size=FLAGS.channel_size ) eval_next_batch = eval_input_fn.get_next() all_distance = [] while True: try: eval_batch = sess.run(eval_next_batch) images = batch['images'] labels = batch['labels'] sequence_length = batch['sequence_length'] train_distance = sess.run([edit_distance], feed_dict={ crnn_model.images:images, crnn_model.labels:labels, crnn_model.keep_prob:1.0, crnn_model.is_training:True, crnn_model.sequence_length: sequence_length}) all_distance.append(train_distance[0]) except tf.errors.OutOfRangeError as e: print("eval acc: ", 1 - np.mean(np.array(all_distance))) break
def __init__( self, FLAGS, full_summary=False, ): '''Input: img_shape: [H,W,C] ''' tf.reset_default_graph() self.size_glimpse_out = FLAGS.size_glimpse_out num_glimpses = FLAGS.num_glimpses self.num_scales = len(FLAGS.scale_sizes) self.patch_shape = [ self.num_scales, FLAGS.scale_sizes[0], FLAGS.scale_sizes[0], FLAGS.img_shape[-1] ] self.patch_shape_flat = np.prod(self.patch_shape) self.FLAGS = FLAGS self.global_step = tf.Variable(0, trainable=False, name='global_step') with tf.name_scope('Placeholders'): self.is_training = tf.placeholder(tf.bool, shape=(), name='is_training') with tf.device('/device:CPU:*'): with tf.name_scope('Dataset'): inputs = input_fn(FLAGS) self.features_ph_train = inputs['features_ph_train'] self.labels_ph_train = inputs['labels_ph_train'] self.features_ph_valid = inputs['features_ph_valid'] self.labels_ph_valid = inputs['labels_ph_valid'] self.features_ph_test = inputs['features_ph_test'] self.labels_ph_test = inputs['labels_ph_test'] self.handle = inputs['handle'] self.train_init_op = inputs['train_init_op'] self.valid_init_op = inputs['valid_init_op'] self.test_init_op = inputs['test_init_op'] self.x, self.y = (inputs['images'], inputs['labels']) (x, y) = (tf.tile(self.x, [FLAGS.MC_samples, 1, 1, 1]), tf.tile(self.y, [FLAGS.MC_samples])) batch_sz = tf.shape(x)[0] # potentially variable batch_size img_NHWC = tf.reshape(x, [batch_sz] + FLAGS.img_shape) with tf.name_scope('learning_rate'): self.learning_rate = tf.maximum( tf.train.exponential_decay(FLAGS.learning_rate, self.global_step, FLAGS.learning_rate_decay_steps, FLAGS.learning_rate_decay_factor, staircase=True), FLAGS.min_learning_rate) location_network = LocationNetwork(img_NHWC, FLAGS) retina_sensor = RetinaSensor(FLAGS) if FLAGS.ConvGlimpse: glimpse_network = GlimpseNetwork_DRAM(FLAGS, self.patch_shape, self.size_glimpse_out) else: glimpse_network = GlimpseNetwork(FLAGS, self.patch_shape, self.size_glimpse_out) with tf.name_scope('CoreNetwork'): if FLAGS.cell == 'RNN': cell = _rnn_cell_RAM(FLAGS.size_rnn_state, activation=tf.nn.relu) elif FLAGS.cell == 'LSTM': cell = tf.nn.rnn_cell.LSTMCell(FLAGS.size_rnn_state, activation=tf.nn.relu) # cell = tf.contrib.cudnn_rnn.CudnnLSTM(num_units=FLAGS.size_rnn_state, num_layers=1) locs_ta = tf.TensorArray(tf.float32, size=num_glimpses, name='locs_ta') loc_means_ta = tf.TensorArray(tf.float32, size=num_glimpses, name='loc_means_ta') glimpses_ta = tf.TensorArray( tf.float32, size=num_glimpses, name='glimpses_ta') # for visualization action_ta = tf.TensorArray(tf.float32, size=num_glimpses, name='action_ta') output_ta = (locs_ta, loc_means_ta, glimpses_ta) def loop_fn(time, cell_output, cell_state, loop_state): emit_output = cell_output if cell_output is None: # time == 0 loc, loc_mean = location_network.initial_loc() next_cell_state = cell.zero_state(batch_sz, tf.float32) loop_state = output_ta else: # time == 1+ loc, loc_mean = location_network(cell_output, self.is_training) next_cell_state = cell_state img_patch_flat = retina_sensor(img_NHWC, tf.clip_by_value(loc, -1, 1)) # tf automatically reparametrizes the normal dist., but we don't want to propagate the supervised loss into location glimpse = glimpse_network(img_patch_flat, tf.stop_gradient(loc)) with tf.name_scope('write_or_finished'): elements_finished = (time >= num_glimpses) finished = tf.reduce_all(elements_finished) def _write(): return (loop_state[0].write(time, loc), loop_state[1].write(time, loc_mean), loop_state[2].write(time, img_patch_flat)) next_loop_state = tf.cond(finished, lambda: loop_state, lambda: _write()) return (elements_finished, glimpse, next_cell_state, emit_output, next_loop_state) outputs_ta, final_state, loop_state_ta = tf.nn.raw_rnn( cell, loop_fn) rnn_outputs = outputs_ta.stack( name='stack_rnn_outputs') # [time, batch_sz, num_cell] with tf.name_scope('stack_outputs'): self.locs = tf.transpose( loop_state_ta[0].stack(name='stack_locs'), [1, 0, 2]) # [batch_sz, timesteps, loc_dims] loc_means = tf.transpose( loop_state_ta[1].stack(name='stack_loc_means'), [1, 0, 2]) self.glimpses = loop_state_ta[2].stack(name='stack_glimpses') with tf.variable_scope('Baseline'): self.b_W = weight_variable([FLAGS.size_rnn_state, 1], name='b_W') self.b_b = bias_variable([1], name='b_b') baselines = [ tf.squeeze( tf.matmul(tf.stop_gradient(rnn_outputs[i]), self.b_W) + self.b_b) for i in range(num_glimpses - 1) ] baselines = tf.stack(baselines, axis=1) # [batch_sz, timesteps] # classification after last time-step with tf.variable_scope('CoreNetwork_preds'): fc_pred = tf.layers.Dense(FLAGS.num_classes, kernel_initializer=xavier_initializer(), name='fc_logits') logits = fc_pred(rnn_outputs[-1]) self.probabilities = tf.nn.softmax(logits) self.prediction = tf.argmax(logits, 1) # store prediction at each step. Tuple of most likely (class, probability) for each step self.intermed_preds = [] for i in range(num_glimpses): p = tf.nn.softmax(fc_pred(tf.stop_gradient(rnn_outputs[i]))) p_class = tf.argmax(p, 1) idx = tf.transpose( [tf.cast(tf.range(batch_sz), dtype=tf.int64), p_class]) self.intermed_preds.append((p_class, tf.gather_nd(p, idx))) with tf.name_scope('Cross-entropy_loss'): self.xent = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)) with tf.name_scope('Rewards'): self.Rewards = Rewards(FLAGS) self.returns, reward, self.unknown_accuracy = self.Rewards( self.prediction, y) self.advantages = self.returns - baselines with tf.name_scope('loglikelihood'): # only want gradients flow through the suggested mean # gaussian = tf.distributions.Normal(tmp_mean[:,1:], scale=FLAGS.loc_std) # loglik = gaussian._log_prob(tf.stop_gradient(tmp_loc[:,1:])) # loglik = tf.reduce_sum(loglik, axis=2) z = (tf.stop_gradient(self.locs[:, 1:]) - loc_means[:, 1:] ) / FLAGS.loc_std # [batch_sz, timesteps, loc_dims] loglik = -0.5 * tf.reduce_sum(tf.square(z), axis=2) with tf.name_scope('RL_loss'): # do not propagate back through advantages self.RL_loss = tf.reduce_mean( loglik * tf.stop_gradient(self.advantages)) with tf.name_scope('Baseline_loss'): self.baselines_mse = tf.reduce_mean( tf.square(tf.stop_gradient(self.returns) - baselines)) with tf.name_scope('Hybrid_loss'): self.loss = -FLAGS.learning_rate_RL * self.RL_loss + self.xent + self.baselines_mse with tf.variable_scope('Adam'): train_op = tf.train.AdamOptimizer(self.learning_rate) grads_and_vars = train_op.compute_gradients(self.loss) # look at selected gradients self.gradient_check = { v: tf.reduce_mean(g) for g, v in grads_and_vars } clipped_grads_and_vars = [ (tf.clip_by_norm(grad, FLAGS.max_gradient_norm), var) for grad, var in grads_and_vars ] self.train_op = train_op.apply_gradients( clipped_grads_and_vars, global_step=self.global_step) with tf.name_scope('Summaries'): self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.prediction, y), tf.float32)) probs = tf.reshape(self.probabilities, [FLAGS.MC_samples, -1, FLAGS.num_classes]) avg_pred = tf.reduce_mean(probs, axis=0) avg_pred = tf.cast(tf.equal(tf.argmax(avg_pred, 1), self.y), tf.float32) self.accuracy_MC = tf.reduce_mean(avg_pred, name='accuracy') self.reward = tf.reduce_mean(reward, name='avg_reward') tf.summary.scalar("loss", self.loss) tf.summary.scalar("cross_entropy", self.xent) tf.summary.scalar("baseline_mse", self.baselines_mse) tf.summary.scalar("RL_loss", self.RL_loss) tf.summary.histogram("loglikelihood", tf.reduce_mean( loglik, axis=0)) # zero if not sampling! tf.summary.histogram("softmax_predictions", self.probabilities) tf.summary.scalar("accuracy", self.accuracy) tf.summary.scalar("accuracy_MC", self.accuracy_MC) tf.summary.scalar("reward", self.reward) tf.summary.scalar("advantages", tf.reduce_mean(self.advantages)) tf.summary.scalar("baseline", tf.reduce_mean(baselines)) tf.summary.scalar("learning_rate", self.learning_rate) if full_summary: with tf.name_scope('Summ_RNN'): tf.summary.image( 'rnn_outputs', tf.reshape( tf.transpose(rnn_outputs, [1, 0, 2]), # [batch_sz, cells, time] [-1, FLAGS.size_rnn_state, num_glimpses, 1]), max_outputs=3) with tf.name_scope('Summ_Locations'): sparse_label = tf.argmax(y, axis=1) for gl in range(num_glimpses): tf.summary.histogram("loc_means_x" + str(gl + 1), loc_means[:, gl, 0]) tf.summary.histogram("loc_means_y" + str(gl + 1), loc_means[:, gl, 1]) # visualize for certain digits if gl != 0: # pass on initial tf.summary.histogram( "num0_loc_means_x" + str(gl + 1), tf.boolean_mask(loc_means[:, gl, 0], tf.equal(sparse_label, 0))) tf.summary.histogram( "num1_loc_means_x" + str(gl + 1), tf.boolean_mask(loc_means[:, gl, 1], tf.equal(sparse_label, 1))) tf.summary.histogram( "num6_loc_means_x" + str(gl + 1), tf.boolean_mask(loc_means[:, gl, 0], tf.equal(sparse_label, 6))) tf.summary.histogram( "num9_loc_means_x" + str(gl + 1), tf.boolean_mask(loc_means[:, gl, 1], tf.equal(sparse_label, 9))) with tf.name_scope('Summ_Trainable'): for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) with tf.name_scope('Summ_Gradients'): for grad, var in grads_and_vars: tf.summary.histogram(var.name + '/gradient', grad) self.summary = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=2, name='Saver') self.saver_best = tf.train.Saver(tf.global_variables(), max_to_keep=1, name='Saver_best') # put glimpses back together in a visualizable format with tf.variable_scope('Visualization'): self.glimpses_composed = [] self.downscaled_scales = [] num_scales = len(FLAGS.scale_sizes) scale0 = FLAGS.scale_sizes[0] out_sz = FLAGS.scale_sizes[-1] channel = FLAGS.img_shape[-1] masks, paddings = [], [] for idx in range(num_scales): pad_size = (out_sz - FLAGS.scale_sizes[idx]) // 2 padding = tf.constant( [[0, 0], [pad_size, out_sz - FLAGS.scale_sizes[idx] - pad_size], [pad_size, out_sz - FLAGS.scale_sizes[idx] - pad_size], [0, 0]]) mask = tf.ones([ batch_sz * num_glimpses, FLAGS.scale_sizes[idx], FLAGS.scale_sizes[idx], channel ]) mask = tf.pad(mask, padding, mode='CONSTANT', constant_values=0) masks.append(mask) paddings.append(padding) self.glimpses_reshpd = tf.reshape(self.glimpses, [batch_sz * num_glimpses, -1]) glimpse_composed = tf.zeros( [batch_sz * num_glimpses, out_sz, out_sz, channel], tf.float32) scales = tf.split(self.glimpses_reshpd, num_scales, axis=1) last_mask = tf.zeros( [batch_sz * num_glimpses, out_sz, out_sz, channel]) # to check actual model input. Nesting from out to in: scales, glimpses, batch for idx in range(num_scales): self.downscaled_scales.append( tf.split(tf.reshape( scales[idx], [batch_sz * num_glimpses, scale0, scale0, channel]), num_glimpses, axis=0)) # Start with smallest scale, pad up to largest, multiply by (mask - last_mask) indicating area not covered by smaller masks for idx in range(num_scales): # TODO: DO THIS TRANSFORMATION ONCE OUTSIDE THE LOOP TO GET INDICES, THEN USE tf.gather() scales[idx] = tf.reshape(scales[idx], [ batch_sz * num_glimpses, scale0, scale0, channel ]) # resize_images expects [B,H,W,C] -> add channel for MNIST # repeat and tile glimpse to scale size (unfortunately there is no tf.repeat) repeats = FLAGS.scale_sizes[idx] // scale0 scales[idx] = tf.transpose( scales[idx], [0, 3, 1, 2]) # put channels in front scales[idx] = tf.reshape( tf.tile( tf.reshape( scales[idx], [batch_sz * num_glimpses, channel, scale0**2, 1]), [1, 1, 1, repeats]), [ batch_sz * num_glimpses, channel, scale0, repeats * scale0 ]) scales[idx] = tf.reshape( tf.tile( tf.reshape(tf.transpose(scales[idx], [0, 1, 3, 2]), [ batch_sz * num_glimpses, channel, repeats * scale0**2, 1 ]), [1, 1, 1, repeats]), [ batch_sz * num_glimpses, channel, repeats * scale0, repeats * scale0 ]) scales[idx] = tf.transpose(scales[idx], [0, 3, 2, 1]) # put channels back # alternative, but not identical to what model actually sees: # scales[idx] = tf.image.resize_images(scales[idx], 2*[FLAGS.scale_sizes[idx]], method=tf.image.ResizeMethod.BILINEAR) glimpse_composed += (masks[idx] - last_mask) * tf.pad( scales[idx], paddings[idx], mode='CONSTANT', constant_values=0.) last_mask = masks[idx] self.glimpses_composed = tf.split(glimpse_composed, num_glimpses, axis=0)
train_label = FLAGS.train_label valid_data_dir = FLAGS.valid_data_dir valid_label = FLAGS.valid_label # Define global variables. hidden_size = FLAGS.hidden_size num_classes = FLAGS.num_classes learning_rate = FLAGS.learning_rate num_train_steps = FLAGS.num_train_steps num_train_per_eval = FLAGS.num_train_per_eval num_epoch = FLAGS.num_epoch train_input_fn = lambda: input_fn.input_fn( train_data_dir, train_label, repeat=True, batch_size=batch_size, num_threads=num_threads, ) valid_input_fn = lambda: input_fn.input_fn( valid_data_dir, valid_label, repeat=False, batch_size=batch_size, num_threads=num_threads, ) test_input_fn = lambda: input_fn.input_fn( FLAGS.test_data_dir, None,
print(dftrain) # Feature columns describe how to use the input. my_feature_columns = [] for key in dftrain.keys(): # returning our columns(train.keys()) my_feature_columns.append( tf.feature_column.numeric_column(key=key, dtype=tf.float32)) classifier = tf.estimator.DNNClassifier( feature_columns=my_feature_columns, # Two hidden layers of 30 and 10 nodes respectively. hidden_units=[30, 10], # The model must choose between 9 classes of TREES. n_classes=9) classifier.train(input_fn=lambda: input_fn(dftrain, train_y, training=True), steps=5000) features = ['Girth', 'Height', 'Volume'] predict = {} print("Please enter numeric values") for feature in features: valid = True while valid: val = input(feature + ": ") if not val.isdigit(): valid = False predict[feature] = [float(val)] predictions = classifier.predict(input_fn=lambda: input_fn_for_user(predict)) for pred_dict in predictions: