def _add_loss_summaries(total_loss): """Add summaries for losses. Generates moving average for all losses and associated summaries for visualizing the performance of the network. Args: total_loss: Total loss from loss(). Returns: loss_averages_op: op for generating moving averages of losses. """ # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage( mytf.MOVING_AVERAGE_DECAY_FOR_LOSS, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summary to all individual losses and the total loss; # do the same for the averaged version of the losses. for i, l in enumerate(losses + [total_loss]): # Name each loss as '(raw)' and name the moving average version of the # loss as the original loss name. tf.scalar_summary(l.op.name + str(i) + ' (raw)', l) tf.scalar_summary(l.op.name + str(i), loss_averages.average(l)) return loss_averages_op
def add_loss_summaries(total_loss, losses=[], decay=0.9): """Add summaries for losses in model. Generates moving average for all losses and associated summaries for visualizing the performance of the network. Args: total_loss: Total loss from loss(). Returns: loss_averages_op: op for generating moving averages of losses. """ # Compute the moving average of all individual losses and the total loss. # shadow_variable = decay * shadow_variable + (1 - decay) * variable loss_averages = tf.train.ExponentialMovingAverage(decay, name='avg') # losses = tf.get_collection('losses') # Instead pass in `losses` as an argument. loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. tf.scalar_summary(l.op.name +' (raw)', l) tf.scalar_summary(l.op.name, loss_averages.average(l)) return loss_averages_op
def training(loss, learning_rate): """Sets up the training Ops. Creates a summarizer to track the loss over time in TensorBoard. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. Args: loss: Loss tensor, from loss(). learning_rate: The learning rate to use for gradient descent. Returns: train_op: The Op for training. """ # Add a scalar summary for the snapshot loss. global_step = tf.Variable(0, name='global_step', trainable=False) lr = tf.train.exponential_decay( learning_rate, # Base learning rate. global_step, # Current index into the dataset. 1000, # Decay step. 0.95, # Decay rate. staircase=True) tf.scalar_summary(loss.op.name, loss) # Create the gradient descent optimizer with the given learning rate. #optimizer = tf.train.GradientDescentOptimizer(learning_rate) #optimizer = tf.train.AdamOptimizer(learning_rate) optimizer = tf.train.MomentumOptimizer(lr, 0.9) # was .35 #.7 works okay, gets to .79 by step 400 at 0.03 learning rate with ROI # Use the optimizer to apply the gradients that minimize the loss # (and also increment the global step counter) as a single training step. train_op = optimizer.minimize(loss, global_step) return train_op
def _testGraphExtensionRestore(self): test_dir = os.path.join(self.get_temp_dir(), "graph_extension") filename = os.path.join(test_dir, "metafile") saver0_ckpt = os.path.join(test_dir, "saver0.ckpt") with self.test_session(graph=tf.Graph()) as sess: # Restores from MetaGraphDef. new_saver = tf.train.import_meta_graph(filename) # Generates a new MetaGraphDef. new_saver.export_meta_graph() # Restores from checkpoint. new_saver.restore(sess, saver0_ckpt) # Addes loss and train. labels = tf.constant(0, tf.int32, shape=[100], name="labels") batch_size = tf.size(labels) labels = tf.expand_dims(labels, 1) indices = tf.expand_dims(tf.range(0, batch_size), 1) concated = tf.concat(1, [indices, labels]) onehot_labels = tf.sparse_to_dense( concated, tf.pack([batch_size, 10]), 1.0, 0.0) logits = tf.get_collection("logits")[0] cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, onehot_labels, name="xentropy") loss = tf.reduce_mean(cross_entropy, name="xentropy_mean") tf.scalar_summary(loss.op.name, loss) # Creates the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(0.01) # Runs train_op. train_op = optimizer.minimize(loss) sess.run(train_op)
def _activation_summary(x): ''' 可視化用のサマリを作成 ''' tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name) tf.histogram_summary(tensor_name + '/activations', x) tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def summary(self): # Keep track of gradient values and sparsity (optional) grad_summaries = [] for grad, var in self.grads_and_vars: if grad is not None: grad_hist_summary = tf.histogram_summary(var.op.name + '/gradients/hist', grad) sparsity_summary = tf.scalar_summary(var.op.name + '/gradients/sparsity', tf.nn.zero_fraction(grad)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) print("Writing to %s\n" % config.out_dir) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", self.loss) acc_summary = tf.scalar_summary("accuracy", self.accuracy) # Train Summaries self.train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(config.out_dir, "summaries", "train") self.train_summary_writer = tf.train.SummaryWriter(train_summary_dir, self.sess.graph_def) # Dev summaries self.val_summary_op = tf.merge_summary([loss_summary, acc_summary]) val_summary_dir = os.path.join(config.out_dir, "summaries", "val") self.val_summary_writer = tf.train.SummaryWriter(val_summary_dir, self.sess.graph_def)
def __init__(self, encoders, vocabulary, data_id, layers=[], activation=tf.tanh, dropout_keep_p=0.5, name='seq_classifier'): self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.layers = layers self.activation = activation self.dropout_keep_p = dropout_keep_p self.name = name self.max_output_len = 1 with tf.variable_scope(name): self.learning_step = tf.Variable(0, name="learning_step", trainable=False) self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout_plc") self.gt_inputs = [tf.placeholder(tf.int32, shape=[None], name="targets")] mlp_input = tf.concat(1, [enc.encoded for enc in encoders]) mlp = MultilayerPerceptron(mlp_input, layers, self.dropout_placeholder, len(vocabulary)) self.loss_with_gt_ins = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(mlp.logits, self.gt_inputs[0])) self.loss_with_decoded_ins = self.loss_with_gt_ins self.cost = self.loss_with_gt_ins self.decoded_seq = [mlp.classification] self.decoded_logits = [mlp.logits] tf.scalar_summary('val_optimization_cost', self.cost, collections=["summary_val"]) tf.scalar_summary('train_optimization_cost', self.cost, collections=["summary_train"])
def training(loss, learning_rate): """Sets up the training Ops. Creates a summarizer to track the loss over time in TensorBoard. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. Args: loss: Loss tensor, from loss(). learning_rate: The learning rate to use for gradient descent. Returns: train_op: The Op for training. """ # Add a scalar summary for the snapshot loss. tf.scalar_summary(loss.op.name, loss) # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(learning_rate) # Create a variable to track the global step. global_step = tf.Variable(0, name='global_step', trainable=False) # Use the optimizer to apply the gradients that minimize the loss # (and also increment the global step counter) as a single training step. train_op = optimizer.minimize(loss, global_step=global_step) return train_op
def get_config(): basename = os.path.basename(__file__) logger.set_logger_dir( os.path.join('train_log', basename[:basename.rfind('.')])) dataset_train = FakeData([(227,227,3), tuple()], 10) dataset_train = BatchData(dataset_train, 10) step_per_epoch = 1 sess_config = get_default_sess_config() sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5 lr = tf.train.exponential_decay( learning_rate=1e-8, global_step=get_global_step_var(), decay_steps=dataset_train.size() * 50, decay_rate=0.1, staircase=True, name='learning_rate') tf.scalar_summary('learning_rate', lr) param_dict = np.load('alexnet.npy').item() return TrainConfig( dataset=dataset_train, optimizer=tf.train.AdamOptimizer(lr), callbacks=Callbacks([ StatPrinter(), ModelSaver(), #ValidationError(dataset_test, prefix='test'), ]), session_config=sess_config, model=Model(), step_per_epoch=step_per_epoch, session_init=ParamRestore(param_dict), max_epoch=100, )
def get_config(): # prepare dataset dataset_train = get_data('train') step_per_epoch = dataset_train.size() dataset_test = get_data('test') sess_config = get_default_sess_config(0.9) lr = tf.Variable(0.1, trainable=False, name='learning_rate') tf.scalar_summary('learning_rate', lr) return TrainConfig( dataset=dataset_train, optimizer=tf.train.MomentumOptimizer(lr, 0.9), callbacks=Callbacks([ StatPrinter(), ModelSaver(), InferenceRunner(dataset_test, [ScalarStats('cost'), ClassificationError() ]), ScheduledHyperParamSetter('learning_rate', [(1, 0.1), (20, 0.01), (33, 0.001), (60, 0.0001)]) ]), session_config=sess_config, model=Model(n=18), step_per_epoch=step_per_epoch, max_epoch=500, )
def _add_loss_summaries(total_loss): """Add summaries for losses in CIFAR-10 model. Generates moving average for all losses and associated summaries for visualizing the performance of the network. Args: total_loss: Total loss from loss(). Returns: loss_averages_op: op for generating moving averages of losses. """ # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. tf.scalar_summary(l.op.name +' (raw)', l) tf.scalar_summary(l.op.name, loss_averages.average(l)) return loss_averages_op
def build_graph(self): """Build the graph for the full model.""" opts = self._options # The training data. A text file. (words, counts, words_per_epoch, self._epoch, self._words, examples, labels) = word2vec.skipgram(filename=opts.train_data, batch_size=opts.batch_size, window_size=opts.window_size, min_count=opts.min_count, subsample=opts.subsample) (opts.vocab_words, opts.vocab_counts, opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch]) opts.vocab_size = len(opts.vocab_words) print("Data file: ", opts.train_data) print("Vocab size: ", opts.vocab_size - 1, " + UNK") print("Words per epoch: ", opts.words_per_epoch) self._examples = examples self._labels = labels self._id2word = opts.vocab_words for i, w in enumerate(self._id2word): self._word2id[w] = i true_logits, sampled_logits = self.forward(examples, labels) loss = self.nce_loss(true_logits, sampled_logits) tf.scalar_summary("NCE loss", loss) self._loss = loss self.optimize(loss) # Properly initialize all variables. tf.initialize_all_variables().run() self.saver = tf.train.Saver()
def evaluate(accuracy_accumulator, val_loss_accumulator, validation_batches): accuracy = accuracy_accumulator/validation_batches loss = val_loss_accumulator/validation_batches accuracy_summary_op = tf.scalar_summary("accuracy", accuracy) val_loss_summary_op = tf.scalar_summary("val_cost", loss) return accuracy, accuracy_summary_op, val_loss_summary_op
def get_config(): # prepare dataset dataset_train = get_data('train') step_per_epoch = dataset_train.size() dataset_test = get_data('test') sess_config = get_default_sess_config(0.9) # warm up with small LR for 1 epoch lr = tf.Variable(0.01, trainable=False, name='learning_rate') tf.scalar_summary('learning_rate', lr) return TrainConfig( dataset=dataset_train, optimizer=tf.train.MomentumOptimizer(lr, 0.9), callbacks=Callbacks([ StatPrinter(), PeriodicSaver(), ValidationError(dataset_test, prefix='test'), ScheduledHyperParamSetter('learning_rate', [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0001)]) ]), session_config=sess_config, model=Model(n=18), step_per_epoch=step_per_epoch, max_epoch=500, )
def train(total_loss, global_step): num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary("learning_rate", lr) loss_averages_op = _add_loss_summaries(total_loss) with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(lr) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) for grad, var in grads: if grad: tf.histogram_summary(var.op.name + "/gradients", grad) #variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) #variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op]): train_op = tf.no_op(name="train") return train_op
def testSummariesAreFlushedToDiskWithoutGlobalStep(self): output_dir = os.path.join(self.get_temp_dir(), 'flush_test_no_global_step') if tf.gfile.Exists(output_dir): # For running on jenkins. tf.gfile.DeleteRecursively(output_dir) names_to_metrics, names_to_updates = self._create_names_to_metrics( self._predictions, self._labels) for k in names_to_metrics: v = names_to_metrics[k] tf.scalar_summary(k, v) summary_writer = tf.train.SummaryWriter(output_dir) initial_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) eval_op = tf.group(*names_to_updates.values()) with self.test_session() as sess: slim.evaluation.evaluation( sess, initial_op=initial_op, eval_op=eval_op, summary_op=tf.merge_all_summaries(), summary_writer=summary_writer) names_to_values = {name: names_to_metrics[name].eval() for name in names_to_metrics} self._verify_summaries(output_dir, names_to_values)
def add_evaluation_step(result_tensor, ground_truth_tensor): """Inserts the operations we need to evaluate the accuracy of our results. Args: result_tensor: The new final node that produces results. ground_truth_tensor: The node we feed ground truth data into. Returns: Nothing. """ with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): # tf.argmax(result_tensor, 1) = return index of maximal value (= 1 in a 1-of-N encoding vector) in each row (axis = 1) # But we have more ones (indicating multiple labels) in one row of result_tensor due to the multi-label classification # correct_prediction = tf.equal(tf.argmax(result_tensor, 1), \ # tf.argmax(ground_truth_tensor, 1)) # ground_truth is not a binary tensor, it contains the probabilities of each label = we need to tf.round() it # to acquire a binary tensor allowing comparison by tf.equal() # See: http://stackoverflow.com/questions/39219414/in-tensorflow-how-can-i-get-nonzero-values-and-their-indices-from-a-tensor-with correct_prediction = tf.equal(tf.round(result_tensor), ground_truth_tensor) with tf.name_scope('accuracy'): # Mean accuracy over all labels: # http://stackoverflow.com/questions/37746670/tensorflow-multi-label-accuracy-calculation evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.scalar_summary('accuracy', evaluation_step) return evaluation_step
def autoencoder(d,c=5,tied_weights=False): ''' An autoencoder network with one hidden layer (containing the encoding), and sigmoid activation functions. Args: d: dimension of input. c: dimension of code. tied_weights: True if w1^T=w2 Returns: Dictionary containing input placeholder Tensor and loss Variable Raises: ''' inputs = tf.placeholder(tf.float32, shape=[None,d], name='input') w1 = tf.Variable(tf.truncated_normal([d,c], stddev=1.0/math.sqrt(d))) b1 = tf.Variable(tf.zeros([c])) w2 = tf.Variable(tf.truncated_normal([c,d], stddev=1.0/math.sqrt(c))) # TODO: Implement tied weights b2 = tf.Variable(tf.zeros([d])) code = tf.nn.sigmoid(tf.matmul(inputs, w1)+b1, name='encoding') reconstruction = tf.nn.sigmoid(tf.matmul(code, w2)+b2, name='reconstruction') loss = tf.reduce_mean(tf.square(reconstruction - inputs)) tf.scalar_summary('loss', loss) return {'inputs': inputs, 'loss': loss}
def testStandardServicesWithoutGlobalStep(self): logdir = _test_dir("standard_services_without_global_step") # Create a checkpoint. with tf.Graph().as_default(): v = tf.Variable([1.0], name="foo") tf.scalar_summary(["v"], v) sv = tf.train.Supervisor(logdir=logdir) sess = sv.prepare_or_wait_for_session("") save_path = sv.save_path self._wait_for_glob(save_path, 3.0) self._wait_for_glob(os.path.join(logdir, "*events*"), 3.0) # Wait to make sure everything is written to file before stopping. time.sleep(1) sv.stop() # There should be an event file with a version number. rr = _summary_iterator(logdir) ev = next(rr) self.assertEquals("brain.Event:2", ev.file_version) ev = next(rr) ev_graph = tf.GraphDef() ev_graph.ParseFromString(ev.graph_def) self.assertProtoEquals(sess.graph.as_graph_def(add_shapes=True), ev_graph) ev = next(rr) self.assertProtoEquals("value { tag: 'v' simple_value: 1.0 }", ev.summary) ev = next(rr) self.assertEquals(tf.SessionLog.STOP, ev.session_log.status) self.assertRaises(StopIteration, lambda: next(rr)) # There should be a checkpoint file with the variable "foo" with tf.Graph().as_default(), self.test_session() as sess: v = tf.Variable([10.10], name="foo") sav = tf.train.Saver([v]) sav.restore(sess, save_path) self.assertEqual(1.0, v.eval()[0])
def train(self, total_loss): loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) for l in losses + [total_loss]: tf.scalar_summary(l.op.name + ' (raw)', l) # Apply gradients, and add histograms with tf.control_dependencies([loss_averages_op]): opt = tf.train.AdamOptimizer() grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) for grad, var in grads: if grad is not None: tf.histogram_summary(var.op.name + '/gradients', grad) # Track the moving averages of all trainable variables variable_averages = tf.train.ExponentialMovingAverage(Recognizer.MOVING_AVERAGE_DECAY) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
def get_config(cifar_classnum): # prepare dataset dataset_train = get_data('train', cifar_classnum) step_per_epoch = dataset_train.size() dataset_test = get_data('test', cifar_classnum) sess_config = get_default_sess_config(0.5) nr_gpu = get_nr_gpu() lr = tf.train.exponential_decay( learning_rate=1e-2, global_step=get_global_step_var(), decay_steps=step_per_epoch * (30 if nr_gpu == 1 else 20), decay_rate=0.5, staircase=True, name='learning_rate') tf.scalar_summary('learning_rate', lr) return TrainConfig( dataset=dataset_train, optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3), callbacks=Callbacks([ StatPrinter(), ModelSaver(), InferenceRunner(dataset_test, ClassificationError()) ]), session_config=sess_config, model=Model(cifar_classnum), step_per_epoch=step_per_epoch, max_epoch=250, )
def get_config(): basename = os.path.basename(__file__) logger.set_logger_dir( os.path.join('train_log', basename[:basename.rfind('.')])) ds = CharRNNData(param.corpus, 100000) ds = BatchData(ds, param.batch_size) step_per_epoch = ds.size() lr = tf.Variable(2e-3, trainable=False, name='learning_rate') tf.scalar_summary('learning_rate', lr) return TrainConfig( dataset=ds, optimizer=tf.train.AdamOptimizer(lr), callbacks=Callbacks([ StatPrinter(), ModelSaver(), #HumanHyperParamSetter('learning_rate', 'hyper.txt') ScheduledHyperParamSetter('learning_rate', [(25, 2e-4)]) ]), model=Model(), step_per_epoch=step_per_epoch, max_epoch=50, )
def drawGraph(self, n_row, n_latent, n_col): with tf.name_scope('matDecomp'): self._p = tf.placeholder(tf.float32, shape=[None, n_col]) self._c = tf.placeholder(tf.float32, shape=[None, n_col]) self._lambda = tf.placeholder(tf.float32) self._index = tf.placeholder(tf.float32, shape=[None, n_row]) self._A = tf.Variable(tf.truncated_normal([n_row, n_latent])) self._B = tf.Variable(tf.truncated_normal([n_latent, n_col])) self._h = tf.matmul(tf.matmul(self._index, self._A), self._B) weighted_loss = tf.reduce_mean(tf.mul(self._c, tf.squared_difference(self._p, self._h))) self._weighted_loss = weighted_loss l2_A = tf.reduce_sum(tf.square(self._A)) l2_B = tf.reduce_sum(tf.square(self._B)) n_w = tf.constant(n_row * n_latent + n_latent * n_col, tf.float32) l2 = tf.truediv(tf.add(l2_A, l2_B), n_w) reg_term = tf.mul(self._lambda, l2) self._loss = tf.add(weighted_loss, reg_term) self._mask = tf.placeholder(tf.float32, shape=[n_row, n_col]) one = tf.constant(1, tf.float32) pred = tf.cast(tf.greater_equal(tf.matmul(self._A, self._B), one), tf.float32) cor = tf.mul(tf.cast(tf.equal(pred, self._p), tf.float32), self._c) self._vali_err = tf.reduce_sum(tf.mul(cor, self._mask)) self._saver = tf.train.Saver([v for v in tf.all_variables() if v.name.find('matDecomp') != -1]) tf.scalar_summary('training_weighted_loss_l2', self._loss) tf.scalar_summary('validation_weighted_loss', self._weighted_loss) merged = tf.merge_all_summaries()
def build_eval_graph(self): # Keep track of the totals while running through the batch data self.total_loss = tf.Variable(0.0, trainable=False, collections=[]) self.total_correct = tf.Variable(0.0, trainable=False, collections=[]) self.example_count = tf.Variable(0.0, trainable=False, collections=[]) # Calculates the means self.mean_loss = self.total_loss / self.example_count self.accuracy = self.total_correct / self.example_count # Operations to modify to the stateful variables inc_total_loss = self.total_loss.assign_add(self.model.total_loss) inc_total_correct = self.total_correct.assign_add( tf.reduce_sum(tf.cast(self.model.correct_predictions, "float"))) inc_example_count = self.example_count.assign_add(self.model.batch_size) # Operation to reset all the stateful vars. Should be called before starting a data set evaluation. with tf.control_dependencies( [self.total_loss.initializer, self.total_correct.initializer, self.example_count.initializer]): self.eval_reset = tf.no_op() # Operation to modify the stateful variables with data from one batch # Should be called for each batch in the evaluatin set with tf.control_dependencies([inc_total_loss, inc_total_correct, inc_example_count]): self.eval_step = tf.no_op() # Summaries summary_mean_loss = tf.scalar_summary("mean_loss", self.mean_loss) summary_acc = tf.scalar_summary("accuracy", self.accuracy) self.summaries = tf.merge_summary([summary_mean_loss, summary_acc])
def training(cost, learning_rate_pl): # add scaler summary TODO """ Set up training operation - generate a summary to track cost in tensorboard - create gradient descent optimizer for all trainable variables The training op returned has to be called in sess.run() Args: cost: cost tensor from cost() learning_rate_pl: gradient descent learning rate, a PLACEHOLDER TO BE FED Returns: train_op: training op """ with tf.name_scope('Training'): tf.scalar_summary('Mean cost', cost, name='Cost_summary') # create gradient descent optimizer optimizer = tf.train.AdamOptimizer(learning_rate_pl, name='Optimizer') # create global step variable to track global step: TODO global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(cost, global_step=global_step, name='Train_OP') return train_op
def loss(self, predicts, labels, objects_num): """Add Loss to all the trainable variables Args: predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell] ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell) labels : 3-D tensor of [batch_size, max_objects, 5] objects_num: 1-D tensor [batch_size] """ class_loss = tf.constant(0, tf.float32) object_loss = tf.constant(0, tf.float32) noobject_loss = tf.constant(0, tf.float32) coord_loss = tf.constant(0, tf.float32) loss = [0, 0, 0, 0] for i in range(self.batch_size): predict = predicts[i, :, :, :] label = labels[i, :, :] object_num = objects_num[i] nilboy = tf.ones([7,7,2]) tuple_results = tf.while_loop(self.cond1, self.body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy]) for j in range(4): loss[j] = loss[j] + tuple_results[2][j] nilboy = tuple_results[5] tf.add_to_collection('losses', (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size) tf.scalar_summary('class_loss', loss[0]/self.batch_size) tf.scalar_summary('object_loss', loss[1]/self.batch_size) tf.scalar_summary('noobject_loss', loss[2]/self.batch_size) tf.scalar_summary('coord_loss', loss[3]/self.batch_size) tf.scalar_summary('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size ) return tf.add_n(tf.get_collection('losses'), name='total_loss'), nilboy
def __init__(self, config): self.config = config self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input') self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels') self.labels_one_hot = tf.one_hot(indices=self.labels, depth=config.output_dim, on_value=1.0, off_value=0.0, axis=-1) self.gru = GRUCell(config.hidden_state_dim) embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0)) self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input) inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)] outputs, last_slu_state = tf.nn.rnn( cell=self.gru, inputs=inputs, dtype=tf.float32,) w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0)) self.logits = logits_bo = tf.matmul(last_slu_state, w_project) tf.histogram_summary('logits', logits_bo) self.probabilities = tf.nn.softmax(logits_bo) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot)) self.predict = tf.nn.softmax(logits_bo) # TensorBoard self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy') tf.scalar_summary('CCE loss', self.loss) tf.scalar_summary('Accuracy', self.accuracy) self.tb_info = tf.merge_all_summaries()
def train(self, eval_on_test=False): """ Train model and save it to file. Train model with given hidden layers. Training data is created by prepare_training_data(), which must be called before this function. """ tf.reset_default_graph() with tf.Session() as sess: feature_data = tf.placeholder("float", [None, self.num_predictors]) labels = tf.placeholder("float", [None, self.num_classes]) layers = [self.num_predictors] + self.hidden_layers + [self.num_classes] model = self.inference(feature_data, layers) cost, cost_summary_op = self.loss(model, labels) training_op = self.training(cost, learning_rate=0.0001) correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # Merge all variable summaries and save the results to log file # summary_op = tf.merge_all_summaries() accuracy_op_train = tf.scalar_summary("Accuracy on Train", accuracy) summary_op_train = tf.merge_summary([cost_summary_op, accuracy_op_train]) if eval_on_test: accuracy_op_test = tf.scalar_summary("Accuracy on Test", accuracy) summary_op_test = tf.merge_summary([accuracy_op_test]) summary_writer = tf.train.SummaryWriter(self.log_dir + self.model_name, sess.graph) train_dict = { feature_data: self.training_predictors_tf.values, labels: self.training_classes_tf.values.reshape(len(self.training_classes_tf.values), self.num_classes)} if eval_on_test: test_dict = { feature_data: self.test_predictors_tf.values, labels: self.test_classes_tf.values.reshape(len(self.test_classes_tf.values), self.num_classes)} init = tf.initialize_all_variables() sess.run(init) for i in range(1, self.max_iteration): sess.run(training_op, feed_dict=train_dict) # Write summary to log if i % 100 == 0: summary_str = sess.run(summary_op_train, feed_dict=train_dict) summary_writer.add_summary(summary_str, i) if eval_on_test: summary_str = sess.run(summary_op_test, feed_dict=test_dict) summary_writer.add_summary(summary_str, i) summary_writer.flush() # Print current accuracy to console if i%5000 == 0: print (i, sess.run(accuracy, feed_dict=train_dict)) # Save trained parameters saver = tf.train.Saver() saver.save(sess, self.model_filename)
def get_config(): logger.auto_set_dir() data_train, data_test = get_data() step_per_epoch = data_train.size() lr = tf.train.exponential_decay( learning_rate=1e-3, global_step=get_global_step_var(), decay_steps=data_train.size() * 60, decay_rate=0.2, staircase=True, name='learning_rate') tf.scalar_summary('learning_rate', lr) return TrainConfig( dataset=data_train, optimizer=tf.train.AdamOptimizer(lr), callbacks=Callbacks([ StatPrinter(), ModelSaver(), InferenceRunner(data_test, [ScalarStats('cost'), ClassificationError()]) ]), model=Model(), step_per_epoch=step_per_epoch, max_epoch=350, )
def train(total_loss, global_step, batch_size=BATCH_SIZE): number_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size decay_steps = int(number_batches_per_epoch * NUM_EPOCHS_PER_DECAY) lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary('learning_rate', lr) # with tf.control_dependencies([total_loss]): # opt = tf.train.AdamOptimizer(lr) # grads = opt.compute_gradients(total_loss) # #apply the gradients # apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # for grad, var in grads: # if grad is not None: # tf.histogram_summary(var.op.name + "/gradients", grad) # with tf.control_dependencies([apply_gradient_op]): # train_op = tf.no_op(name="train") opt = tf.train.GradientDescentOptimizer(lr).minimize(total_loss, global_step=global_step) # grads = opt.compute_gradients(total_loss) return opt
def _activation_summary(x): tensor_name = re.sub('tower_[0-9]*/', '', x.op.name) tf.histogram_summary(tensor_name + '/activations', x) tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def batch_inputs(dataset, batch_size, train, num_preprocess_threads=None, num_readers=1): """Contruct batches of training or evaluation examples from the image dataset. Args: dataset: instance of Dataset class specifying the dataset. See dataset.py for details. batch_size: integer train: boolean num_preprocess_threads: integer, total number of preprocessing threads num_readers: integer, number of parallel readers Returns: images: 4-D float Tensor of a batch of images labels: 1-D integer Tensor of [batch_size]. Raises: ValueError: if data is not found """ with tf.name_scope('batch_processing'): data_files = dataset.data_files() if data_files is None: raise ValueError('No data files found for this dataset') # Create filename_queue if train: filename_queue = tf.train.string_input_producer(data_files, shuffle=True, capacity=64) else: filename_queue = tf.train.string_input_producer(data_files, shuffle=False, capacity=1) if num_preprocess_threads is None: num_preprocess_threads = FLAGS.num_preprocess_threads # to reduce the num of preprocessing threads, no longer require this #if num_preprocess_threads % 4: # raise ValueError('Please make num_preprocess_threads a multiple ' # 'of 4 (%d % 4 != 0).', num_preprocess_threads) if num_readers is None: num_readers = FLAGS.num_readers if num_readers < 1: raise ValueError('Please make num_readers at least 1') # Approximate number of examples per shard. examples_per_shard = FLAGS.examples_per_shard # Size the random shuffle queue to balance between good global # mixing (more examples) and memory use (fewer examples). # 1 image uses 299*299*3*4 bytes = 1MB # The default input_queue_memory_factor is 16 implying a shuffling queue # size: examples_per_shard * 16 * 1MB = 17.6GB if train: min_queue_examples = examples_per_shard * FLAGS.input_queue_memory_factor examples_queue = tf.RandomShuffleQueue( capacity=min_queue_examples + 3 * batch_size, min_after_dequeue=min_queue_examples, dtypes=[tf.string]) else: examples_queue = tf.FIFOQueue(capacity=examples_per_shard + 3 * batch_size, dtypes=[tf.string]) # Create multiple readers to populate the queue of examples. if num_readers > 1: enqueue_ops = [] for _ in range(num_readers): reader = dataset.reader() _, value = reader.read(filename_queue) enqueue_ops.append(examples_queue.enqueue([value])) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) example_serialized = examples_queue.dequeue() else: reader = dataset.reader() _, example_serialized = reader.read(filename_queue) if FLAGS.use_MIMO_inputs_pipeline: # The new convention for images and labels is a generalized one # images: all inputs; labels: all outputs that needs to be predicted datan = [] for thread_id in range(num_preprocess_threads): # 1. this function returns multiple input data (could include both labels and images). # This will enable more complex models such as LRCN with egomotion inputs to be able # to run with this framework # 2. The parse_example_proto function could return more than 1 input for one # example_serialized. # 3. the returned format is a list of tensors [Tensor1, Tensor2,...., Tensor_n], # each of the tensor denotes a small batch of one variable. # The tensors for the video might be 5-dim, [batch_size, nframes, H, W, C] # 4. We expect future data augmentation code to appear in parse_example_proto # itself, since inheriently the augmentation is highly dataset dependent. # 5. the parse_example_proto return net_input and net_output as two seperate tensor lists net_inputs, net_outputs = dataset.parse_example_proto( example_serialized) net_inputs, net_outputs = dataset.augmentation( train, net_inputs, net_outputs) datan.append(net_inputs + net_outputs) # the single thread batch_join dequeue_many operation might be the bottleneck. if net_inputs[0].get_shape()[0].value == batch_size: print( "output batch of parse_example_proto == required batchsize (%d), no batching needed" % batch_size) print("Omitting the batch_join queue") joins = datan one_joined = datan[-1] else: # this is quite slow, avoid using this joins = [] for i in range(FLAGS.num_batch_join): reduced_factor = max( math.ceil(1.0 * num_preprocess_threads / FLAGS.num_batch_join), 2) one_joined = tf.train.batch_join(tensors_list=datan, batch_size=batch_size, capacity=reduced_factor * batch_size, enqueue_many=True) joins.append(one_joined) print(FLAGS.num_batch_join, " batch_joins, each of them capacity is, ", reduced_factor * batch_size, " instances") # add a buffering queue to remove the dequeue_many time print("buffer queue capacity is: ", FLAGS.num_batch_join, " batches") capacity = FLAGS.num_batch_join buffer_queue = tf.FIFOQueue( capacity=capacity, dtypes=[x.dtype for x in one_joined], shapes=[x.get_shape() for x in one_joined], name="buffer_queue") tf.scalar_summary( "queue/%s/fraction_of_%d_full" % (buffer_queue.name, capacity), tf.cast(buffer_queue.size(), tf.float32) * (1. / capacity)) buffer_ops = [buffer_queue.enqueue(join) for join in joins] tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(buffer_queue, buffer_ops)) data_joined = buffer_queue.dequeue() # end of buffer queue # The CPU to GPU memory transfer still not resolved # recover the inputs and outputs joined_inputs = data_joined[:len(net_inputs)] joined_outputs = data_joined[len(net_inputs):] # dataset's visualizer # since only the dataset knows how to visualize the data, let the dataset to provide such method dataset.visualize(joined_inputs, joined_outputs) return joined_inputs, joined_outputs else: raise ValueError("have to use MIMO input pipeline")
def __init__(self, n_input, n_hidden, layer_names,learning_rate=0.01, momentum=0.5, keep_prob=1.0, transfer_function_enc=tf.nn.sigmoid,transfer_function_dec=tf.nn.sigmoid,corr_type='masking',opt='gradient_descent', xavier_init=1,loss_func='mean_squared',corr_frac=0,regtype='none', l2reg=5e-4): self.n_input = n_input self.n_hidden = n_hidden self.transfer_enc = transfer_function_enc self.transfer_dec = transfer_function_dec self.layer_names = layer_names self.keep_prob_value = keep_prob self.opt = opt self.loss_func = loss_func self.learning_rate = learning_rate self.momentum = momentum self.xavier_init = xavier_init self.corr_frac = corr_frac self.corr_type= corr_type assert 0. <= self.corr_frac <= 1. # placeholders # network_weights = self._initialize_weights() # self.weights = network_weights self.x = tf.placeholder(tf.float32, [None, self.n_input], name='x') self.x_corr = tf.placeholder(tf.float32, [None, self.n_input], name='x-corr-input') self.w = tf.Variable(self.xavier_init_func(self.n_input , self.n_hidden, self.xavier_init), name=self.layer_names[0]) #tf.Variable( # tf.truncated_normal( # shape=[n_features, self.n_components], stddev=0.1), # name='enc-w') self.vb = tf.Variable(tf.zeros([self.n_input ]), name=self.layer_names[1]) self.hb = tf.Variable(tf.zeros([self.n_hidden]), name=self.layer_names[2]) self.o_w = np.random.normal(0.0, 0.01, [self.n_input, self.n_hidden]) self.o_vb = np.zeros([self.n_input], np.float32) self.o_hb = np.zeros([self.n_hidden], np.float32) self.keep_prob = tf.placeholder(tf.float32) self.weights = {} self.weights['w'] = self.w self.weights['vb'] = self.vb self.weights['hb'] = self.hb # variables self.encode = self.transfer_enc(tf.matmul(self.x_corr, self.w) + self.hb) self.encode = tf.nn.dropout(self.encode,self.keep_prob) self.decode = self.transfer_dec(tf.matmul(self.encode, tf.transpose(self.w)) + self.vb) self.decode = tf.nn.dropout(self.decode,self.keep_prob) if self.loss_func == 'cross_entropy': self.cost = - tf.reduce_sum(self.x * tf.log(self.decode)) _ = tf.scalar_summary("cross_entropy", self.cost) elif self.loss_func == 'mean_squared': self.cost = tf.sqrt(tf.reduce_mean(tf.square(self.x - self.decode))) _ = tf.scalar_summary("mean_squared", self.cost) else: self.cost = None if self.opt == 'gradient_descent': self.train_step = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.cost) elif self.opt == 'ada_grad': self.train_step = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.cost) elif self.opt == 'momentum': self.train_step = tf.train.MomentumOptimizer(self.learning_rate, self.momentum).minimize(self.cost) else: self.train_step = None # cost # self.err_sum = tf.sqrt(tf.reduce_mean(tf.square(self.x - self.v_sample))) # self.err_sum = tf.reduce_mean(tf.square(self.x - self.v_sample)) init = tf.initialize_all_variables() self.sess = tf.Session() self.sess.run(init)
gen_optimizer = optimizers.Adam(alpha=args.gen_lr) gen_optimizer.setup(generator) gen_optimizer.add_hook(chainer.optimizer.GradientClipping(args.gen_grad_clip)) dis_optimizer = optimizers.Adam(alpha=args.dis_lr) dis_optimizer.setup(discriminator) dis_optimizer.add_hook(NamedWeightDecay(args.dis_l2_reg_lambda, '/out/')) # summaries sess = tf.Session() sess.run(tf.initialize_all_variables()) summary_dir = os.path.join(out_dir, "summaries") loss_ = tf.placeholder(tf.float32) train_loss_summary = tf.scalar_summary('train_loss', loss_) test_loss_summary = tf.scalar_summary('test_loss', loss_) dis_loss_summary = tf.scalar_summary('dis_loss', loss_) dis_acc_summary = tf.scalar_summary('dis_acc', loss_) summary_writer = tf.train.SummaryWriter(summary_dir, sess.graph) dis_train_count = 0 gen_train_count = 0 test_count = 0 with open(os.path.join(out_dir, "generated_sample_pretrain.txt"), 'w') as f: f.write('') with open(os.path.join(out_dir, "generated_sample.txt"), 'w') as f: f.write('')
## Add summary ops to collect data #W1_hist = tf.histogram_summary("W1_hist", W1) #W1_scalar_summary = tf.scalar_summary("W1_scalar", W1) #W1_hist = tf.histogram_summary("W1", W1) #S1_hist = tf.histogram_summary("S1_hist", S1) #S1_scalar_summary = tf.scalar_summary("S1_scalar", S1) #S1_scalar_summary = tf.scalar_summary("S1", S1) #C1_hist = tf.histogram_summary("C1_hist", C1) #C1_scalar_summary = tf.scalar_summary("C1_scalar", C1) #C1_hist = tf.histogram_summary("C1", C1) with tf.name_scope("l2_loss") as scope: ls_scalar_summary = tf.scalar_summary("l2_loss", l2_loss) ## TRAIN if phase_train is not None: #DO BN feed_dict_train = {x: X_train, y_: Y_train, phase_train: False} feed_dict_test = {x: X_test, y_: Y_test, phase_train: False} else: #Don't do BN feed_dict_train = {x: X_train, y_: Y_train} feed_dict_test = {x: X_test, y_: Y_test} def get_batch_feed(X, Y, M, phase_train): mini_batch_indices = np.random.randint(M, size=M) Xminibatch = X[mini_batch_indices, :] # ( M x D^(0) )
initial_learning_rate, learning_rate_input, grad_applier, MAX_TIME_STEP, device=device) training_threads.append(training_thread) # prepare session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.initialize_all_variables() sess.run(init) # summary for tensorboard score_input = tf.placeholder(tf.int32) tf.scalar_summary("score", score_input) summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(LOG_FILE, sess.graph_def) # init or load checkpoint with saver saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_DIR) if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print "checkpoint loaded:", checkpoint.model_checkpoint_path tokens = checkpoint.model_checkpoint_path.split("-") # set global step global_t = int(tokens[1]) print ">>> global step set: ", global_t else:
def __init__(self, is_training, config): seq_width = config.seq_width n_steps = config.batch_size num_hidden = config.num_hidden num_layers = config.num_layers #tensors for input, target and sequence length placeholders self._seq_input = tf.placeholder(tf.float32, [n_steps, seq_width]) self._seq_target = tf.placeholder(tf.float32, [n_steps, 1]) self._early_stop = tf.placeholder(tf.int32) #inputs should be a list of tensors at each timestamp inputs = [ tf.reshape(data, (1, seq_width)) for data in tf.split(0, n_steps, self.seq_input) ] initializer = tf.random_uniform_initializer(-.1, .1) cell = rnn_cell.LSTMCell(num_hidden, seq_width, initializer=initializer) if num_layers > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layers) #initial state self._initial_state = cell.zero_state(1, tf.float32) outputs, states = rnn(cell, inputs, initial_state=self._initial_state, sequence_length=self._early_stop) #save final state of the rnn self._final_state = states[-1] #outputs originaly comes as a list of tensors, but we need a single tensor for tf.matmul outputs = tf.reshape(tf.concat(1, outputs), [-1, num_hidden]) #rnn outputs W = tf.get_variable('W', [num_hidden, 1]) b = tf.get_variable('b', [1]) _output = tf.matmul(outputs, W) + b self._output = _output #ops for least squares error computation error = tf.pow( tf.reduce_sum(tf.pow(tf.sub(_output, self._seq_target), 2)), .5) tf.scalar_summary("error", error) self._error = error self._merge_summaries_op = tf.merge_all_summaries() if not is_training: return #learning rate self._lr = tf.Variable(0., trainable='False', name='lr') #trainable variables for gradient computation tvars = tf.trainable_variables() #compute gradients grads, _ = tf.clip_by_global_norm(tf.gradients(self._error, tvars), config.max_grad_norm) #2 options here: either to use GradientDescentOptimizer (config.useGDO:True) or AdamOptimizer (config.useGDO:False) if config.useGDO: optimizer = tf.train.GradientDescentOptimizer(self._lr) else: optimizer = tf.train.AdamOptimizer(self._lr) #ops for training self._train_op = optimizer.apply_gradients(zip(grads, tvars))
tf.histogram_summary("w_h2_summ", w_h2) tf.histogram_summary("w_o_summ", w_o) p_keep_input = tf.placeholder("float", name="p_keep_input") p_keep_hidden = tf.placeholder("float", name="p_keep_hidden") py_x = model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden) # Step 3 - Add cost function into the events section of Tensorboard with # tf.name_scope and give it a specfic name with tf.name_scope("cost_function"): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y)) train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost) # This section adds the cost_function to the summary section of Tensorboard tf.scalar_summary("cost_function", cost) # Step 4 - Add accuracy function into the events section of Tensorboard with # tf.name_scope and give it a specfic name with tf.name_scope("accuracy"): correct_pred = tf.equal(tf.argmax(Y, 1), tf.argmax(py_x, 1)) # Count correct predictions acc_op = tf.reduce_mean(tf.cast(correct_pred, "float")) # Cast boolean to float to average # This section adds the accuracy_function to the summary section of Tensorboard tf.scalar_summary("accuracy", acc_op) #Step 5 - Here we add define where the Tensorboard logs get stored and pass them to active session with tf.Session() as sess: # In this section we create a log writer. # if you use terminal and run this command to start: 'tensorboard --logdir=logs'
# h_fc2 = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 with tf.name_scope("Output") as scope: W_fcc2 = weight_variable([1, 1, num_fc, num_classes], 'FC_conv_2') b_fcc2 = bias_variable([num_classes], 'bias_for_FC_conv_2') #h_fcc2 = tf.nn.relu(conv2d(h_fcc1,W_fcc2)+b_fcc2) h_fcc2 = tf.nn.relu( tf.nn.conv2d(h_fcc1, W_fcc2, strides=[1, 1, 1, 1], padding='VALID') + b_fcc2) size3 = tf.shape(h_fcc2) h_fcc2_strip = tf.squeeze(h_fcc2) size4 = tf.shape(h_fcc2_strip) with tf.name_scope("Softmax") as scope: loss = tf.nn.sparse_softmax_cross_entropy_with_logits(h_fcc2_strip, y_) cost = tf.reduce_sum(loss) loss_summ = tf.scalar_summary("cross entropy_loss", cost) with tf.name_scope("train") as scope: tvars = tf.trainable_variables() #We clip the gradients to prevent explosion grads = tf.gradients(cost, tvars) optimizer = tf.train.AdamOptimizer(learning_rate) gradients = zip(grads, tvars) train_step = optimizer.apply_gradients(gradients) # The following block plots for every trainable variable # - Histogram of the entries of the Tensor # - Histogram of the gradient over the Tensor # - Histogram of the grradient-norm over the Tensor numel = tf.constant([[0]]) for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices):
filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", cnn.loss) acc_summary = tf.scalar_summary("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged])
tf.summary.histogram("w_h2_summ", w_h2) tf.summary.histogram("w_o_summ", w_o) #Step 5 - Add dropout to input and hidden layers p_keep_input = tf.placeholder("float", name="p_keep_input") p_keep_hidden = tf.placeholder("float", name="p_keep_hidden") #Step 6 - Create Model py_x = model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden) #Step 7 Create cost function with tf.name_scope("cost"): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(py_x, Y)) train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost) # Add scalar summary for cost tensor tf.scalar_summary("cost", cost) #Step 8 Measure accuracy with tf.name_scope("accuracy"): correct_pred = tf.equal(tf.argmax(Y, 1), tf.argmax(py_x, 1)) # Count correct predictions acc_op = tf.reduce_mean(tf.cast( correct_pred, "float")) # Cast boolean to float to average # Add scalar summary for accuracy tensor tf.scalar_summary("accuracy", acc_op) #Step 9 Create a session with tf.Session() as sess: # Step 10 create a log writer. run 'tensorboard --logdir=./logs/nn_logs' writer = tf.train.SummaryWriter("./logs/nn_logs", sess.graph) # for 0.8 merged = tf.merge_all_summaries()
_h = tanh(W_p * r + W_x * outputs[-1]) predict = tf.matmul(_h, weights['out']) + biases['out'] return predict, outputs #temp = RNN(x, weights, biases) pred, outputs_2 = RNN(x, weights, biases) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) validation_accuracy = tf.placeholder("float") tf.scalar_summary('validation_accuracy', validation_accuracy) train_accuracy = tf.placeholder("float") tf.scalar_summary('train_accuracy', train_accuracy) train_loss = tf.placeholder("float") tf.scalar_summary('train_loss', train_loss) init = tf.initialize_all_variables() with tf.Session() as sess: merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter('tensorboard/bilstm_attention', sess.graph) sess.run(init) step = 0 step_all = 0 # train while step_all * batch_size < training_iters:
os.makedirs(params.model_ckpt_dir) sess = tf.InteractiveSession() # Use weighted cross entropy as the loss function with tf.name_scope('cross_entropy'): num_positives = tf.maximum(tf.reduce_sum(tf.cast(model.y_, tf.int32)), 1) num_negatives = tf.sub(tf.size(model.y_), num_positives) class_ratio = tf.cast(num_negatives, tf.float32) / tf.cast( num_positives, tf.float32) diff = tf.nn.weighted_cross_entropy_with_logits( tf.clip_by_value(model.y, 1e-10, 1.0), tf.cast(model.y_, tf.float32), class_ratio) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.scalar_summary('cross entropy', cross_entropy) # Add optimizer to the graph to minimize cross entropy with tf.name_scope('train'): train_step = tf.train.AdamOptimizer( params.learning_rate).minimize(cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(model.prediction, model.y_) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.scalar_summary('accuracy', accuracy) with tf.name_scope('prediction_stats') as scope: true_pos = tf.reduce_mean(
def build_fast_forward_pass(self, step=0.003): self.check_op = tf.add_check_numerics_ops() computations = [] bob = 1 if self.node_layers[0][0].t == 'b': bob = 2 with tf.name_scope("input"): self.input = tf.placeholder(dtype=tf.float64, shape=(None, max(self.input_order)+1, bob), name='Input') # self.input = tf.placeholder(dtype=tf.float64, # shape=(len(self.input_order)*2), name='Input') #the input to be appended to each layer input_splits = [] self.conz = tf.placeholder(shape=[1], dtype=tf.float64) #compute the input weights = [] with tf.name_scope("projection"): n = tf.constant(0.0001, dtype=tf.float64) for L in range(len(self.weights)): if L != 0: drop = tf.round(tf.random_uniform(self.weights[L].get_shape(), self.conz, 1.0, dtype=tf.float64)) weights.append(tf.add(tf.nn.relu(tf.sub(self.weights[L]*drop, n)), n)) else: weights.append(tf.add(tf.nn.relu(tf.sub(self.weights[L], n)), n)) with tf.name_scope('nomralization'): self.sum_of_weights = [tf.segment_sum(x, y) if x.get_shape()[0] > 0 else None for x, y in zip(weights, self.inds)] sum_of_weights = self.sum_of_weights self.norm_weights = [tf.div(x, tf.gather(y, z)) if x.get_shape()[0] > 0 else None for x, y, z in zip(weights, self.sum_of_weights, self.inds)] with tf.name_scope('LEAFS_' + str(len(self.input_order))): input_gather = tf.reshape(tf.transpose(tf.gather(tf.transpose(self.input, (1, 0, 2)), self.input_swap), (1, 0, 2)), shape=(-1, len(self.input_order)*bob)) self.counting.append(input_gather) if self.node_layers[0][0].t == 'b': #if contiuous input_computation_w = tf.mul(input_gather, weights[0]) input_computation_s = tf.transpose(tf.segment_sum(tf.transpose(input_computation_w), self.inds[0])) input_computation_n = tf.log(tf.div(input_computation_s, sum_of_weights[0])) computations.append(input_computation_n) else: pi = tf.constant(np.pi, tf.float64) mus = self.cont[0] sigs = tf.nn.relu(self.cont[1] - 0.01) + 0.01 #sigma can't be smaller than 0.01 #gassian formula input_computation_g = tf.div(tf.exp(tf.neg(tf.div(tf.square(input_gather - mus), 2*tf.mul(sigs, sigs)))), tf.sqrt(2*pi)*sigs) + 0.000001 input_computation_n = tf.log(input_computation_g) computations.append(input_computation_n) #split the input computation and figure out which one goes in each layer j = 0 for i in range(len(self.input_layers)): a = tf.constant(j) b = self.input_layers[i] input_splits.append(tf.slice(input_computation_n, [0, a], [-1, b])) j += b; current_computation = input_splits[0] for i in range(len(self.node_layers[1:])): L = i+1 #the layer number if self.weights[L].get_shape()[0] == 0: #product with tf.name_scope("PRD" + str(self.inds[L].get_shape()[0])): #do a segment sum in the log domain current_computation = tf.transpose(tf.segment_sum(tf.transpose(current_computation), self.inds[L])) else: with tf.name_scope("SUM" + str(self.inds[L].get_shape()[0])): self.counting.append(current_computation) #stats for counting and cccp #get the max at each node maxes = tf.transpose(tf.segment_max(tf.transpose(current_computation), self.inds[L])) back_maxes = tf.transpose(tf.gather(tf.transpose(maxes), self.inds[L])) #sub the max at each node current_computation = tf.sub(current_computation, back_maxes) #get out of log domain current_computation = tf.exp(current_computation) #multiply by weights current_computation = tf.mul(current_computation, weights[L]) #compute sum node current_computation = tf.transpose(tf.segment_sum(tf.transpose(current_computation), self.inds[L])) #normalize current_computation = tf.div(current_computation, sum_of_weights[L]) #re-add the maxes that we took out after entering log domain current_computation = tf.add(tf.log(current_computation), maxes) #concatenate with inputs for the next layer current_computation = tf.concat(1, [current_computation, input_splits[L]]) #shuffle so that next node is ready current_computation = tf.transpose(tf.gather(tf.transpose(current_computation), self.shuffle[L])) computations.append(current_computation) with tf.name_scope('root_node'): self.output = current_computation with tf.name_scope('loss'): if self.multiclass: self.labels = tf.placeholder(shape=(None, len(self.node_layers[-1])), dtype=tf.float64) self.loss = -tf.reduce_mean(tf.mul(self.output, 0.1*(self.labels-1)+self.labels)) else: self.loss = -tf.reduce_mean(self.output) self.loss_summary = tf.scalar_summary(self.summ, self.loss) self.opt_val = self.optimizer(0.001).minimize(self.loss) self.computations = computations
def main(_): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True, fake_data=FLAGS.fake_data) sess = tf.InteractiveSession() # Create the model x = tf.placeholder(tf.float32, [None, 784], name='x-input') W = tf.Variable(tf.zeros([784, 10]), name='weights') b = tf.Variable(tf.zeros([10]), name='bias') # Use a name scope to organize nodes in the graph visualizer with tf.name_scope('Wx_b'): y = tf.nn.softmax(tf.matmul(x, W) + b) # Add summary ops to collect data tf.histogram_summary('weights', W) tf.histogram_summary('biases', b) tf.histogram_summary('y', y) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') # More name scopes will clean up the graph representation with tf.name_scope('xent'): cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) tf.scalar_summary('cross entropy', cross_entropy) with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy) with tf.name_scope('test'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.scalar_summary('accuracy', accuracy) # Merge all the summaries and write them out to /tmp/mnist_logs (by default) merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter(FLAGS.summaries_dir, sess.graph_def) tf.initialize_all_variables().run() # Train the model, and feed in test data and record summaries every 10 steps for i in range(FLAGS.max_steps): if i % 10 == 0: # Record summary data and the accuracy if FLAGS.fake_data: batch_xs, batch_ys = mnist.train.next_batch( 100, fake_data=FLAGS.fake_data) feed = {x: batch_xs, y_: batch_ys} else: feed = {x: mnist.test.images, y_: mnist.test.labels} result = sess.run([merged, accuracy], feed_dict=feed) summary_str = result[0] acc = result[1] writer.add_summary(summary_str, i) print('Accuracy at step %s: %s' % (i, acc)) else: batch_xs, batch_ys = mnist.train.next_batch( 100, fake_data=FLAGS.fake_data) feed = {x: batch_xs, y_: batch_ys} sess.run(train_step, feed_dict=feed)
def __init__(self, input_shape=[128, 96, 96, 1], n_filter=[32, 64, 128], n_hidden=[500, 500], n_y=30, receptive_field=[[3, 3], [2, 2], [2, 2]], pool_size=[[2, 2], [2, 2], [2, 2]], obj_fcn=mse, logdir='train'): self._sanity_check(input_shape, n_filter, receptive_field, pool_size) x_shape = input_shape[:] x_shape[0] = None x = tf.placeholder(shape=x_shape, dtype=tf.float32) y = tf.placeholder(shape=(None, n_y), dtype=tf.float32) self.x, self.y = x, y # ========= CNN layers ========= n_channel = [input_shape[-1]] + n_filter for i in range(len(n_channel) - 1): filter_shape = receptive_field[i] + n_channel[ i:i + 2] # e.g. [5, 5, 32, 64] pool_shape = [1] + pool_size[i] + [1] print 'Filter shape (layer %d): %s' % (i, filter_shape) conv_and_filter = conv_layer(x, filter_shape, 'conv%d' % i, padding='VALID') print 'Shape after conv: %s' % conv_and_filter.get_shape().as_list( ) # norm1 = tf.nn.local_response_normalization( # conv_and_filter, 4, bias=1.0, alpha=0.001 / 9.0, # beta=0.75, name='norm%d'%i) pool1 = tf.nn.max_pool( #norm1, conv_and_filter, ksize=pool_shape, strides=pool_shape, padding='SAME', name='pool%d' % i) print 'Shape after pooling: %s' % pool1.get_shape().as_list() x = pool1 # ========= Fully-connected layers ========= dim = np.prod(x.get_shape()[1:].as_list()) x = tf.reshape(x, [-1, dim]) print 'Total dim after CNN: %d' % dim for i, n in enumerate(n_hidden): x = full_layer(x, n, layer_name='full%d' % i) # nonlinear=tf.nn.relu yhat = full_layer(x, n_y, layer_name='output', nonlinear=tf.identity) self.yhat = yhat self.batch_size = input_shape[0] self.lr = tf.placeholder(dtype=tf.float32) self.objective = obj_fcn(y, yhat) self.optimizer = tf.train.AdamOptimizer(self.lr).minimize( self.objective) tf.scalar_summary(self.objective.op.name, self.objective) self.sess = tf.Session(config=config) self.logdir = logdir
def __init__(self, sess, config, data_feed, log_dir): vocab_size = len(data_feed.vocab) self.data_feed = data_feed with tf.name_scope("io"): self.inputs = tf.placeholder(dtype=tf.int32, shape=(None, None), name="input_seq") self.input_lens = tf.placeholder(dtype=tf.int32, shape=(None, ), name="seq_len") self.da_labels = tf.placeholder(dtype=tf.int32, shape=(None, ), name="dialog_acts") self.senti_labels = tf.placeholder( dtype=tf.float32, shape=(None, data_feed.feature_size[data_feed.SENTI_ID]), name="sentiments") self.learning_rate = tf.Variable(float(config.init_lr), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * config.lr_decay) max_sent_len = array_ops.shape(self.inputs)[1] batch_size = array_ops.shape(self.inputs)[0] with variable_scope.variable_scope("word-embedding"): embedding = tf.get_variable("embedding", [vocab_size, config.embed_size], dtype=tf.float32) input_embedding = embedding_ops.embedding_lookup( embedding, tf.squeeze(tf.reshape(self.inputs, [-1, 1]), squeeze_dims=[1])) input_embedding = tf.reshape(input_embedding, [-1, max_sent_len, config.embed_size]) with variable_scope.variable_scope("rnn"): if config.cell_type == "gru": cell = rnn_cell.GRUCell(config.cell_size) elif config.cell_type == "lstm": cell = rnn_cell.LSTMCell(config.cell_size, use_peepholes=False, forget_bias=1.0) elif config.cell_type == "rnn": cell = rnn_cell.BasicRNNCell(config.cell_size) else: raise ValueError("unknown RNN type") if config.keep_prob < 1.0: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob, input_keep_prob=config.keep_prob) if config.num_layer > 1: cell = rnn_cell.MultiRNNCell([cell] * config.num_layer, state_is_tuple=True) # and enc_last_state will be same as the true last state outputs, _ = tf.nn.dynamic_rnn( cell, input_embedding, dtype=tf.float32, sequence_length=self.input_lens, ) # get the TRUE last outputs last_outputs = tf.reduce_sum( tf.mul( outputs, tf.expand_dims( tf.one_hot(self.input_lens - 1, max_sent_len), -1)), 1) self.dialog_acts = self.fnn( last_outputs, data_feed.feature_size[data_feed.DA_ID], [100], "dialog_act_fnn") self.sentiments = self.fnn( last_outputs, data_feed.feature_size[data_feed.SENTI_ID], [100], "setiment_fnn") self.loss = tf.reduce_sum(nn_ops.sparse_softmax_cross_entropy_with_logits(self.dialog_acts, self.da_labels)) \ + tf.reduce_sum(nn_ops.softmax_cross_entropy_with_logits(self.sentiments, self.senti_labels)) self.loss /= tf.to_float(batch_size) tf.scalar_summary("entropy_loss", self.loss) self.summary_op = tf.merge_all_summaries() # weight decay tvars = tf.trainable_variables() for v in tvars: print("Trainable %s" % v.name) # optimization if config.op == "adam": print("Use Adam") optimizer = tf.train.AdamOptimizer(self.learning_rate) elif config.op == "rmsprop": print("Use RMSProp") optimizer = tf.train.RMSPropOptimizer(self.learning_rate) else: print("Use SGD") optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), config.grad_clip) self.train_ops = optimizer.apply_gradients(zip(grads, tvars)) self.saver = tf.train.Saver(tf.all_variables(), write_version=tf.train.SaverDef.V2) if log_dir is not None: train_log_dir = os.path.join(log_dir, "train") print("Save summary to %s" % log_dir) self.train_summary_writer = tf.train.SummaryWriter( train_log_dir, sess.graph)
def add_activation_summary(var): if var is not None: tf.histogram_summary(var.op.name + "/activation", var) tf.scalar_summary(var.op.name + "/sparsity", tf.nn.zero_fraction(var))
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): with tf.variable_scope("model") as scope: global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() images_eval, labels_eval = cifar10.inputs(eval_data=True) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) scope.reuse_variables() logits_eval = cifar10.inference(images_eval) # Calculate loss. loss = cifar10.loss(logits, labels) # For evaluation top_k = tf.nn.in_top_k (logits, labels, 1) top_k_eval = tf.nn.in_top_k (logits_eval, labels_eval, 1) # Add precision summary summary_train_prec = tf.placeholder(tf.float32) summary_eval_prec = tf.placeholder(tf.float32) tf.scalar_summary('precision/train', summary_train_prec) tf.scalar_summary('precision/eval', summary_eval_prec) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) EVAL_STEP = 10 EVAL_NUM_EXAMPLES = 1024 if step % EVAL_STEP == 0: prec_train = evaluate_set (sess, top_k, EVAL_NUM_EXAMPLES) prec_eval = evaluate_set (sess, top_k_eval, EVAL_NUM_EXAMPLES) print('%s: precision train = %.3f' % (datetime.now(), prec_train)) print('%s: precision eval = %.3f' % (datetime.now(), prec_eval)) if step % 100 == 0: summary_str = sess.run(summary_op, feed_dict={summary_train_prec: prec_train, summary_eval_prec: prec_eval}) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
w3 = init_weights([3, 3, 64, 128]) # 3x3x32 conv, 128 outputs w4 = init_weights([128 * 4 * 4, 625]) # FC 128 * 4 * 4 inputs, 625 outputs w_o = init_weights([625, 10]) # FC 625 inputs, 10 outputs (labels) p_keep_conv = tf.placeholder("float") p_keep_hidden = tf.placeholder("float") py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y)) train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost) predict_op = tf.argmax(py_x, 1) prec = tf.reduce_mean( tf.cast(tf.equal(predict_op, tf.argmax(Y, 1)), tf.float32)) summary_cost = tf.scalar_summary('cost', cost) summary_prec = tf.scalar_summary('prec', prec) train_writer = tf.train.SummaryWriter("logs/" + net_name + "/train", flush_secs=5) test_writer = tf.train.SummaryWriter("logs/" + net_name + "/test", flush_secs=5) sav = tf.train.Saver() # Launch the graph in a session with tf.Session() as sess: # you need to initialize all variables tf.initialize_all_variables().run() k = 1
m = tf.Variable(tf.random_normal([1], dtype=tf.float32), name='Slope') # Declare model output = tf.mul(m, x_graph_input, name='Batch_Multiplication') # Declare loss function (L1) residuals = output - y_graph_input l2_loss = tf.reduce_mean(tf.abs(residuals), name="L2_Loss") # Declare optimization function my_optim = tf.train.GradientDescentOptimizer(0.01) train_step = my_optim.minimize(l2_loss) # Visualize a scalar with tf.name_scope('Slope_Estimate'): tf.scalar_summary('Slope_Estimate', tf.squeeze(m)) # Visualize a histogram (errors) with tf.name_scope('Loss_and_Residuals'): tf.histogram_summary('Histogram_Errors', l2_loss) tf.histogram_summary('Histogram_Residuals', residuals) # Declare summary merging operation summary_op = tf.merge_all_summaries() # Initialize Variables init = tf.initialize_all_variables() sess.run(init) for i in range(generations): batch_indices = np.random.choice(len(x_data_train), size=batch_size)
def train_net(train_dir='./train', val_dir=None, max_steps=100000, batch_size=128, max_n_images=1000, n_retrieved=60): # Load data sim_dir = './sim' similarity.create_dataset(train_dir, sim_dir, max_n_images, k_retrieved=n_retrieved) data = data_input.read_data_sets(train_dir, val_dir, sim_dir, max_n_images) n_comp_im = data.train.training_sim[0].shape[0] # Check target f-score label_dict = data_input.get_label_dict(train_dir, val_dir) # target_f_score = check_score.check_f_score(data, label_dict) # print('Target f-score: %.3f' % target_f_score) # Prepare graph data with tf.name_scope('data'): x = tf.placeholder(tf.float32, [None, 2048], name="input") y_ = tf.placeholder(tf.float32, [None, n_comp_im], name="label") keep_prob = tf.placeholder(tf.float32, name="dropout_prob") # Add feature to summary tf.image_summary('input', tf.reshape(x, [-1, 64, 32, 1]), 10) # Compute output with tf.name_scope('fc'): x_drop = tf.nn.dropout(x, keep_prob) fc8W = tf.Variable(tf.truncated_normal([2048, n_comp_im], stddev=0.01), name="fc") fc8b = tf.Variable(tf.zeros([n_comp_im]), name="bias") y_output = tf.matmul(x_drop, fc8W) + fc8b prob = tf.nn.sigmoid(y_output) tf.histogram_summary("weights", fc8W) tf.histogram_summary("biases", fc8b) tf.histogram_summary("y", y_output) # Loss with tf.name_scope("xent") as scope: loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(y_output, y_)) tf.scalar_summary("cross-entropy", loss) with tf.name_scope("train") as scope: train_op = tf.train.AdamOptimizer(1e-4).minimize(loss) # Saver saver = tf.train.Saver(tf.all_variables()) # Session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config) # Merge summaries summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(os.path.join(train_dir, 'logs'), sess.graph) # Parameters print('########################################') print('Epochs: %d' % ((max_steps * batch_size) // max_n_images)) print('Learning rate:', 1e-4) print('Batch size:', batch_size) print('Number of training images:', max_n_images) print('Number of retrieved images:', n_retrieved) print('########################################') # Training tf.initialize_all_variables().run() if val_dir != None: val_data = data.validation.next_batch(batch_size) start = time.time() for i in range(max_steps): batch = data.train.next_batch(batch_size) if i % 1000 == 0: true_labels = np.float32(batch[1]) train_loss, train_prob, summary = sess.run( [loss, prob, summary_op], feed_dict={ x: batch[0], y_: true_labels, keep_prob: 1.0 }) f_score_train = 0 for b_i in range(batch_size): sim_images_index = np.argpartition(train_prob[b_i], -n_retrieved)[-n_retrieved:] sim_images_ids = list( data.train.ref_order_ids[sim_images_index]) f_score_train += data_input.score(label_dict, target=batch[3][b_i], selection=sim_images_ids, n=50) f_score_train /= batch_size if val_dir != None: f_score_val = 0 val_prob = prob.eval(feed_dict={ x: val_data[0], y_: true_labels, keep_prob: 1.0 }) for b_i in range(batch_size): sim_images_index = np.argpartition( val_prob[b_i], -n_retrieved)[-n_retrieved:] sim_images_ids = list( data.train.ref_order_ids[sim_images_index]) f_score_val += data_input.score(label_dict, target=val_data[3][b_i], selection=sim_images_ids, n=50) f_score_val /= batch_size end = time.time() if val_dir != None: print( "[%d/%d] Training loss: %.3f || Scores: %.3f (train) / %.3f (val) (%.0f sec)" % (i, max_steps, train_loss, f_score_train, f_score_val, (end - start))) else: print( "[%d/%d] Training loss: %.3f || Scores: %.3f (train) (%.0f sec)" % (i, max_steps, train_loss, f_score_train, (end - start))) start = time.time() summary_writer.add_summary(summary, i) train_op.run(feed_dict={x: batch[0], y_: true_labels, keep_prob: 0.5}) if (i % 10000 == 0 or ((i + 1) == max_steps and i > 10000)) and i > 0: checkpoint_path = 'pretrained_model.ckpt' saver.save(sess, checkpoint_path, global_step=i) with open('ref_order.pickle', 'wb') as f: pickle.dump(data.train.ref_order_ids, f) # F-scores f_score_train = 0 train_prob = prob.eval(feed_dict={ x: data.train.images, y_: data.train.training_sim, keep_prob: 1.0 }) for b_i in range(data.train.images.shape[0]): sim_images_index = np.argpartition(train_prob[b_i], -n_retrieved)[-n_retrieved:] sim_images_ids = list(data.train.ref_order_ids[sim_images_index]) f_score_train += data_input.score(label_dict, target=data.train.ids[b_i], selection=sim_images_ids, n=50) f_score_train /= data.train.images.shape[0] print('Training F-score: %.4f' % f_score_train) if val_dir != None: f_score_val = 0 l = data.validation.images.shape[0] if data.train.images.shape[0] > data.validation.images.shape[0]: val_x = data.validation.images val_y = data.train.training_sim[0:l] else: val_x = data.validation.images[0:l] val_y = data.train.training_sim train_prob = prob.eval(feed_dict={x: val_x, y_: val_y, keep_prob: 1.0}) for b_i in range(data.validation.images.shape[0]): sim_images_index = np.argpartition(train_prob[b_i], -n_retrieved)[-n_retrieved:] sim_images_ids = list(data.train.ref_order_ids[sim_images_index]) f_score_val += data_input.score(label_dict, target=data.validation.ids[b_i], selection=sim_images_ids, n=50) f_score_val /= data.validation.images.shape[0] print('Validation F-score: %.4f' % f_score_val)
def train(total_loss, global_step): """Train CIFAR-10 model. Create an optimizer and apply to all trainable variables. Add moving average for all trainable variables. Args: total_loss: Total loss from loss(). global_step: Integer Variable counting the number of training steps processed. Returns: train_op: op for training. """ # Variables that affect learning rate. 27165 batch_size 32 num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary('learning_rate', lr) # Generate moving averages of all losses and associated summaries. loss_averages_op = _add_loss_summaries(total_loss) # Compute gradients. with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(lr) grads = opt.compute_gradients(total_loss) # Apply gradients. # def apply_gradients(self, grads_and_vars, global_step=None, name=None): # """Apply gradients to variables. # This is the second part of `minimize()`. It returns an `Operation` that # applies gradients. # Args: # grads_and_vars: List of (gradient, variable) pairs as returned by # `compute_gradients()`. # global_step: Optional `Variable` to increment by one after the # variables have been updated. # name: Optional name for the returned operation. Default to the # name passed to the `Optimizer` constructor. # Returns: # An `Operation` that applies the specified gradients. If `global_step` # was not None, that operation also increments `global_step`. # Raises: # TypeError: If `grads_and_vars` is malformed. # ValueError: If none of the variables have gradients. # """ # ### Processing gradients before applying them. # Calling `minimize()` takes care of both computing the gradients and # applying them to the variables. If you want to process the gradients # before applying them you can instead use the optimizer in three steps: # 1. Compute the gradients with `compute_gradients()`. # 2. Process the gradients as you wish. # 3. Apply the processed gradients with `apply_gradients()`. # Example: # ```python # # Create an optimizer. # opt = GradientDescentOptimizer(learning_rate=0.1) # # Compute the gradients for a list of variables. # grads_and_vars = opt.compute_gradients(loss, <list of variables>) # # grads_and_vars is a list of tuples (gradient, variable). Do whatever you # # need to the 'gradient' part, for example cap them, etc. # capped_grads_and_vars = [(MyCapper(gv[0]), gv[1])) for gv in grads_and_vars] # # Ask the optimizer to apply the capped gradients. # opt.apply_gradients(capped_grads_and_vars) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) # Add histograms for gradients. for grad, var in grads: if grad: tf.histogram_summary(var.op.name + '/gradients', grad) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
biases["decoder_reconstruction"]) # calculate loss kl_divergence = -0.5 * tf.reduce_sum( 1 + logvar_encoder - tf.square(mu_encoder) - tf.exp(logvar_encoder), reduction_indices=1) bce = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(x_hat, x), reduction_indices=1) loss = tf.reduce_mean(tf.add(kl_divergence, bce)) regularized_loss = tf.add(loss, tf.mul(lam, l2_loss)) # optimization train_step = tf.train.AdamOptimizer(0.001).minimize(regularized_loss) # logging loss_summary = tf.scalar_summary("lower_bound", loss) summary_op = tf.merge_all_summaries() saver = tf.train.Saver() # training n_epoch = 500 batch_size = 50 display_step = 1 with tf.Session() as sess: summary_writer = tf.train.SummaryWriter('experiment', graph=sess.graph) sess.run(tf.initialize_all_variables()) for epoch in range(1, n_epoch + 1): batch = height.train.next_batch(batch_size)
'cifarnet', is_training=False) image = image_preprocessing_fn(image, 32, 32) images, labels = tf.train.batch([image, label], batch_size=BATCH_SIZE, num_threads=2, capacity=5 * BATCH_SIZE) logits, end_points = squeezenet.inference(images) predictions = tf.argmax(logits, 1) accuracy, update_op = slim.metrics.streaming_accuracy( predictions, labels) tf.scalar_summary('eval/accuracy', accuracy) summary_op = tf.merge_all_summaries() num_batches = math.ceil(dataset.num_samples / float(BATCH_SIZE)) sess_config = tf.ConfigProto(allow_soft_placement=True) slim.evaluation.evaluation_loop( master='', checkpoint_dir=CHECKPOINT_DIR, logdir=EVAL_DIR, num_evals=num_batches, eval_op=update_op, eval_interval_secs=160, session_config=sess_config, variables_to_restore=slim.get_variables_to_restore())
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''): print 'train() called' is_finetune = bool(ckptfile) batch_size = FLAGS.batch_size data_size = dataset_train.size() print 'training size:', data_size with tf.Graph().as_default(): startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1]) global_step = tf.Variable(startstep, trainable=False) image_, y_ = model.input() keep_prob_ = tf.placeholder('float32', name='keep_prob') phase_train_ = tf.placeholder(tf.bool, name='phase_train') logits = model.inference(image_, keep_prob_, phase_train_) prediction = model.classify(logits) loss, print_op = model.loss(logits, y_) train_op = model.train(loss, global_step, data_size) # build the summary operation based on the F colection of Summaries summary_op = tf.merge_all_summaries() # must be after merge_all_summaries validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.scalar_summary('validation_loss', validation_loss) validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') validation_acc_summary = tf.scalar_summary('validation_accuracy', validation_acc) saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000) init_op = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) if is_finetune: saver.restore(sess, ckptfile) print 'restore variables done' elif caffemodel: sess.run(init_op) model.load_alexnet(sess, caffemodel) print 'loaded pretrained caffemodel:', caffemodel else: # from scratch sess.run(init_op) print 'init_op done' summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph=sess.graph) step = startstep for epoch in xrange(100): print 'epoch:', epoch dataset_train.shuffle() # dataset_val.shuffle() for batch_x, batch_y in dataset_train.batches(batch_size): # print batch_x_v[0,0,:] # print batch_y if step >= FLAGS.max_steps: break step += 1 start_time = time.time() feed_dict = { image_: batch_x, y_: batch_y, keep_prob_: 0.5, phase_train_: True } _, loss_value, logitsyo, _ = sess.run( [train_op, loss, logits, print_op], feed_dict=feed_dict) # print batch_y # print logitsyo.max(), logitsyo.min() duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 10 == 0 or step < 30: sec_per_batch = float(duration) print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_value, FLAGS.batch_size/duration, sec_per_batch) # val if step % 100 == 0: # and step > 0: val_losses = [] val_logits = [] predictions = np.array([]) val_y = [] for val_step, (val_batch_x, val_batch_y) in \ enumerate(dataset_val.sample_batches(batch_size, g_.VAL_SAMPLE_SIZE)): # enumerate(dataset_val.batches(batch_size)): val_feed_dict = { image_: val_batch_x, y_: val_batch_y, keep_prob_: 1.0, phase_train_: False } val_loss, pred, val_logit, _ = sess.run( [loss, prediction, logits, print_op], feed_dict=val_feed_dict) val_losses.append(val_loss) val_logits.extend(val_logit.tolist()) predictions = np.hstack((predictions, pred)) val_y.extend(val_batch_y) val_logits = np.array(val_logits) # print val_logits # print val_y # print predictions # print val_logits[0].tolist() # val_logits.dump('val_logits.npy') # predictions.dump('predictions.npy') # np.array(val_y).dump('val_y.npy') val_loss = np.mean(val_losses) acc = metrics.accuracy_score(val_y[:predictions.size], np.array(predictions)) print '%s: step %d, validation loss=%.4f, acc=%f' %\ (datetime.now(), step, val_loss, acc*100.) # validation summary val_loss_summ = sess.run( validation_summary, feed_dict={validation_loss: val_loss}) val_acc_summ = sess.run(validation_acc_summary, feed_dict={validation_acc: acc}) summary_writer.add_summary(val_loss_summ, step) summary_writer.add_summary(val_acc_summ, step) summary_writer.flush() if step % 100 == 0: # print 'running f*****g summary' summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 200 == 0 or (step+1) == FLAGS.max_steps \ and step > startstep: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
# from vgg_m_net import VGG_M_2048_NET as Network from berlin_net import BERLIN_NET as Network config = yaml.load(file("config.yaml")) # Create model net = Network() x = tf.placeholder(tf.types.float32, [None] + net.input_shape) y = tf.placeholder(tf.types.float32, [None] + net.output_shape) # Learning operation logits = net.build_net(x) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y)) optimizer = tf.train.AdamOptimizer().minimize(cost) tf.scalar_summary("loss", cost) # Evaluate model correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.types.float32)) tf.scalar_summary("accuracy", accuracy) # Datasets init = tf.initialize_all_variables() train_data = CSVInput(config["TRAINING_DATA"], config["BATCH_SIZE"], net.input_shape, net.output_shape) test_data = CSVInput(config["TEST_DATA"], config["BATCH_SIZE"], net.input_shape, net.output_shape) # Summary for Tensorboard merged_summary_op = tf.merge_all_summaries()
#di attivazione. l'input viene moltiplicato per le #connessioni W, in seguito il valore del Bias B viene #aggiunto al risultato ed il tutto viene passato alla #funzione di attivazione. with tf.name_scope("perceptron") as scope: output = tf.sigmoid(tf.matmul(_input, W) + B) #Questa parte del codice definisce il tipo di costo da #ridurre durante l'addestramento. Viene calcolata la #differenza tra il target e l'output della rete (al quadrato). #Il risultato e' un valore numerico che costituisce il metro #di giudizio per capire se la rete sta apprendendo o meno. #Se il training sta funzionando il valore del costo scende. with tf.name_scope("cost") as scope: cost = tf.reduce_mean(tf.nn.l2_loss(_target - output)) tf.scalar_summary("cost", cost) #Definisco il tipo di optimizer ed il relativo learning rate #In questo caso utilizzo il classico Gradient Descent optimizer #Il valore del learning rate viene generalmente trovato per prove #successive. Nel nostro caso un valore di 0.01 fa il suo dovere. with tf.name_scope("optimizer") as scope: optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost) #Inizializza tutte le variabili di tipo tf #che abbiamo dichiarato sopra. init = tf.initialize_all_variables() #Dichiara un oggetto di tipo "Session" che ci #servira' per gestire la nostra sessione di lavoro. sess = tf.Session()
def scalar_summary(name, tensor): """ Create a scalar summary """ if TF_VERSION == '0.11': return tf.scalar_summary(name, tensor) else: return tf.summary.scalar(name, tensor)
L2NormConst = 0.001 train_vars = tf.trainable_variables() for var in tf.trainable_variables(): print(var.name) loss = tf.reduce_mean( tf.square(tf.sub(model.y_, model.y, name='loss_subtract')) ) #+ tf.add_n([tf.nn.l2_loss(v) for v in train_vars]) * L2NormConst train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) sess.run(tf.global_variables_initializer()) logs_path = './logs' summary_writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph()) tf.scalar_summary("loss", loss) merged_summary_op = tf.summary.merge_all() # Training loop variables epochs = 100 batch_size = 50 num_samples = data.num_examples step_size = int(num_samples / batch_size) saver = tf.train.Saver() for epoch in range(epochs): for i in range(step_size): batch = data.next_batch(batch_size) train_step.run(feed_dict={ model.x: batch[0], model.y_: batch[1],