def get_run_op(): # Create an optimizer that performs gradient descent. #opt = tf.train.GradientDescentOptimizer(learning_rate=0.01) slice_size = FLAGS.batch_size / FLAGS.num_cuts print('Slice size:{}'.format(slice_size)) data = None label = None last_fc = [tf.no_op()] with tf.device('/gpu:0'): data = tf.get_variable( name = 'data', shape=[slice_size, FLAGS.hidden_size], trainable=False) ''' label = tf.get_variable( name = 'label', shape = [slice_size, FLAGS.hidden_size], trainable=False)) with tf.variable_scope('fc_in'): weight_in = tf.zeros([1000, FLAGS.hidden_size]) for k in xrange(FLAGS.num_cuts): with tf.control_dependencies([last_fc[-1]]): last_fc.append(tf.matmul(data[k+1], weight_in)) ''' for i in xrange(FLAGS.num_cuts): last_fc.append(data) for i in xrange(FLAGS.num_layers): dev = '/gpu:%d' % (i * FLAGS.num_gpus / FLAGS.num_layers) with tf.device(dev), scopes.arg_scope([variables.variable], device=dev): tmp_fc = [tf.no_op()] with tf.variable_scope('fc%d' % i): w = tf.get_variable( name='w', shape=[FLAGS.hidden_size, FLAGS.hidden_size], trainable=True) for k in xrange(FLAGS.num_cuts): with tf.control_dependencies([tmp_fc[-1]]): tmp_fc.append(tf.matmul(last_fc[k+1], w)) last_fc = tmp_fc if i == FLAGS.num_layers - 1: with tf.control_dependencies(last_fc): train_op = tf.no_op() ''' with tf.device('/gpu:%d' % (FLAGS.num_gpus - 1)): tmp_fc = [tf.no_op()] with tf.variable_scope('fc_out'): weight_out = tf.zeros([FLAGS.hidden_size, 1000]) for k in xrange(FLAGS.num_cuts): with tf.control_dependencies([tmp_fc[-1]]): tmp_fc.append(tf.matmul(last_fc[k+1], weight_out)) last_fc = tmp_fc loss = tf.nn_softmax_cross_entropy_with_logits(last_fc, labels, name='xentropy') grads = opt.compute_gradients(loss) apply_gradient_op = opt.apply_gradients(grads) train_op = tf.group(apply_gradient_op) ''' init_op = tf.initialize_all_variables() return init_op, train_op
def run_epoch(self, session, x, y=None, train_op=None, shuffle=True, verbose=10): dp=self.config.dropout predictions=self.predictions loss=self.loss if not train_op: train_op=tf.no_op() dp=1 if y is None: loss=tf.no_op() total_steps=sum(1 for x in data_iterator(x, y, self.config.batch_size)) total_loss=[] total_pred=[] for step, (_x, _y) in enumerate(data_iterator(x, y, self.config.batch_size, shuffle)): feed={self.input_placeholder: _x, self.dropout_placeholder: dp} if _y is not None: feed[self.labels_placeholder]=_y _pred, _loss, _=session.run([predictions, loss, train_op], feed_dict=feed) total_pred.append(_pred) if y is not None: total_loss.append(_loss) if verbose and step % verbose==0: sys.stdout.write('\r{} / {} : loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') sys.stdout.flush() assert np.vstack(total_pred).reshape([-1]).shape[0]==x.shape[0], 'pred and x not equal size' return np.vstack(total_pred).reshape([-1]), np.mean(total_loss)
def main(_): assert(FLAGS.num_gpus > 1) slice_size = int(FLAGS.hidden_size / FLAGS.num_gpus) feature_size = slice_size * FLAGS.num_gpus print("Slice size: {} Feature size: {}".format(slice_size, feature_size)) weight_shape = [slice_size, feature_size] # create graph weights, grads = make_weights(weight_shape) ff_deps = [[tf.no_op() for j in range(FLAGS.num_gpus)] for i in range(FLAGS.num_layers)] bp_deps = [[tf.no_op() for j in range(FLAGS.num_gpus)] for i in range(FLAGS.num_layers)] for i in range(FLAGS.num_cuts): with tf.name_scope('data_cut%d' % i): data = make_data(FLAGS.batch_size / FLAGS.num_cuts, slice_size) with tf.name_scope('model_cut%d' % i): ff_deps, bp_deps = ff_bp(data, weights, grads, ff_deps, bp_deps) # create session sess = tf.Session() # init variables print('Initialize Variables') sess.run(tf.initialize_all_variables()) print('Initialize Done') # run merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter(log_dir, sess.graph) grads_flatten = sum(grads, []) with tf.control_dependencies(grads_flatten): train_op = tf.no_op() time_tensorflow_run(sess, train_op, 'Training')
def main(unused_args): config = get_config(FLAGS.model_size) eval_config = get_config(FLAGS.model_size) saved_model_path = FLAGS.model_path weights_dir = FLAGS.weights_dir verbose = FLAGS.verbose debug = FLAGS.debug if weights_dir is not None: if not os.path.exists(weights_dir): os.mkdir(weights_dir) if not debug: raw_data = reader.ptb_raw_data(FLAGS.data_path, "ptb.train.txt", "ptb.valid.txt", "ptb.test.txt") else: raw_data = reader.ptb_raw_data(FLAGS.data_path, "emma.txt", "emma.val.txt", "emma.test.txt") # load up PTB data train_data, val_data, test_data, vocab, word_to_id = raw_data with tf.Graph().as_default(), tf.Session() as session: initialiser = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope('model', reuse=None, initializer=initialiser): m = ACTModel(config,is_training=True) # if we have a saved/pre-trained model, load it. if saved_model_path is not None: saveload.main(saved_model_path, session) with tf.variable_scope("model", reuse=True): m_val = ACTModel(config, is_training=False) m_test = ACTModel(eval_config,is_training=False) tf.initialize_all_variables().run() print("starting training") for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) session.run(tf.assign(m.lr, config.learning_rate * lr_decay)) train_loss = run_epoch(session, m, train_data, m.train_op, verbose=True) valid_loss = run_epoch(session, m_val, val_data, tf.no_op()) if verbose: print("Epoch: {} Learning rate: {}".format(i + 1, session.run(m.lr))) print("Epoch: {} Train Loss: {}".format(i + 1, train_loss)) print("Epoch: %d Valid Loss: %.3f" % (i + 1, valid_loss)) # save weights in a pickled dictionary format if weights_dir is not None: date = "{:%m.%d.%H.%M}".format(datetime.now()) saveload.main(weights_dir + "/Epoch_{:02}Train_{:0.3f}Val_{:0.3f}date{}.pkl" .format(i+1,train_loss,valid_loss, date), session) test_loss = run_epoch(session, m_test, test_data, tf.no_op()) if verbose: print("Test Perplexity: %.3f" % test_loss)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest, test_data, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def build_eval_graph(self): # Keep track of the totals while running through the batch data self.total_loss = tf.Variable(0.0, trainable=False, collections=[]) self.total_correct = tf.Variable(0.0, trainable=False, collections=[]) self.example_count = tf.Variable(0.0, trainable=False, collections=[]) # Calculates the means self.mean_loss = self.total_loss / self.example_count self.accuracy = self.total_correct / self.example_count # Operations to modify to the stateful variables inc_total_loss = self.total_loss.assign_add(self.model.total_loss) inc_total_correct = self.total_correct.assign_add( tf.reduce_sum(tf.cast(self.model.correct_predictions, "float"))) inc_example_count = self.example_count.assign_add(self.model.batch_size) # Operation to reset all the stateful vars. Should be called before starting a data set evaluation. with tf.control_dependencies( [self.total_loss.initializer, self.total_correct.initializer, self.example_count.initializer]): self.eval_reset = tf.no_op() # Operation to modify the stateful variables with data from one batch # Should be called for each batch in the evaluatin set with tf.control_dependencies([inc_total_loss, inc_total_correct, inc_example_count]): self.eval_step = tf.no_op() # Summaries summary_mean_loss = tf.scalar_summary("mean_loss", self.mean_loss) summary_acc = tf.scalar_summary("accuracy", self.accuracy) self.summaries = tf.merge_summary([summary_mean_loss, summary_acc])
def testQueueRunnerSerializationRoundTrip(self): graph = tf.Graph() with graph.as_default(): queue = tf.FIFOQueue(10, tf.float32, name="queue") enqueue_op = tf.no_op(name="enqueue") close_op = tf.no_op(name="close") cancel_op = tf.no_op(name="cancel") qr0 = tf.train.QueueRunner( queue, [enqueue_op], close_op, cancel_op, queue_closed_exception_types=(tf.errors.OutOfRangeError, tf.errors.CancelledError), ) qr0_proto = tf.train.QueueRunner.to_proto(qr0) qr0_recon = tf.train.QueueRunner.from_proto(qr0_proto) self.assertEqual("queue", qr0_recon.queue.name) self.assertEqual(1, len(qr0_recon.enqueue_ops)) self.assertEqual(enqueue_op, qr0_recon.enqueue_ops[0]) self.assertEqual(close_op, qr0_recon.close_op) self.assertEqual(cancel_op, qr0_recon.cancel_op) self.assertEqual( (tf.errors.OutOfRangeError, tf.errors.CancelledError), qr0_recon.queue_closed_exception_types ) # Assert we reconstruct an OutOfRangeError for QueueRunners # created before QueueRunnerDef had a queue_closed_exception_types field. del qr0_proto.queue_closed_exception_types[:] qr0_legacy_recon = tf.train.QueueRunner.from_proto(qr0_proto) self.assertEqual("queue", qr0_legacy_recon.queue.name) self.assertEqual(1, len(qr0_legacy_recon.enqueue_ops)) self.assertEqual(enqueue_op, qr0_legacy_recon.enqueue_ops[0]) self.assertEqual(close_op, qr0_legacy_recon.close_op) self.assertEqual(cancel_op, qr0_legacy_recon.cancel_op) self.assertEqual((tf.errors.OutOfRangeError,), qr0_legacy_recon.queue_closed_exception_types)
def main(_): filename = "Data11-17.txt" vectors_data1,labels_data1 = read_data.read_data(filename) filename = "valid18-20.txt" vectors_data2,labels_data2 = read_data.read_data(filename) filename = "Data21-25.txt" vectors_data3,labels_data3 = read_data.read_data(filename) vectors_data = np.vstack((vectors_data1,vectors_data2,vectors_data3)) print(vectors_data.shape) labels_data = np.vstack((np.reshape(labels_data1,(len(labels_data1),1)), np.reshape(labels_data2,(len(labels_data2),1)), np.reshape(labels_data3,(len(labels_data3),1)))) labels_data = np.reshape(labels_data,-1) print(labels_data.shape) filename = "Data4-10.txt" validation_data,vlabels_data = read_data.read_data(filename) filename = "Data26-29.txt" test_data,tlabels_data = read_data.read_data(filename) test_data = test_data[0:8000,] tlabels_data = tlabels_data[0:8000,] config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() summary_writer = tf.train.SummaryWriter("train/lstm3s",session.graph) for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, vectors_data, labels_data, m.train_op,summary_writer, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, validation_data, vlabels_data, tf.no_op(),summary_writer) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest, test_data, tlabels_data, tf.no_op(),summary_writer) print("Test Perplexity: %.3f" % test_perplexity)
def train(): if not FLAGS.data_path: raise ValueError("Must set --data_path to data directory") raw_data = reader.raw_data(FLAGS.data_path) train_data, valid_data, test_data, _, word_to_id = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, is_testing=False, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, is_testing=False, config=config) mtest = PTBModel(is_training=False, is_testing=True, config=eval_config) # tf.initialize_all_variables().run() if not os.path.exists(FLAGS.train_path): os.makedirs(FLAGS.train_path) session.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.train_path) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) m.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) valid_perplexity_old = 1000000000000000000 for i in range(config.max_max_epoch): [train_data, valid_data, test_data] = reader.split_data(raw_data) lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) if valid_perplexity > valid_perplexity_old: break checkpoint_path = os.path.join(FLAGS.train_path, "translate.ckpt") m.saver.save(session, checkpoint_path, global_step=i) valid_perplexity_old = valid_perplexity test_perplexity = run_epoch(session, mtest, test_data, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def main(unused_args): if not FLAGS.data_path: raise ValueError("Must specify --data_path to PTB data directory") if not FLAGS.save_path: raise ValueError("Must specify --save_path to model directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) # Add ops to save and restore all the variables. saver = tf.train.Saver() ckpt=tf.train.get_checkpoint_state(FLAGS.save_path) if (ckpt): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") tf.initialize_all_variables().run() if not FLAGS.testonly: for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) save_path = saver.save(session, FLAGS.save_path+'/model.ckpt',i) print("Model saved in: %s" % save_path) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) else: print("Running only a perplexity test") test_perplexity = run_epoch(session, mtest, test_data, tf.no_op(),verbose=True) print("Test Perplexity: %.3f" % test_perplexity)
def main(config_size='small', num_epochs=10): def get_config(config_size): config_size = config_size.lower() if config_size == 'small': return c.SmallConfig() elif config_size == 'medium': return c.MediumConfig() elif config_size == 'large': return c.LargeConfig() else: raise ValueError('Unknown config size {} (small, medium, large)'.format(config_size)) def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps print(epoch_size) start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(seq_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps print_interval = 20 if verbose and epoch_size > print_interval \ and step % (epoch_size // print_interval) == print_interval: print("%.3f mse: %.8f speed: %.0f ips" % (step * 1.0 / epoch_size, costs / iters, iters * m.batch_size / (time.time() - start_time))) return costs / (iters if iters > 0 else 1) with tf.Graph().as_default(), tf.Session() as session: config = get_config(config_size) initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = StockLSTM(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mtest = StockLSTM(is_training=False, config=config) tf.initialize_all_variables().run() train_data, valid_data, test_data = get_data() for epoch in xrange(num_epochs): lr_decay = config.lr_decay ** max(epoch - num_epochs, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) cur_lr = session.run(m.lr) mse = run_epoch(session, m, train_data, m.train_op, verbose=True) vmse = run_epoch(session, mtest, valid_data, tf.no_op()) print("Epoch: %d - learning rate: %.3f - train mse: %.3f - test mse: %.3f" % (epoch, cur_lr, mse, vmse)) tmse = run_epoch(session, mtest, test_data, tf.no_op()) print("Test mse: %.3f" % tmse)
def main(_): ##### Configure these based on current situation. ##### preload_model = False # Shall we preload preloaded_epoch or train it from scratch? preloaded_epoch = 0 # The epoch to load (if required). Counting from 0. ####################################################### if preload_model: load_model_file = "model{}.ckpt".format(preloaded_epoch) preloaded_epoch += 1 else: preloaded_epoch = 0 if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, test_data, _ = raw_data config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() # Add ops to save and restore all the variables. saver = tf.train.Saver() if(preload_model): saver.restore(session, load_model_file) for i in range(preloaded_epoch, config.max_max_epoch): # Some simple learning rate scheduling. :-) if(i>3): config.learning_rate = 0.1 lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) # Save the variables to disk. save_path = saver.save(session, "model{}.ckpt".format(i)) print("Model saved in file: %s" % save_path) test_perplexity = run_epoch(session, mtest, test_data, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [ v for _,v in grads_and_vars] d_vars = [] g_vars = [] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise("Couldn't find var in g_vars or d_vars") w = [tf.Variable(self.config.start_at or 0.0), tf.Variable(self.config.start_at or 0.0)] Vidv = [self.gan.trainer.d_loss, self.gan.trainer.g_loss] #Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, -1/2. * self.gan.trainer.d_loss - 1/2.* self.gan.trainer.g_loss] Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, 1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss] wlr = self.config.w_learn_rate or 0.01 wt1 = [w[0] + wlr * (Vidv[0] - Vsoc[0]), w[1] + wlr * (Vidv[1] - Vsoc[1])] def clamped(net): return tf.maximum(self.config.min or 0., tf.minimum(net, self.config.max or 1.)) self._prepare() wt1 = [clamped(wt1[0]),clamped(wt1[1])] self.gan.add_metric('wt0', wt1[0]) self.gan.add_metric('wt1', wt1[1]) op1 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables with tf.get_default_graph().control_dependencies([op1]): Vi = [(1. - w[0]) * Vidv[0] + w[0] * Vsoc[0], (1. - w[1]) * Vidv[1] + w[1] * Vsoc[1]] if self.config.reverse_w: Vi = [(w[0]) * Vidv[0] + (1.0-w[0]) * Vsoc[0], (w[1]) * Vidv[1] + (1.0-w[1]) * Vsoc[1]] self.gan.add_metric('w0', w[0]) self.gan.add_metric('w1', w[1]) new_grads = tf.gradients(Vi[0], d_vars) + tf.gradients(Vi[1], g_vars) self.gan.trainer.d_loss = Vi[0] self.gan.trainer.g_loss = Vi[1] new_grads_and_vars = list(zip(new_grads, var_list)).copy() op3 = self.optimizer.apply_gradients(new_grads_and_vars.copy(), global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op3]): if(self.config.w_l1): # return to selfish state wt1 = [wt1[0] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[0]), wt1[1] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[1])] op4 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables with tf.get_default_graph().control_dependencies([op4]): self.gan.add_metric('l1w0', w[0]) self.gan.add_metric('l1w1', w[1]) return tf.no_op() else: return tf.no_op()
def main(_): if not FLAGS.data_path: #raise ValueError("Must set --data_path to PTB data directory") pass train_data, valid_data, test_data = imdb_data.load_data() word2id, id2word = imdb_data.load_dict_imdb() accsTrain = [] accsTest = [] config = Config() eval_config = Config() eval_config.batch_size = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = SentimentModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = SentimentModel(is_training=False, config=config) mtest = SentimentModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() print("Starting") for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, accTrain = run_epoch(session, m, train_data, m.train_op, id2word, verbose=True) accsTrain.append(accTrain) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity, crap = run_epoch(session, mvalid, valid_data, tf.no_op(), id2word) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity, accTest = run_epoch(session, mtest, test_data, tf.no_op(),id2word) accsTest.append(accTest) print("Test Perplexity: %.3f" % test_perplexity) plt.figure() plt.plot(accsTrain, label="train") plt.plot(accsTest, label="test") plt.show() plt.close()
def _cached_copy(self, var, name, pass_through=False): """Helper function to create a worker cached copy of a Variable. This assigns the var (either a single Variable or a list of Variables) to local transient cache Variable(s). Note that if var is a list of Variables, the assignment is done sequentially to minimize the memory overheads. Also note that if pass_through is set to True, this does not create new Variables but simply return the input back. Args: var: A Variable or a list of Variables to cache. name: name of cached Variable. pass_through: when set to True, this simply pass through the var back through identity operator and does not actually creates a cache. Returns: Tuple consisting of following three entries: cache: the new transient Variable or list of transient Variables corresponding one-to-one with var. cache_init: op to initialize the Variable or the list of Variables. cache_reset: op to reset the Variable or the list of Variables to some default value. """ if var is None: return None, None, None elif pass_through: cache = var cache_init = tf.no_op() cache_reset = tf.no_op() elif isinstance(var, tf.Variable): cache = WALSModel._transient_var(name=name) with ops.colocate_with(cache): cache_init = tf.assign(cache, var, validate_shape=False) cache_reset = tf.assign(cache, 1.0, validate_shape=False) else: assert isinstance(var, list) assert var cache = [WALSModel._transient_var(name='%s_shard_%d' % (name, i)) for i in xrange(len(var))] reset_ops = [] for i, c in enumerate(cache): with ops.colocate_with(c): if i == 0: cache_init = tf.assign(c, var[i], validate_shape=False) else: with ops.control_dependencies([cache_init]): cache_init = tf.assign(c, var[i], validate_shape=False) reset_ops.append(tf.assign(c, 1.0, validate_shape=False)) cache_reset = tf.group(*reset_ops) return cache, cache_init, cache_reset
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to data directory") config = get_config() eval_config = get_config() # eval_config.batch_size = 1 # eval_config.num_steps = 1 raw_data, vocab_size = reader.converted_data(FLAGS.data_path, max_len=config.num_steps, min_nwords=200) config.vocab_size = vocab_size eval_config.vocab_size = vocab_size train_data, valid_data, test_data = reader.split_rawdata(raw_data) sess = tf.InteractiveSession() if os.path.exists(FLAGS.log_dir): shutil.rmtree(FLAGS.log_dir) writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph_def) with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = GenderModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = GenderModel(is_training=False, config=config) mtest = GenderModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_error, train_acc, summary = run_epoch(session, m, train_data, m.train_op, verbose=True) writer.add_summary(summary, i) print("Epoch: %d Train xentorpy: %.3f" % (i + 1, train_error)) print("Epoch: %d Train accuracy: %.3f" % (i + 1, train_acc)) valid_error, valid_acc, summary = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Validation xentropy: %.3f" % (i + 1, valid_error)) print("Epoch: %d Validation accuracy: %.3f" % (i + 1, valid_acc)) test_err, test_acc, summary = run_epoch(session, mtest, test_data, tf.no_op()) print("Test Accuracy %.3f" % test_acc)
def testReuseVars(self): height, width = 3, 3 with self.test_session() as sess: image_shape = (10, height, width, 3) image_values = np.random.rand(*image_shape) expected_mean = np.mean(image_values, axis=(0, 1, 2)) expected_var = np.var(image_values, axis=(0, 1, 2)) images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) output = ops.batch_norm(images, decay=0.1, is_training=False) update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION) with tf.control_dependencies(update_ops): barrier = tf.no_op(name='gradient_barrier') output = control_flow_ops.with_dependencies([barrier], output) # Initialize all variables sess.run(tf.global_variables_initializer()) moving_mean = variables.get_variables('BatchNorm/moving_mean')[0] moving_variance = variables.get_variables('BatchNorm/moving_variance')[0] mean, variance = sess.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 3) self.assertAllClose(variance, [1] * 3) # Simulate assigment from saver restore. init_assigns = [tf.assign(moving_mean, expected_mean), tf.assign(moving_variance, expected_var)] sess.run(init_assigns) for _ in range(10): sess.run([output], {images: np.random.rand(*image_shape)}) mean = moving_mean.eval() variance = moving_variance.eval() # Although we feed different images, the moving_mean and moving_variance # shouldn't change. self.assertAllClose(mean, expected_mean) self.assertAllClose(variance, expected_var)
def namignize(names, checkpoint_path, config): """Recognizes names and prints the Perplexity of the model for each names in the list Args: names: a list of names in the model format checkpoint_path: the path to restore the trained model from, should not include the model name, just the path to config: one of the above configs that specify the model and how it should be run and trained Returns: None """ with tf.Graph().as_default(), tf.Session() as session: with tf.variable_scope("model"): m = NamignizerModel(is_training=False, config=config) m.saver.restore(session, checkpoint_path) for name in names: x, y = data_utils.name_to_batch(name, m.batch_size, m.num_steps) cost, loss, _ = session.run([m.cost, m.loss, tf.no_op()], {m.input_data: x, m.targets: y, m.weights: np.concatenate(( np.ones(len(name)), np.zeros(m.batch_size * m.num_steps - len(name))))}) print("Name {} gives us a perplexity of {}".format( name, np.exp(cost)))
def moving_average(value, window): value = tf.to_float(value) shape = value.get_shape() queue_init = tf.zeros(tf.TensorShape(window).concatenate(shape)) total_init = tf.zeros(shape) num_init = tf.constant(0, dtype=tf.float32) queue = tf.FIFOQueue(window, [tf.float32], shapes=[shape]) total = tf.Variable(total_init, trainable=False) num = tf.Variable(num_init, trainable=False) init = tf.cond( tf.equal(queue.size(), 0), lambda: tf.group( queue.enqueue_many(queue_init), total.assign(total_init), num.assign(num_init)), lambda: tf.no_op()) with tf.control_dependencies([init]): total_ = total + value - queue.dequeue() num_ = num + 1 value_averaged = total_ / (tf.minimum(num_, window) + EPSILON) with tf.control_dependencies([queue.enqueue([value]), total.assign(total_), num.assign(num_)]): return tf.identity(value_averaged)
def testName(self): with tf.name_scope("scope"): queue = tf.FIFOQueue(10, tf.float32, name="queue") qr = tf.train.QueueRunner(queue, [tf.no_op()]) self.assertEqual("scope/queue", qr.name) tf.train.add_queue_runner(qr) self.assertEqual(1, len(tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS, "scope")))
def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1.0 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
def train(total_loss, global_step): num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary("learning_rate", lr) loss_averages_op = _add_loss_summaries(total_loss) with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(lr) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) for grad, var in grads: if grad: tf.histogram_summary(var.op.name + "/gradients", grad) #variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) #variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op]): train_op = tf.no_op(name="train") return train_op
def run_epoch(self, session, data, train_op=None, verbose=10): config=self.config dp=config.dropout if not train_op: train_op=tf.no_op() dp=1 total_steps=sum(1 for x in data_iterator(data, config.batch_size)) total_loss=[] # for rnn #state=self.initial_state.eval() for step, (x, y) in enumerate( data_iterator(data, config.batch_size)): feed={self.input_placeholder: x, self.labels_placeholder: y, #self.initial_state: state, # for rnn self.dropout_placeholder: dp} loss, state, _ = session.run( [self.loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return loss
def train(self, total_loss): loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) for l in losses + [total_loss]: tf.scalar_summary(l.op.name + ' (raw)', l) # Apply gradients, and add histograms with tf.control_dependencies([loss_averages_op]): opt = tf.train.AdamOptimizer() grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) for grad, var in grads: if grad is not None: tf.histogram_summary(var.op.name + '/gradients', grad) # Track the moving averages of all trainable variables variable_averages = tf.train.ExponentialMovingAverage(Recognizer.MOVING_AVERAGE_DECAY) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
def testComputeMovingVars(self): height, width = 3, 3 with self.test_session() as sess: image_shape = (10, height, width, 3) image_values = np.random.rand(*image_shape) expected_mean = np.mean(image_values, axis=(0, 1, 2)) expected_var = np.var(image_values, axis=(0, 1, 2)) images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) output = ops.batch_norm(images, decay=0.1) update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION) with tf.control_dependencies(update_ops): barrier = tf.no_op(name='gradient_barrier') output = control_flow_ops.with_dependencies([barrier], output) # Initialize all variables sess.run(tf.global_variables_initializer()) moving_mean = variables.get_variables('BatchNorm/moving_mean')[0] moving_variance = variables.get_variables('BatchNorm/moving_variance')[0] mean, variance = sess.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 3) self.assertAllClose(variance, [1] * 3) for _ in range(10): sess.run([output]) mean = moving_mean.eval() variance = moving_variance.eval() # After 10 updates with decay 0.1 moving_mean == expected_mean and # moving_variance == expected_var. self.assertAllClose(mean, expected_mean) self.assertAllClose(variance, expected_var)
def model_fn(features, targets): # dummy variable: _ = tf.Variable([0.]) _ = targets predictions = features["x"] loss = tf.constant([2.]) return predictions, loss, tf.no_op()
def build_model(x, y_, n_workers, is_chief): regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE) y = mnist_inference.inference(x, regularizer) global_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses')) learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, 60000 / BATCH_SIZE, LEARNING_RATE_DECAY) # 通过tf.train.SyncReplicasOptimizer函数实现同步更新。 opt = tf.train.SyncReplicasOptimizer( tf.train.GradientDescentOptimizer(learning_rate), replicas_to_aggregate=n_workers, total_num_replicas=n_workers) train_op = opt.minimize(loss, global_step=global_step) if is_chief: variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([variables_averages_op, train_op]): train_op = tf.no_op() return global_step, loss, train_op, opt
def main(unused_args): with tf.Graph().as_default(), tf.Session() as session: config = BaseConfig() testConfig = TestConfig() init_scale = 1.0 // np.sqrt(config.n_hidden) initializer = tf.random_uniform_initializer(-init_scale, init_scale) initializer = tf.random_normal_initializer(0.0, 1.0, None) with tf.variable_scope("model", reuse=None, initializer=initializer): model = RNNModel(True, config) with tf.variable_scope("model", reuse=True, initializer=initializer): testModel = RNNModel(False, testConfig) tf.initialize_all_variables().run() x_data = generateTestPattern(200, 0.2, 2.0, 0.2) naiveInError = getNaiveError(x_data) test_data = generateTestPattern(200, 0.2, 2.0, 0.2) test_data = test_data[20:len(test_data)] naiveTestError = getNaiveError(test_data) for i in range(config.max_epoch): # lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) # model.assign_lr(session, config.learning_rate * lr_decay) cost = run_epoch(session, model, x_data, model.train_op) / naiveInError if i % 20 == 0: print ("cost", cost) cost = run_epoch(session, model, x_data, model.train_op) / naiveInError test_cost = run_epoch(session, testModel, test_data, tf.no_op(), True) / naiveTestError print("final cost", cost, "test_cost", test_cost)
def test_dequeue(self): p = plan.TrainPlan() p.compiler = block_compiler.Compiler().compile(blocks.Scalar()) p.is_chief_trainer = True p.batch_size = 3 p.batches_per_epoch = 2 p.queue_capacity = 12 p.num_dequeuers = 1 p.ps_tasks = 1 q = p._create_queue(0) p._setup_dequeuing([q]) input_batch = list(p.compiler.build_loom_inputs([7])) * 3 q_enqueue = q.enqueue_many([input_batch * 4]) p.losses['foo'], = p.compiler.output_tensors p.train_op = tf.no_op() p.finalize_stats() p.logdir = self.get_temp_dir() p.epochs = 2 p.print_file = six.StringIO() init_op = tf.global_variables_initializer() sv = p.create_supervisor() with self.test_session() as sess: sess.run(init_op) sess.run(q_enqueue) p.run(sv, sess) expected = '\n'.join(['running train', 'train_size: 6', 'epoch: 1 train[loss: 7.000e+00]', 'epoch: 2 train[loss: 7.000e+00]', 'final model saved in file: %s' % p.logdir]) log_str = p.print_file.getvalue() self.assertIn(expected, log_str)
def train(mnist): x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name="x-input") y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name="y-input") regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y = mnist_inference.inference(x, regularizer) global_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses")) learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate)\ .minimize(loss, global_step=global_step) with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(TRAINING_STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) _, loss_value, setp = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys}) if i % 1000 == 0: print("%d 训练后,损失值为 %g" % (i, loss_value)) saver.save(sess, MODEL_SAVE_PATH, global_step=global_step)
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. # Create model directory print('loading and init vgg16.........') vgg = vgg16.Vgg16() with tf.Graph().as_default(): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) images_placeholder, sc_labels_placeholder, ac_labels_placeholder, mc_labels_placeholder, keep_pro = placeholder_inputs( FLAGS.batch_size * gpu_num) tower_grads1 = [] tower_grads2 = [] tower_grads3 = [] multi_logits = [] learning_rate = tf.train.exponential_decay( 1e-4, global_step, decay_steps=FLAGS.max_steps / 50, decay_rate=0.99, staircase=True) tf.summary.scalar('learning_rate', learning_rate) opt_multi = tf.train.AdamOptimizer(learning_rate) with tf.variable_scope('var_name') as var_scope: multi_fea_weights = { 'w1': _variable_with_weight_decay('multi_w1', [4096, 2048], 0.005), 'out': _variable_with_weight_decay('multi_feawout', [2048, 456], 0.005) } multi_fea_biases = { 'b1': _variable_with_weight_decay('multi_b1', [2048], 0.000), 'out': _variable_with_weight_decay('multi_feabout', [456], 0.000), } sc_fea_weights = { 'w1': _variable_with_weight_decay('sc_w1', [4096, 2048], 0.005), 'out': _variable_with_weight_decay('sc_feawout', [2048, 100], 0.005) } sc_fea_biases = { 'b1': _variable_with_weight_decay('sc_b1', [2048], 0.000), 'out': _variable_with_weight_decay('sc_feabout', [100], 0.000), } ac_fea_weights = { 'w1': _variable_with_weight_decay('ac_w1', [4096, 2048], 0.005), 'out': _variable_with_weight_decay('ac_feawout', [2048, 100], 0.005) } ac_fea_biases = { 'b1': _variable_with_weight_decay('ac_b1', [2048], 0.000), 'out': _variable_with_weight_decay('ac_feabout', [100], 0.000), } mc_fea_weights = { 'w1': _variable_with_weight_decay('mc_w1', [4096, 2048], 0.005), 'out': _variable_with_weight_decay('mc_feawout', [2048, 256], 0.005) } mc_fea_biases = { 'b1': _variable_with_weight_decay('mc_b1', [2048], 0.000), 'out': _variable_with_weight_decay('mc_feabout', [256], 0.000), } for gpu_index in range(0, gpu_num): with tf.device('/gpu:%d' % gpu_index): varlist1 = [ multi_fea_weights.values(), multi_fea_biases.values() ] vgg.build(images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size, :, :, :]) train_features = vgg.fc7 multi_logit = model.get_predict(train_features, keep_pro, FLAGS.batch_size, multi_fea_weights, multi_fea_biases) loss_name_scope = ('gpud_%d_loss' % gpu_index) multi_loss = tower_loss( 'multi', multi_logit, sc_labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size], ac_labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size], mc_labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size]) grads1 = opt_multi.compute_gradients(multi_loss, varlist1) tower_grads1.append(grads1) multi_logits.append(multi_logit) multi_logits = tf.concat(multi_logits, 0) sc_logits = tf.slice(multi_logits, [0, 0], [6, 100]) sc_predictions = tf.nn.top_k(tf.nn.softmax(sc_logits), 5) sc_accuracy = topk_acc(sc_logits, sc_labels_placeholder, 5) #sc_accuracy = tower_acc(sc_logits, sc_labels_placeholder) tf.summary.scalar('sc_accuracy', sc_accuracy) ac_logits = tf.slice(multi_logits, [0, 100], [6, 100]) ac_predictions = tf.nn.top_k(tf.nn.softmax(ac_logits), 5) ac_accuracy = topk_acc(ac_logits, ac_labels_placeholder, 5) #ac_accuracy = tower_acc(ac_logits, ac_labels_placeholder) tf.summary.scalar('ac_accuracy', ac_accuracy) mc_logits = tf.slice(multi_logits, [0, 200], [6, 256]) mc_predictions = tf.nn.top_k(tf.nn.softmax(mc_logits), 5) mc_accuracy = topk_acc(mc_logits, mc_labels_placeholder, 5) #mc_accuracy = tower_acc(mc_logits, mc_labels_placeholder) tf.summary.scalar('mc_accuracy', mc_accuracy) grads1 = average_gradients(tower_grads1) apply_gradient_multi = opt_multi.apply_gradients( grads1, global_step=global_step) train_multi = tf.group(apply_gradient_multi) null_op = tf.no_op() # Create a saver for writing training checkpoints. saver = tf.train.Saver(multi_fea_weights.values() + multi_fea_biases.values()) init = tf.global_variables_initializer() # Create a session for running Ops on the Graph. sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) ckpt = tf.train.get_checkpoint_state(pre_model_save_dir) if ckpt and ckpt.model_checkpoint_path: print "loading checkpoint,waiting......" saver.restore(sess, ckpt.model_checkpoint_path) print "load complete!" next_strat_pos = 0 sc_predict_labels = [] ac_predict_labels = [] mc_predict_labels = [] for step in xrange(FLAGS.max_steps): start_time = time.time() print('TEST Data Eval:') val_actions, val_images, val_ac_labels, val_sc_labels, val_mc_labels, next_strat_pos, _ = input_data( filename='./list/test.list', start_pos=next_strat_pos, batch_size=FLAGS.batch_size * gpu_num, shuffle=False) sc_predict, ac_predict, mc_predict, sc_acc, ac_acc, mc_acc = sess.run( [ sc_predictions, ac_predictions, mc_predictions, sc_accuracy, ac_accuracy, mc_accuracy ], feed_dict={ images_placeholder: val_images, ac_labels_placeholder: val_ac_labels, sc_labels_placeholder: val_sc_labels, mc_labels_placeholder: val_mc_labels, keep_pro: 1 }) #print (ac_predict) for i in range(FLAGS.batch_size): sc_predict_labels.append(sc_predict[1][i]) ac_predict_labels.append(ac_predict[1][i]) mc_predict_labels.append(mc_predict[1][i]) duration = time.time() - start_time print('Batchnum %d: %.3f sec' % (step + 1, duration)) #print predict_labels #print val_mc_labels print("get_predict_label_done!") return sc_predict_labels, ac_predict_labels, mc_predict_labels
def train(logits, images_tensor, labels_tensor, is_training_tensor, iterator_num, summary_path='./log', restore=None): cross_id = 1 roi_dir = '/home/give/Documents/dataset/MICCAI2018/Slices/crossvalidation' pre_load = True train_dataset = DataSet(os.path.join(roi_dir, str(cross_id), 'train'), 'train', pre_load=pre_load, rescale=True, divied_liver=False) val_dataset = DataSet(os.path.join(roi_dir, str(cross_id), 'val'), 'val', pre_load=pre_load, rescale=True, divied_liver=False) train_batchdata = train_dataset.get_next_batch(net_config.BATCH_SIZE) val_batchdata = val_dataset.get_next_batch(net_config.BATCH_SIZE) predicted_tensor = tf.argmax(logits, 1) global_step_tensor = tf.Variable(initial_value=0, trainable=False) softmax_loss = loss(logits, labels_tensor) loss_tensor = softmax_loss tf.summary.scalar('softmax loss', softmax_loss) tf.summary.scalar('loss', loss_tensor) train_step = tf.train.AdamOptimizer(learning_rate=1e-3).minimize( loss_tensor, global_step=global_step_tensor) with tf.control_dependencies([train_step]): train_op = tf.no_op('train') correct_prediction = tf.equal(tf.argmax(logits, 1), tf.cast(tf.squeeze(labels_tensor), tf.int64)) accuracy_tensor = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy_tensor) saver = tf.train.Saver() with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) if restore is not None: full_path = tf.train.latest_checkpoint(restore['path']) print 'load model from ', full_path saver.restore(sess, full_path) train_summary_writer = tf.summary.FileWriter(os.path.join( summary_path, 'train'), graph=sess.graph) val_summary_writer = tf.summary.FileWriter(os.path.join( summary_path, 'val'), graph=sess.graph) merged_summary_op = tf.summary.merge_all() for i in range(iterator_num): step_value = sess.run(global_step_tensor) train_expand_roi_batch_images, train_labels = train_batchdata.next( ) # 反向传播的同时更新center value _, train_acc, train_prediction, loss_value, merged_summary_value, softmax_loss_value = sess.run( [ train_op, accuracy_tensor, predicted_tensor, loss_tensor, merged_summary_op, softmax_loss ], feed_dict={ images_tensor: train_expand_roi_batch_images, labels_tensor: train_labels, is_training_tensor: True, }) train_summary_writer.add_summary(merged_summary_value, global_step=step_value) if step_value % 1000 == 0: val_expand_roi_batch_images, val_labels = val_batchdata.next() validation_acc, loss_value, merged_summary_value = sess.run( [accuracy_tensor, loss_tensor, merged_summary_op], feed_dict={ images_tensor: val_expand_roi_batch_images, labels_tensor: val_labels, is_training_tensor: False, }) val_summary_writer.add_summary(merged_summary_value, step_value) print 'step: %d, validation accuracy: %.2f, validation loss: %.2f' % ( step_value, validation_acc, loss_value) save_model_path = os.path.join('./parameters/', str(cross_id)) checkpoint_path = os.path.join(save_model_path, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step_tensor) save_dir = os.path.join(save_model_path, str(step_value)) if not os.path.exists(save_dir): os.mkdir(save_dir) filenames = glob( os.path.join(save_model_path, '*-' + str(int(step_value + 1)) + '.*')) for filename in filenames: shutil.copy( filename, os.path.join(save_dir, os.path.basename(filename))) if step_value % 100 == 0: print 'step: %d, training accuracy: %.2f, training loss: %.2f, softmax_loss_value: %.2f' % ( step_value, train_acc, loss_value, softmax_loss_value) # print centers_value train_summary_writer.close() val_summary_writer.close()
def train_gan(train_set, indices: List, samples_per_N: int, repetition_n: int, identifier: str, experiment_name: str, batch_size: int = 256, desired_epochs: int = 2000, use_bot=False): """ The GAN is trained for 1000 epochs. If a a set of 60k samples is trained with a batchsize of 256, then a epoch equals 226 iterations. A budget of 100,000 iterations would equals to 426 """ assert train_set.shape[0] > len(indices) print(train_set.shape) print(len(indices)) my_ds = DataSetManager(train_set[indices]) # print("Set number of iterations to train\n") v5 = (desired_epochs * (train_set[indices].shape[0])) // batch_size + 1 print("ITERS " + str(v5)) print("SIZE " + str(train_set[indices].shape)) # print("Use pretrained model? (0 means No, some number different to 0 means yes)\n") decision_number = 0 #int( input() ) # print("Type a name to save the model with?\n") model_tag = str(round(samples_per_N)) + '_' + str(repetition_n) storing_path = 'data/' + experiment_name + "/" + model_tag + '_data/' model_path = storing_path + model_tag + '.ckpt' # Recall that os.mkdir isn't recursive, so it only makes on directoryt at a time try: # Create target Directory os.mkdir(storing_path) print("Directory ", storing_path, " Created ") except FileExistsError: print("Directory ", storing_path, " already exists") # ===> Auxiliar functions <=== """ ----------------8<-------------[ cut here ]------------------ ------------------------------------------------ """ def save_history(files_prefix, gen_loss_record, disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds, iter_, epochs, global_iters, BATCH_SIZE, low_lr, high_lr): # Save losses per epoch df = pd.DataFrame(np.array(gen_loss_record)) with open(files_prefix + '_gen_loss.csv', 'w+') as f: df.to_csv(f, header=False, index=False) df = pd.DataFrame(np.array(disc_loss_record)) with open(files_prefix + '_disc_loss.csv', 'w+') as f: df.to_csv(f, header=False, index=False) df = pd.DataFrame(np.array(epoch_record)) with open(files_prefix + '_epoch_record.csv', 'w+') as f: df.to_csv(f, header=False, index=False) # Save current iter and epochs training_history = { 'epochs': [epochs + my_ds.epochs_completed], 'iters': [global_iters + iter_], 'Batch Size': [BATCH_SIZE], 'low LR': [low_lr], 'high LR': [high_lr] } df = pd.DataFrame(training_history) with open(files_prefix + '_training.csv', 'w+') as f: df.to_csv(f, index=False) #, header=False, index=False with open(files_prefix + '_jsd_error.csv', 'a') as csvFile: writer = csv.writer(csvFile) writer.writerow([current_epoch, jsd_error]) def send_bot_message(bot, my_ds, iter_, ITERS, identifier): """ Not quite straighforward since the critic draws many more samples. """ message = "\nEpochs [" + str( my_ds.epochs_completed) + "] Iter: " + str(iter_) + ";\t" + str( np.round(100 * iter_ / ITERS, 2)) + "% " message = message + identifier print(message) bot.set_status(message) # Send update message if bot.verbose: bot.send_message(message) print("\n") def save_gen_samples(gen_op, disc_op, sess, path, k, n=4): """ k: is the number of epochs used to trained the generator n: is the number of batches to draw samples """ suffix = '_gen_samples_' + str(k) + '_epochs_' + '.csv' for k in range(n): samples = sess.run(gen_op) df = pd.DataFrame(np.array(samples)) with open(path + suffix, 'a') as f: df.to_csv(f, header=False, index=False) # Score the samples using the critic scores = sess.run(disc_op) df = pd.DataFrame(np.array(scores)) with open(path + 'scores_' + suffix, 'a') as f: df.to_csv(f, header=False, index=False) # ===> Model Parameters <=== """ ----------------8<-------------[ cut here ]------------------ ------------------------------------------------ """ DIM = 512 # model dimensionality GEN_DIM = 100 # output dimension of the generator DIS_DIM = 1 # outptu dimension fo the discriminator FIXED_GENERATOR = False # wheter to hold the generator fixed at ral data plus Gaussian noise, as in the plots in the paper LAMBDA = .1 # smaller lambda makes things faster for toy tasks, but isn't necessary if you increase CRITIC_ITERS enough BATCH_SIZE = batch_size # batch size ITERS = v5 #100000 # how many generator iterations to train for FREQ = 250 # sample frequency print("==>>Using batch size of " + str(BATCH_SIZE)) CRITIC_ITERS = 5 # homw many critic iteractions per generator iteration def Generator_Softmax(n_samples, name='gen'): with tf.variable_scope(name): noise = tf.random_normal([n_samples, GEN_DIM]) output01 = tf_utils.linear(noise, 2 * DIM, name='fc-1') output01 = tf_utils.relu(output01, name='relu-1') output02 = tf_utils.linear(output01, 2 * DIM, name='fc-2') output02 = tf_utils.relu(output02, name='relu-2') output03 = tf_utils.linear(output02, 2 * DIM, name='fc-3') output03 = tf_utils.relu(output03, name='relu-3') output04 = tf_utils.linear(output03, GEN_DIM, name='fc-4') # Reminder: a logit can be modeled as a linear function of the predictors output05 = tf.nn.softmax(output04, name='softmax-1') return output05 def Discriminator(inputs, is_reuse=True, name='disc'): with tf.variable_scope(name, reuse=is_reuse): print('is_reuse: {}'.format(is_reuse)) output01 = tf_utils.linear(inputs, 2 * DIM, name='fc-1') output01 = tf_utils.relu(output01, name='relu-1') output02 = tf_utils.linear(output01, 2 * DIM, name='fc-2') output02 = tf_utils.relu(output02, name='relu-2') output03 = tf_utils.linear(output02, 2 * DIM, name='fc-3') output03 = tf_utils.relu(output03, name='relu-3') output04 = tf_utils.linear(output03, DIS_DIM, name='fc-4') return output04 real_data = tf.placeholder(tf.float32, shape=[None, GEN_DIM]) fake_data = Generator_Softmax(BATCH_SIZE) disc_real = Discriminator(real_data, is_reuse=False) disc_fake = Discriminator(fake_data) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_cost = -tf.reduce_mean(disc_fake) # WGAN gradient penalty parameters alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.) interpolates = alpha * real_data + (1. - alpha) * fake_data disc_interpolates = Discriminator(interpolates) gradients = tf.gradients(disc_interpolates, [interpolates][0]) slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1)**2) disc_cost += LAMBDA * gradient_penalty disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='disc') gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gen') disc_lr = tf.placeholder(tf.float32, shape=()) # 1e-4 gen_lr = tf.placeholder(tf.float32, shape=()) # 1e-4 disc_train_op = tf.train.AdamOptimizer(learning_rate=disc_lr, beta1=0.5, beta2=0.9).minimize( disc_cost, var_list=disc_vars) if len(gen_vars) > 0: gen_train_op = tf.train.AdamOptimizer(learning_rate=gen_lr, beta1=0.5, beta2=0.9).minimize( gen_cost, var_list=gen_vars) else: gen_train_op = tf.no_op() """ ----------------8<-------------[ cut here ]------------------ ------------------------------------------------ """ # ===> Model Parameters <=== session_saver = tf.train.Saver() # files_prefix = 'model/'+ model_tag if decision_number == 0: pre_trained = False gen_loss_record = [] # type: List[float] disc_loss_record = [] # type: List[float] epoch_record = [] # type: List[float] epochs = 0 global_iters = 0 df = pd.DataFrame(np.array(indices)) with open(storing_path + 'training_indices.csv', 'w+') as f: df.to_csv(f, header=False, index=False) else: pre_trained = True print(storing_path) print(storing_path + 'training_indices.csv') _indices = (pd.read_csv(storing_path + 'training_indices.csv', header=None).values).tolist() print(len(_indices)) print(train_set[indices].shape) print(train_set[_indices].squeeze().shape) assert train_set[_indices].squeeze().shape == train_set[indices].shape my_ds = DataSetManager(train_set[_indices].squeeze()) temp = pd.read_csv(storing_path + '_training.csv', header=None).values epochs, global_iters = temp.flatten() my_ds.epochs_completed = epochs gen_loss_record = (pd.read_csv(storing_path + '_gen_loss.csv', header=None).values).tolist() disc_loss_record = (pd.read_csv(storing_path + '_disc_loss.csv', header=None).values).tolist() epoch_record = (pd.read_csv(storing_path + '_epoch_record.csv', header=None).values).tolist() print("State has been restored") # Create a DLBot instance if use_bot: bot = DLBot(token=telegram_token, user_id=telegram_user_id) # Activate the bot bot.activate_bot() print("\nTelegram bot has been activated ") iters_per_epoch = my_ds.num_examples / BATCH_SIZE total_iters = int( np.ceil((desired_epochs * iters_per_epoch) / CRITIC_ITERS)) critic_iters = np.round((5 / 6) * total_iters) gen_iters = np.round((1 / 6) * total_iters) ITERS = total_iters # Train loop with tf.Session() as sess: if pre_trained == False: # false by default: sess.run(tf.global_variables_initializer()) if pre_trained == True: session_saver.restore(sess, model_path) # # DUCK TAPE SOLUTION iter_ = 0 """ while my_ds.epochs_completed < desired_epochs: iter_ +=1 """ # r=10**-4.72, max_lr=10**-3.72, lr_multiplier: int = 1 low_lr = 10**-5 high_lr = 10**-4 lr1 = low_lr # lr_multiplier*low_lr lr2 = low_lr #lr_multiplier*high_lr gen_lr_ = CyclicLR(base_lr=lr1, max_lr=lr2, step_size=gen_iters) disc_lr_ = CyclicLR(base_lr=lr1, max_lr=lr2, step_size=critic_iters) for iter_ in range(ITERS): batch_data, disc_cost_ = None, None previous_epoch = my_ds.epochs_completed # train critic for i_ in range(CRITIC_ITERS): batch_data = my_ds.next_batch( BATCH_SIZE) # data_gen.__next__() disc_cost_, _ = sess.run([disc_cost, disc_train_op], feed_dict={ real_data: batch_data, disc_lr: disc_lr_.clr() }) disc_lr_.on_batch_end() # train generator sess.run(gen_train_op, feed_dict={gen_lr: gen_lr_.clr()}) gen_lr_.on_batch_end() gen_cost2 = sess.run(gen_cost) current_epoch = my_ds.epochs_completed condition2 = current_epoch % 5 == 0 if current_epoch > previous_epoch and condition2: disc_loss_record.append(disc_cost_) gen_loss_record.append(gen_cost2) epoch_record.append(my_ds.epochs_completed) # print("Diff "+str(current_epoch - previous_epoch)) if (np.mod(iter_, FREQ) == 0) or (iter_ + 1 == ITERS): """ print("===> Debugging") print(disc_loss_record) print(gen_loss_record) """ if use_bot: bot.loss_hist.append(disc_cost_) fake_samples = sess.run( fake_data) # , feed_dict={real_data: batch_data} # print("\n==> Sum-Simplex condition: " +str(np.sum(fake_samples, axis=1))) fake_population = np.array([ sess.run(fake_data) for k in range(40) ]).reshape(40 * batch_size, train_set.shape[1]) print(fake_population.shape) jsd_error = gan_error_all_species(fake_population, k3_test_set) print("JSD Error " + str(jsd_error)) message = "\nEpochs [" + str( my_ds.epochs_completed) + "] Iter: " + str( iter_) + ";\t" + str(np.round(100 * iter_ / ITERS, 2)) + "% " message = message + identifier print(message) if use_bot: send_bot_message(bot, my_ds, iter_, ITERS, identifier) current_epoch = my_ds.epochs_completed session_saver.save(sess, model_path) save_history(storing_path, gen_loss_record, disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds, iter_, epochs, global_iters, BATCH_SIZE, low_lr, high_lr) # save_gen_samples(fake_data, disc_fake ,sess, storing_path, k) # fake_data = Generator_Softmax(BATCH_SIZE) utils.tick() # _iter[0] += 1 if iter_ == ITERS: session_saver.save(sess, model_path) # Create gan samples n_samples = len(indices) k_iter = n_samples // BATCH_SIZE + 1 gan_samples_path = storing_path + "gan_samples_" + model_tag + '.csv' for k in range(k_iter): fake_samples = sess.run(fake_data) df = pd.DataFrame(fake_samples) with open(gan_samples_path, 'a') as f: df.to_csv(f, header=False, index=False) # Clear variables valuies tf.reset_default_graph() current_epoch = my_ds.epochs_completed save_history(storing_path, gen_loss_record, disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds, iter_, epochs, global_iters, BATCH_SIZE, low_lr, high_lr) if use_bot: bot.stop_bot() print("Training is done") # Duct tapping the size of gan sample set to avoid changing the TF Graph temp1 = pd.read_csv(gan_samples_path, header=None).values temp1 = temp1[0:n_samples] df = pd.DataFrame(temp1) with open(gan_samples_path, 'w+') as f: df.to_csv(f, header=False, index=False) print("Training is done")
def optimize(loss, global_step, max_grad_norm, lr, lr_decay, sync_replicas=False, replicas_to_aggregate=1, task_id=0): """Builds optimization graph. * Creates an optimizer, and optionally wraps with SyncReplicasOptimizer * Computes, clips, and applies gradients * Maintains moving averages for all trainable variables * Summarizes variables and gradients Args: loss: scalar loss to minimize. global_step: integer scalar Variable. max_grad_norm: float scalar. Grads will be clipped to this value. lr: float scalar, learning rate. lr_decay: float scalar, learning rate decay rate. sync_replicas: bool, whether to use SyncReplicasOptimizer. replicas_to_aggregate: int, number of replicas to aggregate when using SyncReplicasOptimizer. task_id: int, id of the current task; used to ensure proper initialization of SyncReplicasOptimizer. Returns: train_op """ with tf.name_scope('optimization'): # Compute gradients. tvars = tf.trainable_variables() grads = tf.gradients( loss, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) # Clip non-embedding grads non_embedding_grads_and_vars = [(g, v) for (g, v) in zip(grads, tvars) if 'embedding' not in v.op.name] embedding_grads_and_vars = [(g, v) for (g, v) in zip(grads, tvars) if 'embedding' in v.op.name] ne_grads, ne_vars = zip(*non_embedding_grads_and_vars) ne_grads, _ = tf.clip_by_global_norm(ne_grads, max_grad_norm) non_embedding_grads_and_vars = zip(ne_grads, ne_vars) grads_and_vars = embedding_grads_and_vars + list( non_embedding_grads_and_vars) # Summarize _summarize_vars_and_grads(grads_and_vars) # Decaying learning rate lr = tf.train.exponential_decay(lr, global_step, 1, lr_decay, staircase=True) tf.summary.scalar('learning_rate', lr) opt = tf.train.AdamOptimizer(lr) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( 0.999, global_step) # Apply gradients if sync_replicas: opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=tvars, total_num_replicas=replicas_to_aggregate) apply_gradient_op = opt.apply_gradients(grads_and_vars, global_step=global_step) with tf.control_dependencies([apply_gradient_op]): train_op = tf.no_op(name='train_op') # Initialization ops tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, opt.get_chief_queue_runner()) if task_id == 0: # Chief task local_init_op = opt.chief_init_op tf.add_to_collection('chief_init_op', opt.get_init_tokens_op()) else: local_init_op = opt.local_step_init_op tf.add_to_collection('local_init_op', local_init_op) tf.add_to_collection('ready_for_local_init_op', opt.ready_for_local_init_op) else: # Non-sync optimizer apply_gradient_op = opt.apply_gradients(grads_and_vars, global_step) with tf.control_dependencies([apply_gradient_op]): train_op = variable_averages.apply(tvars) return train_op
def run_epoch(self, session, data, num_epoch=0, train_writer=None, train_op=None, verbose=2, train=False): '''NEED TO ADD BATCH_NORM OR LAYER NORM''' config = self.config dp = config.dropout if train_op is None: train_op = tf.no_op() dp = 1 total_steps = len(data[0]) / config.batch_size total_loss = [] accuracy = 0 # shuffle data p = np.random.permutation(len(data[0])) tp, ip, tl, il, im = data #targets[:config.num_train], inputs[:config.num_train], t_lens[:config.num_train], input_lens[:config.num_train], input_masks[:config.num_train] tp, ip, tl, il, im = tp[p], ip[p], tl[p], il[p], im[p] print total_steps print range(total_steps) for step in range(total_steps): index = range(step * config.batch_size, (step + 1) * config.batch_size) feed = { self.target_placeholder: tp[index], self.input_placeholder: ip[index], self.target_len_placeholder: tl[index], self.input_len_placeholder: il[index], self.dropout_placeholder: dp } loss, pred, summary, _ = session.run( [self.calculate_loss, self.pred_seq, self.merged, train_op], feed_dict=feed) if train_writer is not None: train_writer.add_summary(summary, num_epoch * total_steps + step) #answers = a[step*config.batch_size:(step+1)*config.batch_size] '''IS ACCURACY RIGHT, DOES IT STILL WORK NOW THAT YOU'VE SWITCHED FROM TOKEN TO SEQUENCE''' targets = tp[step * config.batch_size:(step + 1) * config.batch_size] """ '''this is just the first element in the batch printed as a sample of how generations are changing''' print "description" print ip[index][0] for i in ip[index][0]: ''' ss='' for j in i: ss+=str(self.source_id_to_vocab[int(j)]+' ') print ss ''' #print [" ".join(str(self.source_id_to_vocab[int(j)])) for j in i] print [self.source_id_to_vocab[int(j)] for j in i if int(j) is not 0] print "pred" pred_seq = [] #print('len(pred)') print(len(pred)) for i in range(len(pred)): pred_seq.append(pred[i][0]) print ["".join(str(self.target_id_to_vocab[int(pred_i)])) for pred_i in pred_seq] #print "".join([(str(self.target_id_to_vocab[int(pred_i)])) for pred_i in pred[0]]) print "".join([(str(self.target_id_to_vocab_w_new_line(int(pred_i)))) for pred_i in pred_seq]) print "target" print targets[0] #print "".join([(str(self.target_id_to_vocab[int(target)])) for target in targets[0] if int(target) is not 0]) print "".join([(str(self.target_id_to_vocab_w_new_line(int(target)))) for target in targets[0] if int(target) is not 0]) ''' #stop after one iter for only quick check run stop_it #''' """ accuracy += np.sum(pred == targets) / float(len(targets)) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\n{} / {} : loss = {}\n\n'.format( step, total_steps, np.mean(total_loss))) #sys.stdout.flush() if verbose: sys.stdout.write('\n') return np.mean(total_loss), accuracy / float(total_steps)
def train(): OUTPUT_NODE = 10 # 10 classes LAYER1_NODE = 500 BATCH_SIZE = 100 LEARNING_RATE_BASE = 0.8 LEARNING_RATE_DECAY = 0.99 REGULARIZATION_RATE = 0.0001 TRAINING_STEPS = 3000 MOVING_AVERAGE_DECAY = 0.99 (x_train, y_train_orig), (x_test, y_test_orig) = load_data() print("Original Train data X shape: ", x_train.shape, "Training data Y shape: ", y_train_orig.shape) print("Original Test data X shape: ", x_test.shape, "Test data Y shape: ", y_test_orig.shape) #print("before convert y_train_orig :", y_train_orig.shape, "y[0]: ", y_train_orig[0], "y[1]: ", y_train_orig[1],"y[2]: ", y_train_orig[2],) y_train = convert_to_one_hot(y_train_orig, 10) #print("after convert y_train_one_hot :",y_train.shape) y_test = convert_to_one_hot(y_test_orig, 10) print("----- Reshape Original Trains Dataset Shape ------") x_reshape_train = tf.reshape(x_train, [x_train.shape[0], -1]) y_reshape_train = tf.transpose(y_train) print("Reshape Train data X as: ", x_reshape_train.shape, "Training data Y shape: ", y_reshape_train.shape) x_test_reshape = tf.reshape(x_test, [x_test.shape[0], -1]) y_test_reshape = tf.transpose(y_test) print("Reshape Test data X as: ", x_test_reshape.shape, "Test data Y shape: ", y_test_reshape.shape) input_x_flatten_size = x_reshape_train.shape[1] input_x_size = tf.convert_to_tensor(input_x_flatten_size, dtype=tf.int32) input_x_number_examples = tf.convert_to_tensor(x_reshape_train.shape[0], dtype=tf.int32) x = tf.placeholder(tf.float32, shape=(None, input_x_flatten_size), name='x-input') y_ = tf.placeholder(tf.float32, shape=(None, OUTPUT_NODE), name='y-input') weights1 = tf.Variable(tf.truncated_normal([input_x_size, LAYER1_NODE], stddev=0.1), name="weights1") biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]), name="biases1") weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1), name="weights2") biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]), name="biases2") # Forward propagation result y = inference(x, None, weights1, biases1, weights2, biases2) # Step of training number global_step = tf.Variable(0, trainable=False) variabl_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variabl_averages_op = variabl_averages.apply(tf.trainable_variables()) #print(tf.trainable_variables()) # Forward propagation using sliding average average_y = inference(x, variabl_averages, weights1, biases1, weights2, biases2) # loss function cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) regularization = regularizer(weights1) + regularizer(weights2) loss = cross_entropy_mean + regularization # learning rate decay learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, input_x_number_examples / BATCH_SIZE, LEARNING_RATE_DECAY) # Note as from https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer # global_step: Optional Variable to increment by one after the variables have been updated. train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) with tf.control_dependencies([train_step, variabl_averages_op]): train_op = tf.no_op(name='train') correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: tf.global_variables_initializer().run() # validation data sets validates_x = x_test_reshape.eval() validates_y = y_test_reshape.eval() validates_feed = {x: validates_x, y_: validates_y} # test data sets test_feed = {x: x_reshape_train.eval(), y_: y_reshape_train.eval()} seed = 3 mini_batches = random_mini_batches(tf.transpose(x_reshape_train), tf.transpose(y_reshape_train), BATCH_SIZE, seed) for i in range(TRAINING_STEPS): if i % 1000 == 0: validate_acc = sess.run(accuracy, feed_dict=validates_feed) print( "After %d training step(s), validation accuracy using average model is %g" % (i, validate_acc)) k = i % len(mini_batches) if k == 0: seed = seed + 1 mini_batches = random_mini_batches( tf.transpose(x_reshape_train), tf.transpose(y_reshape_train), BATCH_SIZE, seed) mini_x_batches, mini_y_batches = mini_batches[k] sess.run(train_op, feed_dict={ x: mini_x_batches, y_: mini_y_batches }) test_acc = sess.run(accuracy, feed_dict=test_feed) print( "After %d training step(s), testing accuracy using average model is %g" % (i, validate_acc)) writer = tf.summary.FileWriter("./log", tf.get_default_graph()) writer.close()
def _train(self, experience, weights, train_step_counter): # Get individual tensors from transitions. (time_steps, policy_steps_, next_time_steps) = trajectory.to_transition(experience) actions = policy_steps_.action if self._debug_summaries: tf.contrib.summary.histogram('actions', actions) action_distribution_parameters = policy_steps_.info # Reconstruct per-timestep policy distribution from stored distribution # parameters. old_actions_distribution = ( distribution_spec.nested_distributions_from_specs( self._action_distribution_spec, action_distribution_parameters)) # Compute log probability of actions taken during data collection, using the # collect policy distribution. act_log_probs = common_utils.log_probability(old_actions_distribution, actions, self._action_spec) # Compute the value predictions for states using the current value function. # To be used for return & advantage computation. batch_size = nest_utils.get_outer_shape(time_steps, self._time_step_spec)[0] policy_state = self._collect_policy.get_initial_state( batch_size=batch_size) value_preds, unused_policy_state = self._collect_policy.apply_value_network( experience.observation, experience.step_type, policy_state=policy_state) value_preds = tf.stop_gradient(value_preds) valid_mask = ppo_utils.make_timestep_mask(next_time_steps) if weights is None: weights = valid_mask else: weights *= valid_mask returns, normalized_advantages = self.compute_return_and_advantage( next_time_steps, value_preds) # Loss tensors across batches will be aggregated for summaries. policy_gradient_losses = [] value_estimation_losses = [] l2_regularization_losses = [] entropy_regularization_losses = [] kl_penalty_losses = [] # For each epoch, create its own train op that depends on the previous one. loss_info = tf.no_op() for i_epoch in range(self._num_epochs): with tf.name_scope('epoch_%d' % i_epoch): with tf.control_dependencies(tf.nest.flatten(loss_info)): # Only save debug summaries for first and last epochs. debug_summaries = (self._debug_summaries and (i_epoch == 0 or i_epoch == self._num_epochs - 1)) # Build one epoch train op. loss_info = self.build_train_op( time_steps, actions, act_log_probs, returns, normalized_advantages, action_distribution_parameters, weights, train_step_counter, self._summarize_grads_and_vars, self._gradient_clipping, debug_summaries) policy_gradient_losses.append( loss_info.extra.policy_gradient_loss) value_estimation_losses.append( loss_info.extra.value_estimation_loss) l2_regularization_losses.append( loss_info.extra.l2_regularization_loss) entropy_regularization_losses.append( loss_info.extra.entropy_regularization_loss) kl_penalty_losses.append(loss_info.extra.kl_penalty_loss) # After update epochs, update adaptive kl beta, then update observation # normalizer and reward normalizer. with tf.control_dependencies(tf.nest.flatten(loss_info)): # Compute the mean kl from old. batch_size = nest_utils.get_outer_shape(time_steps, self._time_step_spec)[0] policy_state = self._collect_policy.get_initial_state(batch_size) kl_divergence = self._kl_divergence( time_steps, action_distribution_parameters, self._collect_policy.distribution(time_steps, policy_state).action) update_adaptive_kl_beta_op = self.update_adaptive_kl_beta( kl_divergence) with tf.control_dependencies([update_adaptive_kl_beta_op]): if self._observation_normalizer: update_obs_norm = (self._observation_normalizer.update( time_steps.observation, outer_dims=[0, 1])) else: update_obs_norm = tf.no_op() if self._reward_normalizer: update_reward_norm = self._reward_normalizer.update( next_time_steps.reward, outer_dims=[0, 1]) else: update_reward_norm = tf.no_op() with tf.control_dependencies([update_obs_norm, update_reward_norm]): loss_info = tf.nest.map_structure(tf.identity, loss_info) # Make summaries for total loss across all epochs. # The *_losses lists will have been populated by # calls to self.build_train_op. with tf.name_scope('Losses/'): total_policy_gradient_loss = tf.add_n(policy_gradient_losses) total_value_estimation_loss = tf.add_n(value_estimation_losses) total_l2_regularization_loss = tf.add_n(l2_regularization_losses) total_entropy_regularization_loss = tf.add_n( entropy_regularization_losses) total_kl_penalty_loss = tf.add_n(kl_penalty_losses) tf.contrib.summary.scalar('policy_gradient_loss', total_policy_gradient_loss) tf.contrib.summary.scalar('value_estimation_loss', total_value_estimation_loss) tf.contrib.summary.scalar('l2_regularization_loss', total_l2_regularization_loss) if self._entropy_regularization: tf.contrib.summary.scalar('entropy_regularization_loss', total_entropy_regularization_loss) tf.contrib.summary.scalar('kl_penalty_loss', total_kl_penalty_loss) total_abs_loss = (tf.abs(total_policy_gradient_loss) + tf.abs(total_value_estimation_loss) + tf.abs(total_entropy_regularization_loss) + tf.abs(total_l2_regularization_loss) + tf.abs(total_kl_penalty_loss)) tf.contrib.summary.scalar('total_abs_loss', total_abs_loss) if self._summarize_grads_and_vars: with tf.name_scope('Variables/'): all_vars = (self._actor_net.trainable_weights + self._value_net.trainable_weights) for var in all_vars: tf.contrib.summary.histogram(var.name.replace(':', '_'), var) return loss_info
def initialize(self): if tf.executing_eagerly(): return tf.no_op() else: return self._initializers
def stamp_parameter_in_graph(parameter_name, parameter_type, graph): """Stamps a parameter of a given type in the given tf.Graph instance. Tensors are stamped as placeholders, sequences are stamped as data sets constructed from string tensor handles, and named tuples are stamped by independently stamping their elements. Args: parameter_name: The suggested (string) name of the parameter to use in determining the names of the graph components to construct. The names that will actually appear in the graph are not guaranteed to be based on this suggested name, and may vary, e.g., due to existing naming conflicts, but a best-effort attempt will be made to make them similar for ease of debugging. parameter_type: The type of the parameter to stamp. Must be either an instance of computation_types.Type (or convertible to it), or None. graph: The instance of tf.Graph to stamp in. Returns: A tuple (val, binding), where 'val' is a Python object (such as a dataset, a placeholder, or a `structure.Struct` that represents a named tuple) that represents the stamped parameter for use in the body of a Python function that consumes this parameter, and the 'binding' is an instance of TensorFlow.Binding that indicates how parts of the type signature relate to the tensors and ops stamped into the graph. Raises: TypeError: If the arguments are of the wrong computation_types. ValueError: If the parameter type cannot be stamped in a TensorFlow graph. """ py_typecheck.check_type(parameter_name, str) py_typecheck.check_type(graph, tf.Graph) if parameter_type is None: return (None, None) parameter_type = computation_types.to_type(parameter_type) if parameter_type.is_tensor(): with graph.as_default(): placeholder = tf.compat.v1.placeholder(dtype=parameter_type.dtype, shape=parameter_type.shape, name=parameter_name) binding = pb.TensorFlow.Binding(tensor=pb.TensorFlow.TensorBinding( tensor_name=placeholder.name)) return (placeholder, binding) elif parameter_type.is_struct(): # The parameter_type could be a StructTypeWithPyContainer, however, we # ignore that for now. Instead, the proper containers will be inserted at # call time by function_utils.wrap_as_zero_or_one_arg_callable. if not parameter_type: # Stamps dummy element to "populate" graph, as TensorFlow does not support # empty graphs. dummy_tensor = tf.no_op() element_name_value_pairs = [] element_bindings = [] for e in structure.iter_elements(parameter_type): e_val, e_binding = stamp_parameter_in_graph( '{}_{}'.format(parameter_name, e[0]), e[1], graph) element_name_value_pairs.append((e[0], e_val)) element_bindings.append(e_binding) return (structure.Struct(element_name_value_pairs), pb.TensorFlow.Binding(struct=pb.TensorFlow.StructBinding( element=element_bindings))) elif parameter_type.is_sequence(): with graph.as_default(): variant_tensor = tf.compat.v1.placeholder(tf.variant, shape=[]) ds = make_dataset_from_variant_tensor(variant_tensor, parameter_type.element) return (ds, pb.TensorFlow.Binding(sequence=pb.TensorFlow.SequenceBinding( variant_tensor_name=variant_tensor.name))) else: raise ValueError( 'Parameter type component {!r} cannot be stamped into a TensorFlow ' 'graph.'.format(parameter_type))
def create_variables(self): self.target_actor = self.actor.copy(scope="target_actor") self.target_critic = self.critic.copy(scope="target_critic") # FOR REGULAR ACTION SCORE COMPUTATION with tf.name_scope("taking_action"): # self.observation = tf.placeholder(tf.float32, (None, self.observation_size), name="observation") # self.actor_val = tf.nn.sigmoid(self.actor(self.observation)) * 40 - 20; self.actor_val = self.actor(self.observation_for_act) self.actor_action = tf.identity(self.actor_val, name="actor_action") # tf.histogram_summary("actions", self.actor_action) # FOR PREDICTING TARGET FUTURE REWARDS with tf.name_scope("estimating_future_reward"): # self.next_observation = tf.placeholder(tf.float32, (None, self.observation_size), name="next_observation") # self.next_observation_mask = tf.placeholder(tf.float32, (None,), name="next_observation_mask") # self.next_action = self.target_actor(self.next_observation) # ST self.next_action = tf.stop_gradient( self.target_actor(self.next_observation)) # ST # print "next action: " + str(self.next_action) # tf.histogram_summary("target_actions", self.next_action) # self.next_value = self.target_critic([self.next_observation, self.next_action]) # ST self.next_value = tf.stop_gradient( tf.reshape( self.target_critic( [self.next_observation, self.next_action]), [-1])) # ST # self.rewards = tf.placeholder(tf.float32, (None,), name="rewards") self.future_reward = self.rewards + self.discount_rate * self.next_observation_mask * self.next_value with tf.name_scope("critic_update"): ##### ERROR FUNCTION ##### # self.given_action = tf.placeholder(tf.float32, (None, self.action_size), name="given_action") self.value_given_action = tf.reshape( self.critic([self.observation, self.given_action]), [-1]) # tf.scalar_summary("value_for_given_action", tf.reduce_mean(self.value_given_action)) temp_diff = self.value_given_action - self.future_reward self.critic_error = tf.identity(tf.reduce_mean( tf.square(temp_diff)), name='critic_error') ##### OPTIMIZATION ##### critic_gradients = self.optimizer.compute_gradients( self.critic_error, var_list=self.critic.variables()) # Add histograms for gradients. for grad, var in critic_gradients: # tf.histogram_summary('critic_update/' + var.name, var) if grad is not None: # tf.histogram_summary('critic_update/' + var.name + '/gradients', grad) pass self.critic_update = self.optimizer.apply_gradients( critic_gradients, name='critic_train_op') # tf.scalar_summary("critic_error", self.critic_error) with tf.name_scope("actor_update"): ##### ERROR FUNCTION ##### # self.actor_score = self.critic([self.observation, self.actor_action]) self.actor_score = self.critic( [self.observation, self.actor(self.observation)]) ##### OPTIMIZATION ##### # here we are maximizing actor score. # only optimize actor variables here, while keeping critic constant actor_gradients = self.optimizer.compute_gradients( tf.reduce_mean(-self.actor_score), var_list=self.actor.variables()) # Add histograms for gradients. for grad, var in actor_gradients: # tf.histogram_summary('actor_update/' + var.name, var) if grad is not None: # tf.histogram_summary('actor_update/' + var.name + '/gradients', grad) pass self.actor_update = self.optimizer.apply_gradients( actor_gradients, name='actor_train_op') # tf.scalar_summary("actor_score", tf.reduce_mean(self.actor_score)) # UPDATE TARGET NETWORK with tf.name_scope("target_network_update"): self.target_actor_update = ContinuousDeepQ.update_target_network( self.actor, self.target_actor, self.target_actor_update_rate) self.target_critic_update = ContinuousDeepQ.update_target_network( self.critic, self.target_critic, self.target_critic_update_rate) self.update_all_targets = tf.group(self.target_actor_update, self.target_critic_update, name='target_networks_update') # self.summarize = tf.merge_all_summaries() self.no_op1 = tf.no_op()
def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, value_cost, l2_cost, combined_cost, step, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean( tf.reduce_sum(policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) # and soft-max when using self-play records. argmax normalizes the two. policy_target_top_1 = tf.argmax(pi_tensor, axis=1) policy_output_in_top1 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=1)) policy_output_in_top3 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) policy_target_top_1_confidence = tf.boolean_mask( policy_output, tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) value_cost_normalized = value_cost / params['value_cost_weight'] with tf.variable_scope("metrics"): metric_ops = { 'policy_cost': tf.metrics.mean(policy_cost), 'value_cost': tf.metrics.mean(value_cost), 'value_cost_normalized': tf.metrics.mean(value_cost_normalized), 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), 'policy_target_top_1_confidence': tf.metrics.mean(policy_target_top_1_confidence), 'value_confidence': tf.metrics.mean(tf.abs(value_output)), } if est_mode == tf.estimator.ModeKeys.EVAL: return metric_ops # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps. eval_step = tf.reduce_min(step) # Create summary ops so that they show up in SUMMARIES collection # That way, they get logged automatically during training summary_writer = summary.create_file_writer(FLAGS.work_dir) with summary_writer.as_default(), \ summary.record_summaries_every_n_global_steps( params['summary_steps'], eval_step): for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1], step=eval_step) # Reset metrics occasionally so that they are mean of recent batches. reset_op = tf.variables_initializer(tf.local_variables("metrics")) cond_reset_op = tf.cond( tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)), lambda: reset_op, lambda: tf.no_op()) return summary.all_summary_ops() + [cond_reset_op]
summary_writer = tf.summary.FileWriter('/tmp/lstm_logs', session.graph) tf.initialize_all_variables().run() # 对参数变量初始化 for i in range(config.max_max_epoch): # 所有文本要重复多次进入模型训练 # learning rate 衰减 # 在 遍数小于max epoch时, lr_decay = 1 ; > max_epoch时, lr_decay = 0.5^(i-max_epoch) lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) # 设置learning rate print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True) # 训练困惑度 print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) # 检验困惑度 print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest, test_data, tf.no_op()) # 测试困惑度 print("Test Perplexity: %.3f" % test_perplexity) # if __name__ == "__main__": # tf.app.run()
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: #if FLAGS.restore: if True: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) #else: #sess.run(init) #if FLAGS.pretrained_model_path is not None: #variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() for step in range(FLAGS.max_steps): print(step) data = next(data_generator) ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start)/10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus))/(time.time() - start) start = time.time() print('Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format( step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) summary_writer.add_summary(summary_str, global_step=step)
def __init__(self, config): self.config = config self.vfeat_path = config.vfeat_path self.tf_record_dir = config.tf_record_dir self.max_train_iter = config.max_train_iter dataset_str = 'd' dataset_str += '_' + '_'.join( config.tf_record_dir.replace('data/preprocessed/vqa_v2/', '').split('/')) dataset_str += '_' + config.vfeat_name.replace('.hdf5', '') hyper_parameter_str = 'bs{}_lr{}'.format(config.batch_size, config.learning_rate) if config.ft_vlmap: hyper_parameter_str += '_ft_vlmap' self.train_dir = './train_dir/vqa_{}_{}_{}_{}_seed{}_{}'.format( config.model_type, dataset_str, config.prefix, hyper_parameter_str, config.seed, time.strftime("%Y%m%d-%H%M%S")) if not os.path.exists(self.train_dir): os.makedirs(self.train_dir) log.infov("Train Dir: %s", self.train_dir) if config.vlmap_word_weight_dir is not None: self.vlmap_word_weight_dir = os.path.join( self.train_dir, config.vlmap_word_weight_dir.split('/')[-1]) shutil.copytree(config.vlmap_word_weight_dir, self.vlmap_word_weight_dir) config.vlmap_word_weight_dir = self.vlmap_word_weight_dir else: self.vlmap_word_weight_dir = config.vlmap_word_weight_dir # Input self.batch_size = config.batch_size with tf.name_scope('datasets'): self.target_split = tf.placeholder(tf.string) with tf.name_scope('datasets/batch'): vqa_batch = { 'train': input_ops_vqa.create(self.batch_size, self.tf_record_dir, 'train', is_train=True, scope='train_ops', shuffle=True), 'val': input_ops_vqa.create(self.batch_size, self.tf_record_dir, 'val', is_train=True, scope='val_ops', shuffle=False), 'testval': input_ops_vqa.create(self.batch_size, self.tf_record_dir, 'testval', is_train=True, scope='testval_ops', shuffle=False), 'test': input_ops_vqa.create(self.batch_size, self.tf_record_dir, 'test', is_train=True, scope='test_ops', shuffle=False) } batch_opt = { tf.equal(self.target_split, 'train'): lambda: vqa_batch['train'], tf.equal(self.target_split, 'val'): lambda: vqa_batch['val'], tf.equal(self.target_split, 'testval'): lambda: vqa_batch['testval'], tf.equal(self.target_split, 'test'): lambda: vqa_batch['test'], } self.batch = tf.case(batch_opt, default=lambda: vqa_batch['train'], exclusive=True) # Model Model = self.get_model_class(config.model_type) log.infov('using model class: {}'.format(Model)) self.model = Model(self.batch, config, is_train=True) # Optimizer self.global_step = tf.train.get_or_create_global_step(graph=None) self.learning_rate = config.learning_rate if config.lr_weight_decay: self.learning_rate = tf.train.exponential_decay( self.learning_rate, global_step=self.global_step, decay_steps=10000, decay_rate=0.5, staircase=True, name='decaying_learning_rate') # Checkpoint and monitoring trainable_vars = tf.trainable_variables() train_vars = self.model.filter_train_vars(trainable_vars) log.warn('Trainable variables:') tf.contrib.slim.model_analyzer.analyze_vars(trainable_vars, print_info=True) log.warn('Filtered train variables:') tf.contrib.slim.model_analyzer.analyze_vars(train_vars, print_info=True) self.optimizer = tf.contrib.layers.optimize_loss( loss=self.model.loss, global_step=self.global_step, learning_rate=self.learning_rate, optimizer=tf.train.AdamOptimizer, clip_gradients=20.0, variables=train_vars, increment_global_step=True, name='optimizer') self.avg_report = { 'train': {}, 'val': {}, 'testval': {}, } for split in ['train', 'val', 'testval']: for key in self.model.report.keys(): self.avg_report[split][key] = tf.placeholder(tf.float32) tf.summary.scalar('average_{}/{}'.format(split, key), self.avg_report[split][key], collections=['average_{}'.format(split)]) self.summary_ops = { 'train': tf.summary.merge_all(key='train'), 'val': tf.summary.merge_all(key='val'), 'testval': tf.summary.merge_all(key='testval'), 'heavy_train': tf.summary.merge_all(key='heavy_train'), 'heavy_val': tf.summary.merge_all(key='heavy_val'), 'heavy_testval': tf.summary.merge_all(key='heavy_testval'), 'average_train': tf.summary.merge_all(key='average_train'), 'average_val': tf.summary.merge_all(key='average_val'), 'average_testval': tf.summary.merge_all(key='average_testval'), 'no_op': tf.no_op(), } all_vars = tf.global_variables() transfer_vars = self.model.filter_transfer_vars(all_vars) self.saver = tf.train.Saver(max_to_keep=100) self.checkpoint_loader = tf.train.Saver(max_to_keep=1) self.pretrain_loader = tf.train.Saver(var_list=transfer_vars, max_to_keep=1) self.summary_writer = tf.summary.FileWriter(self.train_dir) self.train_average_iter = self.config.train_average_iter self.val_average_iter = self.config.val_average_iter self.heavy_summary_step = self.config.heavy_summary_step self.validation_step = self.config.validation_step self.checkpoint_step = self.config.checkpoint_step self.supervisor = tf.train.Supervisor( logdir=self.train_dir, is_chief=True, saver=None, summary_op=None, summary_writer=self.summary_writer, save_summaries_secs=300, save_model_secs=None, global_step=self.global_step, ) session_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True), device_count={'GPU': 1}) self.session = self.supervisor.prepare_or_wait_for_session( config=session_config) self.ckpt_path = config.checkpoint if self.ckpt_path is not None: log.info('Checkpoint path: {}'.format(self.ckpt_path)) self.checkpoint_loader.restore(self.session, self.ckpt_path) log.info('Loaded the checkpoint') self.pretrained_param_path = config.pretrained_param_path if self.pretrained_param_path is not None: log.warn('Filtered transfer_vars (loaded from pre-trained param):') tf.contrib.slim.model_analyzer.analyze_vars(transfer_vars, print_info=True) log.info('Pre-trained param path: {}'.format( self.pretrained_param_path)) self.pretrain_loader.restore(self.session, self.pretrained_param_path) log.info('Loaded the pre-trained parameters')
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") learning_rate = tf.train.exponential_decay(base_learning_rate, global_step * batch_size, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar('learning_rate', learning_rate) optimizer = optimizer_class(learning_rate) unused_video_id, model_input_raw, labels_batch, num_frames = ( get_input_data_tensors(reader, train_data_pattern, batch_size=batch_size, num_readers=num_readers, num_epochs=num_epochs)) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) with tf.name_scope("model"): result = model.create_model(model_input, num_frames=num_frames, vocab_size=reader.num_classes, labels=labels_batch) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss(predictions, labels_batch) tf.summary.scalar("label_loss", label_loss) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) if regularization_penalty != 0: tf.summary.scalar("reg_loss", reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss train_op = slim.learning.create_train_op( final_loss, optimizer, global_step=global_step, clip_gradient_norm=clip_gradient_norm) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", predictions) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
def VGG16_run(): train_loss, train_acc = [], [] valid_loss, valid_acc = [], [] test_loss, test_acc = [], [] # load data #Dataset total_x, _, total_y=cifar_LoadData.load_training_data() test_x, _, test_y=cifar_LoadData.load_test_data() #total_y=total_y.astype(np.int) #test_y=test_y.astype(np.int) ## Shuffling & train/validation split shuffle_idx = np.arange(total_y.shape[0]) shuffle_rng = np.random.RandomState(123) shuffle_rng.shuffle(shuffle_idx) total_x, total_y = total_x[shuffle_idx], total_y[shuffle_idx] train_x, train_y = total_x[:int(num_images*(1-validation_ratio)), :, :, :], total_y[:int(num_images*(1-validation_ratio)), :] valid_x, valid_y = total_x[int(num_images*(1-validation_ratio)):, :, :, :], total_y[int(num_images*(1-validation_ratio)):, :] #reset graph tf.reset_default_graph() # Graph x = tf.placeholder(tf.float32, [None, img_size, img_size, img_channels], name='x-input') y_ = tf.placeholder(tf.float32, [None, num_classes], name='y-input') training_phase = tf.placeholder(tf.bool, None, name='training_phase') keep_prob =tf.placeholder(tf.float32, None, name='keep_prob') regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y=VGG16.VGG_16(x, keep_prob, regularizer) global_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) # labels is the label index, not the values cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y) cross_entropy_mean = tf.reduce_mean(cross_entropy) #loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses')) loss = cross_entropy_mean learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, num_imges_train // BATCH_SIZE, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step) #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) #minimize is an combi-operation of compute gradients and apply gradients #grads = optimizer.compute_gradients(loss, var_list=tf.trainable_variables()) #train_step=optimizer.apply_gradients(grads, global_step=global_step) # Prediction correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy') with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name="train") saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() #start queue runner coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for epoch in range(epochs): NumOfBatchTrain = int(num_imges_train) // BATCH_SIZE for i in range(NumOfBatchTrain): train_x_batch = train_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :, :,:] train_y_batch = train_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] #train_x_batch, train_y_batch = mnist.train.next_batch(BATCH_SIZE) #train_x_batch=np.reshape(train_x_batch, (-1, img_size, img_size, img_channels)) _, loss_train_batch, step, acc_train_batch = sess.run([train_op, loss, global_step, accuracy], feed_dict={x: train_x_batch, y_: train_y_batch, training_phase: True, keep_prob: 0.5}) train_loss.append(loss_train_batch) train_acc.append(acc_train_batch) if (step-1)%100==0: print("training steps: %d , training loss: %g, train accuracy: %g" % (step, loss_train_batch, acc_train_batch)) #validation in batch NumOfBatchValid= int(num_imges_valid) // BATCH_SIZE _valid_loss, _valid_acc = [], [] for i in range(NumOfBatchValid): #valid_x_batch, valid_y_batch = mnist.test.next_batch(BATCH_SIZE) #valid_x_batch=np.reshape(valid_x_batch, (-1, img_size, img_size, img_channels)) valid_x_batch = valid_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :, :,:] valid_y_batch = valid_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] loss_val_batch, accuracy_val_batch= sess.run([loss, accuracy], feed_dict={x: valid_x_batch, y_: valid_y_batch, training_phase: False, keep_prob: 1.0}) _valid_loss.append(loss_val_batch) _valid_acc.append(accuracy_val_batch) valid_loss.append(np.mean(_valid_loss)) valid_acc.append(np.mean(_valid_acc)) print("validation accuracy: %g" % (valid_acc[-1])) if valid_acc[-1]>0.5: saver.save(sess, os.path.join(save_dir, MODEL_NAME), global_step=global_step) # test NumOfBatchTest = int(num_imges_test) // BATCH_SIZE _test_loss, _test_acc = [], [] for i in range(NumOfBatchTest): test_x_batch = test_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE, :, :,:] test_y_batch = test_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] loss_val_batch, accuracy_val_batch= sess.run([loss, accuracy], feed_dict={x: test_x_batch, y_: test_y_batch, training_phase: False, keep_prob: 1.0}) _test_loss.append(loss_val_batch) _test_acc.append(accuracy_val_batch) test_loss.append(np.mean(_test_loss)) test_acc.append(np.mean(_test_acc)) print("test accuracy: %g" % (test_acc[-1])) coord.request_stop() coord.join(threads) #save loss and accuracy data Path(os.path.join(save_dir, 'accuracy_loss')).mkdir(parents=True, exist_ok=True) np.save(os.path.join(save_dir, 'accuracy_loss', 'train_loss'), train_loss) np.save(os.path.join(save_dir, 'accuracy_loss', 'train_acc'), train_acc) np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_loss'), valid_loss) np.save(os.path.join(save_dir, 'accuracy_loss', 'valid_acc'), valid_acc)
def _predict_proba_op(self, logits, **kwargs): return tf.no_op()
def train(mnist): with tf.name_scope('input'): #处理输入的都放在input下面 x = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS], name="X-input") y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input") regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) ''' image_size = 224 images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1)) ''' #keep_prob = tf.placeholder(tf.float32) predictions, softmax, fc8, p = tf_vgg.inference_op(x, keep_prob=1.0) #print('y shape',y.shape) global_step = tf.Variable(0, trainable=False) with tf.name_scope("moving_average"): variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.name_scope("loss_funtion"): print('sotf ', softmax.shape) print('label ', tf.argmax(y_, 1).shape) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=softmax, labels=tf.argmax(y_, 1)) #print(cross_entropy) cross_entropy_mean = tf.reduce_mean(cross_entropy) #loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses")) loss = cross_entropy_mean with tf.name_scope("train_step"): learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) with tf.control_dependencies([train_step, variables_averages_op]): train_op = tf.no_op(name="train") saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(TRAINING_STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) reshaped_xs = np.reshape( xs, (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: reshaped_xs, y_: ys }) if i % 1000 == 0: print("after %d training steps,batch is %g" % (step, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) writer = tf.summary.FileWriter('/nfs/syzhou/github/project/path/to/log', tf.get_default_graph()) writer.close()
def _decay_weights_op(self, var): if not self._decay_var_list or var.ref() in self._decay_var_list: return var.assign_sub( self._get_hyper("weight_decay", var.dtype) * var, self._use_locking) return tf.no_op()
def _decay_weights_sparse_op(self, var, indices): if not self._decay_var_list or var.ref() in self._decay_var_list: update = -self._get_hyper("weight_decay", var.dtype) * tf.gather( var, indices) return self._resource_scatter_add(var, indices, update) return tf.no_op()
def build_decoder(self, encoder_outputs, encoder_state): sos_id_2 = tf.cast(self.char2ind[self.sos], tf.int32) eos_id_2 = tf.cast(self.char2ind[self.eos], tf.int32) self.output_layer = Dense(self.vocab_size, name='output_projection') # Decoder. with tf.variable_scope("decoder") as decoder_scope: cell, decoder_initial_state = self.build_decoder_cell( encoder_outputs, encoder_state, self.audio_sequence_lengths) # Train if self.mode != 'INFER': helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper( inputs=self.char_embedding, sequence_length=self.char_sequence_lengths, embedding=self.embedding, sampling_probability=0.5, time_major=False) # Decoder my_decoder = tf.contrib.seq2seq.BasicDecoder(cell, helper, decoder_initial_state, output_layer=self.output_layer) # Dynamic decoding outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode( my_decoder, output_time_major=False, maximum_iterations=self.maximum_iterations, swap_memory=False, impute_finished=True, scope=decoder_scope ) sample_id = outputs.sample_id logits = outputs.rnn_output # Inference else: start_tokens = tf.fill([self.batch_size], sos_id_2) end_token = eos_id_2 # Beam search if self.beam_width > 0: my_decoder = tf.contrib.seq2seq.BeamSearchDecoder( cell=cell, embedding=self.embedding, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_width, output_layer=self.output_layer, ) # Greedy else: helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embedding, start_tokens, end_token) my_decoder = tf.contrib.seq2seq.BasicDecoder(cell, helper, decoder_initial_state, output_layer=self.output_layer) if self.inference_targets: maximum_iterations = self.maximum_iterations else: maximum_iterations = None # Dynamic decoding outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode( my_decoder, maximum_iterations=maximum_iterations, output_time_major=False, impute_finished=False, swap_memory=False, scope=decoder_scope) if self.beam_width > 0: logits = tf.no_op() sample_id = outputs.predicted_ids else: logits = tf.no_op() sample_id = outputs.sample_id return logits, sample_id, final_context_state
class IterationBuilderTest(parameterized.TestCase, tf.test.TestCase): # pylint: disable=g-long-lambda @parameterized.named_parameters( { "testcase_name": "single_subnetwork_fn", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "single_subnetwork_with_eval_metrics", "ensemble_builder": _FakeEnsembleBuilder(eval_metric_ops_fn=lambda: {"a": (tf.constant(1), tf.constant(2))}), "subnetwork_builders": [ _FakeBuilder("training", ), ], "mode": tf.estimator.ModeKeys.EVAL, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_eval_metric_ops": ["a"], "want_best_candidate_index": 0, }, { "testcase_name": "single_subnetwork_with_non_tensor_eval_metric_op", "ensemble_builder": _FakeEnsembleBuilder(eval_metric_ops_fn=lambda: {"a": (tf.constant(1), tf.no_op())}), "subnetwork_builders": [ _FakeBuilder("training", ), ], "mode": tf.estimator.ModeKeys.EVAL, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_eval_metric_ops": ["a"], "want_best_candidate_index": 0, }, { "testcase_name": "single_subnetwork_done_training_fn", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("done")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, "want_is_over": True, }, { "testcase_name": "single_dict_predictions_subnetwork_fn", "ensemble_builder": _FakeEnsembleBuilder(dict_predictions=True), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, }, { "testcase_name": "previous_ensemble", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "previous_ensemble_spec": lambda: tu.dummy_ensemble_spec("old"), "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 1, }, { "testcase_name": "previous_ensemble_is_best", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "previous_ensemble_spec": lambda: tu.dummy_ensemble_spec("old", random_seed=12), "want_loss": -.437, "want_predictions": .688, "want_best_candidate_index": 0, }, { "testcase_name": "previous_ensemble_spec_and_eval_metrics", "ensemble_builder": _FakeEnsembleBuilder(eval_metric_ops_fn=lambda: {"a": (tf.constant(1), tf.constant(2))}), "subnetwork_builders": [_FakeBuilder("training")], "mode": tf.estimator.ModeKeys.EVAL, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "previous_ensemble_spec": lambda: tu.dummy_ensemble_spec( "old", eval_metric_ops={"a": (tf.constant(1), tf.constant(2))}), "want_loss": 1.403943, "want_predictions": 2.129, "want_eval_metric_ops": ["a"], "want_best_candidate_index": 1, }, { "testcase_name": "two_subnetwork_fns", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.40394, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "two_subnetwork_fns_other_best", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=12) ], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": -.437, "want_predictions": .688, "want_best_candidate_index": 1, }, { "testcase_name": "two_subnetwork_one_training_fns", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training"), _FakeBuilder("done", random_seed=7)], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "two_subnetwork_done_training_fns", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("done"), _FakeBuilder("done1", random_seed=7)], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, "want_is_over": True, }, { "testcase_name": "two_dict_predictions_subnetwork_fns", "ensemble_builder": _FakeEnsembleBuilder(dict_predictions=True), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.404, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_classes", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.CLASSIFICATION_CLASSES), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.404, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.CLASSIFICATION_CLASSES: [2.129], "serving_default": [2.129], }, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_scores", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.CLASSIFICATION_SCORES), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.404, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.CLASSIFICATION_SCORES: [2.129], "serving_default": [2.129], }, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_regression", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.REGRESSION), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.REGRESSION: 2.129, "serving_default": 2.129, }, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_prediction", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.PREDICTION), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.PREDICTION: { "classes": 2, "logits": 2.129 }, "serving_default": { "classes": 2, "logits": 2.129 }, }, }) def test_build_iteration(self, ensemble_builder, subnetwork_builders, features, labels, want_predictions, want_best_candidate_index, want_eval_metric_ops=(), want_is_over=False, previous_ensemble_spec=lambda: None, want_loss=None, want_export_outputs=None, mode=tf.estimator.ModeKeys.TRAIN): global_step = tf.train.create_global_step() builder = _IterationBuilder(_FakeCandidateBuilder(), ensemble_builder) iteration = builder.build_iteration( iteration_number=0, subnetwork_builders=subnetwork_builders, features=features(), labels=labels(), mode=mode, previous_ensemble_spec=previous_ensemble_spec()) with self.test_session() as sess: init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) estimator_spec = iteration.estimator_spec self.assertAllClose(want_predictions, sess.run(estimator_spec.predictions), atol=1e-3) self.assertEqual(set(want_eval_metric_ops), set(estimator_spec.eval_metric_ops.keys())) self.assertEqual(want_best_candidate_index, sess.run(iteration.best_candidate_index)) self.assertEqual(want_is_over, sess.run(iteration.is_over)) if mode == tf.estimator.ModeKeys.PREDICT: self.assertIsNotNone(estimator_spec.export_outputs) self.assertAllClose(want_export_outputs, sess.run( _export_output_tensors( estimator_spec.export_outputs)), atol=1e-3) self.assertIsNone(iteration.estimator_spec.train_op) self.assertIsNone(iteration.estimator_spec.loss) self.assertIsNotNone(want_export_outputs) return self.assertAlmostEqual(want_loss, sess.run(iteration.estimator_spec.loss), places=3) self.assertIsNone(iteration.estimator_spec.export_outputs) if mode == tf.estimator.ModeKeys.TRAIN: sess.run(iteration.estimator_spec.train_op) self.assertEqual(1, sess.run(global_step)) self.assertEqual(1, sess.run(iteration.step)) @parameterized.named_parameters( { "testcase_name": "empty_subnetwork_builders", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [], "want_raises": ValueError, }, { "testcase_name": "same_subnetwork_builder_names", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("same_name"), _FakeBuilder("same_name")], "want_raises": ValueError, }, { "testcase_name": "same_name_as_previous_ensemble_spec", "ensemble_builder": _FakeEnsembleBuilder(), "previous_ensemble_spec_fn": lambda: tu.dummy_ensemble_spec("same_name"), "subnetwork_builders": [ _FakeBuilder("same_name"), ], "want_raises": ValueError, }, { "testcase_name": "predict_invalid", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.INVALID), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "want_raises": TypeError, }) def test_build_iteration_error(self, ensemble_builder, subnetwork_builders, want_raises, previous_ensemble_spec_fn=lambda: None, mode=tf.estimator.ModeKeys.TRAIN): builder = _IterationBuilder(_FakeCandidateBuilder(), ensemble_builder) features = [[1., -1., 0.]] labels = [1] with self.test_session(): with self.assertRaises(want_raises): builder.build_iteration( iteration_number=0, subnetwork_builders=subnetwork_builders, features=features, labels=labels, mode=mode, previous_ensemble_spec=previous_ensemble_spec_fn())
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. # Create model directory if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) rgb_pre_model_save_dir = "/home/project/I3D/I3D/checkpoints/rgb_imagenet" video_path_list = np.load('./data_list/train_data_list.npy') label_list = np.load('./data_list/train_label_list.npy') with tf.Graph().as_default(): global_step = tf.get_variable('global_step', [], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) train_input_queue = tf.train.slice_input_producer( [video_path_list, label_list], shuffle=True) video_path = train_input_queue[0] train_label = train_input_queue[1] rgb_train_images, _, _ = tf.py_func( func=input_data.get_frames, inp=[ video_path, -1, FLAGS.num_frame_per_clib, FLAGS.crop_size, FLAGS.sample_rate, False ], Tout=[tf.float32, tf.double, tf.int64], ) batch_videos, batch_labels = tf.train.batch( [rgb_train_images, train_label], batch_size=FLAGS.batch_size * gpu_num, capacity=200, num_threads=20, shapes=[(FLAGS.num_frame_per_clib / FLAGS.sample_rate, FLAGS.crop_size, FLAGS.crop_size, 3), ()]) opt_rgb = tf.train.AdamOptimizer(learning_rate) #opt_nonlocal = tf.train.AdamOptimizer(learning_rate*10) #opt_rgb = tf.train.MomentumOptimizer(learning_rate, 0.9) #opt_rgb = tf.train.GradientDescentOptimizer(learning_rate) tower_grads = [] logits = [] loss = [] with tf.variable_scope(tf.get_variable_scope()): for gpu_index in range(0, gpu_num): with tf.device('/gpu:%d' % gpu_index): with tf.name_scope('GPU_%d' % gpu_index): rgb_logit, _ = InceptionI3d( num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits', block_num=FLAGS.block_num)( batch_videos[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size, :, :, :, :], True) rgb_loss = tower_loss( rgb_logit, batch_labels[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size], FLAGS.weight_decay) tf.get_variable_scope().reuse_variables() rgb_grads = opt_rgb.compute_gradients(rgb_loss) tower_grads.append(rgb_grads) logits.append(rgb_logit) loss.append(rgb_loss) logits = tf.concat(logits, 0) accuracy = tower_acc(logits, batch_labels) grads = average_gradients(tower_grads) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) rgb_variable_map = {} i3d_map = {} nonlocal_map = {} for variable in tf.global_variables(): if 'NonLocalBlock' in variable.name: nonlocal_map[variable.name] = variable else: i3d_map[variable.name] = variable if variable.name.split('/')[0] == 'RGB' and \ 'Adam' not in variable.name.split('/')[-1] and \ 'NonLocal' not in variable.name: #rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable rgb_variable_map[variable.name.replace(':0', '')] = variable with tf.control_dependencies(update_ops): apply_gradient_rgb = opt_rgb.apply_gradients( grads, global_step=global_step) if FLAGS.block_num >= 0: train_op = tf.group(apply_gradient_rgb) else: nonlocal_grads = opt_nonlocal.compute_gradients( rgb_loss, var_list=nonlocal_map) apply_gradient_nonlocal = opt_nonlocal.apply_gradients( nonlocal_grads, global_step=global_step) train_op = tf.group(apply_gradient_rgb, apply_gradient_nonlocal) null_op = tf.no_op() # Create a session for running Ops on the Graph. config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(init) # Create summary writter tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('rgb_loss', tf.reduce_mean(loss)) tf.summary.scalar('learning_rate', learning_rate) merged = tf.summary.merge_all() # load pre_train models ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir) if ckpt and ckpt.model_checkpoint_path: print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) rgb_saver.restore(sess, ckpt.model_checkpoint_path) print("load complete!") train_writer = tf.summary.FileWriter( './visual_logs/%dGPU_sgd%dblock_train_scratch_400000_8_64_0.0001_decay' % (gpu_num, FLAGS.block_num), sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) for step in range(FLAGS.max_steps): start_time = time.time() sess.run(train_op) duration = time.time() - start_time print('Step %d: %.3f sec, end time : after %.3f days' % (step, duration, (FLAGS.max_steps - step) * duration / 86400)) if step % 10 == 0 or (step + 1) == FLAGS.max_steps: print('Training Data Eval:') summary, acc, loss_rgb = sess.run([merged, accuracy, loss]) print("accuracy: " + "{:.5f}".format(acc)) print("rgb_loss: " + "{:.5f}".format(np.mean(loss_rgb))) train_writer.add_summary(summary, step) if (step + 1) % 2000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, os.path.join(model_save_dir, 'model'), global_step=step) coord.request_stop() coord.join(threads) print("done")
def train(mnist): x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input") y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input") weights1 = tf.Variable( tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1)) biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE])) weights2 = tf.Variable( tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1)) biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE])) y = inference(x, None, weights1, biases1, weights2, biases2) global_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) regularization = regularizer(weights1) + regularizer(weights2) loss = cross_entropy_mean + regularization learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,\ mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name='train') correct_prediection = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediection, tf.float32)) with tf.Session() as sess: tf.initialize_all_variables().run() validate_feed = { x: mnist.validation.images, y_: mnist.validation.labels } test_feed = {x: mnist.test.images, y_: mnist.test.labels} for i in range(TRAINING_STEPS): if (i % 1000 == 0): validate_acc = sess.run(accuracy, feed_dict=validate_feed) print( "After %d training step(s), validation accuracy using average model is %g" % (i, validate_acc)) xs, ys = mnist.train.next_batch(BATCH_SIZE) sess.run(train_op, feed_dict={x: xs, y_: ys}) test_acc = sess.run(accuracy, feed_dict=test_feed) print( "After %d training steps test accuracy using average model is %g" % (TRAINING_STEPS, test_acc))
def optimize(self, G_loss, D_Y_loss, F_loss, D_X_loss, histogram_loss): def make_optimizer(loss, variables, name='Adam'): """ Adam optimizer with learning rate 0.0002 for the first 100k steps (~100 epochs) and a linearly decaying rate that goes to zero over the next 100k steps """ global_step = tf.Variable(0, trainable=False) starter_learning_rate = self.learning_rate end_learning_rate = 0.0 start_decay_step = 100000 decay_steps = 100000 beta1 = self.beta1 learning_rate = (tf.where( tf.greater_equal(global_step, start_decay_step), tf.train.polynomial_decay(starter_learning_rate, global_step - start_decay_step, decay_steps, end_learning_rate, power=1.0), starter_learning_rate)) tf.summary.scalar('learning_rate/{}'.format(name), learning_rate) learning_step = (tf.train.AdamOptimizer( learning_rate, beta1=beta1, name=name).minimize(loss, global_step=global_step, var_list=variables)) return learning_step def make_optimizer_H(loss, variables, name='RMSprop'): """ Adam optimizer with learning rate 0.0002 for the first 100k steps (~100 epochs) and a linearly decaying rate that goes to zero over the next 100k steps """ global_step = tf.Variable(0, trainable=False) starter_learning_rate = self.learning_rate #if u dont want to use histogram loss,set learning_rate=0 end_learning_rate = 0.0 start_decay_step = 100000 decay_steps = 100000000000 beta1 = self.beta1 learning_rate = (tf.where( tf.greater_equal(global_step, start_decay_step), tf.train.polynomial_decay(starter_learning_rate, global_step - start_decay_step, decay_steps, end_learning_rate, power=1.0), starter_learning_rate)) learning_step = (tf.train.AdamOptimizer( learning_rate, beta1=beta1, name=name).minimize(loss, global_step=global_step, var_list=variables)) return learning_step G_optimizer = make_optimizer(G_loss, self.G.variables, name='Adam_G') D_Y_optimizer = make_optimizer(D_Y_loss, self.D_Y.variables, name='Adam_D_Y') F_optimizer = make_optimizer(F_loss, self.F.variables, name='Adam_F') D_X_optimizer = make_optimizer(D_X_loss, self.D_X.variables, name='Adam_D_X') H_optimizer = make_optimizer_H(histogram_loss, self.G.variables, name='Adam_H') with tf.control_dependencies([ G_optimizer, D_Y_optimizer, F_optimizer, D_X_optimizer, H_optimizer ]): return tf.no_op(name='optimizers')
def _build_train_op(self): """Builds a training op. Returns: train_op: An op performing one step of training from replay data. """ batch_size = tf.shape(self._replay.rewards)[0] target_quantile_values = tf.stop_gradient( self._build_target_quantile_values_op()) # Reshape to self.num_tau_prime_samples x batch_size x 1 since this is # the manner in which the target_quantile_values are tiled. target_quantile_values = tf.reshape( target_quantile_values, [self.num_tau_prime_samples, batch_size, 1]) # Transpose dimensions so that the dimensionality is batch_size x # self.num_tau_prime_samples x 1 to prepare for computation of # Bellman errors. # Final shape of target_quantile_values: # batch_size x num_tau_prime_samples x 1. target_quantile_values = tf.transpose(target_quantile_values, [1, 0, 2]) # Shape of indices: (num_tau_samples x batch_size) x 1. # Expand dimension by one so that it can be used to index into all the # quantiles when using the tf.gather_nd function (see below). indices = tf.range(self.num_tau_samples * batch_size)[:, None] # Expand the dimension by one so that it can be used to index into all the # quantiles when using the tf.gather_nd function (see below). reshaped_actions = self._replay.actions[:, None] reshaped_actions = tf.tile(reshaped_actions, [self.num_tau_samples, 1]) # Shape of reshaped_actions: (num_tau_samples x batch_size) x 2. reshaped_actions = tf.concat([indices, reshaped_actions], axis=1) chosen_action_quantile_values = tf.gather_nd( self._replay_net_quantile_values, reshaped_actions) # Transpose dimensions so that the dimensionality is batch_size x # self.num_tau_samples x 1 to prepare for computation of # Bellman errors. # Reshape to self.num_tau_samples x batch_size x 1 since this is the manner # in which the quantile values are tiled. chosen_action_quantile_values = tf.reshape( chosen_action_quantile_values, [self.num_tau_samples, batch_size, 1]) # Final shape of chosen_action_quantile_values: # batch_size x num_tau_samples x 1. chosen_action_quantile_values = tf.transpose( chosen_action_quantile_values, [1, 0, 2]) #batchsize x quan x 1 # Shape of bellman_erors and huber_loss: # batch_size x num_tau_prime_samples x num_tau_samples x 1. bellman_errors = target_quantile_values[:, :, None, :] - chosen_action_quantile_values[:, None, :, :] # The huber loss (see Section 2.3 of the paper) is defined via two cases: # case_one: |bellman_errors| <= kappa # case_two: |bellman_errors| > kappa huber_loss_case_one = tf.to_float( tf.abs(bellman_errors) <= self.kappa) * 0.5 * bellman_errors**2 huber_loss_case_two = tf.to_float( tf.abs(bellman_errors) > self.kappa) * self.kappa * ( tf.abs(bellman_errors) - 0.5 * self.kappa) huber_loss = huber_loss_case_one + huber_loss_case_two # Reshape replay_quantiles to batch_size x num_tau_samples x 1 replay_quantiles = tf.reshape(self._replay_net_quantiles, [self.num_tau_samples, batch_size, 1]) replay_quantiles = tf.transpose(replay_quantiles, [1, 0, 2]) #batchsize x quan x 1 # Tile by num_tau_prime_samples along a new dimension. Shape is now # batch_size x num_tau_prime_samples x num_tau_samples x 1. # These quantiles will be used for computation of the quantile huber loss # below (see section 2.3 of the paper). replay_quantiles = tf.to_float( tf.tile(replay_quantiles[:, None, :, :], [1, self.num_tau_prime_samples, 1, 1])) # Shape: batch_size x num_tau_prime_samples x num_tau_samples x 1. quantile_huber_loss = (tf.abs( tf.stop_gradient(replay_quantiles) - tf.stop_gradient(tf.to_float(bellman_errors < 0))) * huber_loss) / self.kappa # Sum over current quantile value (num_tau_samples) dimension, # average over target quantile value (num_tau_prime_samples) dimension. # Shape: batch_size x num_tau_prime_samples x 1. loss = tf.reduce_sum(quantile_huber_loss, axis=2) # Shape: batch_size x 1. loss = tf.reduce_mean(loss, axis=1) # TODO(kumasaurabh): Add prioritized replay functionality here. update_priorities_op = tf.no_op() with tf.control_dependencies([update_priorities_op]): if self.summary_writer is not None: with tf.variable_scope('Losses'): tf.summary.scalar('QuantileLoss', tf.reduce_mean(loss)) return self.optimizer.minimize(tf.reduce_mean(loss)),\ tf.squeeze(chosen_action_quantile_values), \ tf.squeeze(replay_quantiles[:,0,:,:])
def train(mnist): x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input') y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input') # generate params for hidden layer weights1 = tf.Variable( tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1)) biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE])) # generate params for output layer weights2 = tf.Variable( tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1)) biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE])) # set None to not use average value of parameters y = inference(x, None, weights1, biases1, weights2, biases2) # define the global training steps global_step = tf.Variable(0, trainable=False) # init the moving average class variable_avg = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variable_avg_op = variable_avg.apply(tf.trainable_variables()) # use average value of parameters avg_y = inference(x, variable_avg, weights1, biases1, weights2, biases2) # calculate the cross entropy of forecast (y) and actual (y_) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) # init and use regularizer function regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) regularization = regularizer(weights1) + regularizer(weights2) # calculate the total loss as cross entropy and reg loss = cross_entropy_mean + regularization # define learning rate and train step learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate)\ .minimize(loss, global_step=global_step) # update the params and avg value in the same time with tf.control_dependencies([train_step, variable_avg_op]): train_op = tf.no_op(name='train') # calcuate the accuracy correct_prediction = tf.equal(tf.argmax(avg_y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # start the training process with tf.Session() as sess: tf.global_variables_initializer().run() validate_feed = { x: mnist.validation.images, y_: mnist.validation.labels } test_feed = {x: mnist.test.images, y_: mnist.test.labels} for i in range(TRAINING_STEPS): if i % 1000 == 0: validate_acc = sess.run(accuracy, feed_dict=validate_feed) print('after %d training steps, validation accuracy is %g ' % (i, validate_acc)) xs, ys = mnist.train.next_batch(BATCH_SIZE) sess.run(train_op, feed_dict={x: xs, y_: ys}) test_acc = sess.run(accuracy, feed_dict=test_feed) print('after %d training steps, test accuracy is %g ' % (TRAINING_STEPS, test_acc))
def build_network(d): # Hyperparameters learning_rate = 2e-5 parameter_l2norm_scaling = 1e-10 global_norm_gradient_clipping_ratio = 0.65 # Define GNN dictionary GNN = {} # Define placeholder for result values (one per problem) instance_val = tf.placeholder(tf.float32, [None], name="instance_val") instance_m_list = tf.placeholder(tf.int32, [None], name="instance_edge_num") instance_target = tf.placeholder(tf.int32, [None], name="instance_target") # Define INV, a tf function to exchange positive and negative literal embeddings def INV(Lh): l = tf.shape(Lh)[0] n = tf.div(l, tf.constant(2)) # Send messages from negated literals to positive ones, and vice-versa Lh_pos = tf.gather(Lh, tf.range(tf.constant(0), n)) Lh_neg = tf.gather(Lh, tf.range(n, l)) Lh_inverted = tf.concat([Lh_neg, Lh_pos], axis=0) return Lh_inverted #end # Define Graph neural network gnn = GraphNN( { "N": d, # Nodes "E": d # Edges }, { "Ms": ("N", "E"), # Matrix pointing from nodes to the edges they are sources "Mt": ("N", "E"), # Matrix pointing from nodes to the edges they are targets "Mw": ("E", "E"), # Matrix indicating an Edge weight "S": ("N", "N"), # Matrix indicating whether a node is the source "T": ("N", "N"), # Matrix indicating whether a node is the target }, { "NsmsgE": ("N", "E" ), # Message cast to convert messages from node sources to edges "NtmsgE": ("N", "E" ), # Message cast to convert messages from node targets to edges "EmsgNs": ("N", "E" ), # Message cast to convert messages from edges to node sources "EmsgNt": ("N", "E" ) # Message cast to convert messages from edges to node targets }, { "N": [{ "mat": "Ms", "msg": "EmsgNs", "var": "E" }, { "mat": "Mt", "msg": "EmsgNt", "var": "E" }, { "mat": "S" }, { "mat": "T" }], "E": [{ "mat": "Ms", "transpose?": True, "msg": "NsmsgE", "var": "N" }, { "mat": "Mt", "transpose?": True, "msg": "NtmsgE", "var": "N" }, { "mat": "Mw" }] }, name="Dijkstra_Quiver", float_dtype=tf.float32) # Define L_vote E_vote_MLP = Mlp(layer_sizes=[d for _ in range(2)], activations=[tf.nn.relu for _ in range(2)], output_size=1, name="E_vote", name_internal_layers=True, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.zeros_initializer()) # Compute the number of variables m = tf.shape(gnn.matrix_placeholders["Mw"])[0] # Compute number of problems p = tf.shape(instance_val)[0] # Get the last embeddings E_n = gnn.last_states["E"].h E_vote_logits = E_vote_MLP(E_n) E_vote = tf.nn.sigmoid(E_vote_logits) E_objective = tf.sparse_tensor_dense_matmul(gnn.matrix_placeholders["Mw"], E_vote) # Reorganize votes' result to obtain a prediction for each problem instance def _vote_while_cond(i, m_acc, predicted_val): return tf.less(i, p) #end _vote_while_cond def _vote_while_body(i, m_acc, predicted_val): # Helper for the amount of edges in this problem i_m = instance_m_list[i] # Gather the edges of that problem obj_vals = tf.gather(E_objective, tf.range(m_acc, tf.add(m_acc, i_m))) problem_predicted_val = tf.reduce_sum(obj_vals) # Update TensorArray predicted_val = predicted_val.write(i, problem_predicted_val) return tf.add(i, tf.constant(1)), tf.add(m_acc, i_m), predicted_val #end _vote_while_body predicted_val = tf.TensorArray(size=p, dtype=tf.float32) _, _, predicted_val = tf.while_loop(_vote_while_cond, _vote_while_body, [ tf.constant(0, dtype=tf.int32), tf.constant(0, dtype=tf.int32), predicted_val ]) predicted_val = predicted_val.stack() # Define loss and %error predict_costs = tf.losses.mean_squared_error(labels=instance_val, predictions=predicted_val) predict_cost = tf.reduce_mean(predict_costs) # %Error abserror = tf.reduce_mean( tf.divide(tf.abs(tf.subtract(instance_val, predicted_val)), instance_val)) error = tf.reduce_mean( tf.divide(tf.subtract(instance_val, predicted_val), instance_val)) vars_cost = tf.zeros([]) tvars = tf.trainable_variables() for var in tvars: vars_cost = tf.add(vars_cost, tf.nn.l2_loss(var)) #end for loss = tf.add(predict_cost, tf.multiply(vars_cost, parameter_l2norm_scaling)) optimizer = tf.train.AdamOptimizer(name="Adam", learning_rate=learning_rate) grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), global_norm_gradient_clipping_ratio) train_step = optimizer.apply_gradients(zip(grads, tvars)) GNN["gnn"] = gnn GNN["instance_val"] = instance_val GNN["instance_target"] = instance_target GNN["instance_m"] = instance_m_list GNN["predicted_val"] = predicted_val GNN["loss"] = loss GNN["%error"] = error GNN["%abserror"] = abserror GNN["train_step"] = train_step GNN["nop"] = tf.no_op() return GNN
def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels, iteration_step, summary, previous_ensemble): return tf.no_op()
def main(_): pp.pprint(flags.FLAGS.__flags) order = [] with open('imagenet_64x64_dogs_%s.txt' % FLAGS.order_file) as file_in: for line in file_in.readlines(): order.append(int(line)) order = np.array(order) assert FLAGS.mode == 'wgan-gp' NUM_CLASSES = 120 NUM_TEST_SAMPLES_PER_CLASS = 50 NUM_TRAIN_SAMPLES_PER_CLASS = 1300 # around 1300 if not FLAGS.only_gen_no_cls: def build_cnn(inputs, is_training): train_or_test = {True: 'train', False: 'test'} if FLAGS.network_arch == 'resnet': logits, end_points = utils_resnet_64x64.ResNet( inputs, train_or_test[is_training], num_outputs=NUM_CLASSES, alpha=0.0, scope=('ResNet-' + train_or_test[is_training])) else: raise Exception() return logits, end_points # save all intermediate result in the result_folder method_name = '_'.join( os.path.basename(__file__).split('.')[0].split('_')[4:]) method_name += '_gen_%d_and_select' % FLAGS.gen_how_many if FLAGS.gen_more_and_select else '' method_name += '_auto-%.1f-%.1f' % (FLAGS.auto_param1, FLAGS.auto_param2) \ if FLAGS.auto_choose_num_exemplars else ('_%d' % FLAGS.num_exemplars_per_class if not FLAGS.memory_constrained else '') method_name += '_%s' % FLAGS.exemplar_select_criterion method_name += '_%.1f-%.1f' % (FLAGS.proto_weight, FLAGS.gen_weight) method_name += '_icarl_%d' % FLAGS.memory_upperbound if FLAGS.memory_constrained else '' method_name += '_reorder' if FLAGS.reorder_exemplars else '' method_name += '_smoothing_%.1f' % FLAGS.label_smoothing cls_func = '' if FLAGS.use_softmax else '_sigmoid' result_folder = os.path.join( FLAGS.result_dir, FLAGS.dataset + ('_flip' if FLAGS.flip else '') + '_' + FLAGS.order_file, 'nb_cl_' + str(FLAGS.nb_cl), 'non_truncated' if FLAGS.no_truncate else 'truncated', FLAGS.network_arch + cls_func + '_init_' + FLAGS.init_strategy, 'weight_decay_' + str(FLAGS.weight_decay), 'base_lr_' + str(FLAGS.base_lr), 'adam_lr_' + str(FLAGS.adam_lr), method_name) if os.path.exists(result_folder): temp_i = 2 while True: result_folder_mod = result_folder + '_run-' + str(temp_i) if not os.path.exists(result_folder_mod): result_folder = result_folder_mod break temp_i += 1 os.makedirs(result_folder) print('Result folder: %s' % result_folder) graph_cls = tf.Graph() with graph_cls.as_default(): ''' Define variables ''' batch_images = tf.placeholder(tf.float32, shape=[None, 64, 64, 3]) batch = tf.Variable(0, trainable=False) learning_rate = tf.placeholder(tf.float32, shape=[]) ''' Network output mask ''' mask_output = tf.placeholder(tf.bool, shape=[NUM_CLASSES]) ''' Old and new ground truth ''' one_hot_labels_truncated = tf.placeholder(tf.float32, shape=[None, None]) ''' Define the training network ''' train_logits, _ = build_cnn(batch_images, True) train_masked_logits = tf.gather(train_logits, tf.squeeze(tf.where(mask_output)), axis=1) # masking operation train_masked_logits = tf.cond( tf.equal(tf.rank(train_masked_logits), 1), lambda: tf.expand_dims(train_masked_logits, 1), lambda: train_masked_logits ) # convert to (N, 1) if the shape is (N,), otherwise softmax would output wrong values # Train accuracy(since there is only one class excluding the old recorded responses, this accuracy is not very meaningful) train_pred = tf.argmax(train_masked_logits, 1) train_ground_truth = tf.argmax(one_hot_labels_truncated, 1) correct_prediction = tf.equal(train_pred, train_ground_truth) train_accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) train_batch_weights = tf.placeholder(tf.float32, shape=[None]) reg_weights = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) regularization_loss = FLAGS.weight_decay * tf.add_n(reg_weights) ''' More Settings ''' if FLAGS.use_softmax: empirical_loss = tf.losses.softmax_cross_entropy( onehot_labels=one_hot_labels_truncated, logits=train_masked_logits, weights=train_batch_weights) else: empirical_loss = tf.losses.sigmoid_cross_entropy( multi_class_labels=one_hot_labels_truncated, logits=train_masked_logits, weights=train_batch_weights) loss = empirical_loss + regularization_loss if FLAGS.use_momentum: opt = tf.train.MomentumOptimizer( learning_rate, FLAGS.momentum).minimize(loss, global_step=batch) else: opt = tf.train.GradientDescentOptimizer( learning_rate).minimize(loss, global_step=batch) ''' Define the testing network ''' test_logits, _ = build_cnn(batch_images, False) test_masked_logits = tf.gather(test_logits, tf.squeeze(tf.where(mask_output)), axis=1) test_masked_logits = tf.cond( tf.equal(tf.rank(test_masked_logits), 1), lambda: tf.expand_dims(test_masked_logits, 1), lambda: test_masked_logits) test_masked_prob = tf.nn.softmax(test_masked_logits) test_pred = tf.argmax(test_masked_logits, 1) test_accuracy = tf.placeholder(tf.float32) ''' Copy network (define the copying op) ''' if FLAGS.network_arch == 'resnet': all_variables = tf.get_collection(tf.GraphKeys.WEIGHTS) else: raise Exception('Invalid network architecture') copy_ops = [ all_variables[ix + len(all_variables) // 2].assign(var.value()) for ix, var in enumerate(all_variables[0:len(all_variables) // 2]) ] ''' Init certain layers when new classes added ''' init_ops = tf.no_op() if FLAGS.init_strategy == 'all': init_ops = tf.global_variables_initializer() elif FLAGS.init_strategy == 'last': if FLAGS.network_arch == 'resnet': init_vars = [ var for var in tf.global_variables() if 'fc' in var.name and 'train' in var.name ] init_ops = tf.initialize_variables(init_vars) ''' Create session ''' config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config, graph=graph_cls) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() ''' Summary ''' train_loss_summary = tf.summary.scalar('train_loss', loss) train_acc_summary = tf.summary.scalar('train_accuracy', train_accuracy) test_acc_summary = tf.summary.scalar('test_accuracy', test_accuracy) summary_dir = os.path.join(result_folder, 'summary') if not os.path.exists(summary_dir): os.makedirs(summary_dir) train_summary_writer = tf.summary.FileWriter( os.path.join(summary_dir, 'train'), sess.graph) test_summary_writer = tf.summary.FileWriter( os.path.join(summary_dir, 'test')) iteration = 0 ''' Declaration of other vars ''' # Average accuracy on seen classes aver_acc_over_time = dict() aver_acc_per_class_over_time = dict() conf_mat_over_time = dict() # Network mask mask_output_val = np.zeros([NUM_CLASSES], dtype=bool) mask_output_test = np.zeros([NUM_CLASSES], dtype=bool) ''' Exemplars(for ablation study and other purposes) ''' exemplars_dir = os.path.join(result_folder, 'exemplars') if not os.path.exists(exemplars_dir): os.makedirs(exemplars_dir) ''' Train generative model(DC-GAN) ''' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) run_config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) run_config.gpu_options.allow_growth = True graph_gen = tf.Graph() sess_wgan = tf.Session(config=run_config, graph=graph_gen) acwgan_obj = WGAN64x64(sess_wgan, graph_gen, dataset_name=FLAGS.dataset + '_' + FLAGS.order_file, mode=FLAGS.mode, batch_size=FLAGS.batch_size, dim=FLAGS.dim, output_dim=FLAGS.output_dim, lambda_param=FLAGS.lambda_param, critic_iters=FLAGS.critic_iters, iters=FLAGS.iters, result_dir=FLAGS.result_dir_cwgan, checkpoint_interval=FLAGS.gan_save_interval, adam_lr=FLAGS.adam_lr, use_decay=FLAGS.use_decay, conditional=FLAGS.conditional, acgan=FLAGS.acgan, acgan_scale=FLAGS.acgan_scale, acgan_scale_g=FLAGS.acgan_scale_g, normalization_g=FLAGS.normalization_g, normalization_d=FLAGS.normalization_d, gen_bs_multiple=FLAGS.gen_bs_multiple, nb_cl=FLAGS.nb_cl, n_gpus=FLAGS.n_gpus) exemplars = [] test_images, test_labels, test_one_hot_labels, raw_images_test = imagenet_64x64.load_test_data( ) if not FLAGS.only_gen_no_cls: # train and test data of seen classes test_x = np.zeros([0, 64, 64, 3], dtype=np.float32) test_y = np.zeros([0], dtype=np.float32) ''' Class Incremental Learning ''' print('Starting from category ' + str(FLAGS.from_class_idx + 1) + ' to ' + str(FLAGS.to_class_idx + 1)) print('Adding %d categories every time' % FLAGS.nb_cl) assert (FLAGS.from_class_idx % FLAGS.nb_cl == 0) for category_idx in range(FLAGS.from_class_idx, FLAGS.to_class_idx + 1, FLAGS.nb_cl): to_category_idx = category_idx + FLAGS.nb_cl - 1 if FLAGS.nb_cl == 1: print('Adding Category ' + str(category_idx + 1)) else: print('Adding Category %d-%d' % (category_idx + 1, to_category_idx + 1)) train_x_gan = np.zeros([0, FLAGS.output_dim], dtype=np.uint8) train_y_gan = np.zeros([0], dtype=float) test_x_gan = np.zeros([0, FLAGS.output_dim], dtype=np.uint8) test_y_gan = np.zeros([0], dtype=float) if not FLAGS.only_gen_no_cls: # train and test data of seen classes train_y_one_hot = np.zeros([0, NUM_CLASSES], dtype=np.float32) for category_idx_in_group in range(category_idx, to_category_idx + 1): real_category_idx = order[category_idx_in_group] real_images_train_cur_cls, raw_images_train_cur_cls = imagenet_64x64.load_train_data( real_category_idx, flip=FLAGS.flip) # GAN train_x_gan = np.concatenate( (train_x_gan, raw_images_train_cur_cls)) train_y_gan_cur_cls = np.ones([len(raw_images_train_cur_cls)]) * ( category_idx_in_group % FLAGS.nb_cl) train_y_gan = np.concatenate((train_y_gan, train_y_gan_cur_cls)) test_indices_cur_cls = [ idx for idx in range(len(test_labels)) if test_labels[idx] == real_category_idx ] test_x_gan_cur_cls = raw_images_test[test_indices_cur_cls, :] test_y_gan_cur_cls = np.ones([len(test_indices_cur_cls)]) * ( category_idx_in_group % FLAGS.nb_cl) test_x_gan = np.concatenate((test_x_gan, test_x_gan_cur_cls)) test_y_gan = np.concatenate((test_y_gan, test_y_gan_cur_cls)) # Classification network if not FLAGS.only_gen_no_cls: train_y_one_hot_cur_cls = np.zeros( [len(raw_images_train_cur_cls), NUM_CLASSES]) train_y_one_hot_cur_cls[:, category_idx_in_group] = np.ones( len(raw_images_train_cur_cls)) test_indices_cur_cls = [ idx for idx in range(len(test_labels)) if test_labels[idx] == real_category_idx ] test_x_cur_cls = test_images[test_indices_cur_cls, :] test_y_cur_cls = np.ones([len(test_indices_cur_cls) ]) * category_idx_in_group test_x = np.concatenate((test_x, test_x_cur_cls)) test_y = np.concatenate((test_y, test_y_cur_cls)) train_y_one_hot = np.concatenate( (train_y_one_hot, train_y_one_hot_cur_cls)) ''' Train classification model ''' # No need to train the classifier if there is only one class if (to_category_idx > 0 and not FLAGS.only_gen_no_cls) or not FLAGS.use_softmax: # init certain layers sess.run(init_ops) if FLAGS.no_truncate: mask_output_val[:] = True else: mask_output_val[:to_category_idx + 1] = True # Test on all seen classes mask_output_test[:to_category_idx + 1] = True ''' Generate samples of old classes ''' train_x = np.copy(train_x_gan) if FLAGS.no_truncate: train_y_truncated = train_y_one_hot[:, :] else: train_y_truncated = train_y_one_hot[:, :to_category_idx + 1] train_weights_val = np.ones(len(train_x)) for old_category_idx in range(0, category_idx): if old_category_idx % FLAGS.nb_cl == 0: # Load old class model if not acwgan_obj.load( (old_category_idx / FLAGS.nb_cl + 1) * FLAGS.nb_cl - 1)[0]: raise Exception( "[!] Train a model first, then run test mode") num_gen_samples_x_needed = NUM_TRAIN_SAMPLES_PER_CLASS - len( exemplars[old_category_idx]) if num_gen_samples_x_needed > 0: if FLAGS.gen_more_and_select: gen_samples_x_more, _, _ = acwgan_obj.test( FLAGS.gen_how_many, old_category_idx % FLAGS.nb_cl) gen_samples_x_more_real = imagenet_64x64.convert_images( gen_samples_x_more) gen_samples_prob = sess.run( test_masked_prob, feed_dict={ batch_images: gen_samples_x_more_real, mask_output: mask_output_val }) gen_samples_scores_cur_cls = gen_samples_prob[:, old_category_idx] top_k_indices = np.argsort(-gen_samples_scores_cur_cls )[:num_gen_samples_x_needed] gen_samples_x = gen_samples_x_more[top_k_indices] else: gen_samples_x, _, _ = acwgan_obj.test( num_gen_samples_x_needed, old_category_idx % FLAGS.nb_cl) # import wgan.tflib.save_images # wgan.tflib.save_images.save_images(gen_samples_x[:128].reshape((128, 3, 64, 64)), # 'test.jpg') train_x = np.concatenate( (train_x, gen_samples_x, exemplars[old_category_idx])) train_weights_val = np.concatenate( (train_weights_val, np.ones(len(gen_samples_x)) * FLAGS.gen_weight, np.ones(len(exemplars[old_category_idx])) * FLAGS.proto_weight)) gen_samples_y = np.ones( (len(gen_samples_x), to_category_idx + 1)) * ( (1 - FLAGS.label_smoothing) / to_category_idx) gen_samples_y[:, old_category_idx] = np.ones( (len(gen_samples_x))) * FLAGS.label_smoothing exemplars_y = np.zeros((len(exemplars[old_category_idx]), to_category_idx + 1)) exemplars_y[:, old_category_idx] = np.ones( (len(exemplars[old_category_idx]))) train_y_truncated = np.concatenate( (train_y_truncated, gen_samples_y, exemplars_y)) elif num_gen_samples_x_needed == 0: train_x = np.concatenate( (train_x, exemplars[old_category_idx])) train_weights_val = np.concatenate( (train_weights_val, np.ones(len(exemplars[old_category_idx])) * FLAGS.proto_weight)) exemplars_y = np.zeros((len(exemplars[old_category_idx]), to_category_idx + 1)) exemplars_y[:, old_category_idx] = np.ones( (len(exemplars[old_category_idx]))) train_y_truncated = np.concatenate( (train_y_truncated, exemplars_y)) # # DEBUG: # train_indices = [idx for idx in range(NUM_SAMPLES_TOTAL) if train_labels[idx] <= category_idx] # train_x = raw_images_train[train_indices, :] # # Record the response of the new data using the old model(category_idx is consistent with the number of True in mask_output_val_prev) # train_y_truncated = train_one_hot_labels[train_indices, :category_idx + 1] # Training set # Convert the raw images from the data-files to floating-points. train_x = imagenet_64x64.convert_images(train_x) # Shuffle the indices and create mini-batch batch_indices_perm = [] epoch_idx = 0 lr = FLAGS.base_lr ''' Training with mixed data ''' while True: # Generate mini-batch if len(batch_indices_perm) == 0: if epoch_idx >= FLAGS.epochs_per_category: break if epoch_idx in lr_strat: lr /= FLAGS.lr_factor print("NEW LEARNING RATE: %f" % lr) epoch_idx = epoch_idx + 1 shuffled_indices = range(train_x.shape[0]) np.random.shuffle(shuffled_indices) for i in range(0, len(shuffled_indices), FLAGS.train_batch_size): batch_indices_perm.append( shuffled_indices[i:i + FLAGS.train_batch_size]) batch_indices_perm.reverse() popped_batch_idx = batch_indices_perm.pop() # Use the random index to select random images and labels. train_weights_batch_val = train_weights_val[popped_batch_idx] train_x_batch = train_x[popped_batch_idx, :, :, :] train_y_batch = [ train_y_truncated[k] for k in popped_batch_idx ] # Train train_loss_summary_str, train_acc_summary_str, train_accuracy_val, \ train_loss_val, train_empirical_loss_val, train_reg_loss_val, _ = sess.run( [train_loss_summary, train_acc_summary, train_accuracy, loss, empirical_loss, regularization_loss, opt], feed_dict={batch_images: train_x_batch, one_hot_labels_truncated: train_y_batch, mask_output: mask_output_val, learning_rate: lr, train_batch_weights: train_weights_batch_val}) # Test if iteration % FLAGS.test_interval == 0: sess.run(copy_ops) # Divide and conquer: to avoid allocating too much GPU memory test_pred_val = [] for i in range(0, len(test_x), FLAGS.test_batch_size): test_x_batch = test_x[i:i + FLAGS.test_batch_size] test_pred_val_batch = sess.run(test_pred, feed_dict={ batch_images: test_x_batch, mask_output: mask_output_test }) test_pred_val.extend(test_pred_val_batch) test_accuracy_val = 1. * np.sum( np.equal(test_pred_val, test_y)) / (len(test_pred_val)) test_per_class_accuracy_val = np.diag( confusion_matrix(test_y, test_pred_val)) * 2 # I simply multiply the correct predictions by 2 to calculate the accuracy since there are 50 samples per class in the test set test_acc_summary_str = sess.run( test_acc_summary, feed_dict={test_accuracy: test_accuracy_val}) test_summary_writer.add_summary(test_acc_summary_str, iteration) print("TEST: step %d, lr %.4f, accuracy %g" % (iteration, lr, test_accuracy_val)) print("PER CLASS ACCURACY: " + " | ".join( str(o) + '%' for o in test_per_class_accuracy_val)) # Print the training logs if iteration % FLAGS.display_interval == 0: train_summary_writer.add_summary(train_loss_summary_str, iteration) train_summary_writer.add_summary(train_acc_summary_str, iteration) print( "TRAIN: epoch %d, step %d, lr %.4f, accuracy %g, loss %g, empirical %g, reg %g" % (epoch_idx, iteration, lr, train_accuracy_val, train_loss_val, train_empirical_loss_val, train_reg_loss_val)) iteration = iteration + 1 ''' Final test(before the next class is added) ''' sess.run(copy_ops) # Divide and conquer: to avoid allocating too much GPU memory test_pred_val = [] for i in range(0, len(test_x), FLAGS.test_batch_size): test_x_batch = test_x[i:i + FLAGS.test_batch_size] test_pred_val_batch = sess.run(test_pred, feed_dict={ batch_images: test_x_batch, mask_output: mask_output_test }) test_pred_val.extend(test_pred_val_batch) test_accuracy_val = 1. * np.sum(np.equal( test_pred_val, test_y)) / (len(test_pred_val)) conf_mat = confusion_matrix(test_y, test_pred_val) test_per_class_accuracy_val = np.diag(conf_mat) # Record and save the cumulative accuracy aver_acc_over_time[to_category_idx] = test_accuracy_val aver_acc_per_class_over_time[ to_category_idx] = test_per_class_accuracy_val conf_mat_over_time[to_category_idx] = conf_mat dump_obj = dict() dump_obj['flags'] = flags.FLAGS.__flags dump_obj['aver_acc_over_time'] = aver_acc_over_time dump_obj[ 'aver_acc_per_class_over_time'] = aver_acc_per_class_over_time dump_obj['conf_mat_over_time'] = conf_mat_over_time np_file_result = os.path.join(result_folder, 'acc_over_time.pkl') with open(np_file_result, 'wb') as file: pickle.dump(dump_obj, file) visualize_result.vis(np_file_result, 'ImageNetDogs') # reorder the exemplars if FLAGS.reorder_exemplars: for old_category_idx in range(category_idx): sess.run(copy_ops) # Divide and conquer: to avoid allocating too much GPU memory train_prob_cur_cls_exemplars_val = sess.run( test_masked_prob, feed_dict={ batch_images: imagenet_64x64.convert_images( exemplars[old_category_idx]), mask_output: mask_output_val }) train_prob_cur_cls_exemplars_val = train_prob_cur_cls_exemplars_val[:, old_category_idx] reorder_indices = np.argsort(-train_prob_cur_cls_exemplars_val) exemplars[old_category_idx] = exemplars[old_category_idx][ reorder_indices] # select the exemplars for category_idx_in_group in range(category_idx, to_category_idx + 1): train_indices_cur_cls = [ idx for idx in range(len(train_y_gan)) if train_y_gan[idx] == category_idx_in_group % FLAGS.nb_cl ] train_x_cur_cls = train_x_gan[train_indices_cur_cls] train_x_cur_cls_normalized = imagenet_64x64.convert_images( train_x_cur_cls) sess.run(copy_ops) # Divide and conquer: to avoid allocating too much GPU memory train_prob_cur_cls_val = sess.run(test_masked_prob, feed_dict={ batch_images: train_x_cur_cls_normalized, mask_output: mask_output_val }) train_prob_cur_cls_val = train_prob_cur_cls_val[:, category_idx_in_group] # use iCaRL-like memory mechanism to save exemplars or not if FLAGS.memory_constrained: if FLAGS.auto_choose_num_exemplars: # auto or fixed number of exemplars # check if we can save all new samples as exemplars if NUM_TRAIN_SAMPLES_PER_CLASS > FLAGS.memory_upperbound - sum( [len(exemplars[i]) for i in range(len(exemplars))]): # load inception scores of all classes save_exemplars_ratios = [] for i in range(category_idx_in_group + 1): inception_score = acwgan_obj.load_inception_score( i) save_exemplars_ratio = FLAGS.auto_param1 - FLAGS.auto_param2 * inception_score save_exemplars_ratios.append(save_exemplars_ratio) save_exemplars_ratios = np.array(save_exemplars_ratios) keep_exemplars_num = np.floor( save_exemplars_ratios * FLAGS.memory_upperbound / sum(save_exemplars_ratios)).astype(int) for old_category_idx in range(category_idx_in_group): exemplars[old_category_idx] = exemplars[ old_category_idx][:keep_exemplars_num[ old_category_idx]] num_exemplars_cur_cls = keep_exemplars_num[-1] else: num_exemplars_cur_cls = NUM_TRAIN_SAMPLES_PER_CLASS else: num_exemplars_per_cls = int(FLAGS.memory_upperbound // (category_idx_in_group + 1)) num_exemplars_per_cls = min(num_exemplars_per_cls, NUM_TRAIN_SAMPLES_PER_CLASS) # remove redundant elements in the memory for previous classes if category_idx_in_group > 0 and len( exemplars[0]) > num_exemplars_per_cls: for old_category_idx in range(category_idx_in_group): exemplars[old_category_idx] = exemplars[ old_category_idx][:num_exemplars_per_cls] # add how many new elements in the memory for the current class num_exemplars_cur_cls = num_exemplars_per_cls print(' [*] Store %d exemplars for each class' % num_exemplars_cur_cls) else: if FLAGS.auto_choose_num_exemplars: # auto or fixed number of exemplars inception_score = acwgan_obj.load_inception_score( category_idx_in_group) num_exemplars_cur_cls = int( np.floor(FLAGS.auto_param1 - FLAGS.auto_param2 * inception_score)) print(' [*] Inception score %f, store %d exemplars' % (inception_score, num_exemplars_cur_cls)) else: num_exemplars_cur_cls = FLAGS.num_exemplars_per_class selected_indices = np.array(range(len(train_prob_cur_cls_val))) if FLAGS.exemplar_select_criterion == 'high': selected_indices = train_prob_cur_cls_val.argsort()[:-( num_exemplars_cur_cls + 1):-1] # select the last 20 elif FLAGS.exemplar_select_criterion == 'low': selected_indices = train_prob_cur_cls_val.argsort( )[:num_exemplars_cur_cls] # select the last 20 elif FLAGS.exemplar_select_criterion == 'random': random_idx = range(len(train_prob_cur_cls_val)) np.random.shuffle(random_idx) selected_indices = random_idx[:num_exemplars_cur_cls] exemplars.append(train_x_cur_cls[selected_indices]) np_file_exemplars = os.path.join( exemplars_dir, 'exemplars_%d' % (category_idx_in_group + 1)) np.save(np_file_exemplars, exemplars) ''' Train generative model(W-GAN) ''' if acwgan_obj.check_model(to_category_idx): print( " [*] Model of Class %d-%d exists. Skip the training process" % (category_idx + 1, to_category_idx + 1)) else: print( " [*] Model of Class %d-%d does not exist. Start the training process" % (category_idx + 1, to_category_idx + 1)) acwgan_obj.train(train_x_gan, train_y_gan, test_x_gan, test_y_gan, to_category_idx) # Save the final model if not FLAGS.only_gen_no_cls: checkpoint_dir = os.path.join(result_folder, 'checkpoints') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver.save(sess, os.path.join(checkpoint_dir, 'model.ckpt')) sess.close()