def _build(self): action_size = self.cfg.action_dim[0] self.x = tf.placeholder(shape=(None, ) + tuple(self.cfg.status_dim), dtype=tf.float32, name='x') self.u = tf.placeholder(shape=(None, ) + tuple(self.cfg.action_dim), dtype=tf.float32, name='u') self.y = tf.placeholder(shape=(None), dtype=tf.float32, name='y') h = self.fc(self.x, self.cfg.hidden_dim, activation_fn=tf.nn.relu) h = self.fc(h, self.cfg.hidden_dim, activation_fn=tf.nn.relu) self.V = tf.squeeze(self.fc(h, 1)) self.mu = self.fc(h, action_size) self.l = self.fc(h, action_size * (action_size + 1) // 2) self.L = self.to_lower_triangle(self.l, action_size) self.P = tf.matmul(self.L, tf.transpose(self.L, (0, 2, 1)), name="P") diff_u = tf.expand_dims(self.u - self.mu, 1) self.A = -tf.matmul(diff_u, tf.matmul(self.P, tf.transpose(diff_u, (0, 2, 1)))) self.A = tf.squeeze(tf.reshape(self.A, (-1, 1)), name="A") self.Q = self.A + self.V self.loss = tf.reduce_mean(tf.squared_difference(self.y, self.Q)) self.train_op = tf.train.AdamOptimizer( learning_rate=self.cfg.learning_rate).minimize( self.loss, global_step=get_or_create_global_step()) self.summaries = tf.summary.merge([ tf.summary.scalar("loss", self.loss), tf.summary.histogram("mu", self.mu), tf.summary.histogram("Q", self.Q) ])
def __init__(self, hidden_size, batch_size, learning_rate): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}): with tf.variable_scope('model') as scope: encoded = encoder(self.input_tensor, hidden_size * 2) mean = encoded[:, :hidden_size] stddev = tf.sqrt(tf.exp(encoded[:, hidden_size:])) epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size]) input_sample = mean + epsilon * stddev output_tensor = decoder(input_sample) with tf.variable_scope('model', reuse=True) as scope: self.sampled_tensor = decoder( tf.random_normal([batch_size, hidden_size])) vae_loss = self.__get_vae_cost(mean, stddev) rec_loss = self.__get_reconstruction_cost(output_tensor, self.input_tensor) loss = vae_loss + rec_loss self.train = layers.optimize_loss(loss, get_or_create_global_step(), learning_rate=learning_rate, optimizer='Adam', update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def _build_model(self): self.X = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X") self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y") self.a = tf.placeholder(shape=[None], dtype=tf.int32, name="a") X = tf.to_float(self.X) / 255.0 conv1 = tfl.conv2d(X, 32, 8, 4) conv2 = tfl.conv2d(conv1, 64, 4, 2) conv3 = tfl.conv2d(conv2, 64, 3, 1) flattened = tfl.flatten(conv3) fc1 = tfl.fully_connected(flattened, 512) self.predictions = tfl.fully_connected(fc1, self.nA, activation_fn=None) batch_size = tf.shape(self.a)[0] ind = tf.pack([tf.range(batch_size), self.a], axis=1) self.action_predictions = tf.gather_nd(self.predictions, ind) self.network_params = get_variables(self.scope) self.loss = tf.reduce_mean( tf.squared_difference(self.y, self.action_predictions)) self.train_op = tf.train.RMSPropOptimizer( 0.0025, 0.99, 0.0, 1e-6).minimize(self.loss, global_step=get_or_create_global_step()) self.summaries = tf.summary.merge([ tf.summary.scalar("loss", self.loss), tf.summary.histogram("a", tf.argmax(self.predictions, axis=1)), tf.summary.histogram("max_q", tf.reduce_max(self.predictions)), ])
def _build(self): self.x = tf.placeholder( shape=(None, self.cfg.status_size), dtype=tf.float32, name='x') self.a = tf.placeholder( shape=(None, self.cfg.action_size), dtype=tf.float32, name='a') self.y = tf.placeholder(shape=(None, ), dtype=tf.float32, name='y') x = self.x h = tfl.fully_connected(x, 400) h = tfl.fully_connected(tf.concat_v2([h, self.a], 1), 300) q = tfl.fully_connected( h, 1, activation_fn=None, weights_regularizer=tfl.l2_regularizer(1e-2)) self.q = tf.squeeze(q) self.network_params = get_variables(self.scope) self.loss = tf.reduce_mean(tf.squared_difference(self.y, self.q)) self.train_op = tf.train.AdamOptimizer(self.cfg.learning_rate).minimize( self.loss, global_step=get_or_create_global_step()) batch_size = tf.cast(tf.shape(self.a)[0], tf.float32) self.action_gradient = tf.div(tf.gradients(self.q, self.a), batch_size) self.summaries = tf.summary.merge([ tf.summary.scalar("loss", self.loss), tf.summary.histogram("q", self.q), ])
def __init__(self, metric=None, log_dir='/tmp/tflearn_logs/', global_step=None, session=None, graph=None, name=None): self.name = name # Estimator Graph and Session self.graph = tf.Graph() if None else graph self.session = tf.Session() if None else session if global_step is None: with self.graph.as_default(): self.global_step = framework.get_or_create_global_step() self.metric = validate_func(metric) # Estimator Graph Branches self._train = GraphBranch() self._pred = GraphBranch() self._eval = GraphBranch() # Tensor Utils if not os.path.exists(log_dir): os.makedirs(log_dir) self.log_dir = log_dir self._is_initialized = False self._to_be_restored = False # Ops self.train_op = None self.loss_op = None
def build_graph(self): from tensorflow.contrib.framework import get_or_create_global_step self.global_step = get_or_create_global_step() self._build_model() if self.mode == 'train': self._build_train_op()
def update(self, x, a, y): _, loss, summaries, global_step = self.sess.run( [self.train_op, self.loss, self.summaries, get_or_create_global_step()], feed_dict={self.x: x, self.a: a, self.y: y}) if self.summary_writer: self.summary_writer.add_summary(summaries, global_step) return loss
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = contrib_framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Create a compression object using the compression hyperparameters compression_obj = cifar10.create_compressor(FLAGS.compression_hparams, global_step=global_step) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images, compression_obj) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step, compression_obj) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = 128 examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def exponential_decay(batch_size, num_epochs, initial_rate, decay_rate, dataset, staircase=True, name=None): """ Get the exponential decay for the following parameters """ global_step = framework.get_or_create_global_step() decay_steps = int(num_epochs * dataset.num_samples / batch_size) return tf.train.exponential_decay( initial_rate, global_step, decay_steps, decay_rate, staircase=staircase, name=name)
def optimizer_exp_decay(): """Construct the optimizer with learning rate decay every experience. Returns: The optimizer. """ global_step = framework.get_or_create_global_step() learning_rate = tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=100, decay_rate=0.001) return tf.train.AdagradOptimizer(learning_rate=learning_rate)
def __init__(self, model_conf, is_training, images, labels): self._depth = model_conf.DEPTH self._growth_rate = model_conf.GROWTH_RATE self.model_conf = model_conf self._num_layer_per_block = int((self._depth-4)/3) self._images = tf.reshape(images, shape=[-1, self.model_conf.HEIGHT, self.model_conf.WIDTH, 1]) self._labels = labels self._is_training = is_training self._layer_func = self.bottleneck_layer if self.model_conf.BOTTLENECK else self.add_layer self.global_step = get_or_create_global_step() logger.info("In %s phase, using %s as layer function" % (self.model_conf.MODE, self._layer_func.__name__))
def update(self, s, a, y): """ ([?, 84, 84, 4], [?], [?]) -> loss """ _, loss, summaries, global_step = self.sess.run([ self.train_op, self.loss, self.summaries, get_or_create_global_step() ], feed_dict={ self.X: s, self.a: a, self.y: y }) if self.summary_writer: self.summary_writer.add_summary(summaries, global_step) return loss
def main(): env = CliffWalkingEnv() sess = tf.Session() ac = ActorCritic(env.nA, env.nS) sess.run(tf.global_variables_initializer()) date_str = datetime.now().strftime("%m%d_%H%M%S") summaries_dir = os.path.abspath("./summary/ac/" + date_str) if not os.path.exists(summaries_dir): os.makedirs(summaries_dir) summary_writer = tf.summary.FileWriter(summaries_dir, graph=tf.get_default_graph()) state = env.reset() episode_cnt = 0 episode_step = 0 episode_reward = 0. while 1: probs, value = sess.run([ac.probs, ac.value], feed_dict={ac.state: state}) action = np.random.choice(env.nA, p=probs) next_state, reward, done, _ = env.step(action) episode_step += 1 episode_reward += reward value_next = sess.run(ac.value, feed_dict={ac.state: next_state}) td_target = reward + 0.99 * value_next td_adv = td_target - value summary, global_step, _, _ = \ sess.run([ac.summary, get_or_create_global_step(), ac.train_p, ac.train_v], feed_dict={ac.state: state, ac.action: action, ac.adv: td_adv, ac.target: td_target}) summary_writer.add_summary(summary, global_step) if done or episode_step > 1000: print('episode cnt:', episode_cnt, 'eoisode step:', episode_step, 'reward:', episode_reward) episode_step = 0. episode_reward = 0. episode_cnt += 1 state = env.reset() else: state = next_state
def main(args): cam = cv2.VideoCapture(0) # Create global step global_step = framework.get_or_create_global_step() # Tensor that holds raw camera frame image_input = tf.placeholder(tf.float32, shape=[None, None, 3], name='input_placeholder') input_size = tf.shape(image_input)[:2] batched_input = tf.expand_dims(image_input, 0) resized_im = tf.image.resize_images(batched_input, IMAGE_SIZE) # Placeholder for target label_placeholder = tf.placeholder(tf.float32, shape=[None, None, 3], name='label_placeholder') batched_label = tf.expand_dims(label_placeholder, 0) resized_label = tf.image.resize_images(batched_label, IMAGE_SIZE) # Create model output = build_model(resized_im, is_training=True) resized_output = tf.squeeze(tf.image.resize_images(output, input_size)) # Create optimizer train_op, loss_op = build_train_op(resized_label, output, args.learning_rate, global_step=global_step) with tf.Session() as sess: # Initialize init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) step=0 while True: ret, im = cam.read() im = im.astype('float') / 255.0 import math l = math.sin(math.pi * step / 40) target = l**2 * im step, output, loss, _ = sess.run([global_step, resized_output, loss_op, train_op], feed_dict={image_input: im, label_placeholder: target}) print "%i: %0.5f" % (step, loss) cv2.imshow('input', im) cv2.imshow('output', output) cv2.imshow('target', target) if cv2.waitKey(1) & 0xFF == ord('q'): break
def build_model(self): config = self.config data_generator = self.data_generator logging.info('Building the model...') # Placeholders self.inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name='inputs') self.inputs_length = tf.placeholder(dtype=tf.int32, shape=[None], name='inputs_length') self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name='targets') self.targets_length = tf.placeholder(dtype=tf.int32, shape=[None], name='targets_length') vocab_size = len(data_generator.vocab) embeddings = tf.get_variable(name='embeddings', shape=[vocab_size, config.word_dim], dtype=tf.float32) with tf.variable_scope('decoder'): with tf.variable_scope('output') as output_scope: # This variable-scope-trick is used to ensure that # output_fn has a proper scope regardless of a caller's # scope. def output_fn(cell_outputs): return layers.fully_connected(inputs=cell_outputs, num_outputs=vocab_size, activation_fn=None, scope=output_scope) self.rnn_cell = rnn.GRUBlockCell(config.sentence_dim) self.encoder_state = self.encode(cell=self.rnn_cell, embeddings=embeddings, inputs=inputs, inputs_length=inputs_length, scope='encoder') self.decoder_outputs = self.decode_train(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state, targets=self.targets[:, :-1], targets_length=self.targets_length - 1, scope='decoder') self.generated = self.decode_inference(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state, output_fn=output_fn, vocab_size=vocab_size, bos_id=data_generator.vocab['<EOS>'], eos_id=data_generator.vocab['<EOS>'], max_length=config.max_length, scope='decoder', reuse=True) self.loss = self.loss(decoder_outputs=self.decoder_outputs, output_fn=output_fn, targets=targets[:, 1:], targets_length=self.targets_length - 1) self.global_step = get_or_create_global_step() self.train_op = slim.optimize_loss(loss=self.loss, global_step=self.global_step, learning_rate=None, optimizer=tf.train.AdamOptimizer(), clip_gradients=5.0) self.summary_writer = tf.summary.FileWriter(logdir=os.path.join(config.save_dir, 'log')) self.summary = tf.summary.merge_all() tf.get_variable_scope().set_initializer(tf.random_normal_initializer(mean=0.0, stddev=0.01)) tf.global_variables_initializer().run() self.saver = tf.train.Saver(max_to_keep=20)
def __init__(self, images, labels, model_conf, is_training): self._images = images self._labels = labels self.model_conf = model_conf self._is_training = is_training self.global_step = get_or_create_global_step() self._filters = [64, 64, 128, 256, 512] self._kernels = [7, 3, 3, 3, 3] self._stride = [2, 1, 2, 2, 2] if self._is_training: self._mode = "TRAIN" self._reuse = False else: self._mode = "Not TRAIN" self._reuse = True logger.info("In %s phase" % (self._mode))
def __init__(self, nA, nS): self.state = tf.placeholder(shape=(), dtype=tf.uint8, name='state') self.action = tf.placeholder(dtype=tf.int32, name='action') self.target = tf.placeholder(dtype=tf.float32, name='target') self.adv = tf.placeholder(dtype=tf.float32, name='advantage') state_onehot = tf.one_hot(self.state, nS, dtype=tf.float32) hidden = tf.expand_dims(state_onehot, 0) self.probs = tf.squeeze( tfl.fully_connected(hidden, nA, activation_fn=tf.nn.softmax, biases_initializer=None)) self.value = tf.squeeze( tfl.fully_connected(hidden, 1, activation_fn=None, biases_initializer=None)) action_prob = tf.gather(self.probs, self.action) self.policy_loss = -tf.log(action_prob) * self.adv self.value_loss = tf.squared_difference(self.value, self.target) self.loss = self.policy_loss + self.value_loss learning_rate = 0.01 global_step = get_or_create_global_step() self.train_p = tf.train.AdamOptimizer(learning_rate=learning_rate)\ .minimize(self.policy_loss, global_step=global_step) self.train_v = tf.train.AdamOptimizer(learning_rate=learning_rate)\ .minimize(self.value_loss, global_step=global_step) self.summary = tf.summary.merge([ tf.summary.scalar("target", self.target), tf.summary.scalar("adv", self.adv), tf.summary.histogram("probs", self.probs), tf.summary.scalar("value", self.value), tf.summary.scalar("policy_loss", self.policy_loss), tf.summary.scalar("value_loss", self.value_loss) ])
def __init__(self, model_conf, is_training, images, labels): self._depth = model_conf.DEPTH self._growth_rate = model_conf.GROWTH_RATE self._compression_rate = model_conf.COMPRESSION_TARE self.model_conf = model_conf self._num_layer_per_block = int((self._depth-5)/4) # self._images = tf.reshape(images, shape=[-1, self.model_conf.HEIGHT, self.model_conf.WIDTH, 3]) self._images = images self._labels = labels self._is_training = is_training self._layer_func = self.bottleneck_layer if self.model_conf.BOTTLENECK else self.add_layer self.global_step = get_or_create_global_step() if self._is_training: self._mode = "TRAIN" self._reuse = False else: self._mode = "Not TRAIN" self._reuse = True logger.info("In %s phase, using %s as layer function" % (self._mode, self._layer_func.__name__))
def __init__(self, hidden_size, batch_size, learning_rate): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=concat_elu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}): with tf.variable_scope('model'): D1 = discriminator(self.input_tensor) # positive example D_params_num = len(tf.trainable_variables()) G = decoder(tf.random_normal([batch_size, hidden_size])) self.sampled_tensor = G with tf.variable_scope('model', reuse=True): D2 = discriminator(G) D_loss = self.__get_discrinator_loss(D1, D2) G_loss = self.__get_generator_loss(D2) params = tf.trainable_variables() D_params = params[:D_params_num] G_params = params[D_params_num:] global_step = get_or_create_global_step() self.train_discrimator = layers.optimize_loss(D_loss, global_step, learning_rate / 10, 'Adam', variables=D_params, update_ops=[]) self.train_generator = layers.optimize_loss(G_loss, global_step, learning_rate, 'Adam', variables=G_params, update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def __init__(self, input_producer, embed_mat, config, is_train): with tf.variable_scope("VAE") as var_scope: x_enc = input_producer.x_enc x_dec = input_producer.x_dec y_dec = input_producer.y_dec len_enc = input_producer.len_enc len_dec = input_producer.len_dec max_len = input_producer.seq_max_length vocab_num = input_producer.vocab_num batch_size = config.batch_size hidden_size = config.hidden_size embed_dim = config.embed_dim is_GRU = config.is_GRU is_argmax_sampling = config.is_argmax_sampling word_keep_prob = config.word_dropout_keep_prob max_grad_norm = config.max_grad_norm learning_rate = config.learning_rate self.KL_weight = tf.Variable(0.0, "KL_weight") self.input_ids = y_dec def _lstm_cell(): return BasicLSTMCell(num_units=hidden_size, forget_bias=1.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse) def _gru_cell(): return GRUCell(num_units=hidden_size, reuse=tf.get_variable_scope().reuse) cell = _gru_cell if is_GRU else _lstm_cell self.initial_state = cell().zero_state(batch_size, tf.float32) # encoder with tf.device("/cpu:0"): embed_init = tf.constant_initializer(embed_mat)\ if (embed_mat is not None) else None embedding = tf.get_variable("embedding", [vocab_num, embed_dim], initializer=embed_init, trainable=True) in_enc = embedding_lookup(embedding, x_enc) with tf.variable_scope("encoder"): out_tuple = dynamic_rnn(cell=cell(), inputs=in_enc, sequence_length=len_enc, initial_state=self.initial_state) (_, encoder_hidden) = out_tuple # linear layers for mu and log(var) latent_dim = hidden_size # may have to change this later W_mu = tf.get_variable("W_mu", [hidden_size,latent_dim]) b_mu = tf.get_variable("b_mu", [latent_dim]) W_logvar = tf.get_variable("W_logvar", [hidden_size,latent_dim]) b_logvar = tf.get_variable("b_logvar", [latent_dim]) #l2_loss = tf.nn.l2_loss(W_mu) + tf.nn.l2_loss(W_logvar) mu = tf.matmul(encoder_hidden, W_mu) + b_mu logvar = tf.matmul(encoder_hidden, W_logvar) + b_logvar # sample epsilon epsilon = tf.random_normal(tf.shape(logvar), name='epsilon') # sample latent variable stddev = tf.exp(0.5 * logvar) # standard deviation self.z = mu + tf.multiply(stddev, epsilon) # decoder with tf.device("/cpu:0"): in_dec = embedding_lookup(embedding, x_dec) with tf.variable_scope("decoder"): helper = WordDropoutTrainingHelper( inputs=in_dec, sequence_length=len_dec, embedding=embedding, dropout_keep_prob=word_keep_prob, drop_token_id=UNK_ID, is_argmax_sampling=is_argmax_sampling) # projection layer output_layer = Dense(units=vocab_num, activation=None, use_bias=True, trainable=True) # decoder decoder = BasicDecoder(cell=cell(), helper=helper, initial_state=self.z, output_layer=output_layer) # dynamic_decode out_tuple = dynamic_decode(decoder=decoder, output_time_major=False, # speed impute_finished=True) # get all the variables in this scope self.vars = tf.contrib.framework.get_variables(var_scope) # (ouputs, state, sequence_length) (self.outputs, _, self.cell_outputs_len) = out_tuple # final # (cell_outputs, sample_ids) (self.cell_outputs, self.sampled_ids) = self.outputs # compute softmax loss (reconstruction) len_out = tf.reduce_max(len_dec) targets = y_dec[:,:len_out] weights = tf.sequence_mask(self.cell_outputs_len, dtype=tf.float32) softmax_loss = sequence_loss(logits=self.cell_outputs, targets=targets, weights=weights, average_across_timesteps=True, average_across_batch=True) self.AE_loss = self.AE_loss_mean = softmax_loss # compute KL loss (regularization) KL_term = 1 + logvar - tf.pow(mu, 2) - tf.exp(logvar) self.KL_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1) self.KL_loss_mean = tf.reduce_mean(self.KL_loss) # total loss self.loss = self.AE_loss + self.KL_weight * self.KL_loss_mean # optimization self.lr = tf.Variable(learning_rate, trainable=False, name="lr") grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, self.vars), max_grad_norm) optimizer = tf.train.AdamOptimizer(self.lr) self.global_step = get_or_create_global_step() self.train_op = optimizer.apply_gradients(zip(grads, self.vars), global_step=self.global_step) # learning_rate update self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_lr") self.lr_update = tf.assign(self.lr, self.new_lr) # KL weight update self.new_KL_weight = tf.placeholder(tf.float32, shape=[], name="new_kl") self.KL_weight_update = tf.assign(self.KL_weight, self.new_KL_weight) # summaries tf.summary.scalar("Loss/AE_mean", self.AE_loss_mean) tf.summary.scalar("Loss/KL_mean", self.KL_loss_mean) tf.summary.scalar("Loss/Total", self.AE_loss_mean + self.KL_loss_mean) tf.summary.scalar("Misc/KL_weight", self.KL_weight) tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(mu)) tf.summary.scalar("Misc/sigma_mean", tf.reduce_mean(stddev)) tf.summary.scalar("Misc/learning_rate", self.lr) self.summary_op = tf.summary.merge_all()
def __init__(self, input_producer, embed_mat, config, is_train): x_enc = input_producer.x_enc x_dec = input_producer.x_dec y_dec = input_producer.y_dec len_enc = input_producer.len_enc len_dec = input_producer.len_dec self.answer = input_producer.answ_disc max_len = input_producer.seq_max_length vocab_num = input_producer.vocab_num config.update(**dict(max_len=max_len, vocab_num=vocab_num)) # import ipdb; ipdb.set_trace() self.kl_weight = tf.Variable(0.0, "KL_weight") self.input_ids = y_dec modeler = CtrlVAEModelingHelper(config, embed_mat) with tf.variable_scope("CtrlVAE"): ### VAE ############################################################ # encoder x_enc_onehot = tf.one_hot(x_enc, vocab_num) out_tuple = modeler.encoder(x_enc_onehot=x_enc_onehot, len_enc=len_enc) (vae_z, vae_mu, vae_logvar) = out_tuple # holistic representation with tf.device("/cpu:0"): vae_c = embedding_lookup(modeler.embed, self.answer) vae_c = tf.reshape(vae_c, [config.batch_size, -1]) vae_represent = tf.concat([vae_z, vae_c], axis=1) # decoder x_dec_onehot = tf.one_hot(x_dec, config.vocab_num) out_tuple = modeler.decoder(initial_state=vae_represent, x_dec_onehot=x_dec_onehot, len_dec=len_dec, is_teacher_forcing=True) (vae_outputs, vae_state, vae_outputs_len) = out_tuple # final (self.vae_output, self.vae_sample) = vae_outputs ### Generator ###################################################### # random z and c from the prior self.gen_z = tf.random_normal( [config.batch_size, config.hidden_size]) self.gen_c = vae_c gen_represent = tf.concat([self.gen_z, self.gen_c], axis=1) # generator (decoder) x_dec_onehot = tf.one_hot(x_dec, config.vocab_num) out_tuple = modeler.decoder(initial_state=gen_represent, x_dec_onehot=x_dec_onehot, len_dec=len_dec, is_teacher_forcing=True, reuse=True) (gen_outputs, gen_state, gen_outputs_len) = out_tuple # final (self.gen_output, self.gen_sample) = gen_outputs gen_outputs_onehot = softmax(self.gen_output / ALMOST_ZERO) # discriminator (for c code) out_tuple = modeler.discriminator(inputs=gen_outputs_onehot, inputs_length=gen_outputs_len) (self.gen_c_output, self.gen_c_sample) = out_tuple # encoder again (for z code ; additional discriminator) out_tuple = modeler.encoder(x_enc_onehot=gen_outputs_onehot, len_enc=gen_outputs_len, reuse=True) (gen_z, dis_mu, dis_logvar) = out_tuple ### Discriminator ################################################## # discriminator (for training) x_dis_onehot = tf.one_hot(x_enc, config.vocab_num) out_tuple = modeler.discriminator(inputs=x_dis_onehot, inputs_length=gen_outputs_len, reuse=True) (self.dis_outputs, self.dis_sample) = out_tuple ######################################################################## # get all the variables in this scope self.vars = get_variables("CtrlVAE") self.enc_vars = get_variables("CtrlVAE/encoder") self.gen_vars = get_variables("CtrlVAE/decoder") self.dis_vars = get_variables("CtrlVAE/discriminator") self.vae_vars = self.enc_vars + self.gen_vars ######################################################################## # compute AE loss (reconstruction) len_out = tf.reduce_max(vae_outputs_len) targets = y_dec[:, :len_out] weights = tf.sequence_mask(vae_outputs_len, dtype=tf.float32) softmax_loss = sequence_loss(logits=self.vae_output, targets=targets, weights=weights, average_across_timesteps=False, average_across_batch=False) # NOTE: fix later! loss_sum = tf.reduce_sum(softmax_loss, axis=1) self.ae_loss = self.ae_loss_mean = tf.reduce_mean(loss_sum, axis=0) #self.ae_loss_mean = tf.reduce_mean(softmax_loss) # compute KL loss (regularization) KL_term = 1 + vae_logvar - tf.pow(vae_mu, 2) - tf.exp(vae_logvar) self.kl_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1) self.kl_loss_mean = tf.reduce_mean(self.kl_loss) # VAE total loss self.vae_loss = self.ae_loss + self.kl_weight * self.kl_loss_mean ######################################################################## # c code loss answer_labels = tf.one_hot(self.answer, config.vocab_num) c_loss = softmax_cross_entropy_with_logits(labels=answer_labels, logits=self.gen_c_output) self.c_loss = tf.reduce_mean(c_loss) # z code loss mu_loss = mean_pairwise_squared_error(vae_mu, dis_mu) logvar_loss = mean_pairwise_squared_error(vae_logvar, dis_logvar) self.z_loss = (mu_loss + logvar_loss) / 2 # generator total loss self.gen_loss = self.c_loss + self.z_loss ######################################################################## # discriminator training loss dis_loss = softmax_cross_entropy_with_logits(labels=answer_labels, logits=self.dis_outputs) self.dis_loss = tf.reduce_mean(dis_loss) ######################################################################## # optimization lr = config.learning_rate self.vae_lr = tf.Variable(lr, trainable=False, name="vae_lr") self.gen_lr = tf.Variable(0.0, trainable=False, name="gen_lr") self.dis_lr = tf.Variable(lr, trainable=False, name="dis_lr") vae_optim = tf.train.AdamOptimizer(self.vae_lr) gen_optim = tf.train.AdamOptimizer(self.gen_lr) dis_optim = tf.train.AdamOptimizer(self.dis_lr) vae_grads = tf.gradients(self.vae_loss, self.vae_vars) gen_grads = tf.gradients(self.gen_loss, self.gen_vars) dis_grads = tf.gradients(self.dis_loss, self.dis_vars) vae_grads, _ = tf.clip_by_global_norm(vae_grads, config.max_grad_norm) gen_grads, _ = tf.clip_by_global_norm(gen_grads, config.max_grad_norm) dis_grads, _ = tf.clip_by_global_norm(dis_grads, config.max_grad_norm) self.global_step = get_or_create_global_step() self.vae_train = vae_optim.apply_gradients( zip(vae_grads, self.vae_vars)) self.gen_train = gen_optim.apply_gradients( zip(gen_grads, self.gen_vars)) self.dis_train = dis_optim.apply_gradients( zip(dis_grads, self.dis_vars), self.global_step) # learning_rate update self.new_gen_lr = tf.placeholder(tf.float32, shape=[], name="new_gen_lr") self.gen_lr_update = tf.assign(self.gen_lr, self.new_gen_lr) # KL weight update self.new_kl_weight = tf.placeholder(tf.float32, shape=[], name="new_kl") self.kl_weight_update = tf.assign(self.kl_weight, self.new_kl_weight) # summaries tf.summary.scalar("Loss/ae_mean", self.ae_loss_mean) tf.summary.scalar("Loss/kl_mean", self.kl_loss_mean) tf.summary.scalar("Loss/Total", self.ae_loss_mean + self.kl_loss_mean) tf.summary.scalar("Misc/kl_weight", self.kl_weight) tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(vae_mu)) tf.summary.scalar("Misc/logvar_mean", tf.reduce_mean(vae_logvar)) tf.summary.scalar("Misc/gen_lr", self.gen_lr) self.summary_op = tf.summary.merge_all()
def run(): #Create log_dir for evaluation information if not os.path.exists(log_eval): os.mkdir(log_eval) #Just construct the graph from scratch again with tf.Graph().as_default() as graph: tf.logging.set_verbosity(tf.logging.INFO) print("tf.logging") #Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing dataset = get_split('validation', dataset_dir) print("dataset") images, raw_images, labels = load_batch(dataset, batch_size=batch_size, is_training=False) print(labels) #Create some information about the training steps num_batches_per_epoch = dataset.num_samples / batch_size num_steps_per_epoch = num_batches_per_epoch print("num_batches_per_epoch,num_steps_per_epoch", num_batches_per_epoch, num_steps_per_epoch) #Now create the inference model but set is_training=False with slim.arg_scope(inception_resnet_v2_arg_scope()): logits, end_points = inception_resnet_v2( images, num_classes=dataset.num_classes, is_training=False) print("logists") # #get all the variables to restore from the checkpoint file and create the saver function to restore variables_to_restore = slim.get_variables_to_restore() print("finished variables_to_restore") saver = tf.train.Saver(variables_to_restore) print("finished tf.train.Saver(variables_to_restore)") def restore_fn(sess): print(checkpoint_file) print("saver.restore(sess, checkpoint_file)") return saver.restore(sess, checkpoint_file) #Just define the metrics to track without the loss or whatsoever predictions = tf.argmax(end_points['Predictions'], 1) print("predictions", predictions) accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( predictions, labels) print("accuracy, accuracy_update\n") metrics_op = tf.group(accuracy_update) print(" metrics_op\n") #Create the global step and an increment op for monitoring global_step = get_or_create_global_step() print("global_step\n") global_step_op = tf.assign( global_step, global_step + 1 ) #no apply_gradient method so manually increasing the global_step print("global_step_op\n") #Create a evaluation step function def eval_step(sess, metrics_op, global_step): ''' Simply takes in a session, runs the metrics op and some logging information. ''' start_time = time.time() _, global_step_count, accuracy_value = sess.run( [metrics_op, global_step_op, accuracy]) time_elapsed = time.time() - start_time #Log some information logging.info( 'Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value, time_elapsed) return accuracy_value #Define some scalar quantities to monitor tf.summary.scalar('Validation_Accuracy', accuracy) my_summary_op = tf.summary.merge_all() print("finished Define some scalar quantities to monitor") #Get your supervisor sv = tf.train.Supervisor(logdir=log_eval, summary_op=None, saver=None, init_fn=restore_fn) print("finished tf.train.Supervisor") #Now we are ready to run in one session with sv.managed_session() as sess: #with sv.managed_session() as sess: print("begin sv.managed_session()") print(int(num_steps_per_epoch * num_epochs)) for step in range(int(num_steps_per_epoch * num_epochs)): print(step) sess.run(sv.global_step) print( "print vital information every start of the epoch as always" ) #print vital information every start of the epoch as always if step % num_batches_per_epoch == 0: logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1, num_epochs) logging.info('Current Streaming Accuracy: %.4f', sess.run(accuracy)) #Compute summaries every 10 steps and continue evaluating if step % 10 == 0: eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step) summaries = sess.run(my_summary_op) sv.summary_computed(sess, summaries) #Otherwise just run as per normal else: eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step) #At the end of all the evaluation, show the final accuracy logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy)) print( "Now we want to visualize the last batch's images just to see what our model has predicted" ) #Now we want to visualize the last batch's images just to see what our model has predicted raw_images, labels, predictions = sess.run( [raw_images, labels, predictions]) for i in range(10): image, label, prediction = raw_images[i], labels[ i], predictions[i] prediction_name, label_name = dataset.labels_to_name[ prediction], dataset.labels_to_name[label] text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name, label_name) print(text) logging.info( 'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.' )
def main(): global_step = get_or_create_global_step() z = tf.placeholder(tf.float32, [batch_size, noise_size]) x = tf.placeholder(tf.float32, [batch_size, height, width, channel]) x_reshaped = tf.reshape(x, [batch_size, height, width, 3]) x_resized = tf.image.resize_images(x_reshaped, [32, 32]) G = generator(z) D_real = discriminator(x_resized) D_fake = discriminator(G) var_generator = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') var_discriminator = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='discriminator') loss_discriminator = tf.reduce_mean( -tf.log(D_real + epsilon) - tf.log(1 - D_fake + epsilon), axis=0) + get_reg_loss() #loss_generator = tf.reduce_mean(tf.log(1-D_fake), axis=0) loss_generator = tf.reduce_mean(-tf.log(D_fake), axis=0) optimize_discriminator = tf.train.AdamOptimizer( learning_rate * 0.1, beta1=0.5).minimize(loss=loss_discriminator, var_list=var_discriminator, global_step=global_step) optimize_generator = tf.train.AdamOptimizer( learning_rate, beta1=0.5).minimize(loss=loss_generator, var_list=var_generator, global_step=global_step) """ optimizer_discriminator = tf.train.AdamOptimizer(learning_rate*0.2, beta1=0.9) grads_and_vars_d = optimizer_discriminator.compute_gradients( loss=loss_discriminator, var_list=var_discriminator) clipped_grads_and_vars_d = [(tf.clip_by_norm(grad, 5.0),var) for grad, var in grads_and_vars_d] optimize_discriminator = optimizer_discriminator.apply_gradients( clipped_grads_and_vars_d, global_step=global_step) optimizer_generator= tf.train.AdamOptimizer(learning_rate, beta1=0.9) grads_and_vars_g = optimizer_generator.compute_gradients( loss=loss_generator, var_list=var_generator) clipped_grads_and_vars_g = [(tf.clip_by_norm(grad, 5.0),var) for grad, var in grads_and_vars_g] optimize_generator = optimizer_generator.apply_gradients( clipped_grads_and_vars_g, global_step=global_step) """ init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) print('g', [item.name for item in var_generator]) print('d', [item.name for item in var_discriminator]) saver = tf.train.Saver() sess = tf.Session() ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt: print('load_model', ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('initialize_model') sess.run(init_op) writer = tf.summary.FileWriter(tb_dir, sess.graph) for i in range(num_epoch): for k in range(num_k): real = get_next_batch(batch_size) noise = generate_random_normal_vector() """ _D_real, _D_fake, _loss_d, _loss_g, _, _, m = sess.run( [D_real, D_fake, loss_discriminator, loss_generator, optimize_discriminator, optimize_generator, merged], feed_dict={x:real, z:noise}) """ _D_real, _loss_d, _ = sess.run( [D_real, loss_discriminator, optimize_discriminator], feed_dict={ x: real, z: noise }) noise = generate_random_normal_vector() _D_fake, _loss_g, _ = sess.run( [D_fake, loss_generator, optimize_generator], feed_dict={z: noise}) if i % 100 == 0: print('%g th step' % i) print('D_real : %g' % np.mean(_D_real)) print('D_fake : %g' % np.mean(_D_fake)) print('loss_d', _loss_d) print('loss_g', _loss_g) print('G', np.mean(sess.run(G, feed_dict={z: noise}))) if i % 1000 == 1: _global_step = sess.run(global_step) saver.save(sess, checkpoint_dir + 'model.ckpt', global_step=_global_step) #samples = sess.run([G], feed_dict={z:noise}) #save_generated_samples(samples) real = get_next_batch(batch_size) noise = generate_random_normal_vector() #noise = generate_linspaced_vector() save_image = tf.summary.image('generated', tf.multiply(tf.add(G, 1), 127.5), max_outputs=batch_size) image_summary = sess.run(save_image, feed_dict={z: noise}) writer.add_summary(image_summary) print('write generated samples') save_gt = tf.summary.image('ground_truth', tf.multiply(tf.add(x_resized, 1), 127.5), max_outputs=30) image_summary = sess.run(save_gt, feed_dict={x: real}) writer.add_summary(image_summary) print('write ground truth')
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = contrib_framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Parse pruning hyperparameters pruning_hparams = pruning.get_pruning_hparams().parse( FLAGS.pruning_hparams) # Create a pruning object using the pruning hyperparameters pruning_obj = pruning.Pruning(pruning_hparams, global_step=global_step) # Use the pruning_obj to add ops to the training graph to update the masks # The conditional_mask_update_op will update the masks only when the # training step is in [begin_pruning_step, end_pruning_step] specified in # the pruning spec proto mask_update_op = pruning_obj.conditional_mask_update_op() # Use the pruning_obj to add summaries to the graph to track the sparsity # of each of the layers pruning_obj.add_pruning_summaries() class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = 128 examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op) # Update the masks mon_sess.run(mask_update_op)
inputs = tf.nn.embedding_lookup(embedding, input_.input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, keep_prob=config.keep_prob) outputs = [] state = _initial_state # 每一个batch之后初始状态会重置 with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step:], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, axis=1), [-1, hidden_size]) # 关于学习率和梯度的控制方法 _lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(_lr) train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=framework.get_or_create_global_step()) # 对学习率进行赋值 _new_lr = tf.placeholder(tf.float32, shape=[], name="new_lr") _lr_update = tf.assign(_lr, _new_lr) def assign_lr(sess, lr_value): sess.run(_lr_update, feed_dict={_new_lr: lr_value})
def run(): #Create the log directory here. Must be done here otherwise import will activate this unneededly. if not os.path.exists(log_dir): os.mkdir(log_dir) #======================= TRAINING PROCESS ========================= #Now we start to construct the graph and build our model with tf.Graph().as_default() as graph: #"First create the dataset and load one batch" dataset = get_split('train', dataset_dir, file_pattern=file_pattern) images, _, labels = load_batch(dataset, batch_size=batch_size) #"Know the number steps to take before decaying the learning rate and batches per epoch" print("dataset.num_samples", dataset.num_samples) num_batches_per_epoch = int(dataset.num_samples / batch_size) print("num_batches_per_epoch", num_batches_per_epoch) num_steps_per_epoch = num_batches_per_epoch #Because one step is one batch processed decay_steps = int(num_epochs_before_decay * num_steps_per_epoch) print("decay_steps", decay_steps) #"Create the model inference" print("dataset.num_classes", dataset.num_classes) with slim.arg_scope(inception_resnet_v2_arg_scope()): logits, end_points = inception_resnet_v2( images, num_classes=dataset.num_classes, is_training=True) #"Define the scopes that you want to exclude for restoration" exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits'] variables_to_restore = slim.get_variables_to_restore(exclude=exclude) #"Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)" one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) #"Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks" loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits) total_loss = tf.losses.get_total_loss( ) #obtain the regularization losses as well #"Create the global step for monitoring the learning_rate and training." global_step = get_or_create_global_step() #"Define your exponentially decaying learning rate" lr = tf.train.exponential_decay(learning_rate=initial_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=learning_rate_decay_factor, staircase=True) #Now we can define the optimizer that takes on the learning rate optimizer = tf.train.AdamOptimizer(learning_rate=lr) #"Create the train_op." train_op = slim.learning.create_train_op(total_loss, optimizer) #"Accuracy" predictions = tf.argmax(end_points['Predictions'], 1) probabilities = end_points['Predictions'] accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( predictions, labels) metrics_op = tf.group(accuracy_update, probabilities) #"Summary's" print("total_loss", total_loss) tf.summary.scalar('losses/Total_Loss', total_loss) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('learning_rate', lr) my_summary_op = tf.summary.merge_all() def train_step(sess, train_op, global_step): #"training step function that runs both the train_op, metrics_op and updates the global_step concurrently." #"Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step" #Check the time for each sess run start_time = time.time() total_loss, global_step_count, _ = sess.run( [train_op, global_step, metrics_op]) time_elapsed = time.time() - start_time #Run the logging to print some results print('global step %s: loss: %.4f (%.2f sec/step)' % (global_step_count, total_loss, time_elapsed)) return total_loss, global_step_count #Saver function that restores the variables from a checkpoint file in a sess saver = tf.train.Saver(variables_to_restore) def restore_fn(sess): return saver.restore(sess, checkpoint_file) #"Define supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory" sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn) print("restore checkpoint success") #Run the managed session with sv.managed_session() as sess: print("num_steps_per_epoch * num_epochs:", num_steps_per_epoch * num_epochs) for step in range(num_steps_per_epoch * num_epochs): print("step:%s/%s" % (step, num_steps_per_epoch * num_epochs)) #At the start of every epoch, show the vital information: if step % num_batches_per_epoch == 0: print('Epoch %s/%s' % (step / num_batches_per_epoch + 1, num_epochs)) learning_rate_value, accuracy_value = sess.run( [lr, accuracy]) print('Current Learning Rate: %s' % learning_rate_value) print('Current Streaming Accuracy: %s' % accuracy_value) # optionally, print your logits and predictions for a sanity check that things are going fine. logits_value, probabilities_value, predictions_value, labels_value = sess.run( [logits, probabilities, predictions, labels]) print('predictions: \n', predictions_value) print('Labels:\n', labels_value) #Log the summaries every 10 step. if step % 100 == 0: loss, _ = train_step(sess, train_op, sv.global_step) summaries = sess.run(my_summary_op) sv.summary_computed(sess, summaries) #If not, simply run the training step else: loss, _ = train_step(sess, train_op, sv.global_step) #We log the final training loss and accuracy print('Final Loss: %s' % loss) sess_accuracy = sess.run(accuracy) print('Final Accuracy: %s' % sess_accuracy) #Once all the training has been done, save the log files and checkpoint model print('Finished training! Saving model to disk now.') # saver.save(sess, "./flowers_model.ckpt") sv.saver.save(sess, sv.save_path, global_step=sv.global_step) print('Finished training! Saved model to disk now.')
def train_model(config): """ Train the model using the passed in config """ training_devices = [ graph_utils.device_fn(device) for device in graph_utils.collect_devices({'GPU': FLAGS.num_gpus})] assert training_devices, 'Found no training devices!' ########################################################### # Create the input pipeline ########################################################### with tf.device('/cpu:0'), tf.name_scope('input_pipeline'): dataset = input_utils.get_dataset( config.datadir, config.dataset, 'train', num_folds=config.fold_count, fold=config.fold, holdout=False) init_op, init_feed_dict, image = input_utils.get_data( config.dataset, dataset, config.batch_size, num_epochs=config.num_epochs, num_readers=config.num_readers) inputs_queue = input_utils.batch_images( image, config.batch_size, num_threads=config.num_preprocessing_threads, num_devices=len(training_devices)) ########################################################### # Generate the model ########################################################### towers = graph_utils.create_towers( create_training_model, training_devices, config, inputs_queue, dataset) assert towers, 'No training towers were created!' ########################################################### # Setup the training objectives ########################################################### with tf.name_scope('training'): with tf.device('/cpu:0'): learning_rate_decay_step = config.learning_rate_decay_step / len(towers) learning_rate = tf.maximum( exponential_decay( config.batch_size, learning_rate_decay_step, config.learning_rate, config.learning_rate_decay, dataset), config.learning_rate_min, name='learning_rate') tf.add_to_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS, learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) # Calculate gradients and total loss tower_klds, tower_losses, grads_and_vars = graph_utils.optimize_towers( optimizer, towers, clip_norm=config.clip) total_kld = tf.add_n(tower_klds, name='total_kld') if tower_klds else None total_loss = tf.add_n(tower_losses, name='total_loss') # Gather update ops from the first tower (for updating batch_norm for example) global_step = framework.get_or_create_global_step() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, towers[0].scope) update_ops.append(optimizer.apply_gradients(grads_and_vars, global_step=global_step)) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_op = tf.identity(total_loss, name='train_op') ########################################################### # Collect summaries ########################################################### with tf.device('/cpu:0'): summaries = [] summaries.extend(learning.add_gradients_summaries(grads_and_vars)) summaries.extend(layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES)) summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.METRICS)) summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS)) summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS)) with tf.name_scope('losses'): if total_kld is not None: summaries.append(tf.summary.scalar('total_kld', total_kld)) summaries.append(tf.summary.scalar('total_loss', total_loss)) for loss in tower_losses: summaries.append(tf.summary.scalar(loss.op.name, loss)) for loss in tf.losses.get_losses(): summaries.append(tf.summary.scalar(loss.op.name, loss)) summary_op = tf.summary.merge(summaries, name='summaries') ########################################################### # Begin training ########################################################### global_init_op = tf.global_variables_initializer() init_op = global_init_op if init_op is None else tf.group(global_init_op, init_op) session_config = tf.ConfigProto( allow_soft_placement=False, log_device_placement=FLAGS.log_device_placement) prefetch_queue_buffer = 2 * len(training_devices) number_of_steps = int(int(dataset.num_samples / config.batch_size) / len(training_devices)) number_of_steps = number_of_steps * config.num_epochs - prefetch_queue_buffer tf.logging.info('Running %s steps', number_of_steps) learning.train( train_op, FLAGS.log_dir, session_config=session_config, global_step=global_step, number_of_steps=number_of_steps, init_op=init_op, init_feed_dict=init_feed_dict, save_interval_secs=config.checkpoint_frequency, summary_op=summary_op, save_summaries_secs=config.summary_frequency, trace_every_n_steps=config.trace_frequency if config.trace_frequency > 0 else None)
def main(): data_path = args.data vocab_path = args.vocab save_dir = args.save_dir word_dim = args.word_dim sentence_dim = args.sentence_dim omit_prob = args.omit_prob swap_prob = args.swap_prob config_path = args.config batch_size = args.batch_size max_epoch = args.max_epoch max_length = args.max_length if not os.path.exists(save_dir): os.makedirs(save_dir) # Check whether all needed options are given if config_path is not None: assert (word_dim is None and sentence_dim is None and omit_prob is None and swap_prob is None), ( 'Model hyperparameter options must not be provided when ' 'the "config" option is given.') config = ModelConfig.load(config_path) else: assert not ( word_dim is None or sentence_dim is None or omit_prob is None or swap_prob is None), ( 'All model hyperparameter options must be provided when ' 'the "config" option is not given.') config = ModelConfig(word_dim=word_dim, sentence_dim=sentence_dim, omit_prob=omit_prob, swap_prob=swap_prob) config_path = os.path.join(save_dir, 'config.ini') config.save(config_path) logging.info('Initializing the data generator...') data_generator = DataGenerator(data_path=data_path, vocab_path=vocab_path, eos_symbol='<EOS>', unk_symbol='<UNK>', omit_prob=config.omit_prob, swap_prob=config.swap_prob, batch_size=batch_size, max_length=max_length, max_epoch=max_epoch) with tf.Graph().as_default() as graph: with tf.Session() as sess: logging.info('Building the model...') # Placeholders inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name='inputs') inputs_length = tf.placeholder(dtype=tf.int32, shape=[None], name='inputs_length') targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name='targets') targets_length = tf.placeholder(dtype=tf.int32, shape=[None], name='targets_length') vocab_size = len(data_generator.vocab) embeddings = tf.get_variable(name='embeddings', shape=[vocab_size, config.word_dim], dtype=tf.float32) with tf.variable_scope('decoder'): with tf.variable_scope('output') as output_scope: # This variable-scope-trick is used to ensure that # output_fn has a proper scope regardless of a caller's # scope. def output_fn(cell_outputs): return layers.fully_connected(inputs=cell_outputs, num_outputs=vocab_size, activation_fn=None, scope=output_scope) rnn_cell = rnn.GRUBlockCell(config.sentence_dim) encoder_state = sae.encode(cell=rnn_cell, embeddings=embeddings, inputs=inputs, inputs_length=inputs_length, scope='encoder') decoder_outputs = sae.decode_train(cell=rnn_cell, embeddings=embeddings, encoder_state=encoder_state, targets=targets[:, :-1], targets_length=targets_length - 1, scope='decoder') generated = sae.decode_inference( cell=rnn_cell, embeddings=embeddings, encoder_state=encoder_state, output_fn=output_fn, vocab_size=vocab_size, bos_id=data_generator.vocab['<EOS>'], eos_id=data_generator.vocab['<EOS>'], max_length=max_length, scope='decoder', reuse=True) loss = sae.loss(decoder_outputs=decoder_outputs, output_fn=output_fn, targets=targets[:, 1:], targets_length=targets_length - 1) global_step = get_or_create_global_step() train_op = slim.optimize_loss(loss=loss, global_step=global_step, learning_rate=None, optimizer=tf.train.AdamOptimizer(), clip_gradients=5.0) summary_writer = tf.summary.FileWriter(logdir=os.path.join( save_dir, 'log'), graph=graph) summary = tf.summary.merge_all() tf.get_variable_scope().set_initializer( tf.random_normal_initializer(mean=0.0, stddev=0.01)) tf.global_variables_initializer().run() saver = tf.train.Saver(max_to_keep=20) logging.info('Training starts!') for data_batch in data_generator: (inputs_v, inputs_length_v, targets_v, targets_length_v) = data_batch summary_v, global_step_v, _ = sess.run( fetches=[summary, global_step, train_op], feed_dict={ inputs: inputs_v, inputs_length: inputs_length_v, targets: targets_v, targets_length: targets_length_v }) summary_writer.add_summary(summary=summary_v, global_step=global_step_v) if global_step_v % 100 == 0: logging.info('{} Iter #{}, Epoch {:.2f}'.format( datetime.now(), global_step_v, data_generator.progress)) num_samples = 2 (inputs_sample_v, inputs_length_sample_v, targets_sample_v, targets_length_sample_v) = ( data_generator.sample(num_samples)) generated_v = sess.run(fetches=generated, feed_dict={ inputs: inputs_sample_v, inputs_length: inputs_length_sample_v }) for i in range(num_samples): logging.info('-' * 60) logging.info('Sample #{}'.format(i)) inputs_sample_words = data_generator.ids_to_words( inputs_sample_v[i][:inputs_length_sample_v[i]]) targets_sample_words = data_generator.ids_to_words( targets_sample_v[i][1:targets_length_sample_v[i]]) generated_words = data_generator.ids_to_words( generated_v[i]) if '<EOS>' in generated_words: eos_index = generated_words.index('<EOS>') generated_words = generated_words[:eos_index + 1] logging.info('Input: {}'.format( ' '.join(inputs_sample_words))) logging.info('Target: {}'.format( ' '.join(targets_sample_words))) logging.info('Generated: {}'.format( ' '.join(generated_words))) logging.info('-' * 60) if global_step_v % 500 == 0: save_path = os.path.join(save_dir, 'model.ckpt') real_save_path = saver.save(sess=sess, save_path=save_path, global_step=global_step_v) logging.info( 'Saved the checkpoint to: {}'.format(real_save_path))
def get_train_global_op(self, global_network): assert len(self.gradients) == len(global_network.vars) return global_network.optimizer.apply_gradients( zip(self.gradients, global_network.vars), global_step=get_or_create_global_step())
def __init__(self, is_training, config, input_): self._input = input_ batch_size = input_.batch_size num_steps = input_.num_steps size = config.hidden_size vocab_size = config.vocab_size def lstm_cell(): return rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True) attn_cell = lstm_cell if is_training and config.keep_prob < 1: def attn_cell(): return rnn.DropoutWrapper(lstm_cell(), output_keep_prob=config.keep_prob) cell = rnn.MultiRNNCell( [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size], dtype=tf.float32) inputs = tf.nn.embedding_lookup(embedding, input_.input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) outputs = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, size]) softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=tf.float32) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=tf.float32) logits = tf.matmul(output, softmax_w) + softmax_b loss = seq2seq.sequence_loss_by_example( [logits], [tf.reshape(input_.targets, [-1])], [tf.ones([batch_size * num_steps], dtype=tf.float32)]) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=framework.get_or_create_global_step()) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)