def train(model, optimizer, train_data, train_target, mask): def model_loss(inputs, targets): return loss_fn(model, inputs, targets, mask) grad_fn = tfe.implicit_gradients(model_loss) grads_and_vars = grad_fn(train_data, train_target) optimizer.apply_gradients(grads_and_vars)
def train(model, optimizer, train_data, sequence_length, clip_ratio): def model_loss(inputs, targets): return loss_fn(model, inputs, targets, training=True) grads = tfe.implicit_gradients(model_loss) train_loss = [] # range把原来的sequence截成多部分,分别训练 for batch, i in enumerate( range(0, train_data.shape[1] - sequence_length, sequence_length)): # 获取数据 train_seq = tf.convert_to_tensor(train_data[:, i:i + sequence_length]) train_target = tf.convert_to_tensor(train_data[:, i + 1:i + 1 + sequence_length]) assert train_seq.shape[1] == train_target.shape[1] == sequence_length optimizer.apply_gradients( clip_gradients(grads(train_seq, train_target), clip_ratio)) if batch % 10 == 0: loss = model_loss(train_seq, train_target).numpy() print(loss, end=",", flush=True) train_loss.append(loss) return np.mean(train_loss)
def _benchmark_train(self, label, model): with tf.device(device()): optimizer = tf.train.GradientDescentOptimizer(1.) def model_loss(inputs, targets): return rnn_ptb.loss_fn(model, inputs, targets, training=True) grads = tfe.implicit_gradients(model_loss) sequence_batch = tf.ones( [PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE], dtype=tf.int64) def step(): optimizer.apply_gradients( rnn_ptb.clip_gradients( grads(sequence_batch, sequence_batch), 0.25)) for _ in range(10): # Warmup step() force_gpu_sync() gc.collect() start = time.time() iters = 100 for _ in range(iters): step() force_gpu_sync() self._report(label, start, iters, device(), int(sequence_batch.shape[1]))
def _benchmark_train(self, label, model): with tf.device(device()): optimizer = tf.train.GradientDescentOptimizer(1.) def model_loss(inputs, targets): return rnn_ptb.loss_fn(model, inputs, targets, training=True) grads = tfe.implicit_gradients(model_loss) sequence_batch = tf.ones( [PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE], dtype=tf.int64) def step(): optimizer.apply_gradients( rnn_ptb.clip_gradients(grads(sequence_batch, sequence_batch), 0.25)) for _ in range(10): # Warmup step() force_gpu_sync() gc.collect() start = time.time() iters = 100 for _ in range(iters): step() force_gpu_sync() self._report(label, start, iters, device(), int(sequence_batch.shape[1]))
def main(_): policy = Policy() optimizer = tf.train.GradientDescentOptimizer(5e-1) policy_gradient = tfe.implicit_gradients(loss_fn) env = gym.make('CartPole-v0') observs = [env.reset()] for i in range(5): observs.append(env.step(env.action_space.sample())[0]) observs = tf.constant(tf.stack(observs)) optimizer.apply_gradients(policy_gradient(policy, observs)) print(loss_fn(policy, observs))
def train(model, optimizer, train_data, sequence_length, clip_ratio): """training an epoch.""" def model_loss(inputs, targets,total): lossfn,accuracy = loss_fn(model, inputs, targets, training=True) total[1] += accuracy total[0] += 1.0 return lossfn grads = tfe.implicit_gradients(model_loss) total = [0.0]*2 total_time = 0 #writer = tf.contrib.summary.create_file_writer(FLAGS.logdir) global_step=tf.train.get_or_create_global_step() # return global step var #writer.set_as_default() for j in range(0,sequence_length,2): for batch, i in enumerate(range(0, train_data.shape[0] - 1, sequence_length)): train_seq, train_target = _get_batch(train_data, i+j, sequence_length) a,b = train_seq.shape if a < sequence_length/2: continue input_list = tf.unstack(train_seq, num=int(train_seq.shape[0]), axis=0) start = time.time() optimizer.apply_gradients( clip_gradients(grads(train_seq, train_target,total), clip_ratio)) total_time += (time.time() - start) global_step.assign_add(1) with tf.contrib.summary.record_summaries_every_n_global_steps(100): if total[0] > 0.0: tf.contrib.summary.scalar('train_acc', total[1]/total[0]) tf.contrib.summary.scalar('train_loss', model_loss(train_seq, train_target,total).numpy()) if batch % 100 == 0 :#and i >= train_data.shape[0] -5: time_in_ms = (total_time * 1000) / (batch + 1) sys.stderr.write("batch %d,i:%d: training loss %.6f, avg step time %d ms\n" % (batch,i, model_loss(train_seq, train_target,total).numpy(), time_in_ms)) if total[0] > 0.0: sys.stderr.write("batch %d: training accuracy: %.8f\n" % (batch, total[1]/total[0])) total[0] = 0.0 total[1] =0
def train(model, optimizer, train_data, sequence_length, clip_ratio): """training an epoch.""" def model_loss(inputs, targets): return loss_fn(model, inputs, targets, training=True) grads = tfe.implicit_gradients(model_loss) total_time = 0 for batch, i in enumerate(range(0, train_data.shape[0] - 1, sequence_length)): train_seq, train_target = _get_batch(train_data, i, sequence_length) start = time.time() optimizer.apply_gradients( clip_gradients(grads(train_seq, train_target), clip_ratio)) total_time += (time.time() - start) if batch % 10 == 0: time_in_ms = (total_time * 1000) / (batch + 1) sys.stderr.write("batch %d: training loss %.2f, avg step time %d ms\n" % (batch, model_loss(train_seq, train_target).numpy(), time_in_ms))
def train_one_epoch(model, optimizer, train_data, log_interval=None): """Trains model on train_data using optimizer.""" def model_loss(labels, chars, sequence_length): predictions = model(chars, sequence_length, training=True) return loss(labels, predictions) # `grad` is a function that returns the gradients of model_loss with respect # to all the variables that contribute to the computation of its output. grad = tfe.implicit_gradients(model_loss) batch = 0 for (labels, chars, sequence_length) in tfe.Iterator(train_data): optimizer.apply_gradients(grad(labels, chars, sequence_length)) if log_interval and batch % log_interval == 0: batch_loss = model_loss(labels, chars, sequence_length) print("train/batch #{}\tloss: {:.6f}".format( batch, float(batch_loss))) if model.summary_writer is not None: model.summary_writer.scalar("train/loss", batch_loss) model.summary_writer.step() batch += 1
# Optimal parameter 3 x = [9.42478, 2.475] params = [ tf.get_variable('%d' % i, initializer=xi) for i, xi in enumerate(x) ] loss = evaluation_branin(*params).numpy() assert np.allclose(loss, 0.397887) check_branin_impl() print("Initial loss : ", evaluation_branin(x, y).numpy()) # Get the gradients and variables to be optimized grad_fn = tfe.implicit_gradients(evaluation_branin) grad_vars = grad_fn(x, y) # {repare the optimizer. Since this is a very simple problem, we don't need # many optimization steps optimizer = tf.train.AdamOptimizer(0.01) for i in range(200): # update the variables and print the loss value optimizer.apply_gradients(grad_vars) print("[%d] Loss = %0.6f - (x = %0.5f, y = %0.5f)" % (i + 1, evaluation_branin(x, y).numpy(), x.numpy(), y.numpy())) print() print("Final Loss = %0.6f - (x = %0.5f, y = %0.5f)" % (evaluation_branin(x, y).numpy(), x.numpy(), y.numpy()))
x = [0.20169, 0.15001, 0.476874, 0.275332, 0.311652, 0.6573] params = [ tf.get_variable('x%d' % i, initializer=xi) for i, xi in enumerate(x) ] loss = evaluation_hartmann6(params).numpy() assert np.allclose(loss, -3.32237) check_hartmann6_impl() print("Initial loss : ", evaluation_hartmann6(variables).numpy()) # Get the gradients and variables to be optimized grad_fn = tfe.implicit_gradients(evaluation_hartmann6) grad_vars = grad_fn(variables) # {repare the optimizer. Since this is a very simple problem, we don't need # many optimization steps optimizer = tf.train.AdamOptimizer(0.001) for i in range(500): # update the variables and print the loss value optimizer.apply_gradients(grad_vars) print("[%d] Loss = %0.6f" % (i + 1, evaluation_hartmann6(variables).numpy())) print() print("Final Loss = %0.6f" % (evaluation_hartmann6(variables).numpy())) """
# Create the variables to be optimized x = tf.get_variable('x', initializer=tf.random_uniform([], -5.0, 5.0)) y = tf.get_variable('y', initializer=tf.random_uniform([], -0.0, 15.0)) # define the squared error loss function as before def loss(x, y): y_predicted = 2 * x - y y_true = 4.0 return tf.square(y_predicted - y_true) print("Initial loss : ", loss(x, y).numpy()) # Get the gradients and variables to be optimized grad_fn = tfe.implicit_gradients(loss) grad_vars = grad_fn(x, y) # prepare the optimizer. Since this is a very simple problem, we don't need # many optimization steps optimizer = tf.train.GradientDescentOptimizer(0.01) for i in range(10): # update the variables and print the loss value optimizer.apply_gradients(grad_vars) print("[%d] Loss = %0.6f - (x = %0.5f, y = %0.5f)" % (i + 1, loss(x, y).numpy(), x.numpy(), y.numpy())) print() print("Final Loss = %0.6f - (x = %0.5f, y = %0.5f)" % (loss(x, y).numpy(), x.numpy(), y.numpy()))
import tensorflow as tf from tensorflow.contrib.eager.python import tfe tf.enable_eager_execution() x = tfe.Variable(initial_value=tf.random_uniform([1], -1., 1.), name='x') def loss(input): return tf.sigmoid(input) grad_vars = tfe.implicit_gradients(loss) opt = tf.train.GradientDescentOptimizer(learning_rate=1) for i in range(1000): for j in range(50): opt.apply_gradients(grad_vars(x)) if i % 50 == 0: loss_val = loss(x) print(i, "Optimal Value : ", loss_val.numpy(), "Val (X) : ", x.numpy())