def testTrain(self): batch_size = 20 sequence_length = 35 with tf.Graph().as_default(), tf.device(tf.test.gpu_device_name()): inputs_ph = tf.placeholder(tf.int64, [sequence_length, batch_size], "inputs") labels_ph = tf.placeholder(tf.int64, [sequence_length, batch_size], "labels") inputs = np.ones(inputs_ph.shape.as_list(), dtype=np.int64) labels = np.ones(labels_ph.shape.as_list(), dtype=np.int64) model = rnn_ptb.test_model(tf.test.is_gpu_available()) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) loss = rnn_ptb.loss_fn(model, inputs_ph, labels_ph, training=True) grads = rnn_ptb.clip_gradients(optimizer.compute_gradients(loss), 0.25) train_op = optimizer.apply_gradients(grads) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(train_op, feed_dict={ inputs_ph: inputs, labels_ph: labels }) sess.run([train_op, loss], feed_dict={ inputs_ph: inputs, labels_ph: labels })
def testTrain(self): batch_size = 20 sequence_length = 35 with tf.Graph().as_default(), tf.device(tf.test.gpu_device_name()): inputs_ph = tf.placeholder(tf.int64, [sequence_length, batch_size], "inputs") labels_ph = tf.placeholder(tf.int64, [sequence_length, batch_size], "labels") inputs = np.ones(inputs_ph.shape.as_list(), dtype=np.int64) labels = np.ones(labels_ph.shape.as_list(), dtype=np.int64) model = rnn_ptb.test_model(tf.test.is_gpu_available()) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) loss = rnn_ptb.loss_fn(model, inputs_ph, labels_ph, training=True) grads = rnn_ptb.clip_gradients(optimizer.compute_gradients(loss), 0.25) train_op = optimizer.apply_gradients(grads) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(train_op, feed_dict={inputs_ph: inputs, labels_ph: labels}) sess.run( [train_op, loss], feed_dict={ inputs_ph: inputs, labels_ph: labels })
def _benchmark_train(self, label, model): num_iters = 100 num_warmup = 10 dataset = tf.data.Dataset.from_tensors( tf.ones([PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE], dtype=tf.int64)).repeat(num_iters + num_warmup) # inputs and labels have the same shape dataset = tf.data.Dataset.zip((dataset, dataset)) (inputs, labels) = dataset.make_one_shot_iterator().get_next() with tf.device(tf.test.gpu_device_name()): optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) loss = rnn_ptb.loss_fn(model, inputs, labels, training=True) grads = rnn_ptb.clip_gradients(optimizer.compute_gradients(loss), 0.25) train_op = optimizer.apply_gradients(grads) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for _ in range(num_warmup): sess.run(train_op) gc.collect() start = time.time() for _ in range(num_iters): sess.run(train_op) self._report(label, start, num_iters, tf.test.gpu_device_name(), PTBBenchmark.BATCH_SIZE)
def step(): optimizer.apply_gradients( rnn_ptb.clip_gradients( grads(sequence_batch, sequence_batch), 0.25))