def testTrain(self): model = rnn_ptb.test_model(tfe.num_gpus() > 0) sequence_length = 35 data = np.ones([4 * sequence_length, 20], dtype=np.int64) with tf.device(device()): optimizer = tf.train.GradientDescentOptimizer(1.0) # Train two epochs rnn_ptb.train(model, optimizer, data, sequence_length, 0.25) rnn_ptb.train(model, optimizer, data, sequence_length, 0.25)
def testTrain(self): model = rnn_ptb.small_model(tfe.num_gpus() > 0) sequence_length = 35 data = np.ones([4 * sequence_length, 20], dtype=np.int64) with tf.device(device()): optimizer = tf.train.GradientDescentOptimizer(1.0) # Train two epochs rnn_ptb.train(model, optimizer, data, sequence_length, 0.25) rnn_ptb.train(model, optimizer, data, sequence_length, 0.25)
def main(): import common_gd args = common_gd.args args.cuda = not args.no_cuda and (tfe.num_gpus() > 0) print( benchmark(batch_size=args.batch_size, iters=args.iters, seed=args.seed, cuda=args.cuda, verbose=True))
def __init__(self, units, num_classes, merge_mode='concat', num_layers=1): super(BiRNN, self).__init__() self.impl = 1 if tfe.num_gpus() == 0 else 2 self.cells = [ tf.keras.layers.LSTMCell(units, implementation=self.impl) for _ in range(num_layers) ] self.rnn = tf.keras.layers.RNN( self.cells, unroll=True ) # slower if not unrolled - probably because it is using K.rnn() internally. self.bidirectional = tf.keras.layers.Bidirectional( self.rnn, merge_mode=merge_mode) self.classifier = tf.keras.layers.Dense(num_classes)
def testGPU(self): if tfe.num_gpus() <= 0: self.skipTest('No GPUs available') # tf.Tensor.as_gpu_device() moves a tensor to GPU. x = constant_op.constant([[1., 2.], [3., 4.]]).gpu() # Alternatively, tf.device() as a context manager places tensors and # operations. with ops.device('gpu:0'): x += 1. # Without a device context, heuristics are used to place ops. # In this case, ops.reduce_mean runs on the GPU. reduction_indices = range(x.shape.ndims) m = math_ops.reduce_mean(x, reduction_indices) # m is on GPU, bring it back to CPU and compare. self.assertEqual(3.5, m.cpu().numpy())
def testGPU(self): if tfe.num_gpus() <= 0: self.skipTest('No GPUs available') # tf.Tensor.as_gpu_device() moves a tensor to GPU. x = constant_op.constant([[1., 2.], [3., 4.]]).as_gpu_tensor() # Alternatively, tf.device() as a context manager places tensors and # operations. with ops.device('gpu:0'): x += 1. # Without a device context, heuristics are used to place ops. # In this case, ops.reduce_mean runs on the GPU. reduction_indices = range(x.shape.ndims) m = math_ops.reduce_mean(x, reduction_indices) # m is on GPU, bring it back to CPU and compare. self.assertEqual(3.5, m.as_cpu_tensor().numpy())
def main(_): tf.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) checkpoint = tfe.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write( "eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
def main(_): tf.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) checkpoint = tfe.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write("eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
def main(_): tfe.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data_path") corpus = Corpus(FLAGS.data_path) # TODO(ashankar): Remove _batchify and _get_batch and use the Datasets API # instead. train_data = _batchify(corpus.train, FLAGS.batch_size) eval_data = _batchify(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tfe.restore_variables_on_create( tf.train.latest_checkpoint(FLAGS.logdir)): with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: tfe.Saver(model.trainable_weights + [learning_rate]).save( os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write( "eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
import numpy as np import tensorflow as tf from tensorflow.contrib.eager.python import tfe tf.enable_eager_execution() tf.set_random_seed(0) device = '/gpu:0' if tfe.num_gpus() > 0 else '/cpu:0' # variables x = tf.get_variable('x', dtype=tf.float32, initializer=1.0) y = tf.get_variable('y', dtype=tf.float32, initializer=1.0) # function to optimize def f(x, y): return x + y # Constraint # Solution must => x^2 + y^2 = 1 lambd = tf.get_variable('lambda', dtype=tf.float32, initializer=1.0, constraint=lambda x: tf.clip_by_value(x, 0., np.infty)) def constraint(x, y): return (x * x + y * y - 1) def L(x, y, l): return -f(x, y) + l * constraint(x, y) optimizer = tf.train.GradientDescentOptimizer(0.05)
def main(_): tf.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tf.contrib.eager.Variable(0.001, name="learning_rate") model = LSTMModel( corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn,0.5) #optimizer = tf.train.GradientDescentOptimizer(learning_rate) optimizer = tf.train.AdamOptimizer( learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam' ) checkpoint = tfe.Checkpoint(optimizer=optimizer, model=model, optimizer_step=tf.train.get_or_create_global_step()) ''' = tf.train.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) ''' # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None best_accuracy = 0.0 writer = tf.contrib.summary.create_file_writer(FLAGS.logdir) global_step=tf.train.get_or_create_global_step() # return global step var writer.set_as_default() for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss,eval_accuracy = evaluate(model, eval_data) global_step.assign_add(1) with tf.contrib.summary.record_summaries_every_n_global_steps(1): tf.contrib.summary.scalar('epoch_acc', eval_accuracy) tf.contrib.summary.scalar('epoch_loss',eval_loss) if not best_loss or eval_loss < best_loss or eval_accuracy > best_accuracy: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss best_accuracy = eval_accuracy ''' sys.stderr.write( "model.variables:%s"% (model.variables)) sess = tf.Session() saver = tf.train.Saver(tf.global_variables()) model_path = "/tmp/tf/" save_path = saver.save(sess,model_path) ''' else: learning_rate.assign(learning_rate * 0.95) sys.stderr.write("eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy()) sys.stderr.write( "one epoch,best_loss: :%f \n"% (best_loss))
def benchmark_cudnn_train_large(self): if not tfe.num_gpus(): return self._benchmark_train("eager_cudnn_train_large", rnn_ptb.large_model(True))
def force_gpu_sync(): if tfe.num_gpus(): tf.constant(1).gpu().cpu()
def testNumGPUs(self): devices = tfe.list_devices() self.assertEqual(len(devices) - 1, tfe.num_gpus())
print('x test', x_test.shape, x_test.mean(), x_test.std()) print('y test', y_test.shape, y_test.mean(), y_test.std()) # model definition (canonical way) class Regressor(tf.keras.Model): def __init__(self): super(Regressor, self).__init__() self.dense = tf.keras.layers.Dense(1) def call(self, inputs, training=None, mask=None): output = self.dense(inputs) return output device = '/cpu:0' if tfe.num_gpus() == 0 else '/gpu:0' with tf.device(device): # build model and optimizer model = Regressor() model.compile(optimizer=tf.train.GradientDescentOptimizer(0.05), loss='mse') # suggested fix for TF <= 1.9; can be incorporated inside `_eager_set_inputs` or `_set_input` # Fix = Use exactly one sample from the provided input dataset to determine input/output shape/s for the model dummy_x = tf.zeros((1, 13)) model.call(dummy_x) # train model.fit(x_train, y_train,
def device(): return "/device:GPU:0" if tfe.num_gpus() else "/device:CPU:0"
def train_model(clean_train, clean_test, noisy_test, num_batches=150, batch_size=32, show_loss_plot=False, verbose=False, seed=1337): """ Trains a UNet to learn denoising by self-supervision (noise2self). Uses matplotlib to display the results. Args: clean_train (tensor): the clean training data clean_test (tensor): the clean testing data clean_test (tensor): the noisy testing data num_batches (int): number of batches used for training batch_size (int): number of images in each batch show_loss_plot (bool): display a graph of loss after training verbose (bool): print extra information seed (int): random seed for tensorflow and numpy Returns: The trained tensorflow model. """ if num_batches <= 0: raise ValueError('must have a positive number of batches') if batch_size <= 0: raise ValueError('must have a positive batch size') def verbose_print(s): if verbose: print(s) tf.enable_eager_execution() tf.set_random_seed(seed) np.random.seed(seed) device = '/gpu:0' if tfe.num_gpus() else '/cpu:0' with tf.device(device): verbose_print('building model (device={})'.format(device)) model = BabyUnet() model.compile(optimizer=tf.train.AdamOptimizer(0.001), loss=tf.keras.losses.mean_squared_error) model.build((1, image_size, image_size ,1)) optimizer = tf.train.AdamOptimizer() loss_fn = tf.losses.mean_squared_error loss_history = [] noise_gen = noisy_clean_generator(clean_train, batch_size, 0, 0.4) masker = Masker(interpolate=True, spacing=4, radius=1) verbose_print('fitting model') start_time = time.time() loss_display = '' for (batch, (batch_noisy, batch_clean)) in enumerate(noise_gen): fraction_display = '{}/{}'.format(batch, num_batches).rjust(10) display_progress(batch, num_batches, length=30, suffix=loss_display, prefix=fraction_display) if batch == num_batches: break with tf.GradientTape() as tape: masked, mask = masker(batch_noisy, batch, shape=data_shape) batch_predictions = model(masked) loss_value = loss_fn(mask * batch_clean, mask * batch_predictions) loss_display = '(loss: {:0.6f})'.format(loss_value.numpy()) loss_history.append(loss_value.numpy()) grads = tape.gradient(loss_value, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables), global_step=tf.train.get_or_create_global_step() ) end_time = time.time() verbose_print('fit completed in {:0.2f}s'.format(end_time - start_time)) if show_loss_plot: show_plot(loss_history, 'Loss', 'Epoch', 'Mean Square Error Loss') verbose_print('evaluating test set') masked, mask = masker(noisy_test, 0, shape=data_shape) test_predictions = model(masked) test_loss_value = loss_fn(mask * clean_test, mask * test_predictions) print("final test loss: {:0.6f}".format(test_loss_value)) return model
def benchmark_cudnn_apply_small(self): if not tfe.num_gpus(): return self._benchmark_apply("eager_cudnn_apply_small", rnn_ptb.small_model(True))
def main(_): data_dir = os.path.join(FLAGS.dir, "data") train_data = load_dataset(data_dir=data_dir, url=SOURCE_TRAIN_URL, batch_size=FLAGS.batch_size) eval_data = load_dataset(data_dir=data_dir, url=SOURCE_TEST_URL, batch_size=FLAGS.batch_size) model = RNNColorbot(rnn_cell_sizes=FLAGS.rnn_cell_sizes, label_dimension=3, keep_prob=FLAGS.keep_probability) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) if FLAGS.no_gpu or tfe.num_gpus() <= 0: print(tfe.num_gpus()) device = "/cpu:0" else: device = "/gpu:0" print("Using device %s." % device) log_dir = os.path.join(FLAGS.dir, "summaries") tf.gfile.MakeDirs(log_dir) train_summary_writer = tf.contrib.summary.create_summary_file_writer( os.path.join(log_dir, "train"), flush_secs=10) test_summary_writer = tf.contrib.summary.create_summary_file_writer( os.path.join(log_dir, "eval"), flush_secs=10, name="eval") with tf.device(device): for epoch in range(FLAGS.num_epochs): start = time.time() with train_summary_writer.as_default(): train_one_epoch(model, optimizer, train_data, FLAGS.log_interval) end = time.time() print("train/time for epoch #%d: %.2f" % (epoch, end - start)) with test_summary_writer.as_default(): test(model, eval_data) print("Colorbot is ready to generate colors!") while True: try: color_name = six.moves.input( "Give me a color name (or press enter to exit): ") except EOFError: return if not color_name: return _, chars, length = parse(color_name) with tf.device(device): (chars, length) = (tf.identity(chars), tf.identity(length)) chars = tf.expand_dims(chars, 0) length = tf.expand_dims(length, 0) preds = tf.unstack(model(chars, length, training=False)[0]) # Predictions cannot be negative, as they are generated by a ReLU layer; # they may, however, be greater than 1. clipped_preds = tuple(min(float(p), 1.0) for p in preds) rgb = tuple(int(p * 255) for p in clipped_preds) print("rgb:", rgb) data = [[clipped_preds]] if HAS_MATPLOTLIB: plt.imshow(data) plt.title(color_name) plt.show()
import time from tensorflow.contrib.eager.python import tfe tfe.enable_eager_execution() # for line profiling try: profile # throws an exception when profile isn't defined except NameError: profile = lambda x: x # if it's not defined simply ignore the decorator. import common_gd args = common_gd.args args.cuda = not args.no_cuda and (tfe.num_gpus() > 0) dtype = np.float32 #tf_dtype = tf.float32 lambda_ = 3e-3 lr = 0.2 dsize = 2 fs = [dsize, 2, 2, 2] # layer sizes nonlin = tf.nn.sigmoid def d_nonlin(y): return y * (1 - y)
img[a == a.max()] = 0 a = img a = np.uint8(np.clip(a, 0, 255)) print(a.shape) plt.figure(dpi=300, figsize=(20, 20)) plt.subplots_adjust(left=0.0, right=1.0, bottom=0.0, top=1.0) plt.imshow(a) #plt.show() plt.savefig('temp.png') Y, X = np.mgrid[-1.3:1.3:0.001, -2:1:0.001] Z = X + 1j * Y num_gpus = tfe.num_gpus() if num_gpus > 0: with tf.device('gpu:0'): xs = tf.constant(Z.astype(np.complex64)) zs = tfe.Variable(xs) ns = tfe.Variable(tf.zeros_like(xs, tf.float32)) else: with tf.device('/cpu:0'): xs = tf.constant(Z.astype(np.complex64)) zs = tfe.Variable(xs) ns = tfe.Variable(tf.zeros_like(xs, tf.float32)) # Operation to update the zs and the iteration count. # # Note: We keep computing zs after they diverge! This
def main(_): data_dir = os.path.join(FLAGS.dir, "data") train_data = load_dataset( data_dir=data_dir, url=SOURCE_TRAIN_URL, batch_size=FLAGS.batch_size) eval_data = load_dataset( data_dir=data_dir, url=SOURCE_TEST_URL, batch_size=FLAGS.batch_size) model = RNNColorbot( rnn_cell_sizes=FLAGS.rnn_cell_sizes, label_dimension=3, keep_prob=FLAGS.keep_probability) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) if FLAGS.no_gpu or tfe.num_gpus() <= 0: print(tfe.num_gpus()) device = "/cpu:0" else: device = "/gpu:0" print("Using device %s." % device) log_dir = os.path.join(FLAGS.dir, "summaries") tf.gfile.MakeDirs(log_dir) train_summary_writer = tf.contrib.summary.create_summary_file_writer( os.path.join(log_dir, "train"), flush_secs=10) test_summary_writer = tf.contrib.summary.create_summary_file_writer( os.path.join(log_dir, "eval"), flush_secs=10, name="eval") with tf.device(device): for epoch in range(FLAGS.num_epochs): start = time.time() with train_summary_writer.as_default(): train_one_epoch(model, optimizer, train_data, FLAGS.log_interval) end = time.time() print("train/time for epoch #%d: %.2f" % (epoch, end - start)) with test_summary_writer.as_default(): test(model, eval_data) print("Colorbot is ready to generate colors!") while True: try: color_name = six.moves.input( "Give me a color name (or press enter to exit): ") except EOFError: return if not color_name: return _, chars, length = parse(color_name) with tf.device(device): (chars, length) = (tf.identity(chars), tf.identity(length)) chars = tf.expand_dims(chars, 0) length = tf.expand_dims(length, 0) preds = tf.unstack(model(chars, length, training=False)[0]) # Predictions cannot be negative, as they are generated by a ReLU layer; # they may, however, be greater than 1. clipped_preds = tuple(min(float(p), 1.0) for p in preds) rgb = tuple(int(p * 255) for p in clipped_preds) print("rgb:", rgb) data = [[clipped_preds]] if HAS_MATPLOTLIB: plt.imshow(data) plt.title(color_name) plt.show()
def testApply(self): model = rnn_ptb.test_model(tfe.num_gpus() > 0) with tf.device(device()): model(tf.ones([35, 20], dtype=tf.int64), training=False)
def testApply(self): model = rnn_ptb.small_model(tfe.num_gpus() > 0) with tf.device(device()): model(tf.ones([35, 20], dtype=tf.int64), training=False)
def main(): import common_gd args = common_gd.args args.cuda = not args.no_cuda and (tfe.num_gpus() > 0) print(benchmark(batch_size=args.batch_size, iters=args.iters, seed=args.seed, cuda=args.cuda, history=args.history, verbose=True))