예제 #1
0
 def testTrain(self):
     model = rnn_ptb.test_model(tfe.num_gpus() > 0)
     sequence_length = 35
     data = np.ones([4 * sequence_length, 20], dtype=np.int64)
     with tf.device(device()):
         optimizer = tf.train.GradientDescentOptimizer(1.0)
         # Train two epochs
         rnn_ptb.train(model, optimizer, data, sequence_length, 0.25)
         rnn_ptb.train(model, optimizer, data, sequence_length, 0.25)
def main(_):
    tf.enable_eager_execution()

    if not FLAGS.data_path:
        raise ValueError("Must specify --data-path")
    corpus = Datasets(FLAGS.data_path)
    train_data = _divide_into_batches(corpus.train, FLAGS.batch_size)
    eval_data = _divide_into_batches(corpus.valid, 10)

    have_gpu = tfe.num_gpus() > 0
    use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu

    with tf.device("/device:GPU:0" if have_gpu else None):
        # Make learning_rate a Variable so it can be included in the checkpoint
        # and we can resume training with the last saved learning_rate.
        learning_rate = tf.Variable(20.0, name="learning_rate")
        model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim,
                         FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout,
                         use_cudnn_rnn)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        checkpoint = tf.train.Checkpoint(
            learning_rate=learning_rate,
            model=model,
            # GradientDescentOptimizer has no state to checkpoint, but noting it
            # here lets us swap in an optimizer that does.
            optimizer=optimizer)
        # Restore existing variables now (learning_rate), and restore new variables
        # on creation if a checkpoint exists.
        checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir))
        sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy())

        best_loss = None
        for _ in range(FLAGS.epoch):
            train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip)
            eval_loss = evaluate(model, eval_data)
            if not best_loss or eval_loss < best_loss:
                if FLAGS.logdir:
                    checkpoint.save(os.path.join(FLAGS.logdir, "ckpt"))
                best_loss = eval_loss
            else:
                learning_rate.assign(learning_rate / 4.0)
                sys.stderr.write(
                    "eval_loss did not reduce in this epoch, "
                    "changing learning rate to %f for the next epoch\n" %
                    learning_rate.numpy())
예제 #3
0
 def benchmark_cudnn_apply_small(self):
     if not tfe.num_gpus():
         return
     self._benchmark_apply("eager_cudnn_apply_small",
                           rnn_ptb.small_model(True))
예제 #4
0
def force_gpu_sync():
    if tfe.num_gpus():
        tf.constant(1).gpu().cpu()
예제 #5
0
 def testApply(self):
     model = rnn_ptb.test_model(tfe.num_gpus() > 0)
     with tf.device(device()):
         model(tf.ones([35, 20], dtype=tf.int64), training=False)
예제 #6
0
def device():
    return "/device:GPU:0" if tfe.num_gpus() else "/device:CPU:0"
예제 #7
0
 def benchmark_cudnn_train_large(self):
     if not tfe.num_gpus():
         return
     self._benchmark_train("eager_cudnn_train_large",
                           rnn_ptb.large_model(True))
예제 #8
0
def main(_):
    data_dir = os.path.join(FLAGS.dir, "data")
    train_data = load_dataset(data_dir=data_dir,
                              url=SOURCE_TRAIN_URL,
                              batch_size=FLAGS.batch_size)
    eval_data = load_dataset(data_dir=data_dir,
                             url=SOURCE_TEST_URL,
                             batch_size=FLAGS.batch_size)

    model = RNNColorbot(rnn_cell_sizes=FLAGS.rnn_cell_sizes,
                        label_dimension=3,
                        keep_prob=FLAGS.keep_probability)
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)

    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        print(tfe.num_gpus())
        device = "/cpu:0"
    else:
        device = "/gpu:0"
    print("Using device %s." % device)

    log_dir = os.path.join(FLAGS.dir, "summaries")
    tf.gfile.MakeDirs(log_dir)
    train_summary_writer = tf.contrib.summary.create_file_writer(
        os.path.join(log_dir, "train"), flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        os.path.join(log_dir, "eval"), flush_millis=10000, name="eval")

    with tf.device(device):
        for epoch in range(FLAGS.num_epochs):
            start = time.time()
            with train_summary_writer.as_default():
                train_one_epoch(model, optimizer, train_data,
                                FLAGS.log_interval)
            end = time.time()
            print("train/time for epoch #%d: %.2f" % (epoch, end - start))
            with test_summary_writer.as_default():
                test(model, eval_data)

    print("Colorbot is ready to generate colors!")
    while True:
        try:
            color_name = six.moves.input(
                "Give me a color name (or press enter to exit): ")
        except EOFError:
            return

        if not color_name:
            return

        _, chars, length = parse(color_name)
        with tf.device(device):
            (chars, length) = (tf.identity(chars), tf.identity(length))
            chars = tf.expand_dims(chars, 0)
            length = tf.expand_dims(length, 0)
            preds = tf.unstack(model((chars, length), training=False)[0])

        # Predictions cannot be negative, as they are generated by a ReLU layer;
        # they may, however, be greater than 1.
        clipped_preds = tuple(min(float(p), 1.0) for p in preds)
        rgb = tuple(int(p * 255) for p in clipped_preds)
        print("rgb:", rgb)
        data = [[clipped_preds]]
        if HAS_MATPLOTLIB:
            plt.imshow(data)
            plt.title(color_name)
            plt.show()