Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", help="cifar data path", default="../data/cifar-10-batches-py")
    parser.add_argument("--epochs", type=int, help="number of learning epoch, default is 10", default=10)
    parser.add_argument("--saving", help="wheter saving or not(each verbose iteration)", action="store_true")
    parser.add_argument("--batch_size", type=int, help="batch size(default is 32)", default=32)
    parser.add_argument("--verbose", type=int, help="verbosity cycle(default is 1 epoch)", default=1)
    parser.add_argument("--no_tqdm", help="whether to use tqdm process bar", action="store_true")
    parser.add_argument("--lr", type=float, help="learning rate, default is 0.001", default=1e-3)

    args = parser.parse_args()
    dirname = args.data

    X_train, y_train = prerprocess_train(dirname)
    X_test, y_test = prerprocess_test(dirname)

    device = 'gpu:0' if tfe.num_gpus() > 0 else 'cpu:0'
    googlenet_model = GoogLEnet(learning_rate=args.lr, device_name=device)
    # googlenet_model.load()  # you can load the latest model you saved
    googlenet_model(tf.convert_to_tensor(X_train[:1]), True)
    googlenet_model.summary()

    if args.no_tqdm:
        tqdm_option = None
    else:
        tqdm_option = "normal"
    googlenet_model.fit(X_train, y_train, X_test, y_test, epochs=args.epochs, verbose=args.verbose,
                        batch_size=args.batch_size, saving=args.saving, tqdm_option=tqdm_option)
Esempio n. 2
0
    def benchmarkEagerL2hmc(self):
        """Benchmark Eager performance."""

        hparams = get_default_hparams()
        dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                  loglikelihood_fn=l2hmc.get_scg_energy_fn(),
                                  n_steps=hparams.n_steps,
                                  eps=hparams.eps)
        # TODO(lxuechen): Add learning rate decay
        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)

        # Warmup to reduce initialization effect when timing
        l2hmc.warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters)

        # Time
        start_time = time.time()
        l2hmc.fit(dynamics,
                  optimizer,
                  n_samples=hparams.n_samples,
                  n_iters=hparams.n_iters)
        wall_time = time.time() - start_time
        examples_per_sec = hparams.n_samples / wall_time

        self.report_benchmark(name="eager_train_%s" %
                              ("gpu" if tfe.num_gpus() > 0 else "cpu"),
                              iters=hparams.n_iters,
                              extras={"examples_per_sec": examples_per_sec},
                              wall_time=wall_time)
Esempio n. 3
0
def main(_):
    tfe.enable_eager_execution()

    # Automatically determine device and data_format
    (device, data_format) = ('/gpu:0', 'channels_first')
    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    # If data_format is defined in FLAGS, overwrite automatically set value.
    if FLAGS.data_format is not None:
        data_format = data_format
    print('Using device %s, and data format %s.' % (device, data_format))

    # Load the datasets
    train_ds = mnist_dataset.train(FLAGS.data_dir).shuffle(60000).batch(
        FLAGS.batch_size)
    test_ds = mnist_dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)

    # Create the model and optimizer
    model = mnist.Model(data_format)
    optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

    # Create file writers for writing TensorBoard summaries.
    if FLAGS.output_dir:
        # Create directories to which summaries will be written
        # tensorboard --logdir=<output_dir>
        # can then be used to see the recorded summaries.
        train_dir = os.path.join(FLAGS.output_dir, 'train')
        test_dir = os.path.join(FLAGS.output_dir, 'eval')
        tf.gfile.MakeDirs(FLAGS.output_dir)
    else:
        train_dir = None
        test_dir = None
    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')

    # Create and restore checkpoint (if one exists on the path)
    checkpoint_prefix = os.path.join(FLAGS.model_dir, 'ckpt')
    step_counter = tf.train.get_or_create_global_step()
    checkpoint = tfe.Checkpoint(model=model,
                                optimizer=optimizer,
                                step_counter=step_counter)
    # Restore variables on creation if a checkpoint exists.
    checkpoint.restore(tf.train.latest_checkpoint(FLAGS.model_dir))

    # Train and evaluate for a set number of epochs.
    with tf.device(device):
        for _ in range(FLAGS.train_epochs):
            start = time.time()
            with summary_writer.as_default():
                train(model, optimizer, train_ds, step_counter,
                      FLAGS.log_interval)
            end = time.time()
            print('\nTrain time for epoch #%d (%d total steps): %f' %
                  (checkpoint.save_counter.numpy() + 1, step_counter.numpy(),
                   end - start))
            with test_summary_writer.as_default():
                test(model, test_ds)
            checkpoint.save(checkpoint_prefix)
  def benchmarkEagerLinearRegression(self):
    num_batches = 200
    batch_size = 64
    dataset = linear_regression.synthetic_dataset(
        w=tf.random_uniform([3, 1]),
        b=tf.random_uniform([1]),
        noise_level=0.01,
        batch_size=batch_size,
        num_batches=num_batches)
    burn_in_dataset = dataset.take(10)

    model = linear_regression.LinearModel()

    with tf.device(device()):
      optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)

      # Perform burn-in.
      linear_regression.fit(model, burn_in_dataset, optimizer)

      start_time = time.time()
      linear_regression.fit(model, dataset, optimizer)
      wall_time = time.time() - start_time

      examples_per_sec = num_batches * batch_size / wall_time
      self.report_benchmark(
          name="eager_train_%s" %
          ("gpu" if tfe.num_gpus() > 0 else "cpu"),
          iters=num_batches,
          extras={"examples_per_sec": examples_per_sec},
          wall_time=wall_time)
def main(_):
    tfe.enable_eager_execution()
    # Ground-truth constants.
    true_w = [[-2.0], [4.0], [1.0]]
    true_b = [0.5]
    noise_level = 0.01

    # Training constants.
    batch_size = 64
    learning_rate = 0.1

    print("True w: %s" % true_w)
    print("True b: %s\n" % true_b)

    model = LinearModel()
    dataset = synthetic_dataset(true_w, true_b, noise_level, batch_size, 20)

    device = "gpu:0" if tfe.num_gpus() else "cpu:0"
    print("Using device: %s" % device)
    with tf.device(device):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        fit(model, dataset, optimizer, verbose=True, logdir=FLAGS.logdir)

    print("\nAfter training: w = %s" % model.variables[0].numpy())
    print("\nAfter training: b = %s" % model.variables[1].numpy())
Esempio n. 6
0
def _generate_synthetic_snli_data_batch(sequence_length,
                                        batch_size,
                                        vocab_size):
  """Generate a fake batch of SNLI data for testing."""
  with tf.device("cpu:0"):
    labels = tf.random_uniform([batch_size], minval=1, maxval=4, dtype=tf.int64)
    prem = tf.random_uniform(
        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
    prem_trans = tf.constant(np.array(
        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
          3, 2, 2]] * batch_size, dtype=np.int64).T)
    hypo = tf.random_uniform(
        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
    hypo_trans = tf.constant(np.array(
        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
          3, 2, 2]] * batch_size, dtype=np.int64).T)
  if tfe.num_gpus():
    labels = labels.gpu()
    prem = prem.gpu()
    prem_trans = prem_trans.gpu()
    hypo = hypo.gpu()
    hypo_trans = hypo_trans.gpu()
  return labels, prem, prem_trans, hypo, hypo_trans
Esempio n. 7
0
def main(_):
  tfe.enable_eager_execution()
  # Ground-truth constants.
  true_w = [[-2.0], [4.0], [1.0]]
  true_b = [0.5]
  noise_level = 0.01

  # Training constants.
  batch_size = 64
  learning_rate = 0.1

  print("True w: %s" % true_w)
  print("True b: %s\n" % true_b)

  model = LinearModel()
  dataset = synthetic_dataset(true_w, true_b, noise_level, batch_size, 20)

  device = "gpu:0" if tfe.num_gpus() else "cpu:0"
  print("Using device: %s" % device)
  with tf.device(device):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    fit(model, dataset, optimizer, verbose=True, logdir=FLAGS.logdir)

  print("\nAfter training: w = %s" % model.variables[0].numpy())
  print("\nAfter training: b = %s" % model.variables[1].numpy())
Esempio n. 8
0
def train():
    # Specify the path where you want to save/restore the trained variables.
    checkpoint_directory = 'models_checkpoints/EmotionCNN/'
    # Use the GPU if available.
    device = 'gpu:0' if tfe.num_gpus() > 0 else 'cpu:0'
    # Define optimizer.
    optimizer = tf.train.AdamOptimizer()

    # Instantiate model. This doesn't initialize the variables yet.
    model = ERCNN(num_classes=7, device=device,
                                  checkpoint_directory=checkpoint_directory)
    training_data,eval_data = data_process()
    # Train model
    model.fit(training_data, eval_data, optimizer, num_epochs=500,
              early_stopping_rounds=5, verbose=10, train_from_scratch=False)
    model.save_model()
    plt.plot(range(len(model.history['train_loss'])), model.history['train_loss'],
             color='b', label='Train loss')
    plt.plot(range(len(model.history['eval_loss'])), model.history['eval_loss'],
             color='r', label='Dev loss')
    plt.title('Model performance during training', fontsize=15)
    plt.xlabel('Number of epochs', fontsize=15)
    plt.ylabel('Loss', fontsize=15)
    plt.legend(fontsize=15)
    plt.show()
    train_acc = model.compute_accuracy(training_data)
    eval_acc = model.compute_accuracy(eval_data)

    print('Train accuracy: ', train_acc.result().numpy())
    print('Eval accuracy: ', eval_acc.result().numpy())
Esempio n. 9
0
    def benchmarkEagerLinearRegression(self):
        num_epochs = 10
        num_batches = 200
        batch_size = 64
        dataset = linear_regression.synthetic_dataset(w=tf.random_uniform(
            [3, 1]),
                                                      b=tf.random_uniform([1]),
                                                      noise_level=0.01,
                                                      batch_size=batch_size,
                                                      num_batches=num_batches)
        burn_in_dataset = dataset.take(10)

        model = linear_regression.LinearModel()

        with tf.device(device()):
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)

            # Perform burn-in.
            linear_regression.fit(model, burn_in_dataset, optimizer)

            start_time = time.time()
            for _ in range(num_epochs):
                linear_regression.fit(model, dataset, optimizer)
            wall_time = time.time() - start_time

            examples_per_sec = num_epochs * num_batches * batch_size / wall_time
            self.report_benchmark(
                name="eager_train_%s" %
                ("gpu" if tfe.num_gpus() > 0 else "cpu"),
                iters=num_epochs * num_batches,
                extras={"examples_per_sec": examples_per_sec},
                wall_time=wall_time)
Esempio n. 10
0
 def _report(self, test_name, start, num_iters, batch_size):
   avg_time = (time.time() - start) / num_iters
   dev = 'gpu' if tfe.num_gpus() else 'cpu'
   name = 'eager_%s_%s_batch_%d_%s' % (test_name, dev, batch_size,
                                       data_format())
   extras = {'examples_per_sec': batch_size / avg_time}
   self.report_benchmark(
       iters=num_iters, wall_time=avg_time, name=name, extras=extras)
Esempio n. 11
0
def main(_):
    assert tfe.num_gpus() > 0, 'Make sure the GPU device exists'
    device_name = '/gpu:{}'.format(args.cuda_device)
    print('\n==> ==> ==> Using device {}'.format(device_name))

    # Load the dataset
    train_ds, val_ds = [
        dataset_generator(
            mode,
            conf.input_size,
            num_epochs=1,
            batch_size=conf.batch_size,
            buffer_size=10000)  # TODO edit this when in real training
        for mode in ['train', 'val']
    ]

    # Create the model and optimizer
    model = RetinaNet()
    optimizer = tf.train.RMSPropOptimizer(conf.learning_rate)

    # Define the path to the TensorBoard summary
    train_dir, val_dir = [
        os.path.join(conf.summary_dir, mode) for mode in ['train', 'val']
    ]
    tf.gfile.MakeDirs(conf.summary_dir)

    train_summary_writer = tf.contrib.summary.create_summary_file_writer(
        train_dir, flush_millis=10000, name='train')
    val_summary_writer = tf.contrib.summary.create_summary_file_writer(
        val_dir, flush_millis=10000, name='val')

    checkpoint_prefix = os.path.join(conf.checkpoint_dir, 'ckpt')

    with tfe.restore_variables_on_create(
            tf.train.latest_checkpoint(conf.checkpoint_dir)):
        with tf.device(device_name):
            epoch = tfe.Variable(1., name='epoch')
            best_loss = tfe.Variable(tf.float32.max, name='best_loss')
            print('==> ==> ==> Start training from epoch {:.0f}...\n'.format(
                epoch.numpy()))

            while epoch <= conf.num_epochs + 1:
                gs = tf.train.get_or_create_global_step()
                with train_summary_writer.as_default():
                    train_one_epoch(model, optimizer, train_ds, epoch.numpy())
                with val_summary_writer.as_default():
                    eval_loss = validate(model, val_ds, epoch.numpy())

                # Save the best loss
                if eval_loss < best_loss:
                    best_loss.assign(
                        eval_loss)  # do NOT be copied directly, SHALLOW!
                    all_variables = (model.variables + optimizer.variables() +
                                     [gs] + [epoch] + [best_loss])
                    tfe.Saver(all_variables).save(checkpoint_prefix,
                                                  global_step=gs)

                epoch.assign_add(1)
Esempio n. 12
0
    def train(self, x, y, val, execution_mode=None):
        device = '/gpu:0' if tfe.num_gpus() else '/cpu:0'
        log = {
            'epoch_list': [],
            'train_binary_crossentropy': [],
            'train_roc': [],
            'val_roc': []
        }
        # with tfe.execution_mode(execution_mode):
        optimizer = tf.train.AdagradOptimizer(self.lr)
        weight_file = '/nn_modules/best_eager_MLP_weight'
        no_improve = 0  # no_improve 技计数 用于early_stoping
        max_score = 0
        with tf.device(device):  # 指定硬件
            for epoch in range(self.epoch):  # epoch 寻环
                train_iterator = make_iterator(
                    (x, y), batch_size=self.batch_size)  # 生成iterator
                loss_history = []
                full_y_pred = []
                while True:  # batch 寻环,利用try停止epoch
                    try:
                        batch_x, batch_y = train_iterator.get_next()
                        grads, loss, batch_pre = self.compute_gradients_and_loss(
                            batch_x, batch_y)

                        self.apply_gradients(optimizer, grads)
                        loss_history.append(loss.numpy())
                        full_y_pred.append(batch_pre.numpy())
                        # tfe.async_wait()
                    except tf.errors.OutOfRangeError:
                        break
                full_y_pred = np.concatenate(full_y_pred)
                val_score = self.validate(val[0], val[1])
                if val_score > max_score:
                    max_score = val_score
                    no_improve = 0
                    self.model.save_weights(weight_file)
                else:
                    no_improve += 1
                    if no_improve > 10:
                        print("early stop at epoch %d" % (no_improve))
                        break
                self.model.load_weights(weight_file)
                train_score = f1(y, full_y_pred)
                epoch_loss = np.mean(loss_history)
                log['epoch_list'].append(epoch)
                log['train_binary_crossentropy'].append(epoch_loss)
                log['train_roc'].append(train_score)
                log['val_roc'].append(val_score)
                print(
                    "epoch=%d,loss=%.6f,train_roc=%.6f,val_roc=%.6f,time=%s" %
                    (epoch, epoch_loss, train_score, val_score,
                     time.asctime()))

        datafe = pd.DataFrame(log)
        datafe.to_csv('performance_log.csv')
Esempio n. 13
0
 def _report(self, test_name, start, num_iters, batch_size):
     avg_time = (time.time() - start) / num_iters
     dev = 'gpu' if tfe.num_gpus() else 'cpu'
     name = 'eager_%s_%s_batch_%d_%s' % (test_name, dev, batch_size,
                                         data_format())
     extras = {'examples_per_sec': batch_size / avg_time}
     self.report_benchmark(iters=num_iters,
                           wall_time=avg_time,
                           name=name,
                           extras=extras)
def main():
    # Enable eager execution
    tfe.enable_eager_execution()

    # check gpu availability
    device = '/gpu:0'
    if tfe.num_gpus() <= 0:
        device = '/cpu:0'

    train(device)
    return
Esempio n. 15
0
    def __init__(self, param_dict):
        self.param = param_dict
        print(self.param)
        self.lr = param_dict['lr']
        self.epoch = param_dict['epoch']
        self.batch_size = param_dict['batch_size']
        self.val_batch_size = param_dict['val_batch_size']
        # self.batch_num = param_dict['batch_num']
        # self.embed_feature_size_list = param_dict['embed_faeture_list']
        # self.MLP = param_dict['MLP']
        # self.input_dim = param_dict['input_dim']
        self.drop_rate = param_dict['drop_rate']
        self.reg_rate = param_dict['reg_rate']
        # self.vector_length = param_dict['vector_length']
        # self.subclass = param_dict['subclass']
        self.weight_file_path = param_dict['weight_file_path']
        self.pre_train = param_dict['pre_train']
        self.device = '/gpu:0' if tfe.num_gpus() else '/cpu:0'
        with tf.Session().as_default() as sess:
            self.sess = sess
            self.loss = tf.keras.losses.categorical_crossentropy
            # with tf.Graph().as_default():
            self.model = czx_NN_subclass(param_dict)

        self.padded_dict = {}
        for col in self.param['feature_name'].feature_all():
            if col is not 'appId_list_encoded':
                self.padded_dict[col] = [1]
        self.padded_dict['appId_list_encoded'] = self.param[
            'appId_list_encoded_length']
        self.padded_dict['usage_appId_list'] = self.param['size_of_space'][
            'max_usage_len']
        self.padded_dict['usage_duration_list'] = self.param['size_of_space'][
            'max_usage_len']
        self.padded_dict['usage_times_list'] = self.param['size_of_space'][
            'max_usage_len']
        self.padded_dict['usage_use_date_list'] = self.param['size_of_space'][
            'max_usage_len']
        self.padded_dict['all_activedApp_cate_list'] = self.param[
            'size_of_space']['max_cate_len']
        self.padded_dict['usage_appId_duration_list'] = self.param[
            'size_of_space']['max_usage_len']
        self.padded_dict['usage_appId_times_list'] = self.param[
            'size_of_space']['max_usage_len']
        self.padded_dict['usage_appId_mean_dura_list'] = self.param[
            'size_of_space']['max_usage_len']

        self.padded_dict['usage_appId_full_list'] = self.param[
            'size_of_space']['max_usage_full_len']
        self.padded_dict['usage_duration_full_list'] = self.param[
            'size_of_space']['max_usage_full_len']
        self.padded_dict['usage_time_full_list'] = self.param['size_of_space'][
            'max_usage_full_len']
Esempio n. 16
0
def main(_):

    pp = pprint.PrettyPrinter()
    pp.pprint(flags.FLAGS.__flags)

    filenames = glob.glob(data_dir)

    (device, data_format) = ('/gpu:0', 'channels_first')
    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    print('Using device %s, and data format %s.' % (device, data_format))

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    if not os.path.exists(FLAGS.sample_dir):
        os.makedirs(FLAGS.sample_dir)

    model_objects = {
        'generator': Generator(data_format),
        'discriminator': Discriminator(data_format),
        'generator_optimizer': tf.train.AdamOptimizer(FLAGS.generator_learning_rate, FLAGS.beta1, FLAGS.beta2),
        'discriminator_optimizer': tf.train.AdamOptimizer(FLAGS.discriminator_learning_rate, FLAGS.beta1, FLAGS.beta2),
        'step_counter': tf.train.get_or_create_global_step()
    }

    summary_writer = tf.contrib.summary.create_file_writer(FLAGS.summary_dir,
                                                           flush_millis=1000)

    checkpoint = tfe.Checkpoint(**model_objects)
    checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
    latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    if latest_cpkt:
        print('Using latest checkpoint at ' + latest_cpkt)
    checkpoint.restore(latest_cpkt)

    dataset = tf.data.TFRecordDataset(
        filenames).map(read_and_decode_with_labels)
    dataset = dataset.shuffle(10000).apply(
        tf.contrib.data.batch_and_drop_remainder(FLAGS.batch_size))

    with tf.device(device):
        for epoch in range(FLAGS.epoch):
            start = time.time()
            with summary_writer.as_default():
                train_one_epoch(dataset=dataset, batch_size=FLAGS.batch_size, log_interval=FLAGS.log_interval,
                                z_dim=FLAGS.z_dim, device=device, epoch=epoch, **model_objects)
            end = time.time()
            checkpoint.save(checkpoint_prefix)
            print('\nTrain time for epoch #%d (step %d): %f' %
                  (checkpoint.save_counter.numpy(),
                   checkpoint.step_counter.numpy(),
                   end - start))
Esempio n. 17
0
 def __init__(self, param_dict):
     self.lr = param_dict['lr']
     self.epoch = param_dict['epoch']
     self.batch_size = param_dict['batch_size']
     # self.embed_feature_size_list = param_dict['embed_faeture_list']
     # self.MLP = param_dict['MLP']
     # self.input_dim = param_dict['input_dim']
     self.drop_rate = param_dict['drop_rate']
     self.reg_rate = param_dict['reg_rate']
     # self.vector_length = param_dict['vector_length']
     # self.subclass = param_dict['subclass']
     self.device = '/gpu:0' if tfe.num_gpus() else '/cpu:0'
     self.model = czx_NN_subclass(param_dict)
Esempio n. 18
0
def main(_):
  tfe.enable_eager_execution()

  (device, data_format) = ('/gpu:0', 'channels_first')
  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  train_ds = dataset.train(FLAGS.data_dir).shuffle(60000).batch(
      FLAGS.batch_size)
  test_ds = dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)

  # Create the model and optimizer
  model = mnist.Model(data_format)
  optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

  if FLAGS.output_dir:
    # Create directories to which summaries will be written
    # tensorboard --logdir=<output_dir>
    # can then be used to see the recorded summaries.
    train_dir = os.path.join(FLAGS.output_dir, 'train')
    test_dir = os.path.join(FLAGS.output_dir, 'eval')
    tf.gfile.MakeDirs(FLAGS.output_dir)
  else:
    train_dir = None
    test_dir = None
  summary_writer = tf.contrib.summary.create_file_writer(
      train_dir, flush_millis=10000)
  test_summary_writer = tf.contrib.summary.create_file_writer(
      test_dir, flush_millis=10000, name='test')
  checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
  step_counter = tf.train.get_or_create_global_step()
  checkpoint = tfe.Checkpoint(
      model=model, optimizer=optimizer, step_counter=step_counter)
  # Restore variables on creation if a checkpoint exists.
  checkpoint.restore(tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
  # Train and evaluate for 10 epochs.
  with tf.device(device):
    for _ in range(10):
      start = time.time()
      with summary_writer.as_default():
        train(model, optimizer, train_ds, step_counter, FLAGS.log_interval)
      end = time.time()
      print('\nTrain time for epoch #%d (%d total steps): %f' %
            (checkpoint.save_counter.numpy() + 1,
             step_counter.numpy(),
             end - start))
      with test_summary_writer.as_default():
        test(model, test_ds)
      checkpoint.save(checkpoint_prefix)
Esempio n. 19
0
def main(_):
    tfe.enable_eager_execution()

    (device, data_format) = ('/gpu:0', 'channels_first')
    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    print('Using device %s, and data format %s.' % (device, data_format))

    # Load the datasets
    train_ds = dataset.train(FLAGS.data_dir).shuffle(60000).batch(
        FLAGS.batch_size)
    test_ds = dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size)

    # Create the model and optimizer
    model = mnist.Model(data_format)
    optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

    if FLAGS.output_dir:
        # Create directories to which summaries will be written
        # tensorboard --logdir=<output_dir>
        # can then be used to see the recorded summaries.
        train_dir = os.path.join(FLAGS.output_dir, 'train')
        test_dir = os.path.join(FLAGS.output_dir, 'eval')
        tf.gfile.MakeDirs(FLAGS.output_dir)
    else:
        train_dir = None
        test_dir = None
    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')
    checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')

    # Train and evaluate for 11 epochs.
    with tf.device(device):
        for epoch in range(1, 11):
            with tfe.restore_variables_on_create(
                    tf.train.latest_checkpoint(FLAGS.checkpoint_dir)):
                global_step = tf.train.get_or_create_global_step()
                start = time.time()
                with summary_writer.as_default():
                    train(model, optimizer, train_ds, FLAGS.log_interval)
                end = time.time()
                print('\nTrain time for epoch #%d (global step %d): %f' %
                      (epoch, global_step.numpy(), end - start))
            with test_summary_writer.as_default():
                test(model, test_ds)
            all_variables = (model.variables + optimizer.variables() +
                             [global_step])
            tfe.Saver(all_variables).save(checkpoint_prefix,
                                          global_step=global_step)
Esempio n. 20
0
def main(_):
  (device, data_format) = ('/gpu:0', 'channels_first')
  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  data = input_data.read_data_sets(FLAGS.data_dir)
  dataset = (tf.data.Dataset
             .from_tensor_slices(data.train.images)
             .shuffle(60000)
             .batch(FLAGS.batch_size))

  # Create the models and optimizers
  generator = Generator(data_format)
  discriminator = Discriminator(data_format)
  with tf.variable_scope('generator'):
    generator_optimizer = tf.train.AdamOptimizer(FLAGS.lr)
  with tf.variable_scope('discriminator'):
    discriminator_optimizer = tf.train.AdamOptimizer(FLAGS.lr)

  # Prepare summary writer and checkpoint info
  summary_writer = tf.contrib.summary.create_summary_file_writer(
      FLAGS.output_dir, flush_millis=1000)
  checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
  latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
  if latest_cpkt:
    print('Using latest checkpoint at ' + latest_cpkt)

  with tf.device(device):
    for epoch in range(1, 101):
      with tfe.restore_variables_on_create(latest_cpkt):
        global_step = tf.train.get_or_create_global_step()
        start = time.time()
        with summary_writer.as_default():
          train_one_epoch(generator, discriminator, generator_optimizer,
                          discriminator_optimizer,
                          dataset, FLAGS.log_interval, FLAGS.noise)
        end = time.time()
        print('\nTrain time for epoch #%d (global step %d): %f' % (
            epoch, global_step.numpy(), end - start))

      all_variables = (
          generator.variables
          + discriminator.variables
          + generator_optimizer.variables()
          + discriminator_optimizer.variables()
          + [global_step])
      tfe.Saver(all_variables).save(
          checkpoint_prefix, global_step=global_step)
def test_spinn(embed, test_data, config):
    """Test a SPINN model.

  Args:
    embed: The embedding matrix as a float32 numpy array with shape
      [vocabulary_size, word_vector_len]. word_vector_len is the length of a
      word embedding vector.
    test_data: An instance of `data_chemprot.ChemprotData`, for the test split.
    config: A configuration object. See the argument to this Python binary for
      details.

  Returns:
    1. Final loss value on the test split.
    2. Final fraction of correct classifications on the test split.
  """
    use_gpu = tfe.num_gpus() > 0 and not config.force_cpu
    device = "gpu:0" if use_gpu else "cpu:0"
    print("Using device: %s" % device)

    log_header = (
        "  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss"
        "     Accuracy  Dev/Accuracy")
    dev_log_template = (
        "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} "
        "{:8.6f} {:12.4f} {:12.4f}")

    summary_writer = tf.contrib.summary.create_file_writer(config.logdir,
                                                           flush_millis=10000)
    with tf.device(device), \
         summary_writer.as_default(), \
         tf.contrib.summary.always_record_summaries():
        model = ChemprotClassifier(config, embed)
        latest_checkpoint = tf.train.latest_checkpoint(config.logdir)
        print("Latest checkpoint", latest_checkpoint)
        tfe.restore_network_checkpoint(
            model, tf.train.latest_checkpoint(config.logdir))

        start = time.time()
        dev_mean_loss = tfe.metrics.Mean()
        dev_accuracy = tfe.metrics.Accuracy()
        print(log_header)

        #restore
        dev_loss, dev_frac_correct, dev_f1, dev_lables, dev_logits, dev_pmids, dev_ent1s, dev_ent2s = _evaluate_on_dataset(
            dev_data, config.batch_size, model, use_gpu)

        print(
            dev_log_template.format(time.time() - start, 0, 0, 1, 0, 1 / 1, 0,
                                    dev_loss, 0, dev_frac_correct * 100.0))
        print(dev_f1)
Esempio n. 22
0
def main(_):
  tfe.enable_eager_execution()

  (device, data_format) = ('/gpu:0', 'channels_first')
  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  (train_ds, test_ds) = load_data(FLAGS.data_dir)
  train_ds = train_ds.shuffle(60000).batch(FLAGS.batch_size)

  # Create the model and optimizer
  model = MNISTModel(data_format)
  optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

  if FLAGS.output_dir:
    train_dir = os.path.join(FLAGS.output_dir, 'train')
    test_dir = os.path.join(FLAGS.output_dir, 'eval')
    tf.gfile.MakeDirs(FLAGS.output_dir)
  else:
    train_dir = None
    test_dir = None
  summary_writer = tf.contrib.summary.create_summary_file_writer(
      train_dir, flush_secs=10)
  test_summary_writer = tf.contrib.summary.create_summary_file_writer(
      test_dir, flush_secs=10, name='test')
  checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')

  with tf.device(device):
    for epoch in range(1, 11):
      with tfe.restore_variables_on_create(
          tf.train.latest_checkpoint(FLAGS.checkpoint_dir)):
        global_step = tf.train.get_or_create_global_step()
        start = time.time()
        with summary_writer.as_default():
          train_one_epoch(model, optimizer, train_ds, FLAGS.log_interval)
        end = time.time()
        print('\nTrain time for epoch #%d (global step %d): %f' % (
            epoch, global_step.numpy(), end - start))
      with test_summary_writer.as_default():
        test(model, test_ds)
      all_variables = (
          model.variables
          + tfe.get_optimizer_variables(optimizer)
          + [global_step])
      tfe.Saver(all_variables).save(
          checkpoint_prefix, global_step=global_step)
Esempio n. 23
0
def main(_):
  tfe.enable_eager_execution()

  (device, data_format) = ('/gpu:0', 'channels_first')
  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  (train_ds, test_ds) = load_data(FLAGS.data_dir)
  train_ds = train_ds.shuffle(60000).batch(FLAGS.batch_size)

  # Create the model and optimizer
  model = MNISTModel(data_format)
  optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum)

  if FLAGS.output_dir:
    train_dir = os.path.join(FLAGS.output_dir, 'train')
    test_dir = os.path.join(FLAGS.output_dir, 'eval')
    tf.gfile.MakeDirs(FLAGS.output_dir)
  else:
    train_dir = None
    test_dir = None
  summary_writer = tf.contrib.summary.create_file_writer(
      train_dir, flush_millis=10000)
  test_summary_writer = tf.contrib.summary.create_file_writer(
      test_dir, flush_millis=10000, name='test')
  checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')

  with tf.device(device):
    for epoch in range(1, 11):
      with tfe.restore_variables_on_create(
          tf.train.latest_checkpoint(FLAGS.checkpoint_dir)):
        global_step = tf.train.get_or_create_global_step()
        start = time.time()
        with summary_writer.as_default():
          train_one_epoch(model, optimizer, train_ds, FLAGS.log_interval)
        end = time.time()
        print('\nTrain time for epoch #%d (global step %d): %f' % (
            epoch, global_step.numpy(), end - start))
      with test_summary_writer.as_default():
        test(model, test_ds)
      all_variables = (
          model.variables
          + optimizer.variables()
          + [global_step])
      tfe.Saver(all_variables).save(
          checkpoint_prefix, global_step=global_step)
Esempio n. 24
0
def main():
    # Enable eager execution
    tfe.enable_eager_execution()

    # (device, data_format) = ('/gpu:0', 'channels_first')
    # if tfe.num_gpus() <= 0:
    #     (device, data_format) = ('/cpu:0', 'channels_last')

    # check gpu availability
    device = '/gpu:0'
    if tfe.num_gpus() <= 0:
        device = '/cpu:0'

    train(device)

    return
Esempio n. 25
0
def main(_):
    (device, data_format) = ('/gpu:0', 'channels_first')
    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    print('Using device %s, and data format %s.' % (device, data_format))

    # Load the datasets
    data = input_data.read_data_sets(FLAGS.data_dir)
    dataset = (tf.data.Dataset.from_tensor_slices(
        data.train.images).shuffle(60000).batch(FLAGS.batch_size))

    # Create the models and optimizers
    generator = Generator(data_format)
    discriminator = Discriminator(data_format)
    with tf.variable_scope('generator'):
        generator_optimizer = tf.train.AdamOptimizer(FLAGS.lr)
    with tf.variable_scope('discriminator'):
        discriminator_optimizer = tf.train.AdamOptimizer(FLAGS.lr)

    # Prepare summary writer and checkpoint info
    summary_writer = tf.contrib.summary.create_summary_file_writer(
        FLAGS.output_dir, flush_millis=1000)
    checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
    latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    if latest_cpkt:
        print('Using latest checkpoint at ' + latest_cpkt)

    with tf.device(device):
        for epoch in range(1, 101):
            with tfe.restore_variables_on_create(latest_cpkt):
                global_step = tf.train.get_or_create_global_step()
                start = time.time()
                with summary_writer.as_default():
                    train_one_epoch(generator, discriminator,
                                    generator_optimizer,
                                    discriminator_optimizer, dataset,
                                    FLAGS.log_interval, FLAGS.noise)
                end = time.time()
                print('\nTrain time for epoch #%d (global step %d): %f' %
                      (epoch, global_step.numpy(), end - start))

            all_variables = (generator.variables + discriminator.variables +
                             generator_optimizer.variables() +
                             discriminator_optimizer.variables() +
                             [global_step])
            tfe.Saver(all_variables).save(checkpoint_prefix,
                                          global_step=global_step)
Esempio n. 26
0
def main(_):
    (device, data_format) = ('/gpu:0', 'channels_first')
    if FLAGS.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    print('Using device %s, and data format %s.' % (device, data_format))

    # Load the datasets
    data = input_data.read_data_sets(FLAGS.data_dir)
    dataset = (tf.data.Dataset.from_tensor_slices(
        data.train.images).shuffle(60000).batch(FLAGS.batch_size))

    # Create the models and optimizers.
    model_objects = {
        'generator': Generator(data_format),
        'discriminator': Discriminator(data_format),
        'generator_optimizer': tf.train.AdamOptimizer(FLAGS.lr),
        'discriminator_optimizer': tf.train.AdamOptimizer(FLAGS.lr),
        'step_counter': tf.train.get_or_create_global_step(),
    }

    # Prepare summary writer and checkpoint info
    summary_writer = tf.contrib.summary.create_summary_file_writer(
        FLAGS.output_dir, flush_millis=1000)
    checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
    latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    if latest_cpkt:
        print('Using latest checkpoint at ' + latest_cpkt)
    checkpoint = tfe.Checkpoint(**model_objects)
    # Restore variables on creation if a checkpoint exists.
    checkpoint.restore(latest_cpkt)

    with tf.device(device):
        for _ in range(100):
            start = time.time()
            with summary_writer.as_default():
                train_one_epoch(dataset=dataset,
                                log_interval=FLAGS.log_interval,
                                noise_dim=FLAGS.noise,
                                **model_objects)
            end = time.time()
            checkpoint.save(checkpoint_prefix)
            print('\nTrain time for epoch #%d (step %d): %f' %
                  (checkpoint.save_counter.numpy(),
                   checkpoint.step_counter.numpy(), end - start))
Esempio n. 27
0
def main(_):
  (device, data_format) = ('/gpu:0', 'channels_first')
  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  data = input_data.read_data_sets(FLAGS.data_dir)
  dataset = (
      tf.data.Dataset.from_tensor_slices(data.train.images).shuffle(60000)
      .batch(FLAGS.batch_size))

  # Create the models and optimizers.
  model_objects = {
      'generator': Generator(data_format),
      'discriminator': Discriminator(data_format),
      'generator_optimizer': tf.train.AdamOptimizer(FLAGS.lr),
      'discriminator_optimizer': tf.train.AdamOptimizer(FLAGS.lr),
      'step_counter': tf.train.get_or_create_global_step(),
  }

  # Prepare summary writer and checkpoint info
  summary_writer = tf.contrib.summary.create_summary_file_writer(
      FLAGS.output_dir, flush_millis=1000)
  checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
  latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
  if latest_cpkt:
    print('Using latest checkpoint at ' + latest_cpkt)
  checkpoint = tfe.Checkpoint(**model_objects)
  # Restore variables on creation if a checkpoint exists.
  checkpoint.restore(latest_cpkt)

  with tf.device(device):
    for _ in range(100):
      start = time.time()
      with summary_writer.as_default():
        train_one_epoch(dataset=dataset, log_interval=FLAGS.log_interval,
                        noise_dim=FLAGS.noise, **model_objects)
      end = time.time()
      checkpoint.save(checkpoint_prefix)
      print('\nTrain time for epoch #%d (step %d): %f' %
            (checkpoint.save_counter.numpy(),
             checkpoint.step_counter.numpy(),
             end - start))
Esempio n. 28
0
  def benchmarkEagerSpinnSNLIClassifier(self):
    test_device = "gpu:0" if tfe.num_gpus() else "cpu:0"
    with tf.device(test_device):
      burn_in_iterations = 2
      benchmark_iterations = 10

      vocab_size = 1000
      batch_size = 128
      sequence_length = 15
      d_embed = 200
      d_out = 4

      embed = tf.random_normal((vocab_size, d_embed))

      config = _test_spinn_config(d_embed, d_out)
      model = spinn.SNLIClassifier(config, embed)
      trainer = spinn.SNLIClassifierTrainer(model, config.lr)

      (labels, prem, prem_trans, hypo,
       hypo_trans) = _generate_synthetic_snli_data_batch(sequence_length,
                                                         batch_size,
                                                         vocab_size)

      for _ in range(burn_in_iterations):
        trainer.train_batch(labels, prem, prem_trans, hypo, hypo_trans)

      gc.collect()
      start_time = time.time()
      for _ in xrange(benchmark_iterations):
        trainer.train_batch(labels, prem, prem_trans, hypo, hypo_trans)
      wall_time = time.time() - start_time
      # Named "examples"_per_sec to conform with other benchmarks.
      extras = {"examples_per_sec": benchmark_iterations / wall_time}
      self.report_benchmark(
          name="Eager_SPINN_SNLIClassifier_Benchmark",
          iters=benchmark_iterations,
          wall_time=wall_time,
          extras=extras)
Esempio n. 29
0
def device_and_data_format():
  return ('/gpu:0', 'channels_first') if tfe.num_gpus() else ('/cpu:0',
                                                              'channels_last')
Esempio n. 30
0
 def setUp(self):
   super(SpinnTest, self).setUp()
   self._test_device = "gpu:0" if tfe.num_gpus() else "cpu:0"
   self._temp_data_dir = tempfile.mkdtemp()
Esempio n. 31
0
def device():
  return "/device:GPU:0" if tfe.num_gpus() else "/device:CPU:0"
Esempio n. 32
0
def data_format():
  return "channels_first" if tfe.num_gpus() else "channels_last"
Esempio n. 33
0
def data_format():
    return "channels_first" if tfe.num_gpus() else "channels_last"
Esempio n. 34
0
def device():
    return "/device:GPU:0" if tfe.num_gpus() else "/device:CPU:0"
Esempio n. 35
0
                        help='Predict the class of an input image',
                        type=str)
    parser.add_argument('--test',
                        help='Evaluate accuracy on the test set',
                        action='store_true')
    parser.add_argument('--validation',
                        help='Evaluate accuracy on the validation set',
                        action='store_true')
    args = parser.parse_args()

    cfg = Configuration()
    net = AlexNet(cfg, training=False)

    testset = ImageNetDataset(cfg, 'test')

    if tfe.num_gpus() > 2:
        # set 2 to 0 if you want to run on the gpu
        # but currently running on gpu is impossible
        # because tf.in_top_k does not have a cuda implementation
        with tf.device('/gpu:0'):
            tester = Tester(cfg, net, testset)

            if args.classify:
                tester.classify_image(args.classify)
            elif args.validation:
                tester.test('validation')
            else:
                tester.test('test')
    else:
        tester = Tester(cfg, net, testset)
Esempio n. 36
0
    writer = tf.contrib.summary.create_file_writer("./tb/")
    writer.set_as_default()

    buffer = ExperienceBuffer(REPLAY_SIZE)
    agent = Agent(env, buffer)
    epsilon = EPSILON_START

    total_rewards = []
    frame_idx = 0
    ts_frame = 0
    ts = time.time()
    best_mean_reward = None
    speed = 0
    mean_reward = 0

    device = "gpu:0" if tfe.num_gpus() else "cpu:0"
    print("Using device: %s" % device)
    with tf.device(device):
        optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)

        while True:
            with tf.device('/cpu:0'):
                global_step.assign_add(1)
            frame_idx += 1
            epsilon = max(EPSILON_FINAL,
                          EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME)

            reward = agent.play_step(net, epsilon)

            if reward is not None:
                total_rewards.append(reward)
Esempio n. 37
0
def main(argv):
    parser = MNISTEagerArgParser()
    flags = parser.parse_args(args=argv[1:])

    # TF v1.7
    tfe.enable_eager_execution()

    # Automatically determine device and data_format
    (device, data_format) = ('/gpu:0', 'channels_first')
    if flags.no_gpu or tfe.num_gpus() <= 0:
        (device, data_format) = ('/cpu:0', 'channels_last')
    # If data_format is defined in FLAGS, overwrite automatically set value.
    if flags.data_format is not None:
        data_format = flags.data_format

    # Log Info
    print("-" * 64)
    print("TEST INFO - EAGER")
    print("-" * 64)
    print("TF version:\t {}".format(tf.__version__))
    print("Eager execution:\t {}".format(tf.executing_eagerly()))
    print("Dataset:\t MNIST")
    print("Model:\t CNN")
    print('Device:\t {}'.format(device))

    if data_format == 'channels_first':
        print("Data format:\t NCHW (channel first)")
    else:
        print("Data format:\t NHWC (channel last)")

    print("=" * 64)

    # Load the datasets
    train_ds = mnist_dataset.train(flags.data_dir).shuffle(60000).batch(
        flags.batch_size)
    test_ds = mnist_dataset.test(flags.data_dir).batch(flags.batch_size)

    # Create the model and optimizer
    # model = create_model(data_format)
    model = MNISTModel(data_format)
    optimizer = tf.train.MomentumOptimizer(flags.lr, flags.momentum)

    # Create file writers for writing TensorBoard summaries.
    if flags.output_dir:
        # Create directories to which summaries will be written
        # tensorboard --logdir=<output_dir>
        # can then be used to see the recorded summaries.
        train_dir = os.path.join(flags.output_dir, 'train')
        test_dir = os.path.join(flags.output_dir, 'eval')
        tf.gfile.MakeDirs(flags.output_dir)
    else:
        train_dir = None
        test_dir = None
    summary_writer = tf.contrib.summary.create_file_writer(train_dir,
                                                           flush_millis=10000)
    test_summary_writer = tf.contrib.summary.create_file_writer(
        test_dir, flush_millis=10000, name='test')

    # Create and restore checkpoint (if one exists on the path)
    checkpoint_prefix = os.path.join(flags.model_dir, 'ckpt')
    step_counter = tf.train.get_or_create_global_step()
    checkpoint = tfe.Checkpoint(model=model,
                                optimizer=optimizer,
                                step_counter=step_counter)
    # Restore variables on creation if a checkpoint exists.
    checkpoint.restore(tf.train.latest_checkpoint(flags.model_dir))

    # Train and evaluate for a set number of epochs.
    with tf.device(device):
        for _ in range(flags.train_epochs):
            start = time.time()
            with summary_writer.as_default():
                train(model, optimizer, train_ds, step_counter,
                      flags.log_interval)
            end = time.time()
            print('\nTrain time for epoch #%d (%d total steps): %f' %
                  (checkpoint.save_counter.numpy() + 1, step_counter.numpy(),
                   end - start))
            with test_summary_writer.as_default():
                test(model, test_ds)
            checkpoint.save(checkpoint_prefix)
Esempio n. 38
0
def device():
  return '/gpu:0' if tfe.num_gpus() else '/cpu:0'
Esempio n. 39
0
# data
OBJECTS = ['hand']
NUM_OBJECTS = 1
MAX_DETECTIONS_PER_IMAGE = 1
VIDEO_IN = 'video.mp4'

# data preprocessing
DIM_OUTPUT_PER_GRID_PER_ANCHOR = 5 + NUM_OBJECTS
GRID_H, GRID_W = 13, 13
GRID_SIZE = 416 // GRID_H
ANCHORS = np.array([[0.09112895, 0.06958421], [0.21102316, 0.16803947],
                    [0.42625895, 0.26609842], [0.25476474, 0.49848],
                    [0.52668947, 0.59138947]])
NUM_ANCHORS = ANCHORS.shape[0]
ANCHORS *= np.array([GRID_H, GRID_W])  # map from [0,1] space to [0,19] space
IMG_OUT_H, IMG_OUT_W = GRID_H * GRID_SIZE, GRID_W * GRID_SIZE

# prediction
CHECKPOINT_DIR = 'checkpoints'
CHECKPOINT_PREFIX = os.path.join(CHECKPOINT_DIR, "ckpt")
DIR_TEST = 'test_input'
DIR_IMGS_OUT = 'imgs_out'
THRESHOLD_OUT_PROB = 0.5
THRESHOLD_IOU_NMS = 0.5
if tfe.num_gpus() > 0:
    DEVICE = '/gpu:0'
    print('Using GPU')
else:
    DEVICE = '/cpu:0'
    print('Using CPU')
Esempio n. 40
0
def train_or_infer_spinn(embed,
                         word2index,
                         train_data,
                         dev_data,
                         test_data,
                         config):
  """Perform Training or Inference on a SPINN model.

  Args:
    embed: The embedding matrix as a float32 numpy array with shape
      [vocabulary_size, word_vector_len]. word_vector_len is the length of a
      word embedding vector.
    word2index: A `dict` mapping word to word index.
    train_data: An instance of `data.SnliData`, for the train split.
    dev_data: Same as above, for the dev split.
    test_data: Same as above, for the test split.
    config: A configuration object. See the argument to this Python binary for
      details.

  Returns:
    If `config.inference_premise ` and `config.inference_hypothesis` are not
      `None`, i.e., inference mode: the logits for the possible labels of the
      SNLI data set, as a `Tensor` of three floats.
    else:
      The trainer object.
  Raises:
    ValueError: if only one of config.inference_premise and
      config.inference_hypothesis is specified.
  """
  # TODO(cais): Refactor this function into separate one for training and
  #   inference.
  use_gpu = tfe.num_gpus() > 0 and not config.force_cpu
  device = "gpu:0" if use_gpu else "cpu:0"
  print("Using device: %s" % device)

  if ((config.inference_premise and not config.inference_hypothesis) or
      (not config.inference_premise and config.inference_hypothesis)):
    raise ValueError(
        "--inference_premise and --inference_hypothesis must be both "
        "specified or both unspecified, but only one is specified.")

  if config.inference_premise:
    # Inference mode.
    inference_sentence_pair = [
        data.encode_sentence(config.inference_premise, word2index),
        data.encode_sentence(config.inference_hypothesis, word2index)]
  else:
    inference_sentence_pair = None

  log_header = (
      "  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss"
      "     Accuracy  Dev/Accuracy")
  log_template = (
      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} {} "
      "{:12.4f} {}")
  dev_log_template = (
      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} "
      "{:8.6f} {:12.4f} {:12.4f}")

  summary_writer = tf.contrib.summary.create_file_writer(
      config.logdir, flush_millis=10000)

  with tf.device(device), \
       summary_writer.as_default(), \
       tf.contrib.summary.always_record_summaries():
    model = SNLIClassifier(config, embed)
    global_step = tf.train.get_or_create_global_step()
    trainer = SNLIClassifierTrainer(model, config.lr)
    checkpoint = tfe.Checkpoint(trainer=trainer, global_step=global_step)
    checkpoint.restore(tf.train.latest_checkpoint(config.logdir))

    if inference_sentence_pair:
      # Inference mode.
      prem, prem_trans = inference_sentence_pair[0]
      hypo, hypo_trans = inference_sentence_pair[1]
      hypo_trans = inference_sentence_pair[1][1]
      inference_logits = model(
          tf.constant(prem), tf.constant(prem_trans),
          tf.constant(hypo), tf.constant(hypo_trans), training=False)
      inference_logits = inference_logits[0][1:]
      max_index = tf.argmax(inference_logits)
      print("\nInference logits:")
      for i, (label, logit) in enumerate(
          zip(data.POSSIBLE_LABELS, inference_logits)):
        winner_tag = " (winner)" if max_index == i else ""
        print("  {0:<16}{1:.6f}{2}".format(label + ":", logit, winner_tag))
      return inference_logits

    train_len = train_data.num_batches(config.batch_size)
    start = time.time()
    iterations = 0
    mean_loss = tfe.metrics.Mean()
    accuracy = tfe.metrics.Accuracy()
    print(log_header)
    for epoch in xrange(config.epochs):
      batch_idx = 0
      for label, prem, prem_trans, hypo, hypo_trans in _get_dataset_iterator(
          train_data, config.batch_size):
        if use_gpu:
          label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu()
          # prem_trans and hypo_trans are used for dynamic control flow and can
          # remain on CPU. Same in _evaluate_on_dataset().

        iterations += 1
        batch_train_loss, batch_train_logits = trainer.train_batch(
            label, prem, prem_trans, hypo, hypo_trans)
        batch_size = tf.shape(label)[0]
        mean_loss(batch_train_loss.numpy(),
                  weights=batch_size.gpu() if use_gpu else batch_size)
        accuracy(tf.argmax(batch_train_logits, axis=1), label)

        if iterations % config.save_every == 0:
          checkpoint.save(os.path.join(config.logdir, "ckpt"))

        if iterations % config.dev_every == 0:
          dev_loss, dev_frac_correct = _evaluate_on_dataset(
              dev_data, config.batch_size, trainer, use_gpu)
          print(dev_log_template.format(
              time.time() - start,
              epoch, iterations, 1 + batch_idx, train_len,
              100.0 * (1 + batch_idx) / train_len,
              mean_loss.result(), dev_loss,
              accuracy.result() * 100.0, dev_frac_correct * 100.0))
          tf.contrib.summary.scalar("dev/loss", dev_loss)
          tf.contrib.summary.scalar("dev/accuracy", dev_frac_correct)
        elif iterations % config.log_every == 0:
          mean_loss_val = mean_loss.result()
          accuracy_val = accuracy.result()
          print(log_template.format(
              time.time() - start,
              epoch, iterations, 1 + batch_idx, train_len,
              100.0 * (1 + batch_idx) / train_len,
              mean_loss_val, " " * 8, accuracy_val * 100.0, " " * 12))
          tf.contrib.summary.scalar("train/loss", mean_loss_val)
          tf.contrib.summary.scalar("train/accuracy", accuracy_val)
          # Reset metrics.
          mean_loss = tfe.metrics.Mean()
          accuracy = tfe.metrics.Accuracy()

        batch_idx += 1
      if (epoch + 1) % config.lr_decay_every == 0:
        trainer.decay_learning_rate(config.lr_decay_by)

    test_loss, test_frac_correct = _evaluate_on_dataset(
        test_data, config.batch_size, trainer, use_gpu)
    print("Final test loss: %g; accuracy: %g%%" %
          (test_loss, test_frac_correct * 100.0))

  return trainer
Esempio n. 41
0
def device_and_data_format():
    return ('/gpu:0',
            'channels_first') if tfe.num_gpus() else ('/cpu:0',
                                                      'channels_last')
Esempio n. 42
0
def main(argv):
  parser = MNISTEagerArgParser()
  flags = parser.parse_args(args=argv[1:])

  tfe.enable_eager_execution()

  # Automatically determine device and data_format
  (device, data_format) = ('/gpu:0', 'channels_first')
  if flags.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  # If data_format is defined in FLAGS, overwrite automatically set value.
  if flags.data_format is not None:
    data_format = flags.data_format
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  train_ds = mnist_dataset.train(flags.data_dir).shuffle(60000).batch(
      flags.batch_size)
  test_ds = mnist_dataset.test(flags.data_dir).batch(flags.batch_size)

  # Create the model and optimizer
  model = mnist.create_model(data_format)
  optimizer = tf.train.MomentumOptimizer(flags.lr, flags.momentum)

  # Create file writers for writing TensorBoard summaries.
  if flags.output_dir:
    # Create directories to which summaries will be written
    # tensorboard --logdir=<output_dir>
    # can then be used to see the recorded summaries.
    train_dir = os.path.join(flags.output_dir, 'train')
    test_dir = os.path.join(flags.output_dir, 'eval')
    tf.gfile.MakeDirs(flags.output_dir)
  else:
    train_dir = None
    test_dir = None
  summary_writer = tf.contrib.summary.create_file_writer(
      train_dir, flush_millis=10000)
  test_summary_writer = tf.contrib.summary.create_file_writer(
      test_dir, flush_millis=10000, name='test')

  # Create and restore checkpoint (if one exists on the path)
  checkpoint_prefix = os.path.join(flags.model_dir, 'ckpt')
  step_counter = tf.train.get_or_create_global_step()
  checkpoint = tfe.Checkpoint(
      model=model, optimizer=optimizer, step_counter=step_counter)
  # Restore variables on creation if a checkpoint exists.
  checkpoint.restore(tf.train.latest_checkpoint(flags.model_dir))

  # Train and evaluate for a set number of epochs.
  with tf.device(device):
    for _ in range(flags.train_epochs):
      start = time.time()
      with summary_writer.as_default():
        train(model, optimizer, train_ds, step_counter, flags.log_interval)
      end = time.time()
      print('\nTrain time for epoch #%d (%d total steps): %f' %
            (checkpoint.save_counter.numpy() + 1,
             step_counter.numpy(),
             end - start))
      with test_summary_writer.as_default():
        test(model, test_ds)
      checkpoint.save(checkpoint_prefix)
Esempio n. 43
0
def train_or_infer_spinn(embed,
                         word2index,
                         train_data,
                         dev_data,
                         test_data,
                         config):
  """Perform Training or Inference on a SPINN model.

  Args:
    embed: The embedding matrix as a float32 numpy array with shape
      [vocabulary_size, word_vector_len]. word_vector_len is the length of a
      word embedding vector.
    word2index: A `dict` mapping word to word index.
    train_data: An instance of `data.SnliData`, for the train split.
    dev_data: Same as above, for the dev split.
    test_data: Same as above, for the test split.
    config: A configuration object. See the argument to this Python binary for
      details.

  Returns:
    If `config.inference_premise ` and `config.inference_hypothesis` are not
      `None`, i.e., inference mode: the logits for the possible labels of the
      SNLI data set, as a `Tensor` of three floats.
    else:
      The trainer object.
  Raises:
    ValueError: if only one of config.inference_premise and
      config.inference_hypothesis is specified.
  """
  # TODO(cais): Refactor this function into separate one for training and
  #   inference.
  use_gpu = tfe.num_gpus() > 0 and not config.force_cpu
  device = "gpu:0" if use_gpu else "cpu:0"
  print("Using device: %s" % device)

  if ((config.inference_premise and not config.inference_hypothesis) or
      (not config.inference_premise and config.inference_hypothesis)):
    raise ValueError(
        "--inference_premise and --inference_hypothesis must be both "
        "specified or both unspecified, but only one is specified.")

  if config.inference_premise:
    # Inference mode.
    inference_sentence_pair = [
        data.encode_sentence(config.inference_premise, word2index),
        data.encode_sentence(config.inference_hypothesis, word2index)]
  else:
    inference_sentence_pair = None

  log_header = (
      "  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss"
      "     Accuracy  Dev/Accuracy")
  log_template = (
      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} {} "
      "{:12.4f} {}")
  dev_log_template = (
      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} "
      "{:8.6f} {:12.4f} {:12.4f}")

  summary_writer = tf.contrib.summary.create_file_writer(
      config.logdir, flush_millis=10000)

  with tf.device(device), \
       summary_writer.as_default(), \
       tf.contrib.summary.always_record_summaries():
    model = SNLIClassifier(config, embed)
    global_step = tf.train.get_or_create_global_step()
    trainer = SNLIClassifierTrainer(model, config.lr)
    checkpoint = tfe.Checkpoint(trainer=trainer, global_step=global_step)
    checkpoint.restore(tf.train.latest_checkpoint(config.logdir))

    if inference_sentence_pair:
      # Inference mode.
      prem, prem_trans = inference_sentence_pair[0]
      hypo, hypo_trans = inference_sentence_pair[1]
      hypo_trans = inference_sentence_pair[1][1]
      inference_logits = model(
          tf.constant(prem), tf.constant(prem_trans),
          tf.constant(hypo), tf.constant(hypo_trans), training=False)
      inference_logits = inference_logits[0][1:]
      max_index = tf.argmax(inference_logits)
      print("\nInference logits:")
      for i, (label, logit) in enumerate(
          zip(data.POSSIBLE_LABELS, inference_logits)):
        winner_tag = " (winner)" if max_index == i else ""
        print("  {0:<16}{1:.6f}{2}".format(label + ":", logit, winner_tag))
      return inference_logits

    train_len = train_data.num_batches(config.batch_size)
    start = time.time()
    iterations = 0
    mean_loss = tfe.metrics.Mean()
    accuracy = tfe.metrics.Accuracy()
    print(log_header)
    for epoch in xrange(config.epochs):
      batch_idx = 0
      for label, prem, prem_trans, hypo, hypo_trans in _get_dataset_iterator(
          train_data, config.batch_size):
        if use_gpu:
          label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu()
          # prem_trans and hypo_trans are used for dynamic control flow and can
          # remain on CPU. Same in _evaluate_on_dataset().

        iterations += 1
        batch_train_loss, batch_train_logits = trainer.train_batch(
            label, prem, prem_trans, hypo, hypo_trans)
        batch_size = tf.shape(label)[0]
        mean_loss(batch_train_loss.numpy(),
                  weights=batch_size.gpu() if use_gpu else batch_size)
        accuracy(tf.argmax(batch_train_logits, axis=1), label)

        if iterations % config.save_every == 0:
          checkpoint.save(os.path.join(config.logdir, "ckpt"))

        if iterations % config.dev_every == 0:
          dev_loss, dev_frac_correct = _evaluate_on_dataset(
              dev_data, config.batch_size, trainer, use_gpu)
          print(dev_log_template.format(
              time.time() - start,
              epoch, iterations, 1 + batch_idx, train_len,
              100.0 * (1 + batch_idx) / train_len,
              mean_loss.result(), dev_loss,
              accuracy.result() * 100.0, dev_frac_correct * 100.0))
          tf.contrib.summary.scalar("dev/loss", dev_loss)
          tf.contrib.summary.scalar("dev/accuracy", dev_frac_correct)
        elif iterations % config.log_every == 0:
          mean_loss_val = mean_loss.result()
          accuracy_val = accuracy.result()
          print(log_template.format(
              time.time() - start,
              epoch, iterations, 1 + batch_idx, train_len,
              100.0 * (1 + batch_idx) / train_len,
              mean_loss_val, " " * 8, accuracy_val * 100.0, " " * 12))
          tf.contrib.summary.scalar("train/loss", mean_loss_val)
          tf.contrib.summary.scalar("train/accuracy", accuracy_val)
          # Reset metrics.
          mean_loss = tfe.metrics.Mean()
          accuracy = tfe.metrics.Accuracy()

        batch_idx += 1
      if (epoch + 1) % config.lr_decay_every == 0:
        trainer.decay_learning_rate(config.lr_decay_by)

    test_loss, test_frac_correct = _evaluate_on_dataset(
        test_data, config.batch_size, trainer, use_gpu)
    print("Final test loss: %g; accuracy: %g%%" %
          (test_loss, test_frac_correct * 100.0))

  return trainer
Esempio n. 44
0
def train_spinn(embed, train_data, dev_data, test_data, config):
  """Train a SPINN model.

  Args:
    embed: The embedding matrix as a float32 numpy array with shape
      [vocabulary_size, word_vector_len]. word_vector_len is the length of a
      word embedding vector.
    train_data: An instance of `data.SnliData`, for the train split.
    dev_data: Same as above, for the dev split.
    test_data: Same as above, for the test split.
    config: A configuration object. See the argument to this Python binary for
      details.

  Returns:
    1. Final loss value on the test split.
    2. Final fraction of correct classifications on the test split.
  """
  use_gpu = tfe.num_gpus() > 0 and not config.force_cpu
  device = "gpu:0" if use_gpu else "cpu:0"
  print("Using device: %s" % device)

  log_header = (
      "  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss"
      "     Accuracy  Dev/Accuracy")
  log_template = (
      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} {} "
      "{:12.4f} {}")
  dev_log_template = (
      "{:>6.0f} {:>5.0f} {:>9.0f} {:>5.0f}/{:<5.0f} {:>7.0f}% {:>8.6f} "
      "{:8.6f} {:12.4f} {:12.4f}")

  summary_writer = tf.contrib.summary.create_file_writer(
      config.logdir, flush_millis=10000)
  train_len = train_data.num_batches(config.batch_size)
  with tf.device(device), \
       tfe.restore_variables_on_create(
           tf.train.latest_checkpoint(config.logdir)), \
       summary_writer.as_default(), \
       tf.contrib.summary.always_record_summaries():
    model = SNLIClassifier(config, embed)
    global_step = tf.train.get_or_create_global_step()
    trainer = SNLIClassifierTrainer(model, config.lr)

    start = time.time()
    iterations = 0
    mean_loss = tfe.metrics.Mean()
    accuracy = tfe.metrics.Accuracy()
    print(log_header)
    for epoch in xrange(config.epochs):
      batch_idx = 0
      for label, prem, prem_trans, hypo, hypo_trans in _get_dataset_iterator(
          train_data, config.batch_size):
        if use_gpu:
          label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu()
          # prem_trans and hypo_trans are used for dynamic control flow and can
          # remain on CPU. Same in _evaluate_on_dataset().

        iterations += 1
        batch_train_loss, batch_train_logits = trainer.train_batch(
            label, prem, prem_trans, hypo, hypo_trans)
        batch_size = tf.shape(label)[0]
        mean_loss(batch_train_loss.numpy(),
                  weights=batch_size.gpu() if use_gpu else batch_size)
        accuracy(tf.argmax(batch_train_logits, axis=1), label)

        if iterations % config.save_every == 0:
          all_variables = (
              model.variables + [trainer.learning_rate] + [global_step])
          saver = tfe.Saver(all_variables)
          saver.save(os.path.join(config.logdir, "ckpt"),
                     global_step=global_step)

        if iterations % config.dev_every == 0:
          dev_loss, dev_frac_correct = _evaluate_on_dataset(
              dev_data, config.batch_size, model, trainer, use_gpu)
          print(dev_log_template.format(
              time.time() - start,
              epoch, iterations, 1 + batch_idx, train_len,
              100.0 * (1 + batch_idx) / train_len,
              mean_loss.result(), dev_loss,
              accuracy.result() * 100.0, dev_frac_correct * 100.0))
          tf.contrib.summary.scalar("dev/loss", dev_loss)
          tf.contrib.summary.scalar("dev/accuracy", dev_frac_correct)
        elif iterations % config.log_every == 0:
          mean_loss_val = mean_loss.result()
          accuracy_val = accuracy.result()
          print(log_template.format(
              time.time() - start,
              epoch, iterations, 1 + batch_idx, train_len,
              100.0 * (1 + batch_idx) / train_len,
              mean_loss_val, " " * 8, accuracy_val * 100.0, " " * 12))
          tf.contrib.summary.scalar("train/loss", mean_loss_val)
          tf.contrib.summary.scalar("train/accuracy", accuracy_val)
          # Reset metrics.
          mean_loss = tfe.metrics.Mean()
          accuracy = tfe.metrics.Accuracy()

        batch_idx += 1
      if (epoch + 1) % config.lr_decay_every == 0:
        trainer.decay_learning_rate(config.lr_decay_by)

    test_loss, test_frac_correct = _evaluate_on_dataset(
        test_data, config.batch_size, model, trainer, use_gpu)
    print("Final test loss: %g; accuracy: %g%%" %
          (test_loss, test_frac_correct * 100.0))