def main(argv):
    del argv  # unused arg
    if not FLAGS.use_gpu:
        raise ValueError('Only GPU is currently supported.')
    if FLAGS.num_cores > 1:
        raise ValueError('Only a single accelerator is currently supported.')
    np.random.seed(FLAGS.seed)
    tf.random.set_seed(FLAGS.seed)
    tf.io.gfile.makedirs(FLAGS.output_dir)

    x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset)
    n_train = x_train.shape[0]
    ensemble_filenames = []
    for i in range(FLAGS.ensemble_size):
        model = multilayer_perceptron(
            x_train.shape[1:],
            np.std(y_train, axis=0) + tf.keras.backend.epsilon())
        if FLAGS.epsilon:
            loss_fn = make_adversarial_loss_fn(model)
            optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
        else:

            def negative_log_likelihood(y_true, y_pred):
                return -y_pred.distribution.log_prob(y_true)

            model.compile(
                optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate),
                loss=negative_log_likelihood)

        member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i))
        tensorboard = tf.keras.callbacks.TensorBoard(
            log_dir=member_dir,
            update_freq=FLAGS.batch_size * FLAGS.validation_freq)
        if FLAGS.epsilon:
            for epoch in range(
                (FLAGS.batch_size * FLAGS.training_steps) // n_train):
                logging.info('Epoch %s', epoch)
                for j in range(n_train // FLAGS.batch_size):
                    perm = np.random.permutation(n_train)
                    with tf.GradientTape() as tape:
                        loss = loss_fn(x_train[perm[j:j + FLAGS.batch_size]],
                                       y_train[perm[j:j + FLAGS.batch_size]])
                    grads = tape.gradient(loss, model.trainable_weights)
                    optimizer.apply_gradients(
                        zip(grads, model.trainable_weights))
        else:
            if FLAGS.bootstrap:
                inds = np.random.choice(n_train, n_train, replace=True)
                x_sampled = x_train[inds]
                y_sampled = y_train[inds]

            model.fit(
                x=x_train if not FLAGS.bootstrap else x_sampled,
                y=y_train if not FLAGS.bootstrap else y_sampled,
                batch_size=FLAGS.batch_size,
                epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train,
                validation_data=(x_test, y_test),
                validation_freq=max(
                    (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1),
                verbose=0,
                callbacks=[tensorboard])

        member_filename = os.path.join(member_dir, 'model.weights')
        ensemble_filenames.append(member_filename)
        model.save_weights(member_filename)

    # TODO(trandustin): Move this into utils.ensemble_metrics. It's currently
    # separate so that VI can use utils.ensemble_metrics while in TF1.
    def ll(arg):
        features, labels = arg
        predictions = model(features)
        log_prob = predictions.distribution.log_prob(labels)
        error = predictions.distribution.loc - labels
        return [log_prob, error]

    ensemble_metrics_vals = {
        'train':
        utils.ensemble_metrics(x_train,
                               y_train,
                               model,
                               ll,
                               weight_files=ensemble_filenames),
        'test':
        utils.ensemble_metrics(x_test,
                               y_test,
                               model,
                               ll,
                               weight_files=ensemble_filenames),
    }

    for split, metrics in ensemble_metrics_vals.items():
        logging.info(split)
        for metric_name in metrics:
            logging.info('%s: %s', metric_name, metrics[metric_name])
def main(argv):
    del argv  # unused arg
    if not FLAGS.use_gpu:
        raise ValueError('Only GPU is currently supported.')
    if FLAGS.num_cores > 1:
        raise ValueError('Only a single accelerator is currently supported.')
    np.random.seed(FLAGS.seed)
    tf.random.set_seed(FLAGS.seed)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    tf1.disable_v2_behavior()

    session = tf1.Session()
    x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset, session)
    n_train = x_train.shape[0]
    num_classes = int(np.amax(y_train)) + 1

    ensemble_filenames = []
    for i in range(FLAGS.ensemble_size):
        # TODO(trandustin): We re-build the graph for each ensemble member. This
        # is due to an unknown bug where the variables are otherwise not
        # re-initialized to be random. While this is inefficient in graph mode, I'm
        # keeping this for now as we'd like to move to eager mode anyways.
        model = lenet5(x_train.shape[1:], num_classes)

        def negative_log_likelihood(y, rv_y):
            del rv_y  # unused arg
            return -model.output.distribution.log_prob(tf.squeeze(y))  # pylint: disable=cell-var-from-loop

        def accuracy(y_true, y_sample):
            del y_sample  # unused arg
            return tf.equal(
                tf.argmax(input=model.output.distribution.logits, axis=1),  # pylint: disable=cell-var-from-loop
                tf.cast(tf.squeeze(y_true), tf.int64))

        def log_likelihood(y_true, y_sample):
            del y_sample  # unused arg
            return model.output.distribution.log_prob(tf.squeeze(y_true))  # pylint: disable=cell-var-from-loop

        model.compile(
            optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate),
            loss=negative_log_likelihood,
            metrics=[log_likelihood, accuracy])
        member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i))
        tensorboard = tf1.keras.callbacks.TensorBoard(
            log_dir=member_dir,
            update_freq=FLAGS.batch_size * FLAGS.validation_freq)

        if FLAGS.bootstrap:
            inds = np.random.choice(n_train, n_train, replace=True)
            x_sampled = x_train[inds]
            y_sampled = y_train[inds]

        model.fit(x=x_train if not FLAGS.bootstrap else x_sampled,
                  y=y_train if not FLAGS.bootstrap else y_sampled,
                  batch_size=FLAGS.batch_size,
                  epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train,
                  validation_data=(x_test, y_test),
                  validation_freq=max(
                      (FLAGS.validation_freq * FLAGS.batch_size) // n_train,
                      1),
                  verbose=1,
                  callbacks=[tensorboard])

        member_filename = os.path.join(member_dir, 'model.weights')
        ensemble_filenames.append(member_filename)
        model.save_weights(member_filename)

    labels = tf.keras.layers.Input(shape=y_train.shape[1:])
    ll = tf.keras.backend.function([model.input, labels], [
        model.output.distribution.log_prob(tf.squeeze(labels)),
        model.output.distribution.logits,
    ])

    ensemble_metrics_vals = {
        'train':
        utils.ensemble_metrics(x_train,
                               y_train,
                               model,
                               ll,
                               weight_files=ensemble_filenames),
        'test':
        utils.ensemble_metrics(x_test,
                               y_test,
                               model,
                               ll,
                               weight_files=ensemble_filenames),
    }

    for split, metrics in ensemble_metrics_vals.items():
        logging.info(split)
        for metric_name in metrics:
            logging.info('%s: %s', metric_name, metrics[metric_name])
Exemple #3
0
def main(argv):
    del argv  # unused arg
    if not FLAGS.use_gpu:
        raise ValueError('Only GPU is currently supported.')
    if FLAGS.num_cores > 1:
        raise ValueError('Only a single accelerator is currently supported.')
    np.random.seed(FLAGS.seed)
    tf.random.set_seed(FLAGS.seed)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    tf1.disable_v2_behavior()

    session = tf1.Session()
    with session.as_default():
        x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset)
        n_train = x_train.shape[0]

        model = multilayer_perceptron(
            n_train, x_train.shape[1:],
            np.std(y_train) + tf.keras.backend.epsilon())
        for l in model.layers:
            l.kl_cost_weight = l.add_weight(
                name='kl_cost_weight',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)
            l.kl_cost_bias = l.add_variable(
                name='kl_cost_bias',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)

        [negative_log_likelihood, mse, log_likelihood, kl,
         elbo] = get_losses_and_metrics(model, n_train)
        metrics = [elbo, log_likelihood, kl, mse]

        tensorboard = tf1.keras.callbacks.TensorBoard(
            log_dir=FLAGS.output_dir,
            update_freq=FLAGS.batch_size * FLAGS.validation_freq)

        def fit_fn(model, steps, initial_epoch):
            return model.fit(
                x=x_train,
                y=y_train,
                batch_size=FLAGS.batch_size,
                epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train,
                initial_epoch=initial_epoch,
                validation_data=(x_test, y_test),
                validation_freq=max(
                    (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1),
                verbose=1,
                callbacks=[tensorboard])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)),
            loss=negative_log_likelihood,
            metrics=metrics)
        session.run(tf1.initialize_all_variables())

        train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train
        fit_fn(model, FLAGS.training_steps, initial_epoch=0)

        labels = tf.keras.layers.Input(shape=y_train.shape[1:])
        ll = tf.keras.backend.function([model.input, labels], [
            model.output.distribution.log_prob(labels),
            model.output.distribution.loc - labels
        ])

        base_metrics = [
            utils.ensemble_metrics(x_train, y_train, model, ll),
            utils.ensemble_metrics(x_test, y_test, model, ll),
        ]
        model_dir = os.path.join(FLAGS.output_dir, 'models')
        tf.io.gfile.makedirs(model_dir)
        base_model_filename = os.path.join(model_dir, 'base_model.weights')
        model.save_weights(base_model_filename)

        # Train base model further for comparison.
        fit_fn(model,
               FLAGS.n_auxiliary_variables *
               FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size,
               initial_epoch=train_epochs)

        overtrained_metrics = [
            utils.ensemble_metrics(x_train, y_train, model, ll),
            utils.ensemble_metrics(x_test, y_test, model, ll),
        ]

        # Perform refined VI.
        sample_op = []
        for l in model.layers:
            if hasattr(l, 'kernel_prior'):
                weight_op, weight_cost = sample_auxiliary_op(
                    l.kernel_prior.distribution,
                    l.kernel_posterior.distribution,
                    FLAGS.auxiliary_variance_ratio)
                sample_op.append(weight_op)
                sample_op.append(l.kl_cost_weight.assign_add(weight_cost))
                # Fix the variance of the prior
                session.run(l.kernel_prior.distribution.istrainable.assign(0.))
                if hasattr(l.bias_prior, 'distribution'):
                    bias_op, bias_cost = sample_auxiliary_op(
                        l.bias_prior.distribution,
                        l.bias_posterior.distribution,
                        FLAGS.auxiliary_variance_ratio)
                    sample_op.append(bias_op)
                    sample_op.append(l.kl_cost_bias.assign_add(bias_cost))
                    # Fix the variance of the prior
                    session.run(
                        l.bias_prior.distribution.istrainable.assign(0.))

        ensemble_filenames = []
        for i in range(FLAGS.ensemble_size):
            model.load_weights(base_model_filename)
            for j in range(FLAGS.n_auxiliary_variables):
                session.run(sample_op)
                model.compile(
                    optimizer=tf.keras.optimizers.Adam(
                        # The learning rate is proportional to the scale of the prior.
                        lr=float(FLAGS.learning_rate_for_sampling *
                                 np.sqrt(1. -
                                         FLAGS.auxiliary_variance_ratio)**j)),
                    loss=negative_log_likelihood,
                    metrics=metrics)
                fit_fn(model,
                       FLAGS.auxiliary_sampling_frequency,
                       initial_epoch=train_epochs)
            ensemble_filename = os.path.join(
                model_dir, 'ensemble_component_' + str(i) + '.weights')
            ensemble_filenames.append(ensemble_filename)
            model.save_weights(ensemble_filename)

        auxiliary_metrics = [
            utils.ensemble_metrics(x_train,
                                   y_train,
                                   model,
                                   ll,
                                   weight_files=ensemble_filenames),
            utils.ensemble_metrics(x_test,
                                   y_test,
                                   model,
                                   ll,
                                   weight_files=ensemble_filenames),
        ]

        for metrics, name in [(base_metrics, 'Base model'),
                              (overtrained_metrics, 'Overtrained model'),
                              (auxiliary_metrics, 'Auxiliary sampling')]:
            logging.info(name)
            for metrics_dict, split in [(metrics[0], 'train'),
                                        (metrics[1], 'test')]:
                logging.info(split)
                for metric_name in metrics_dict:
                    logging.info('%s: %s', metric_name,
                                 metrics_dict[metric_name])
Exemple #4
0
def main(argv):
  del argv  # unused arg
  np.random.seed(FLAGS.seed)
  tf.random.set_seed(FLAGS.seed)
  tf.io.gfile.makedirs(FLAGS.output_dir)
  tf1.disable_v2_behavior()

  x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset)
  n_train = x_train.shape[0]

  session = tf1.Session()
  ensemble_filenames = []
  for i in range(FLAGS.ensemble_size):
    # TODO(trandustin): We re-build the graph for each ensemble member. This
    # is due to an unknown bug where the variables are otherwise not
    # re-initialized to be random. While this is inefficient in graph mode, I'm
    # keeping this for now as we'd like to move to eager mode anyways.
    model = multilayer_perceptron(
        x_train.shape[1:], np.std(y_train, axis=0) + tf.keras.backend.epsilon())

    def negative_log_likelihood(y, rv_y):
      del rv_y  # unused arg
      return -model.output.distribution.log_prob(y)  # pylint: disable=cell-var-from-loop

    def mse(y_true, y_sample):
      del y_sample  # unused arg
      return tf.math.square(model.output.distribution.loc - y_true)  # pylint: disable=cell-var-from-loop

    def log_likelihood(y_true, y_sample):
      del y_sample  # unused arg
      return model.output.distribution.log_prob(y_true)  # pylint: disable=cell-var-from-loop

    if FLAGS.epsilon:
      y_true = tf.keras.Input(shape=y_train.shape[1:], name='labels')
      loss = tf.reduce_mean(-model.output.distribution.log_prob(y_true))
      nn_input_tensor = model.input
      grad = tf1.gradients(loss, nn_input_tensor)[0]
      # It is assumed that the training data is normalized.
      adv_inputs_tensor = nn_input_tensor + FLAGS.epsilon * tf.math.sign(
          tf1.stop_gradient(grad))
      adv_inputs = tf.keras.Input(tensor=adv_inputs_tensor, name='adv_inputs')
      adv_out_dist = model(adv_inputs)
      adv_loss = tf.reduce_mean(-adv_out_dist.distribution.log_prob(y_true))
      optimizer = tf1.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
      train_op = optimizer.minimize(0.5 * loss + 0.5 * adv_loss)
    else:
      model.compile(
          optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate),
          loss=negative_log_likelihood,
          metrics=[log_likelihood, mse])

    member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i))
    tensorboard = tf1.keras.callbacks.TensorBoard(
        log_dir=member_dir,
        update_freq=FLAGS.batch_size * FLAGS.validation_freq)
    if FLAGS.epsilon:
      session.run(tf1.initialize_all_variables())
      for epoch in range((FLAGS.batch_size * FLAGS.training_steps) // n_train):
        logging.info('Epoch %s', epoch)
        for j in range(n_train // FLAGS.batch_size):
          perm = np.random.permutation(n_train)
          session.run(
              train_op,
              feed_dict={
                  nn_input_tensor: x_train[perm[j:j + FLAGS.batch_size]],
                  y_true: y_train[perm[j:j + FLAGS.batch_size]],
              })
    else:
      if FLAGS.bootstrap:
        inds = np.random.choice(n_train, n_train, replace=True)
        x_sampled = x_train[inds]
        y_sampled = y_train[inds]

      model.fit(
          x=x_train if not FLAGS.bootstrap else x_sampled,
          y=y_train if not FLAGS.bootstrap else y_sampled,
          batch_size=FLAGS.batch_size,
          epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train,
          validation_data=(x_test, y_test),
          validation_freq=max(
              (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1),
          verbose=0,
          callbacks=[tensorboard])

    member_filename = os.path.join(member_dir, 'model.weights')
    ensemble_filenames.append(member_filename)
    model.save_weights(member_filename)

  labels = tf.keras.layers.Input(shape=y_train.shape[1:])
  ll = tf.keras.backend.function(
      [model.input, labels],
      [model.output.distribution.log_prob(labels),
       model.output.distribution.loc - labels])

  ensemble_metrics_vals = {
      'train': utils.ensemble_metrics(
          x_train, y_train, model, ll, weight_files=ensemble_filenames),
      'test': utils.ensemble_metrics(
          x_test, y_test, model, ll, weight_files=ensemble_filenames),
  }

  for split, metrics in ensemble_metrics_vals.items():
    logging.info(split)
    for metric_name in metrics:
      logging.info('%s: %s', metric_name, metrics[metric_name])