Exemple #1
0
 def _map_fn(features, labels):
   features = tf.expand_dims(features, 0)
   features = module(features)
   features = tf.squeeze(features, 0)
   return features, labels
Exemple #2
0
 def negative_log_likelihood(y, rv_y):
     del rv_y  # unused arg
     return -model.output.distribution.log_prob(tf.squeeze(y))
Exemple #3
0
 def accuracy(y_true, y_sample):
   del y_sample  # unused arg
   return tf.equal(
       tf.argmax(input=model.output.distribution.logits, axis=1),
       tf.cast(tf.squeeze(y_true), tf.int64))
Exemple #4
0
def main(argv):
  del argv  # unused arg
  np.random.seed(FLAGS.seed)
  tf.random.set_seed(FLAGS.seed)
  tf.io.gfile.makedirs(FLAGS.output_dir)
  tf1.disable_v2_behavior()

  session = tf1.Session()
  with session.as_default():
    x_train, y_train, x_test, y_test = datasets.load(session)
    n_train = x_train.shape[0]

    num_classes = int(np.amax(y_train)) + 1
    if not FLAGS.resnet:
      model = lenet5(n_train, x_train.shape[1:], num_classes)
    else:
      datagen = tf.keras.preprocessing.image.ImageDataGenerator(
          rotation_range=90,
          width_shift_range=0.1,
          height_shift_range=0.1,
          horizontal_flip=True)
      datagen.fit(x_train)
      model = res_net(n_train,
                      x_train.shape[1:],
                      num_classes,
                      batchnorm=FLAGS.batchnorm,
                      variational='hybrid' if FLAGS.hybrid else 'full')

      def schedule_fn(epoch):
        """Learning rate schedule function."""
        rate = FLAGS.learning_rate
        if epoch > 180:
          rate *= 0.5e-3
        elif epoch > 160:
          rate *= 1e-3
        elif epoch > 120:
          rate *= 1e-2
        elif epoch > 80:
          rate *= 1e-1
        return float(rate)

      lr_callback = tf.keras.callbacks.LearningRateScheduler(schedule_fn)

    for l in model.layers:
      l.kl_cost_weight = l.add_weight(
          name='kl_cost_weight',
          shape=(),
          initializer=tf.constant_initializer(0.),
          trainable=False)
      l.kl_cost_bias = l.add_variable(
          name='kl_cost_bias',
          shape=(),
          initializer=tf.constant_initializer(0.),
          trainable=False)

    [negative_log_likelihood,
     accuracy,
     log_likelihood,
     kl,
     elbo] = get_losses_and_metrics(model, n_train)

    metrics = [elbo, log_likelihood, kl, accuracy]

    tensorboard = tf1.keras.callbacks.TensorBoard(
        log_dir=FLAGS.output_dir,
        update_freq=FLAGS.batch_size * FLAGS.validation_freq)
    if FLAGS.resnet:
      callbacks = [tensorboard, lr_callback]
    else:
      callbacks = [tensorboard]

    if not FLAGS.resnet or not FLAGS.data_augmentation:

      def fit_fn(model,
                 steps,
                 initial_epoch=0,
                 with_lr_schedule=FLAGS.resnet):
        return model.fit(
            x=x_train,
            y=y_train,
            batch_size=FLAGS.batch_size,
            epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train,
            initial_epoch=initial_epoch,
            validation_data=(x_test, y_test),
            validation_freq=(
                (FLAGS.validation_freq * FLAGS.batch_size) // n_train),
            verbose=1,
            callbacks=callbacks if with_lr_schedule else [tensorboard])
    else:

      def fit_fn(model,
                 steps,
                 initial_epoch=0,
                 with_lr_schedule=FLAGS.resnet):
        return model.fit_generator(
            datagen.flow(x_train, y_train, batch_size=FLAGS.batch_size),
            epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train,
            initial_epoch=initial_epoch,
            steps_per_epoch=n_train // FLAGS.batch_size,
            validation_data=(x_test, y_test),
            validation_freq=max(
                (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1),
            verbose=1,
            callbacks=callbacks if with_lr_schedule else [tensorboard])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)),
        loss=negative_log_likelihood,
        metrics=metrics)
    session.run(tf1.initialize_all_variables())

    train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train
    fit_fn(model, FLAGS.training_steps)

    labels = tf.keras.layers.Input(shape=y_train.shape[1:])
    ll = tf.keras.backend.function([model.input, labels], [
        model.output.distribution.log_prob(tf.squeeze(labels)),
        model.output.distribution.logits
    ])

    base_metrics = [
        ensemble_metrics(x_train, y_train, model, ll),
        ensemble_metrics(x_test, y_test, model, ll)
    ]
    model_dir = os.path.join(FLAGS.output_dir, 'models')
    tf.io.gfile.makedirs(model_dir)
    base_model_filename = os.path.join(model_dir, 'base_model.weights')
    model.save_weights(base_model_filename)

    # Train base model further for comparison.
    fit_fn(
        model,
        FLAGS.n_auxiliary_variables * FLAGS.auxiliary_sampling_frequency *
        FLAGS.ensemble_size,
        initial_epoch=train_epochs)

    overtrained_metrics = [
        ensemble_metrics(x_train, y_train, model, ll),
        ensemble_metrics(x_test, y_test, model, ll)
    ]

    # Perform refined VI.
    sample_op = []
    for l in model.layers:
      if isinstance(l, tfp.layers.DenseLocalReparameterization) or isinstance(
          l, tfp.layers.Convolution2DFlipout):
        weight_op, weight_cost = sample_auxiliary_op(
            l.kernel_prior.distribution, l.kernel_posterior.distribution,
            FLAGS.auxiliary_variance_ratio)
        sample_op.append(weight_op)
        sample_op.append(l.kl_cost_weight.assign_add(weight_cost))
        # Fix the variance of the prior
        session.run(l.kernel_prior.distribution.istrainable.assign(0.))
        if hasattr(l.bias_prior, 'distribution'):
          bias_op, bias_cost = sample_auxiliary_op(
              l.bias_prior.distribution, l.bias_posterior.distribution,
              FLAGS.auxiliary_variance_ratio)
          sample_op.append(bias_op)
          sample_op.append(l.kl_cost_bias.assign_add(bias_cost))
          # Fix the variance of the prior
          session.run(l.bias_prior.distribution.istrainable.assign(0.))

    ensemble_filenames = []
    for i in range(FLAGS.ensemble_size):
      model.load_weights(base_model_filename)
      for j in range(FLAGS.n_auxiliary_variables):
        session.run(sample_op)
        model.compile(
            optimizer=tf.keras.optimizers.Adam(
                # The learning rate is proportional to the scale of the prior.
                lr=float(FLAGS.learning_rate_for_sampling *
                         np.sqrt(1. - FLAGS.auxiliary_variance_ratio)**j)),
            loss=negative_log_likelihood,
            metrics=metrics)
        fit_fn(
            model,
            FLAGS.auxiliary_sampling_frequency,
            initial_epoch=train_epochs,
            with_lr_schedule=False)
      ensemble_filename = os.path.join(
          model_dir, 'ensemble_component_' + str(i) + '.weights')
      ensemble_filenames.append(ensemble_filename)
      model.save_weights(ensemble_filename)

    auxiliary_metrics = [
        ensemble_metrics(
            x_train,
            y_train,
            model,
            ll,
            weight_files=ensemble_filenames),
        ensemble_metrics(
            x_test,
            y_test,
            model,
            ll,
            weight_files=ensemble_filenames)
    ]

    for metrics, name in [(base_metrics, 'Base model'),
                          (overtrained_metrics, 'Overtrained model'),
                          (auxiliary_metrics, 'Auxiliary sampling')]:
      logging.info(name)
      for metrics_dict, split in [(metrics[0], 'Training'),
                                  (metrics[1], 'Testing')]:
        logging.info(split)
        for metric_name in metrics_dict:
          logging.info('%s: %s', metric_name, metrics_dict[metric_name])
Exemple #5
0
  def build(self, input_shape):
    dtype = self.dtype
    if len(input_shape) == 2:
      batch_image_shape, batch_conditional_shape = input_shape
      conditional_input = tf.keras.layers.Input(
          shape=batch_conditional_shape[1:], dtype=dtype)
    else:
      batch_image_shape = input_shape
      conditional_input = None

    image_shape = batch_image_shape[1:]
    image_input = tf.keras.layers.Input(shape=image_shape, dtype=dtype)

    if self._resnet_activation == 'concat_elu':
      activation = tf.keras.layers.Lambda(
          lambda x: tf.nn.elu(tf.concat([x, -x], axis=-1)), dtype=dtype)
    else:
      activation = tf.keras.activations.get(self._resnet_activation)

    # Define layers with default inputs and layer wrapper applied
    Conv2D = functools.partial(  # pylint:disable=invalid-name
        self._layer_wrapper(tf.keras.layers.Convolution2D),
        filters=self._num_filters,
        padding='same',
        dtype=dtype)

    Dense = functools.partial(  # pylint:disable=invalid-name
        self._layer_wrapper(tf.keras.layers.Dense), dtype=dtype)

    Conv2DTranspose = functools.partial(  # pylint:disable=invalid-name
        self._layer_wrapper(tf.keras.layers.Conv2DTranspose),
        filters=self._num_filters,
        padding='same',
        strides=(2, 2),
        dtype=dtype)

    rows, cols = self._receptive_field_dims

    # Define the dimensions of the valid (unmasked) areas of the layer kernels
    # for stride 1 convolutions in the internal layers.
    kernel_valid_dims = {'vertical': (rows - 1, cols),
                         'horizontal': (2, cols // 2 + 1)}

    # Define the size of the kernel necessary to center the current pixel
    # correctly for stride 1 convolutions in the internal layers.
    kernel_sizes = {'vertical': (2 * rows - 3, cols), 'horizontal': (3, cols)}

    # Make the kernel constraint functions for stride 1 convolutions in internal
    # layers.
    kernel_constraints = {
        k: _make_kernel_constraint(kernel_sizes[k], (0, v[0]), (0, v[1]))
        for k, v in kernel_valid_dims.items()}

    # Build the initial vertical stack/horizontal stack convolutional layers,
    # as shown in Figure 1 of [2]. The receptive field of the initial vertical
    # stack layer is a rectangular area centered above the current pixel.
    vertical_stack_init = Conv2D(
        kernel_size=(2 * rows - 1, cols),
        kernel_constraint=_make_kernel_constraint(
            (2 * rows - 1, cols), (0, rows - 1), (0, cols)))(image_input)

    # In Figure 1 [2], the receptive field of the horizontal stack is
    # illustrated as the pixels in the same row and to the left of the current
    # pixel. [1] increases the height of this receptive field from one pixel to
    # two (`horizontal_stack_left`) and additionally includes a subset of the
    # row of pixels centered above the current pixel (`horizontal_stack_up`).
    horizontal_stack_up = Conv2D(
        kernel_size=(3, cols),
        kernel_constraint=_make_kernel_constraint(
            (3, cols), (0, 1), (0, cols)))(image_input)

    horizontal_stack_left = Conv2D(
        kernel_size=(3, cols),
        kernel_constraint=_make_kernel_constraint(
            (3, cols), (0, 2), (0, cols // 2)))(image_input)

    horizontal_stack_init = tf.keras.layers.add(
        [horizontal_stack_up, horizontal_stack_left], dtype=dtype)

    layer_stacks = {
        'vertical': [vertical_stack_init],
        'horizontal': [horizontal_stack_init]}

    # Build the downward pass of the U-net (left-hand half of Figure 2 of [1]).
    # Each `i` iteration builds one of the highest-level blocks (identified as
    # 'Sequence of 6 layers' in the figure, consisting of `num_resnet=5` stride-
    # 1 layers, and one stride-2 layer that contracts the height/width
    # dimensions). The `_` iterations build the stride 1 layers. The layers of
    # the downward pass are stored in lists, since we'll later need them to make
    # skip-connections to layers in the upward pass of the U-net (the skip-
    # connections are represented by curved lines in Figure 2 [1]).
    for i in range(self._num_hierarchies):
      for _ in range(self._num_resnet):
        # Build a layer shown in Figure 2 of [2]. The 'vertical' iteration
        # builds the layers in the left half of the figure, and the 'horizontal'
        # iteration builds the layers in the right half.
        for stack in ['vertical', 'horizontal']:
          input_x = layer_stacks[stack][-1]
          x = activation(input_x)
          x = Conv2D(kernel_size=kernel_sizes[stack],
                     kernel_constraint=kernel_constraints[stack])(x)

          # Add the vertical-stack layer to the horizontal-stack layer
          if stack == 'horizontal':
            h = activation(layer_stacks['vertical'][-1])
            h = Dense(self._num_filters)(h)
            x = tf.keras.layers.add([h, x], dtype=dtype)

          x = activation(x)
          x = tf.keras.layers.Dropout(self._dropout_p, dtype=dtype)(x)
          x = Conv2D(filters=2*self._num_filters,
                     kernel_size=kernel_sizes[stack],
                     kernel_constraint=kernel_constraints[stack])(x)

          if conditional_input is not None:
            h_projection = _build_and_apply_h_projection(
                conditional_input, self._num_filters, dtype=dtype)
            x = tf.keras.layers.add([x, h_projection], dtype=dtype)

          x = _apply_sigmoid_gating(x)

          # Add a residual connection from the layer's input.
          out = tf.keras.layers.add([input_x, x], dtype=dtype)
          layer_stacks[stack].append(out)

      if i < self._num_hierarchies - 1:
        # Build convolutional layers that contract the height/width dimensions
        # on the downward pass between each set of layers (e.g. contracting from
        # 32x32 to 16x16 in Figure 2 of [1]).
        for stack in ['vertical', 'horizontal']:
          # Define kernel dimensions/masking to maintain the autoregressive
          # property.
          x = layer_stacks[stack][-1]
          h, w = kernel_valid_dims[stack]
          kernel_height = 2 * h
          if stack == 'vertical':
            kernel_width = w + 1
          else:
            kernel_width = 2 * w

          kernel_size = (kernel_height, kernel_width)
          kernel_constraint = _make_kernel_constraint(
              kernel_size, (0, h), (0, w))
          x = Conv2D(strides=(2, 2), kernel_size=kernel_size,
                     kernel_constraint=kernel_constraint)(x)
          layer_stacks[stack].append(x)

    # Upward pass of the U-net (right-hand half of Figure 2 of [1]). We stored
    # the layers of the downward pass in a list, in order to access them to make
    # skip-connections to the upward pass. For the upward pass, we need to keep
    # track of only the current layer, so we maintain a reference to the
    # current layer of the horizontal/vertical stack in the `upward_pass` dict.
    # The upward pass begins with the last layer of the downward pass.
    upward_pass = {key: stack.pop() for key, stack in layer_stacks.items()}

    # As with the downward pass, each `i` iteration builds a highest level block
    # in Figure 2 [1], and the `_` iterations build individual layers within the
    # block.
    for i in range(self._num_hierarchies):
      num_resnet = self._num_resnet if i == 0 else self._num_resnet + 1

      for _ in range(num_resnet):
        # Build a layer as shown in Figure 2 of [2], with a skip-connection
        # from the symmetric layer in the downward pass.
        for stack in ['vertical', 'horizontal']:
          input_x = upward_pass[stack]
          x_symmetric = layer_stacks[stack].pop()

          x = activation(input_x)
          x = Conv2D(kernel_size=kernel_sizes[stack],
                     kernel_constraint=kernel_constraints[stack])(x)

          # Include the vertical-stack layer of the upward pass in the layers
          # to be added to the horizontal layer.
          if stack == 'horizontal':
            x_symmetric = tf.keras.layers.Concatenate(axis=-1, dtype=dtype)(
                [upward_pass['vertical'], x_symmetric])

          # Add a skip-connection from the symmetric layer in the downward
          # pass to the layer `x` in the upward pass.
          h = activation(x_symmetric)
          h = Dense(self._num_filters)(h)
          x = tf.keras.layers.add([h, x], dtype=dtype)

          x = activation(x)
          x = tf.keras.layers.Dropout(self._dropout_p, dtype=dtype)(x)
          x = Conv2D(filters=2*self._num_filters,
                     kernel_size=kernel_sizes[stack],
                     kernel_constraint=kernel_constraints[stack])(x)

          if conditional_input is not None:
            h_projection = _build_and_apply_h_projection(
                conditional_input, self._num_filters, dtype=dtype)
            x = tf.keras.layers.add([x, h_projection], dtype=dtype)

          x = _apply_sigmoid_gating(x)
          upward_pass[stack] = tf.keras.layers.add([input_x, x], dtype=dtype)

    # Define deconvolutional layers that expand height/width dimensions on the
    # upward pass (e.g. expanding from 8x8 to 16x16 in Figure 2 of [1]), with
    # the correct kernel dimensions/masking to maintain the autoregressive
    # property.
      if i < self._num_hierarchies - 1:
        for stack in ['vertical', 'horizontal']:
          h, w = kernel_valid_dims[stack]
          kernel_height = 2 * h - 2
          if stack == 'vertical':
            kernel_width = w + 1
            kernel_constraint = _make_kernel_constraint(
                (kernel_height, kernel_width), (h - 2, kernel_height), (0, w))
          else:
            kernel_width = 2 * w - 2
            kernel_constraint = _make_kernel_constraint(
                (kernel_height, kernel_width), (h - 2, kernel_height),
                (w - 2, kernel_width))

          x = upward_pass[stack]
          x = Conv2DTranspose(kernel_size=(kernel_height, kernel_width),
                              kernel_constraint=kernel_constraint)(x)
          upward_pass[stack] = x

    x_out = tf.keras.layers.ELU(dtype=dtype)(upward_pass['horizontal'])

    # Build final Dense/Reshape layers to output the correct number of
    # parameters per pixel.
    num_channels = tensorshape_util.as_list(image_shape)[-1]
    num_coeffs = num_channels * (num_channels - 1) // 2
    num_out = num_channels * 2 + num_coeffs + 1
    num_out_total = num_out * self._num_logistic_mix
    params = Dense(num_out_total)(x_out)
    params = tf.reshape(params, prefer_static.concat(
        [[-1], image_shape[:-1], [self._num_logistic_mix, num_out]], axis=0))

    # If there is one color channel, split the parameters into a list of three
    # output `Tensor`s: (1) component logits for the Quantized Logistic mixture
    # distribution, (2) location parameters for each component, and (3) scale
    # parameters for each component. If there is more than one color channel,
    # return a fourth `Tensor` for the coefficients for the linear dependence
    # among color channels.
    splits = (3 if num_channels == 1
              else [1, num_channels, num_channels, num_coeffs])
    outputs = tf.split(params, splits, axis=-1)

    # Squeeze singleton dimension from component logits
    outputs[0] = tf.squeeze(outputs[0], axis=-1)

    # Ensure scales are positive and do not collapse to near-zero
    outputs[2] = tf.nn.softplus(outputs[2]) + tf.cast(tf.exp(-7.), self.dtype)

    inputs = (image_input if conditional_input is None
              else [image_input, conditional_input])
    self._network = tf.keras.Model(inputs=inputs, outputs=outputs)
    super(_PixelCNNNetwork, self).build(input_shape)
  def solve_nu_zeta(self,
                    dataset: dataset_lib.OffpolicyDataset,
                    target_policy: tf_policy.TFPolicy,
                    regularizer: float = 1e-6):
    """Solves for density ratios and then approximates target policy value.

    Args:
      dataset: The dataset to sample experience from.
      target_policy: The policy whose value we want to estimate.
      regularizer: A small constant to add to matrices before inverting them or
        to floats before taking square root.

    Returns:
      Estimated average per-step reward of the target policy.
    """

    if not hasattr(self, '_td_mat'):
      # Set up env_steps.
      episodes, valid_steps = dataset.get_all_episodes(
          limit=self._limit_episodes)
      total_num_steps_per_episode = tf.shape(valid_steps)[1] - 1
      num_episodes = tf.shape(valid_steps)[0]
      num_samples = num_episodes * total_num_steps_per_episode
      valid_and_not_last = tf.logical_and(valid_steps, episodes.discount > 0)
      valid_indices = tf.squeeze(
          tf.where(tf.reshape(valid_and_not_last[:, :-1], [-1])))

      initial_env_step = tf.nest.map_structure(
          lambda t: tf.squeeze(
              tf.reshape(
                  tf.repeat(
                      t[:, 0:1, ...],
                      axis=1,
                      repeats=total_num_steps_per_episode), [num_samples, -1])),
          episodes)
      initial_env_step = tf.nest.map_structure(
          lambda t: tf.gather(t, valid_indices), initial_env_step)
      tfagents_initial_env_step = dataset_lib.convert_to_tfagents_timestep(
          initial_env_step)

      env_step = tf.nest.map_structure(
          lambda t: tf.squeeze(
              tf.reshape(t[:, 0:total_num_steps_per_episode, ...],
                         [num_samples, -1])), episodes)
      env_step = tf.nest.map_structure(lambda t: tf.gather(t, valid_indices),
                                       env_step)
      tfagents_env_step = dataset_lib.convert_to_tfagents_timestep(env_step)

      next_env_step = tf.nest.map_structure(
          lambda t: tf.squeeze(
              tf.reshape(t[:, 1:total_num_steps_per_episode + 1, ...],
                         [num_samples, -1])), episodes)
      next_env_step = tf.nest.map_structure(
          lambda t: tf.gather(t, valid_indices), next_env_step)
      tfagents_next_env_step = dataset_lib.convert_to_tfagents_timestep(
          next_env_step)

      # get probabilities
      initial_target_probs = target_policy.distribution(
          tfagents_initial_env_step).action.probs_parameter()
      next_target_probs = target_policy.distribution(
          tfagents_next_env_step).action.probs_parameter()

      # First, get the nu_loss and data weights
      #current_nu_loss = self._get_nu_loss(initial_env_step, env_step,
      #                                    next_env_step, target_policy)
      #data_weight, _ = self._get_weights(current_nu_loss)

      # # debug only and to reproduce dual dice result, DELETE
      # data_weight = tf.ones_like(data_weight)

      state_action_count = self._get_state_action_counts(env_step)
      counts = tf.reduce_sum(tf.one_hot(state_action_count, self._dimension), 0)
      gamma_sample = tf.pow(self._gamma, tf.cast(env_step.step_num, tf.float32))

      # # debug only and to reproduce dual dice result, DELETE
      # gamma_sample = tf.ones_like(gamma_sample)

      # now we need to expand_dims to include action space in extra dimensions
      #data_weights = tf.reshape(data_weight, [-1, self._num_limits])
      # both are data sample weights for L2 problem, needs to be normalized later
      #gamma_data_weights = tf.reshape(gamma_sample, [-1, 1]) * data_weights

      initial_states = tf.tile(
          tf.reshape(initial_env_step.observation, [-1, 1]),
          [1, self._num_actions])
      initial_actions = tf.tile(
          tf.reshape(tf.range(self._num_actions), [1, -1]),
          [initial_env_step.observation.shape[0], 1])
      initial_nu_indices = self._get_index(initial_states, initial_actions)

      # linear term w.r.t. initial distribution
      #b_vec_2 = tf.stack([
      #    tf.reduce_sum(
      #        tf.reshape(
      #            data_weights[:, itr] / tf.reduce_sum(data_weights[:, itr]),
      #            [-1, 1]) * tf.reduce_sum(
      #                tf.one_hot(initial_nu_indices, self._dimension) *
      #                (1 - self._gamma) *
      #                tf.expand_dims(initial_target_probs, axis=-1),
      #                axis=1),
      #        axis=0) for itr in range(self._num_limits)
      #],
      #                   axis=0)

      next_states = tf.tile(
          tf.reshape(next_env_step.observation, [-1, 1]),
          [1, self._num_actions])
      next_actions = tf.tile(
          tf.reshape(tf.range(self._num_actions), [1, -1]),
          [next_env_step.observation.shape[0], 1])
      next_nu_indices = self._get_index(next_states, next_actions)
      next_nu_indices = tf.where(
          tf.expand_dims(next_env_step.is_absorbing(), -1),
          -1 * tf.ones_like(next_nu_indices), next_nu_indices)

      nu_indices = self._get_index(env_step.observation, env_step.action)

      target_log_probabilities = target_policy.distribution(
          tfagents_env_step).action.log_prob(env_step.action)
      if not self._solve_for_state_action_ratio:
        policy_ratio = tf.exp(target_log_probabilities -
                              env_step.get_log_probability())
      else:
        policy_ratio = tf.ones([
            target_log_probabilities.shape[0],
        ])
      policy_ratios = tf.tile(
          tf.reshape(policy_ratio, [-1, 1]), [1, self._num_actions])

      # the tabular feature vector
      a_vec = tf.one_hot(nu_indices, self._dimension) - tf.reduce_sum(
          self._gamma *
          tf.expand_dims(next_target_probs * policy_ratios, axis=-1) *
          tf.one_hot(next_nu_indices, self._dimension),
          axis=1)

      # linear term w.r.t. reward
      #b_vec_1 = tf.stack([
      #    tf.reduce_sum(
      #        tf.reshape(
      #            (gamma_data_weights[:, itr] /
      #             tf.reduce_sum(gamma_data_weights[:, itr])) * self._reward_fn(env_step), #/
      #            #tf.cast(state_action_count, tf.float32),
      #            [-1, 1]) * a_vec,
      #        axis=0) for itr in range(self._num_limits)
      #],
      #                   axis=0)
      # quadratic term of feature
      # Get weighted outer product by using einsum to save computing resource!
      #a_mat = tf.stack([
      #    tf.einsum(
      #        'ai, a, aj -> ij', a_vec,
      #        #1.0 / tf.cast(state_action_count, tf.float32),
      #        gamma_data_weights[:, itr] /
      #        tf.reduce_sum(gamma_data_weights[:, itr]),
      #        a_vec)
      #    for itr in range(self._num_limits)
      #],
      #                 axis=0)

      td_mat = tf.einsum('ai, a, aj -> ij',
                         tf.one_hot(nu_indices, self._dimension),
                         1.0 / tf.cast(state_action_count, tf.float32), a_vec)

      weighted_rewards = policy_ratio * self._reward_fn(env_step)

      bias = tf.reduce_sum(
          tf.one_hot(nu_indices, self._dimension) *
          tf.reshape(weighted_rewards, [-1, 1]) * 1.0 /
          tf.cast(state_action_count, tf.float32)[:, None],
          axis=0)

      # Initialize
      self._nu = np.ones_like(self._nu) * bias[:, None]
      self._nu2 = np.ones_like(self._nu2) * bias[:, None]

      self._a_vec = a_vec
      self._td_mat = td_mat
      self._bias = bias
      self._weighted_rewards = weighted_rewards
      self._state_action_count = state_action_count
      self._nu_indices = nu_indices
      self._initial_nu_indices = initial_nu_indices
      self._initial_target_probs = initial_target_probs
      self._gamma_sample = gamma_sample
      self._gamma_sample = tf.ones_like(gamma_sample)

    saddle_bellman_residuals = (
        tf.matmul(self._a_vec, self._nu) - self._weighted_rewards[:, None])
    saddle_bellman_residuals *= -1 * self._algae_alpha_sign
    saddle_zetas = tf.gather(self._zeta, self._nu_indices)
    saddle_initial_nu_values = tf.reduce_sum(  # Average over actions.
        self._initial_target_probs[:, :, None] *
        tf.gather(self._nu, self._initial_nu_indices),
        axis=1)
    saddle_init_nu_loss = ((1 - self._gamma) * saddle_initial_nu_values *
                           self._algae_alpha_sign)

    saddle_bellman_residuals2 = (
        tf.matmul(self._a_vec, self._nu2) - self._weighted_rewards[:, None])
    saddle_bellman_residuals2 *= 1 * self._algae_alpha_sign
    saddle_zetas2 = tf.gather(self._zeta2, self._nu_indices)
    saddle_initial_nu_values2 = tf.reduce_sum(  # Average over actions.
        self._initial_target_probs[:, :, None] *
        tf.gather(self._nu2, self._initial_nu_indices),
        axis=1)
    saddle_init_nu_loss2 = ((1 - self._gamma) * saddle_initial_nu_values2 * -1 *
                            self._algae_alpha_sign)

    saddle_loss = 0.5 * (
        saddle_init_nu_loss + saddle_bellman_residuals * saddle_zetas +
        -tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas) +
        -saddle_init_nu_loss2 + -saddle_bellman_residuals2 * saddle_zetas2 +
        tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas2))
    # Binary search to find best alpha.
    left = tf.constant([-8., -8.])
    right = tf.constant([32., 32.])
    for _ in range(16):
      mid = 0.5 * (left + right)
      self._alpha.assign(mid)
      weights, log_weights = self._get_weights(saddle_loss *
                                               self._gamma_sample[:, None])

      divergence = self._compute_divergence(weights, log_weights)
      divergence_violation = divergence - self._two_sided_limit
      left = tf.where(divergence_violation > 0., mid, left)
      right = tf.where(divergence_violation > 0., right, mid)
    self._alpha.assign(0.5 * (left + right))
    weights, log_weights = self._get_weights(saddle_loss *
                                             self._gamma_sample[:, None])

    gamma_data_weights = tf.stop_gradient(weights * self._gamma_sample[:, None])
    #print(tf.concat([gamma_data_weights, saddle_loss], axis=-1))
    avg_saddle_loss = (
        tf.reduce_sum(gamma_data_weights * saddle_loss, axis=0) /
        tf.reduce_sum(gamma_data_weights, axis=0))

    weighted_state_action_count = tf.reduce_sum(
        tf.one_hot(self._nu_indices, self._dimension)[:, :, None] *
        weights[:, None, :],
        axis=0)
    weighted_state_action_count = tf.gather(weighted_state_action_count,
                                            self._nu_indices)
    my_td_mat = tf.einsum(
        'ai, ab, ab, aj -> bij',
        tf.one_hot(self._nu_indices, self._dimension),
        #1.0 / tf.cast(self._state_action_count, tf.float32),
        1.0 / weighted_state_action_count,
        weights,
        self._a_vec)
    my_bias = tf.reduce_sum(
        tf.transpose(weights)[:, :, None] *
        tf.one_hot(self._nu_indices, self._dimension)[None, :, :] *
        tf.reshape(self._weighted_rewards, [1, -1, 1]) *
        #1.0 / tf.cast(self._state_action_count, tf.float32)[None, :, None],
        1.0 / tf.transpose(weighted_state_action_count)[:, :, None],
        axis=1)

    #print('hello', saddle_initial_nu_values[:1], saddle_zetas[:3],
    #      self._nu[:2], my_bias[:, :2], saddle_loss[:4])

    with tf.GradientTape(
        watch_accessed_variables=False, persistent=True) as tape:
      tape.watch([self._nu, self._nu2, self._alpha])
      bellman_residuals = tf.matmul(
          my_td_mat,
          tf.transpose(self._nu)[:, :, None]) - my_bias[:, :, None]
      bellman_residuals = tf.transpose(tf.squeeze(bellman_residuals, -1))
      bellman_residuals = tf.gather(bellman_residuals, self._nu_indices)
      initial_nu_values = tf.reduce_sum(  # Average over actions.
          self._initial_target_probs[:, :, None] *
          tf.gather(self._nu, self._initial_nu_indices),
          axis=1)

      bellman_residuals *= self._algae_alpha_sign

      init_nu_loss = ((1 - self._gamma) * initial_nu_values *
                      self._algae_alpha_sign)

      nu_loss = (
          tf.math.square(bellman_residuals) / 2.0 +
          tf.math.abs(self._algae_alpha) * init_nu_loss)

      loss = (
          gamma_data_weights * nu_loss /
          tf.reduce_sum(gamma_data_weights, axis=0, keepdims=True))

      bellman_residuals2 = tf.matmul(
          my_td_mat,
          tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :, None]
      bellman_residuals2 = tf.transpose(tf.squeeze(bellman_residuals2, -1))
      bellman_residuals2 = tf.gather(bellman_residuals2, self._nu_indices)
      initial_nu_values2 = tf.reduce_sum(  # Average over actions.
          self._initial_target_probs[:, :, None] *
          tf.gather(self._nu2, self._initial_nu_indices),
          axis=1)

      bellman_residuals2 *= -1 * self._algae_alpha_sign

      init_nu_loss2 = ((1 - self._gamma) * initial_nu_values2 * -1 *
                       self._algae_alpha_sign)

      nu_loss2 = (
          tf.math.square(bellman_residuals2) / 2.0 +
          tf.math.abs(self._algae_alpha) * init_nu_loss2)

      loss2 = (
          gamma_data_weights * nu_loss2 /
          tf.reduce_sum(gamma_data_weights, axis=0, keepdims=True))

      divergence = self._compute_divergence(weights, log_weights)
      divergence_violation = divergence - self._two_sided_limit

      alpha_loss = (-tf.exp(self._alpha) *
                    tf.stop_gradient(divergence_violation))

      extra_loss = tf.reduce_sum(tf.math.square(self._nu[-1, :]))
      extra_loss2 = tf.reduce_sum(tf.math.square(self._nu2[-1, :]))
      nu_grad = tape.gradient(loss + extra_loss, [self._nu])[0]
      nu_grad2 = tape.gradient(loss2 + extra_loss2, [self._nu2])[0]
    avg_loss = tf.reduce_sum(
        0.5 * (loss - loss2) / tf.math.abs(self._algae_alpha), axis=0)
    nu_jacob = tape.jacobian(nu_grad, [self._nu])[0]
    nu_hess = tf.stack([nu_jacob[:, i, :, i] for i in range(self._num_limits)],
                       axis=0)

    nu_jacob2 = tape.jacobian(nu_grad2, [self._nu2])[0]
    nu_hess2 = tf.stack(
        [nu_jacob2[:, i, :, i] for i in range(self._num_limits)], axis=0)

    for idx, div in enumerate(divergence):
      tf.summary.scalar('divergence%d' % idx, div)

    #alpha_grads = tape.gradient(alpha_loss, [self._alpha])
    #alpha_grad_op = self._alpha_optimizer.apply_gradients(
    #    zip(alpha_grads, [self._alpha]))
    #self._alpha.assign(tf.minimum(8., tf.maximum(-8., self._alpha)))

    #print(self._alpha, tf.concat([weights, nu_loss], -1))
    #regularizer = 0.1
    nu_transformed = tf.transpose(
        tf.squeeze(
            tf.linalg.solve(nu_hess + regularizer * tf.eye(self._dimension),
                            tf.expand_dims(-tf.transpose(nu_grad), axis=-1))))
    self._nu = self._nu + 0.1 * nu_transformed
    nu_transformed2 = tf.transpose(
        tf.squeeze(
            tf.linalg.solve(nu_hess2 + regularizer * tf.eye(self._dimension),
                            tf.expand_dims(-tf.transpose(nu_grad2), axis=-1))))
    self._nu2 = self._nu2 + 0.1 * nu_transformed2

    print(avg_loss * self._algae_alpha_sign,
          avg_saddle_loss * self._algae_alpha_sign, self._nu[:2], divergence)
    #print(init_nu_loss[:8], init_nu_loss[-8:])
    #print(bellman_residuals[:8])
    #print(self._nu[:3], self._zeta[:3])

    zetas = tf.matmul(my_td_mat,
                      tf.transpose(self._nu)[:, :, None]) - my_bias[:, :, None]
    zetas = tf.transpose(tf.squeeze(zetas, -1))
    zetas *= -self._algae_alpha_sign
    zetas /= tf.math.abs(self._algae_alpha)
    self._zeta = self._zeta + 0.1 * (zetas - self._zeta)

    zetas2 = tf.matmul(my_td_mat,
                       tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :,
                                                                      None]
    zetas2 = tf.transpose(tf.squeeze(zetas2, -1))
    zetas2 *= 1 * self._algae_alpha_sign
    zetas2 /= tf.math.abs(self._algae_alpha)
    self._zeta2 = self._zeta2 + 0.1 * (zetas2 - self._zeta2)

    #self._zeta = (
    #    tf.einsum('ij,ja-> ia', self._td_mat, self._nu) -
    #    tf.transpose(my_bias))
    #self._zeta *= -tf.reshape(self._algae_alpha_sign, [1, self._num_limits])
    #self._zeta /= tf.math.abs(self._algae_alpha)
    return [
        avg_saddle_loss * self._algae_alpha_sign,
        avg_loss * self._algae_alpha_sign, divergence
    ]
Exemple #7
0
def main(argv):
    del argv  # unused arg
    np.random.seed(FLAGS.seed)
    tf.random.set_seed(FLAGS.seed)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    tf1.disable_v2_behavior()

    session = tf1.Session()
    x_train, y_train, x_test, y_test = datasets.load(session)
    n_train = x_train.shape[0]
    num_classes = int(np.amax(y_train)) + 1

    ensemble_filenames = []
    for i in range(FLAGS.ensemble_size):
        # TODO(trandustin): We re-build the graph for each ensemble member. This
        # is due to an unknown bug where the variables are otherwise not
        # re-initialized to be random. While this is inefficient in graph mode, I'm
        # keeping this for now as we'd like to move to eager mode anyways.
        if not FLAGS.resnet:
            model = lenet5(x_train.shape[1:], num_classes)
        else:
            model = res_net(n_train,
                            x_train.shape[1:],
                            num_classes,
                            batchnorm=FLAGS.batchnorm,
                            variational=False)

            def schedule_fn(epoch):
                """Learning rate schedule function."""
                rate = FLAGS.learning_rate
                if epoch > 180:
                    rate *= 0.5e-3
                elif epoch > 160:
                    rate *= 1e-3
                elif epoch > 120:
                    rate *= 1e-2
                elif epoch > 80:
                    rate *= 1e-1
                return rate

            lr_callback = tf.keras.callbacks.LearningRateScheduler(schedule_fn)

        def negative_log_likelihood(y, rv_y):
            del rv_y  # unused arg
            return -model.output.distribution.log_prob(tf.squeeze(y))  # pylint: disable=cell-var-from-loop

        def accuracy(y_true, y_sample):
            del y_sample  # unused arg
            return tf.equal(
                tf.argmax(input=model.output.distribution.logits, axis=1),  # pylint: disable=cell-var-from-loop
                tf.cast(tf.squeeze(y_true), tf.int64))

        def log_likelihood(y_true, y_sample):
            del y_sample  # unused arg
            return model.output.distribution.log_prob(tf.squeeze(y_true))  # pylint: disable=cell-var-from-loop

        model.compile(
            optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate),
            loss=negative_log_likelihood,
            metrics=[log_likelihood, accuracy])
        member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i))
        tensorboard = tf1.keras.callbacks.TensorBoard(
            log_dir=member_dir,
            update_freq=FLAGS.batch_size * FLAGS.validation_freq)

        if FLAGS.bootstrap:
            inds = np.random.choice(n_train, n_train, replace=True)
            x_sampled = x_train[inds]
            y_sampled = y_train[inds]

        model.fit(x=x_train if not FLAGS.bootstrap else x_sampled,
                  y=y_train if not FLAGS.bootstrap else y_sampled,
                  batch_size=FLAGS.batch_size,
                  epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train,
                  validation_data=(x_test, y_test),
                  validation_freq=max(
                      (FLAGS.validation_freq * FLAGS.batch_size) // n_train,
                      1),
                  verbose=1,
                  callbacks=[tensorboard]
                  if not FLAGS.resnet else [tensorboard, lr_callback])

        member_filename = os.path.join(member_dir, 'model.weights')
        ensemble_filenames.append(member_filename)
        model.save_weights(member_filename)

    labels = tf.keras.layers.Input(shape=y_train.shape[1:])
    ll = tf.keras.backend.function([model.input, labels], [
        model.output.distribution.log_prob(tf.squeeze(labels)),
        model.output.distribution.logits,
    ])

    ensemble_metrics_vals = {
        'train':
        ensemble_metrics(x_train,
                         y_train,
                         model,
                         ll,
                         weight_files=ensemble_filenames),
        'test':
        ensemble_metrics(x_test,
                         y_test,
                         model,
                         ll,
                         weight_files=ensemble_filenames),
    }

    for split, metrics in ensemble_metrics_vals.items():
        logging.info(split)
        for metric_name in metrics:
            logging.info('%s: %s', metric_name, metrics[metric_name])
  def __init__(self,
               vocab_size,
               hidden_size=768,
               num_layers=12,
               num_attention_heads=12,
               sequence_length=512,
               max_sequence_length=None,
               type_vocab_size=16,
               intermediate_size=3072,
               activation=activations.gelu,
               dropout_rate=0.1,
               attention_dropout_rate=0.1,
               initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
               return_all_encoder_outputs=False,
               **kwargs):
    activation = tf.keras.activations.get(activation)
    initializer = tf.keras.initializers.get(initializer)

    if not max_sequence_length:
      max_sequence_length = sequence_length
    self._self_setattr_tracking = False
    self._config_dict = {
        'vocab_size': vocab_size,
        'hidden_size': hidden_size,
        'num_layers': num_layers,
        'num_attention_heads': num_attention_heads,
        'sequence_length': sequence_length,
        'max_sequence_length': max_sequence_length,
        'type_vocab_size': type_vocab_size,
        'intermediate_size': intermediate_size,
        'activation': tf.keras.activations.serialize(activation),
        'dropout_rate': dropout_rate,
        'attention_dropout_rate': attention_dropout_rate,
        'initializer': tf.keras.initializers.serialize(initializer),
        'return_all_encoder_outputs': return_all_encoder_outputs,
    }

    word_ids = tf.keras.layers.Input(
        shape=(sequence_length,), dtype=tf.int32, name='input_word_ids')
    mask = tf.keras.layers.Input(
        shape=(sequence_length,), dtype=tf.int32, name='input_mask')
    type_ids = tf.keras.layers.Input(
        shape=(sequence_length,), dtype=tf.int32, name='input_type_ids')

    self._embedding_layer = layers.OnDeviceEmbedding(
        vocab_size=vocab_size,
        embedding_width=hidden_size,
        initializer=initializer,
        name='word_embeddings')
    word_embeddings = self._embedding_layer(word_ids)

    # Always uses dynamic slicing for simplicity.
    self._position_embedding_layer = layers.PositionEmbedding(
        initializer=initializer,
        use_dynamic_slicing=True,
        max_sequence_length=max_sequence_length)
    position_embeddings = self._position_embedding_layer(word_embeddings)

    type_embeddings = (
        layers.OnDeviceEmbedding(
            vocab_size=type_vocab_size,
            embedding_width=hidden_size,
            initializer=initializer,
            use_one_hot=True,
            name='type_embeddings')(type_ids))

    embeddings = tf.keras.layers.Add()(
        [word_embeddings, position_embeddings, type_embeddings])
    embeddings = (
        tf.keras.layers.LayerNormalization(
            name='embeddings/layer_norm',
            axis=-1,
            epsilon=util.LAYER_NORM_EPSILON,
            dtype=tf.float32)(embeddings))
    embeddings = (
        tf.keras.layers.Dropout(rate=dropout_rate)(embeddings))

    self._transformer_layers = []
    data = embeddings
    attention_mask = layers.SelfAttentionMask()([data, mask])
    encoder_outputs = []
    for i in range(num_layers):
      layer = layers.Transformer(
          num_attention_heads=num_attention_heads,
          intermediate_size=intermediate_size,
          intermediate_activation=activation,
          dropout_rate=dropout_rate,
          attention_dropout_rate=attention_dropout_rate,
          kernel_initializer=initializer,
          name='transformer/layer_%d' % i)
      self._transformer_layers.append(layer)
      data = layer([data, attention_mask])
      encoder_outputs.append(data)

    first_token_tensor = (
        tf.keras.layers.Lambda(lambda x: tf.squeeze(x[:, 0:1, :], axis=1))(
            encoder_outputs[-1]))
    cls_output = tf.keras.layers.Dense(
        units=hidden_size,
        activation='tanh',
        kernel_initializer=initializer,
        name='pooler_transform')(
            first_token_tensor)

    if return_all_encoder_outputs:
      outputs = [encoder_outputs, cls_output]
    else:
      outputs = [encoder_outputs[-1], cls_output]

    super(TransformerEncoder, self).__init__(
        inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs)
Exemple #9
0
def calc_spectrograms(waves,
                      window_lengths,
                      spectral_diffs=(0, 1),
                      window_name='hann',
                      use_mel_scale=True,
                      proj_method='matmul',
                      num_spec_bins=256,
                      random_crop=True):
    """Calculate spectrograms with multiple window sizes for list of input waves.

  Args:
    waves: List of float tensors of shape [batch, length] or [batch, length, 1].
    window_lengths: List of Int. Window sizes (frame lengths) to use for
      computing the spectrograms.
    spectral_diffs: Int. order of finite diff. to take before computing specs.
    window_name: Str. Name of the window to use when computing the spectrograms.
      Supports 'hann' and None.
    use_mel_scale: Bool. Whether or not to project to mel-scale frequencies.
    proj_method: Str. Spectral projection method implementation to use.
      Supported are 'fft' and 'matmul'.
    num_spec_bins: Int. Number of bins in the spectrogram.
    random_crop: Bool. Take random crop or not.

  Returns:
    Tuple of lists of magnitude spectrograms, with output[i][j] being the
      spectrogram for input wave i, computed for window length j.
  """
    waves = [tf.squeeze(w, axis=-1) for w in waves]

    if window_name == 'hann':
        windows = [
            tf.reshape(tf.signal.hann_window(wl, periodic=False), [1, 1, -1])
            for wl in window_lengths
        ]
    elif window_name is None:
        windows = [None] * len(window_lengths)
    else:
        raise ValueError('Unknown window function (%s).' % window_name)

    spec_len_wave = []
    for d in spectral_diffs:
        for length, window in zip(window_lengths, windows):

            wave_crops = waves
            for _ in range(d):
                wave_crops = [w[:, 1:] - w[:, :-1] for w in wave_crops]

            if random_crop:
                wave_crops = aligned_random_crop(wave_crops, length)

            frames = [
                tf.signal.frame(wc, length, length // 2) for wc in wave_crops
            ]
            if window is not None:
                frames = [f * window for f in frames]

            if proj_method == 'fft':
                ffts = [tf.signal.rfft(f)[:, :, 1:] for f in frames]

            elif proj_method == 'matmul':
                mat = get_spectral_matrix(length,
                                          num_spec_bins=num_spec_bins,
                                          use_mel_scale=use_mel_scale)
                ffts = [matmul_real_with_complex(f, mat) for f in frames]

            sq_mag = lambda x: tf.square(tf.math.real(x)) + tf.square(
                tf.math.imag(x))
            specs_sq = [sq_mag(f) for f in ffts]

            if use_mel_scale and proj_method == 'fft':
                sample_rate = 24000
                upper_edge_hertz = sample_rate / 2.
                lower_edge_hertz = sample_rate / length
                lin_to_mel = tf.signal.linear_to_mel_weight_matrix(
                    num_mel_bins=num_spec_bins,
                    num_spectrogram_bins=length // 2 + 1,
                    sample_rate=sample_rate,
                    lower_edge_hertz=lower_edge_hertz,
                    upper_edge_hertz=upper_edge_hertz,
                    dtype=tf.dtypes.float32)[1:]
                specs_sq = [tf.matmul(s, lin_to_mel) for s in specs_sq]

            specs = [tf.sqrt(s + EPSILON) for s in specs_sq]
            spec_len_wave.append(specs)

    spec_wave_len = zip(*spec_len_wave)
    return spec_wave_len
Exemple #10
0
 def log_likelihood(y_true, y_sample):
     del y_sample  # unused arg
     return model.output.distribution.log_prob(tf.squeeze(y_true))  # pylint: disable=cell-var-from-loop
    def train_step(self,
                   dataset: dataset_lib.OffpolicyDataset,
                   target_policy: tf_policy.TFPolicy,
                   regularizer: float = 1e-6):
        """Performs single iteration of CoinDICE.

    Args:
      dataset: The dataset to sample experience from.
      target_policy: The policy whose value we want to estimate.
      regularizer: A small constant to add to matrices before inverting them or
        to floats before taking square root.

    Returns:
      Estimated average per-step reward of the target policy.
    """
        # First compute Lagrangian loss.
        saddle_bellman_residuals = (tf.matmul(self._a_vec, self._nu) -
                                    self._weighted_rewards[:, None])
        saddle_bellman_residuals *= -1 * self._algae_alpha_sign
        saddle_zetas = tf.gather(self._zeta, self._nu_indices)
        saddle_initial_nu_values = tf.reduce_sum(  # Average over actions.
            self._initial_target_probs[:, :, None] *
            tf.gather(self._nu, self._initial_nu_indices),
            axis=1)
        saddle_init_nu_loss = ((1 - self._gamma) * saddle_initial_nu_values *
                               self._algae_alpha_sign)

        # This second optimization switches the sign of algae_alpha.
        # We add these two together to get the final loss, and thus counteract
        # the bias introduced by algae_alpha.
        saddle_bellman_residuals2 = (tf.matmul(self._a_vec, self._nu2) -
                                     self._weighted_rewards[:, None])
        saddle_bellman_residuals2 *= 1 * self._algae_alpha_sign
        saddle_zetas2 = tf.gather(self._zeta2, self._nu_indices)
        saddle_initial_nu_values2 = tf.reduce_sum(  # Average over actions.
            self._initial_target_probs[:, :, None] *
            tf.gather(self._nu2, self._initial_nu_indices),
            axis=1)
        saddle_init_nu_loss2 = ((1 - self._gamma) * saddle_initial_nu_values2 *
                                -1 * self._algae_alpha_sign)

        saddle_loss = 0.5 * (
            saddle_init_nu_loss + saddle_bellman_residuals * saddle_zetas +
            -tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas) +
            -saddle_init_nu_loss2 + -saddle_bellman_residuals2 * saddle_zetas2
            + tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas2))

        # Find optimal weights by doing binary search on alpha (lambda in the
        # paper).
        left = tf.constant([-8., -8.])
        right = tf.constant([32., 32.])
        for _ in range(16):
            mid = 0.5 * (left + right)
            self._alpha.assign(mid)
            weights, log_weights = self._get_weights(saddle_loss)

            divergence = self._compute_divergence(weights, log_weights)
            divergence_violation = divergence - self._two_sided_limit
            left = tf.where(divergence_violation > 0., mid, left)
            right = tf.where(divergence_violation > 0., right, mid)
        self._alpha.assign(0.5 * (left + right))
        weights, log_weights = self._get_weights(saddle_loss)

        # Now that we have weights, we reconstruct the Bellman residual matrices.
        data_weights = tf.stop_gradient(weights)
        avg_saddle_loss = (tf.reduce_sum(data_weights * saddle_loss, axis=0) /
                           tf.reduce_sum(data_weights, axis=0))

        weighted_state_action_count = tf.reduce_sum(
            tf.one_hot(self._nu_indices, self._dimension)[:, :, None] *
            weights[:, None, :],
            axis=0)
        weighted_state_action_count = tf.gather(weighted_state_action_count,
                                                self._nu_indices)
        my_td_mat = tf.einsum('ai, ab, ab, aj -> bij',
                              tf.one_hot(self._nu_indices, self._dimension),
                              1.0 / weighted_state_action_count, weights,
                              self._a_vec)
        my_bias = tf.reduce_sum(
            tf.transpose(weights)[:, :, None] *
            tf.one_hot(self._nu_indices, self._dimension)[None, :, :] *
            tf.reshape(self._weighted_rewards, [1, -1, 1]) * 1.0 /
            tf.transpose(weighted_state_action_count)[:, :, None],
            axis=1)

        # Solve for nu using primal form; i.e., E[(nu - B nu)^2] - (1-g) * E[nu0].
        with tf.GradientTape(watch_accessed_variables=False,
                             persistent=True) as tape:
            tape.watch([self._nu, self._nu2, self._alpha])
            bellman_residuals = tf.matmul(
                my_td_mat,
                tf.transpose(self._nu)[:, :, None]) - my_bias[:, :, None]
            bellman_residuals = tf.transpose(tf.squeeze(bellman_residuals, -1))
            bellman_residuals = tf.gather(bellman_residuals, self._nu_indices)
            initial_nu_values = tf.reduce_sum(  # Average over actions.
                self._initial_target_probs[:, :, None] *
                tf.gather(self._nu, self._initial_nu_indices),
                axis=1)

            bellman_residuals *= self._algae_alpha_sign

            init_nu_loss = ((1 - self._gamma) * initial_nu_values *
                            self._algae_alpha_sign)

            nu_loss = (tf.math.square(bellman_residuals) / 2.0 +
                       tf.math.abs(self._algae_alpha) * init_nu_loss)

            loss = (data_weights * nu_loss /
                    tf.reduce_sum(data_weights, axis=0, keepdims=True))

            bellman_residuals2 = tf.matmul(
                my_td_mat,
                tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :, None]
            bellman_residuals2 = tf.transpose(
                tf.squeeze(bellman_residuals2, -1))
            bellman_residuals2 = tf.gather(bellman_residuals2,
                                           self._nu_indices)
            initial_nu_values2 = tf.reduce_sum(  # Average over actions.
                self._initial_target_probs[:, :, None] *
                tf.gather(self._nu2, self._initial_nu_indices),
                axis=1)

            bellman_residuals2 *= -1 * self._algae_alpha_sign

            init_nu_loss2 = ((1 - self._gamma) * initial_nu_values2 * -1 *
                             self._algae_alpha_sign)

            nu_loss2 = (tf.math.square(bellman_residuals2) / 2.0 +
                        tf.math.abs(self._algae_alpha) * init_nu_loss2)

            loss2 = (data_weights * nu_loss2 /
                     tf.reduce_sum(data_weights, axis=0, keepdims=True))

            divergence = self._compute_divergence(weights, log_weights)
            divergence_violation = divergence - self._two_sided_limit

            # Extra loss if for the 'terminal' state (index = -1).
            extra_loss = tf.reduce_sum(tf.math.square(self._nu[-1, :]))
            extra_loss2 = tf.reduce_sum(tf.math.square(self._nu2[-1, :]))

            nu_grad = tape.gradient(loss + extra_loss, [self._nu])[0]
            nu_grad2 = tape.gradient(loss2 + extra_loss2, [self._nu2])[0]

        avg_loss = tf.reduce_sum(0.5 * (loss - loss2) /
                                 tf.math.abs(self._algae_alpha),
                                 axis=0)
        nu_jacob = tape.jacobian(nu_grad, [self._nu])[0]
        nu_hess = tf.stack(
            [nu_jacob[:, i, :, i] for i in range(self._num_limits)], axis=0)

        nu_jacob2 = tape.jacobian(nu_grad2, [self._nu2])[0]
        nu_hess2 = tf.stack(
            [nu_jacob2[:, i, :, i] for i in range(self._num_limits)], axis=0)

        for idx, div in enumerate(divergence):
            tf.summary.scalar('divergence%d' % idx, div)

        # Perform Newton step on nu.
        nu_transformed = tf.transpose(
            tf.squeeze(
                tf.linalg.solve(
                    nu_hess + regularizer * tf.eye(self._dimension),
                    tf.expand_dims(-tf.transpose(nu_grad), axis=-1))))
        self._nu = self._nu + self._nu_learning_rate * nu_transformed
        nu_transformed2 = tf.transpose(
            tf.squeeze(
                tf.linalg.solve(
                    nu_hess2 + regularizer * tf.eye(self._dimension),
                    tf.expand_dims(-tf.transpose(nu_grad2), axis=-1))))
        self._nu2 = self._nu2 + self._nu_learning_rate * nu_transformed2

        # Perform step on zeta based on fact that zeta* = (nu* - bellman nu*)/a.
        zetas = tf.matmul(my_td_mat,
                          tf.transpose(self._nu)[:, :, None]) - my_bias[:, :,
                                                                        None]
        zetas = tf.transpose(tf.squeeze(zetas, -1))
        zetas *= -self._algae_alpha_sign
        zetas /= tf.math.abs(self._algae_alpha)
        self._zeta = self._zeta + self._zeta_learning_rate * (zetas -
                                                              self._zeta)

        zetas2 = tf.matmul(my_td_mat,
                           tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :,
                                                                          None]
        zetas2 = tf.transpose(tf.squeeze(zetas2, -1))
        zetas2 *= 1 * self._algae_alpha_sign
        zetas2 /= tf.math.abs(self._algae_alpha)
        self._zeta2 = (self._zeta2 + self._zeta_learning_rate *
                       (zetas2 - self._zeta2))

        return [
            avg_saddle_loss * self._algae_alpha_sign,
            avg_loss * self._algae_alpha_sign, divergence
        ]
    def prepare_dataset(self, dataset: dataset_lib.OffpolicyDataset,
                        target_policy: tf_policy.TFPolicy):
        """Performs pre-computations on dataset to make solving easier."""
        episodes, valid_steps = dataset.get_all_episodes(
            limit=self._limit_episodes)
        total_num_steps_per_episode = tf.shape(valid_steps)[1] - 1
        num_episodes = tf.shape(valid_steps)[0]
        num_samples = num_episodes * total_num_steps_per_episode
        valid_and_not_last = tf.logical_and(valid_steps, episodes.discount > 0)
        valid_indices = tf.squeeze(
            tf.where(tf.reshape(valid_and_not_last[:, :-1], [-1])))

        # Flatten all tensors so that each data sample is a tuple of
        # (initial_env_step, env_step, next_env_step).
        initial_env_step = tf.nest.map_structure(
            lambda t: tf.squeeze(
                tf.reshape(
                    tf.repeat(t[:, 0:1, ...],
                              axis=1,
                              repeats=total_num_steps_per_episode),
                    [num_samples, -1])), episodes)
        initial_env_step = tf.nest.map_structure(
            lambda t: tf.gather(t, valid_indices), initial_env_step)
        tfagents_initial_env_step = dataset_lib.convert_to_tfagents_timestep(
            initial_env_step)

        env_step = tf.nest.map_structure(
            lambda t: tf.squeeze(
                tf.reshape(t[:, 0:total_num_steps_per_episode, ...],
                           [num_samples, -1])), episodes)
        env_step = tf.nest.map_structure(lambda t: tf.gather(t, valid_indices),
                                         env_step)
        tfagents_env_step = dataset_lib.convert_to_tfagents_timestep(env_step)

        next_env_step = tf.nest.map_structure(
            lambda t: tf.squeeze(
                tf.reshape(t[:, 1:total_num_steps_per_episode + 1, ...],
                           [num_samples, -1])), episodes)
        next_env_step = tf.nest.map_structure(
            lambda t: tf.gather(t, valid_indices), next_env_step)
        tfagents_next_env_step = dataset_lib.convert_to_tfagents_timestep(
            next_env_step)

        # Get target probabilities for initial and next steps.
        initial_target_probs = target_policy.distribution(
            tfagents_initial_env_step).action.probs_parameter()
        next_target_probs = target_policy.distribution(
            tfagents_next_env_step).action.probs_parameter()

        # Map states and actions to indices into tabular representation.
        initial_states = tf.tile(
            tf.reshape(initial_env_step.observation, [-1, 1]),
            [1, self._num_actions])
        initial_actions = tf.tile(
            tf.reshape(tf.range(self._num_actions), [1, -1]),
            [initial_env_step.observation.shape[0], 1])
        initial_nu_indices = self._get_index(initial_states, initial_actions)

        next_states = tf.tile(tf.reshape(next_env_step.observation, [-1, 1]),
                              [1, self._num_actions])
        next_actions = tf.tile(
            tf.reshape(tf.range(self._num_actions), [1, -1]),
            [next_env_step.observation.shape[0], 1])
        next_nu_indices = self._get_index(next_states, next_actions)
        next_nu_indices = tf.where(
            tf.expand_dims(next_env_step.is_absorbing(), -1),
            -1 * tf.ones_like(next_nu_indices), next_nu_indices)

        nu_indices = self._get_index(env_step.observation, env_step.action)

        target_log_probabilities = target_policy.distribution(
            tfagents_env_step).action.log_prob(env_step.action)
        if not self._solve_for_state_action_ratio:
            policy_ratio = tf.exp(target_log_probabilities -
                                  env_step.get_log_probability())
        else:
            policy_ratio = tf.ones([
                target_log_probabilities.shape[0],
            ])
        policy_ratios = tf.tile(tf.reshape(policy_ratio, [-1, 1]),
                                [1, self._num_actions])

        # Bellman residual matrix of size [n_data, n_dim].
        a_vec = tf.one_hot(nu_indices, self._dimension) - tf.reduce_sum(
            self._gamma *
            tf.expand_dims(next_target_probs * policy_ratios, axis=-1) *
            tf.one_hot(next_nu_indices, self._dimension),
            axis=1)

        state_action_count = self._get_state_action_counts(env_step)
        # Bellman residual matrix of size [n_dim, n_dim].
        td_mat = tf.einsum('ai, a, aj -> ij',
                           tf.one_hot(nu_indices, self._dimension),
                           1.0 / tf.cast(state_action_count, tf.float32),
                           a_vec)

        # Reward vector of size [n_data].
        weighted_rewards = policy_ratio * self._reward_fn(env_step)

        # Reward vector of size [n_dim].
        bias = tf.reduce_sum(tf.one_hot(nu_indices, self._dimension) *
                             tf.reshape(weighted_rewards, [-1, 1]) * 1.0 /
                             tf.cast(state_action_count, tf.float32)[:, None],
                             axis=0)

        # Initialize.
        self._nu = np.ones_like(self._nu) * bias[:, None]
        self._nu2 = np.ones_like(self._nu2) * bias[:, None]

        self._a_vec = a_vec
        self._td_mat = td_mat
        self._bias = bias
        self._weighted_rewards = weighted_rewards
        self._state_action_count = state_action_count
        self._nu_indices = nu_indices
        self._initial_nu_indices = initial_nu_indices
        self._initial_target_probs = initial_target_probs
    def test_correctness(self,
                         optimizer_fn,
                         noise_size,
                         use_analytic_pricing,
                         expected_mr,
                         expected_vol,
                         mr_rtol=1e-4,
                         mr_atol=1e-3,
                         vol_rtol=1e-4,
                         vol_atol=1e-3):
        """Tests calibration with constant parameters."""
        dtype = tf.float64

        # Setup - generate some observed prices using the model.
        zero_rate_fn = lambda x: 0.01 * tf.ones_like(x, dtype=dtype)
        prices = tff.models.hull_white.cap_floor_price(
            strikes=self.strikes,
            expiries=self.expiries,
            maturities=self.maturities,
            daycount_fractions=self.daycount_fractions,
            reference_rate_fn=zero_rate_fn,
            notional=1.0,
            dim=1,
            mean_reversion=[expected_mr],
            volatility=[expected_vol],
            is_cap=tf.expand_dims(self.is_cap, axis=1),
            use_analytic_pricing=True,
            dtype=dtype)

        prices = prices + tf.random.normal(
            prices.shape, stddev=noise_size * prices, seed=0, dtype=dtype)

        # Calibrate the model.
        calibrated_model, is_converged, _ = (
            tff.models.hull_white.calibration_from_cap_floors(
                prices=tf.squeeze(prices),
                strikes=self.strikes,
                expiries=self.expiries,
                maturities=self.maturities,
                daycount_fractions=self.daycount_fractions,
                reference_rate_fn=zero_rate_fn,
                mean_reversion=[0.4],
                volatility=[0.02],
                notional=1.0,
                dim=1,
                is_cap=tf.expand_dims(self.is_cap, axis=1),
                use_analytic_pricing=use_analytic_pricing,
                optimizer_fn=optimizer_fn,
                num_samples=1000,
                random_type=tff.math.random.RandomType.STATELESS_ANTITHETIC,
                seed=[0, 0],
                time_step=0.1,
                maximum_iterations=200,
                dtype=dtype))

        calib_parameters = tf.concat(
            axis=0,
            values=[
                calibrated_model.mean_reversion.values(),
                calibrated_model.volatility.values()
            ])
        calib_parameters = self.evaluate(calib_parameters)
        mr = calib_parameters[0]
        vol = calib_parameters[1]

        # Assert model convergence to expected parameters.
        self.assertTrue(is_converged)
        self.assertAllClose(mr, expected_mr, rtol=mr_rtol, atol=mr_atol)
        self.assertAllClose(vol, expected_vol, rtol=vol_rtol, atol=vol_atol)
    def test_docstring_example(self):
        """Explicitly test the code provided in the docstring."""

        # In this example, we synthetically generate some prices. Then we use our
        # calibration to back out these prices.
        dtype = tf.float64

        daycount_fractions = np.array([
            [0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0],
            [0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0],
            [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
            [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
        ])
        expiries = np.array([
            [0.0, 0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0],
            [0.0, 0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0],
            [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75],
            [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75],
        ])
        maturities = np.array([
            [0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0, 0.0],
            [0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0, 0.0],
            [0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0],
            [0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0],
        ])
        is_cap = np.array([True, False, True, False])
        strikes = 0.01 * np.ones_like(expiries)

        # Setup - generate some observed prices using the model.
        expected_mr = [0.4]
        expected_vol = [0.01]

        zero_rate_fn = lambda x: 0.01 * tf.ones_like(x, dtype=dtype)
        prices = tff.models.hull_white.cap_floor_price(
            strikes=strikes,
            expiries=expiries,
            maturities=maturities,
            daycount_fractions=daycount_fractions,
            reference_rate_fn=zero_rate_fn,
            notional=1.0,
            dim=1,
            mean_reversion=expected_mr,
            volatility=expected_vol,
            is_cap=tf.expand_dims(is_cap, axis=1),
            use_analytic_pricing=True,
            dtype=dtype)

        # Calibrate the model.
        calibrated_model, is_converged, _ = (
            tff.models.hull_white.calibration_from_cap_floors(
                prices=tf.squeeze(prices),
                strikes=strikes,
                expiries=expiries,
                maturities=maturities,
                daycount_fractions=daycount_fractions,
                reference_rate_fn=zero_rate_fn,
                mean_reversion=[0.3],
                volatility=[0.02],
                notional=1.0,
                dim=1,
                is_cap=tf.expand_dims(is_cap, axis=1),
                use_analytic_pricing=True,
                optimizer_fn=None,
                num_samples=1000,
                random_type=tff.math.random.RandomType.STATELESS_ANTITHETIC,
                seed=[0, 0],
                time_step=0.1,
                maximum_iterations=200,
                dtype=dtype))

        calibrated_mr = calibrated_model.mean_reversion.values()
        calibrated_vol = calibrated_model.volatility.values()

        calibrated_mr, calibrated_vol = self.evaluate(
            [calibrated_mr, calibrated_vol])
        self.assertTrue(is_converged)
        self.assertAllClose(calibrated_mr, expected_mr, atol=1e-3, rtol=1e-2)
        self.assertAllClose(calibrated_vol, expected_vol, atol=1e-3, rtol=1e-2)
Exemple #15
0
 def negative_log_likelihood(y, rv_y):
     del rv_y  # unused arg
     return -model.output.distribution.log_prob(tf.squeeze(y))  # pylint: disable=cell-var-from-loop
Exemple #16
0
 def call(self, y_true, y_pred):
     error = tf.pow(tf.abs(tf.squeeze(y_pred) - y_true), self._power)
     return ops.softquantiles(error, self._quantile, axis=0, **self._kwargs)
    def apply(self, x1, x2, example_ndims=0):
        """Apply the kernel function pairs of inputs.

    Args:
      x1: `Tensor` input to the kernel, of shape `B1 + E1 + F`, where `B1` and
        `E1` may be empty (ie, no batch/example dims, resp.) and `F` (the
        feature shape) must have rank equal to the kernel's `feature_ndims`
        property. Batch shape must broadcast with the batch shape of `x2` and
        with the kernel's batch shape. Example shape must broadcast with example
        shape of `x2`. `x1` and `x2` must have the same *number* of example dims
        (ie, same rank).
      x2: `Tensor` input to the kernel, of shape `B2 + E2 + F`, where `B2` and
        `E2` may be empty (ie, no batch/example dims, resp.) and `F` (the
        feature shape) must have rank equal to the kernel's `feature_ndims`
        property. Batch shape must broadcast with the batch shape of `x2` and
        with the kernel's batch shape. Example shape must broadcast with example
        shape of `x2`. `x1` and `x2` must have the same *number* of example
      example_ndims: A python integer, the number of example dims in the inputs.
        In essence, this parameter controls how broadcasting of the kernel's
        batch shape with input batch shapes works. The kernel batch shape will
        be broadcast against everything to the left of the combined example and
        feature dimensions in the input shapes.

    Returns:
      `Tensor` containing the results of applying the kernel function to inputs
      `x1` and `x2`. If the kernel parameters' batch shape is `Bk` then the
      shape of the `Tensor` resulting from this method call is
      `broadcast(Bk, B1, B2) + broadcast(E1, E2)`.

    Given an index set `S`, a kernel function is mathematically defined as a
    real- or complex-valued function on `S` satisfying the
    positive semi-definiteness constraint:

    ```none
    sum_i sum_j (c[i]*) c[j] k(x[i], x[j]) >= 0
    ```

    for any finite collections `{x[1], ..., x[N]}` in `S` and
    `{c[1], ..., c[N]}` in the reals (or the complex plane). '*' is the complex
    conjugate, in the complex case.

    This method most closely resembles the function described in the
    mathematical definition of a kernel. Given a PositiveSemidefiniteKernel `k`
    with scalar parameters and inputs `x` and `y` in `S`, `apply(x, y)` yields a
    single scalar value.

    #### Examples

    ```python
    import tensorflow_probability as tfp

    # Suppose `SomeKernel` acts on vectors (rank-1 tensors)
    scalar_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=.5)
    scalar_kernel.batch_shape
    # ==> []

    # `x` and `y` are batches of five 3-D vectors:
    x = np.ones([5, 3], np.float32)
    y = np.ones([5, 3], np.float32)
    scalar_kernel.apply(x, y).shape
    # ==> [5]
    ```

    The above output is the result of vectorized computation of the five values

    ```none
    [k(x[0], y[0]), k(x[1], y[1]), ..., k(x[4], y[4])]
    ```

    Now we can consider a kernel with batched parameters:

    ```python
    batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=[.2, .5])
    batch_kernel.batch_shape
    # ==> [2]
    batch_kernel.apply(x, y).shape
    # ==> Error! [2] and [5] can't broadcast.
    ```

    The parameter batch shape of `[2]` and the input batch shape of `[5]` can't
    be broadcast together. We can fix this in either of two ways:

    1. Give the parameter a shape of `[2, 1]` which will correctly
    broadcast with `[5]` to yield `[2, 5]`:

    ```python
    batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel(
        param=[[.2], [.5]])
    batch_kernel.batch_shape
    # ==> [2, 1]
    batch_kernel.apply(x, y).shape
    # ==> [2, 5]
    ```

    2. By specifying `example_ndims`, which tells the kernel to treat the `5`
    in the input shape as part of the "example shape", and "pushing" the
    kernel batch shape to the left:

    ```python
    batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=[.2, .5])
    batch_kernel.batch_shape
    # ==> [2]
    batch_kernel.apply(x, y, example_ndims=1).shape
    # ==> [2, 5]

    """
        with self._name_and_control_scope(self._name):
            x1 = tf.convert_to_tensor(x1, name='x1', dtype_hint=self.dtype)
            x2 = tf.convert_to_tensor(x2, name='x2', dtype_hint=self.dtype)

            should_expand_dims = (example_ndims == 0)

            if should_expand_dims:
                example_ndims += 1
                x1 = tf.expand_dims(x1, -(self.feature_ndims + 1))
                x2 = tf.expand_dims(x2, -(self.feature_ndims + 1))

            result = self._apply(x1, x2, example_ndims=example_ndims)

            if should_expand_dims:
                result = tf.squeeze(result, axis=-1)

            return result
Exemple #18
0
    def test_batching(self, input_batch_shape, kernel_batch_shape):
        input_shape = (12, 12, 2)
        filter_shape = (3, 3)
        channels_out = 4
        strides = 2
        dilations = (1, 1)
        padding = 'SAME'

        x, k = _make_input_and_kernel(self.make_input,
                                      input_batch_shape=input_batch_shape,
                                      input_shape=input_shape,
                                      kernel_batch_shape=kernel_batch_shape,
                                      filter_shape=filter_shape,
                                      channels_out=channels_out,
                                      dtype=self.dtype)

        conv_fn = self.make_conv_fn(filter_shape, strides, padding, dilations)
        y_batched = conv_fn(x, k)

        broadcast_batch_shape = ps.broadcast_shape(input_batch_shape,
                                                   kernel_batch_shape)
        broadcasted_input = tf.broadcast_to(
            x, shape=ps.concat([broadcast_batch_shape, input_shape], axis=0))
        broadcasted_kernel = tf.broadcast_to(
            k,
            shape=ps.concat([broadcast_batch_shape,
                             ps.shape(k)[-2:]], axis=0))

        flat_y = tf.reshape(y_batched,
                            shape=ps.pad(ps.shape(y_batched)[-3:],
                                         paddings=[[1, 0]],
                                         constant_values=-1))
        flat_x = tf.reshape(broadcasted_input,
                            shape=ps.pad(input_shape,
                                         paddings=[[1, 0]],
                                         constant_values=-1))
        flat_tf_kernel = tf.einsum(
            '...ij->...ji',
            tf.reshape(broadcasted_kernel,
                       shape=ps.concat([(-1, ), filter_shape,
                                        (input_shape[-1], channels_out)],
                                       axis=0)))

        rank = 2
        output_shape, strides_ = convolution_util._get_output_shape(
            rank=rank,
            strides=(strides, ) * rank,
            padding=padding,
            dilations=dilations,
            input_shape=input_shape,
            output_size=channels_out,
            filter_shape=filter_shape)

        y_expected = tf.vectorized_map(
            lambda args: tf.nn.conv2d_transpose(  # pylint: disable=g-long-lambda
                args[0][tf.newaxis],
                args[1],
                output_shape=ps.concat([[1], output_shape], axis=0),
                strides=strides_,
                padding=padding),
            elems=(flat_x, flat_tf_kernel))

        [y_actual_,
         y_expected_] = self.evaluate([flat_y,
                                       tf.squeeze(y_expected, axis=1)])
        self.assertAllClose(y_expected_, y_actual_, rtol=1e-5, atol=0)
Exemple #19
0
def beam_search(symbols_to_logits_fn,
                init_seq_BxT,
                initial_cache_BxU,
                vocab_size,
                beam_size,
                length_norm_fn,
                eos_id=1):
  """Beam search.

  Args:
    symbols_to_logits_fn: fn(seq_BxT, cache_BxU, i) -> (logits_BxV, cache_BxU)
    init_seq_BxT: initial sequence ids.
    initial_cache_BxU: dictionary of tensors with shape BxU.
    vocab_size: vocabulary size.
    beam_size: beam size.
    length_norm_fn: length normalization function.
    eos_id: end of sequence.

  Returns:
    Tuple of (beams_BxMxT, scores_BxM). Beam searched sequences and scores.
  """
  B, T = init_seq_BxT.shape
  M, V = beam_size, vocab_size
  dtype = tf.float32
  int_dtype = init_seq_BxT.dtype

  def _loop_body(i, alive_seq_BxMxT, alive_log_probs_BxM, alive_cache_BxMxU,
                 finished_seq_BxMxT, finished_scores_BxM):
    """Beam search loop body."""
    # Decode one step with beam
    logits_BMxV, cache_BMxU = symbols_to_logits_fn(
        _flatten_beam_dim(alive_seq_BxMxT),
        tf.nest.map_structure(_flatten_beam_dim, alive_cache_BxMxU), i)
    logits_BxMxV = _unflatten_beam_dim(logits_BMxV, M)
    new_cache_BxMxU = tf.nest.map_structure(lambda t: _unflatten_beam_dim(t, M),
                                            cache_BMxU)

    # select top 2 * beam_size and fill alive and finished.
    log_probs_BxMxV = logits_BxMxV - tf.reduce_logsumexp(
        logits_BxMxV, axis=2, keepdims=True)
    log_probs_BxMxV += tf.expand_dims(alive_log_probs_BxM, axis=2)
    log_probs_BxMV = tf.reshape(log_probs_BxMxV, [B, -1])
    new_log_probs_Bx2M, topk_indices_Bx2M = tf.nn.top_k(log_probs_BxMV, k=2 * M)
    topk_beam_Bx2M = topk_indices_Bx2M // V
    topk_seq_Bx2MxT, new_cache_Bx2MxU = _gather_nested(
        [alive_seq_BxMxT, new_cache_BxMxU], topk_beam_Bx2M)
    topk_ids_Bx2M = topk_indices_Bx2M % V
    new_seq_Bx2MxT = _update_i(topk_seq_Bx2MxT, topk_ids_Bx2M, i)
    new_finished_flags_Bx2M = tf.cast(
        tf.reduce_any(tf.equal(new_seq_Bx2MxT, eos_id), axis=-1), dtype)

    # get new alive
    _, topk_alive_indices_BxM = tf.nn.top_k(
        new_log_probs_Bx2M + new_finished_flags_Bx2M * dtype.min, k=M)
    (alive_seq_BxMxT, alive_log_probs_BxM, alive_cache_BxMxU) = _gather_nested(
        [new_seq_Bx2MxT, new_log_probs_Bx2M, new_cache_Bx2MxU],
        topk_alive_indices_BxM)

    # get new finished
    new_scores_Bx2M = length_norm_fn(new_log_probs_Bx2M, i + 1)
    new_scores_Bx2M += (1 - new_finished_flags_Bx2M) * dtype.min
    finished_seq_Bx3MxT = tf.concat([finished_seq_BxMxT, new_seq_Bx2MxT],
                                    axis=1)
    finished_scores_Bx3M = tf.concat([finished_scores_BxM, new_scores_Bx2M],
                                     axis=1)
    _, topk_finished_indices_BxM = tf.nn.top_k(finished_scores_Bx3M, k=M)
    (finished_seq_BxMxT, finished_scores_BxM) = _gather_nested(
        [finished_seq_Bx3MxT, finished_scores_Bx3M], topk_finished_indices_BxM)

    return [
        i + 1, alive_seq_BxMxT, alive_log_probs_BxM, alive_cache_BxMxU,
        finished_seq_BxMxT, finished_scores_BxM
    ]

  # initialize.
  init_i = tf.constant(0, dtype=int_dtype)
  init_alive_seq_BxMxT = _expand_to_beam_size(init_seq_BxT, M)
  log_probs_1xM = tf.constant([[0.] + [dtype.min] * (M - 1)], dtype=dtype)
  init_alive_log_probs_BxM = tf.tile(log_probs_1xM, [B, 1])
  init_alive_cache_BxMxU = tf.nest.map_structure(
      lambda t: _expand_to_beam_size(t, M), initial_cache_BxU)
  init_finished_seq_BxMxT = tf.zeros(tf.shape(init_alive_seq_BxMxT), int_dtype)
  init_finished_scores_BxM = tf.zeros([B, M], dtype=dtype) + dtype.min

  # run loop.
  (_, final_alive_seq_BxMxT, final_alive_scores_BxM, _,
   final_finished_seq_BxMxT, final_finished_scores_BxM) = tf.while_loop(
       lambda *args: True,  # Always do T iterations
       _loop_body,
       loop_vars=[
           init_i, init_alive_seq_BxMxT, init_alive_log_probs_BxM,
           init_alive_cache_BxMxU, init_finished_seq_BxMxT,
           init_finished_scores_BxM
       ],
       parallel_iterations=1,
       back_prop=False,
       maximum_iterations=T,
   )

  # process finished.
  final_finished_flag_BxMx1 = tf.reduce_any(
      tf.equal(final_finished_seq_BxMxT, eos_id), axis=-1, keepdims=True)
  final_seq_BxMxT = tf.where(
      tf.tile(final_finished_flag_BxMx1, [1, 1, T]), final_finished_seq_BxMxT,
      final_alive_seq_BxMxT)
  final_scores_BxM = tf.where(
      tf.squeeze(final_finished_flag_BxMx1, axis=-1), final_finished_scores_BxM,
      final_alive_scores_BxM)
  return final_seq_BxMxT, final_scores_BxM
Exemple #20
0
def concatenate_batch_into_sample(batch):
    for feature in batch.keys():
        batch[feature] = tf.reshape(batch[feature], [1, -1])
    return batch


for batch in dataset:
    concatenated_examples.append(concatenate_batch_into_sample(batch))


feature_dict = {}

for feature in concatenated_examples[0].keys():
    feature_list = [example[feature] for example in concatenated_examples]
    feature_dict[feature] = tf.squeeze(tf.stack(
        feature_list, axis=0))

feature_dict["f0_hz"] = feature_dict["f0_hz"].numpy()
if INTONATION:
    for di in range(feature_dict["f0_hz"].shape[0]):
        feature_dict["f0_hz"][di, :] = intonate(
            feature_dict["f0_hz"][di, :])


dataset = tf.data.Dataset.from_tensor_slices(feature_dict)

ex = next(iter(dataset))


assert ex["audio"].shape[0] == 16000*16
    def test_univariate_sample_mean_and_variance_time_varying_drift(
            self, supply_draws, dtype):
        """Tests the mean and vol of the univariate GBM sampled paths."""
        initial_state = 2.0
        min_tol = _tolerance_by_dtype(dtype)

        with self.subTest("Drift as a step function, sigma = 0.0"):
            mu_times = np.array([0.0, 5.0, 10.0], dtype=dtype)
            mu_values = np.array([0.0, 0.0, 0.05, 0.05], dtype=dtype)
            mu = tff.math.piecewise.PiecewiseConstantFunc(
                jump_locations=mu_times, values=mu_values, dtype=dtype)
            sigma = 0.0
            times = np.array([0.0, 1.0, 5.0, 7.0, 10.0], dtype=dtype)
            mean, var, se_mean, se_var = calculate_sample_paths_mean_and_variance(
                self, mu, sigma, times, initial_state, supply_draws,
                NUM_SAMPLES, dtype)
            expected_mean = np.array(
                [
                    0.0,  # mu = 0 at t = 0
                    0.0,  # mu = 0 for t <= 1.0
                    0.0,  # mu = 0 for t < 5.0
                    2.0 * 0.05,  # mu = 0.05 for 5.0 < t <= 7.0
                    5.0 * 0.05  # mu = 0.05 for 5.0 < t <= 10.0
                ],
                dtype=dtype) + np.log(initial_state)
            expected_var = sigma * np.sqrt(
                times)  # As sigma is zero this will be 0.0
            mean_tol = np.maximum(se_mean * NUM_STDERRS, min_tol)
            var_tol = np.maximum(se_var * NUM_STDERRS, min_tol)
            arrays_all_close(self,
                             tf.squeeze(mean),
                             expected_mean,
                             mean_tol,
                             msg="comparing means")
            arrays_all_close(self,
                             tf.squeeze(var),
                             expected_var,
                             var_tol,
                             msg="comparing variances")

        with self.subTest("Drift = 0.05, sigma = step function"):
            mu = 0.05
            sigma_times = np.array([0.0, 5.0, 10.0], dtype=dtype)
            sigma_values = np.array([0.0, 0.2, 0.4, 0.6], dtype=dtype)
            sigma = tff.math.piecewise.PiecewiseConstantFunc(
                jump_locations=sigma_times, values=sigma_values, dtype=dtype)
            times = np.array([0.0, 1.0, 5.0, 7.0, 10.0], dtype=dtype)
            mean, var, se_mean, se_var = calculate_sample_paths_mean_and_variance(
                self, mu, sigma, times, initial_state, supply_draws,
                NUM_SAMPLES, dtype)
            expected_mean = np.array(
                [
                    0.0,  # mu = 0 at t = 0
                    1.0 * mu - 0.5 * 1.0 * 0.2**2,  # t = 1.0
                    5.0 * mu - 0.5 * 5.0 * 0.2**2,  # t = 5.0
                    7.0 * mu - 0.5 * (5.0 * 0.2**2 + 2.0 * 0.4**2),  # t = 7.0
                    10.0 * mu - 0.5 * (5.0 * 0.2**2 + 5.0 * 0.4**2)  # t = 10.0
                ],
                dtype=dtype) + np.log(initial_state)
            expected_var = np.array(
                [
                    0.0,  # t = 0
                    1.0 * 0.2**2,  # t = 1.0
                    5.0 * 0.2**2,  # t = 5.0
                    5.0 * 0.2**2 + 2.0 * 0.4**2,  # t = 7.0
                    5.0 * 0.2**2 + 5.0 * 0.4**2  # t = 10.0
                ],
                dtype=dtype)

            # Set acceptable tolerances based on the predicted variance and a minimum
            # tolerance based on the precision.
            mean_tol = np.maximum(se_mean * NUM_STDERRS, min_tol)
            var_tol = np.maximum(se_var * NUM_STDERRS, min_tol)

            arrays_all_close(self,
                             tf.squeeze(mean),
                             expected_mean,
                             mean_tol,
                             msg="comparing means")
            arrays_all_close(self,
                             tf.squeeze(var),
                             expected_var,
                             var_tol,
                             msg="comparing variances")
Exemple #22
0
    def call(self, x, training=False):
        x_flat = tf.reshape(x, shape=(-1, self.depth))

        # Split each input vector into one segment per head.
        x_flat_split = tf.split(x_flat, self.num_heads, axis=1)
        x_flat = tf.concat(x_flat_split, axis=0)

        if training:
            # Figure out which centroids we want to keep, and which we want to
            # restart.
            n = x_flat.shape[0]
            keep = self.counts * self.k > self.restart_threshold * n
            restart = tf.math.logical_not(keep)

            # Replace centroids to restart with elements from the batch, using samples
            # from a uniform distribution as a fallback in case we need to restart
            # more centroids than we have elements in the batch.
            restart_idx = tf.squeeze(tf.where(restart), -1)
            n_replace = tf.minimum(tf.shape(restart_idx)[0], x_flat.shape[0])
            e_restart = tf.tensor_scatter_nd_update(
                tf.random.uniform([self.k, self.depth // self.num_heads]),
                tf.expand_dims(restart_idx[:n_replace], 1),
                tf.random.shuffle(x_flat)[:n_replace])

            # Compute the values of the centroids we want to keep by dividing the
            # summed vectors by the corresponding counts.
            e = tf.where(
                tf.expand_dims(keep, 1),
                tf.math.divide_no_nan(self.sums,
                                      tf.expand_dims(self.counts, 1)),
                e_restart)

        else:
            # If not training, just use the centroids as is with no restarts.
            e = tf.math.divide_no_nan(self.sums,
                                      tf.expand_dims(self.counts, 1))

        # Compute distance between each input vector and each cluster center.
        distances = (tf.expand_dims(tf.reduce_sum(x_flat**2, axis=1), 1) -
                     2 * tf.matmul(x_flat, tf.transpose(e)) +
                     tf.expand_dims(tf.reduce_sum(e**2, axis=1), 0))

        # Find nearest cluster center for each input vector.
        c = tf.argmin(distances, axis=1)

        # Quantize input vectors with straight-through estimator.
        z = tf.nn.embedding_lookup(e, c)
        z_split = tf.split(z, self.num_heads, axis=0)
        z = tf.concat(z_split, axis=1)
        z = tf.reshape(z, tf.shape(x))
        z = x + tf.stop_gradient(z - x)

        if training:
            # Compute cluster counts and vector sums over the batch.
            oh = tf.one_hot(indices=c, depth=self.k)
            counts = tf.reduce_sum(oh, axis=0)
            sums = tf.matmul(oh, x_flat, transpose_a=True)

            # Apply exponential moving average to cluster counts and vector sums.
            self.counts.assign_sub((1 - self.gamma) * (self.counts - counts))
            self.sums.assign_sub((1 - self.gamma) * (self.sums - sums))

        c_split = tf.split(c, self.num_heads, axis=0)
        c = tf.stack(c_split, axis=1)
        c = tf.reshape(c,
                       tf.concat([tf.shape(x)[:-1], [self.num_heads]], axis=0))

        return z, c
    def test_univariate_time_varying_vol_batched_time(self, supply_draws,
                                                      dtype):
        """Tests the mean and vol of the univariate GBM sampled paths."""
        initial_state = 2.0
        min_tol = 5e-3
        mu = 0.05
        sigma_times = np.array([[0.0, 5.0, 10.0], [0.0, 7.0, 10.0]],
                               dtype=dtype)
        sigma_values = np.array([[0.2, 0.2, 0.4, 0.4], [0.5, 0.5, 0.3, 0.1]],
                                dtype=dtype)
        sigma = tff.math.piecewise.PiecewiseConstantFunc(
            jump_locations=sigma_times, values=sigma_values, dtype=dtype)
        times = np.array(
            [[0.0, 1.0, 5.0, 7.0, 12.0], [0.0, 1.5, 3.5, 9.0, 17.0]],
            dtype=dtype)
        mean, var, se_mean, se_var = calculate_sample_paths_mean_and_variance(
            self, mu, sigma, times, initial_state, supply_draws, NUM_SAMPLES,
            dtype)
        expected_mean = np.array(
            [
                [
                    0.0,  # t = 0
                    1.0 * mu - 0.5 * 1.0 * 0.2**2,  # t = 1.0
                    5.0 * mu - 0.5 * 5.0 * 0.2**2,  # t = 5.0
                    7.0 * mu - 0.5 * (5.0 * 0.2**2 + 2.0 * 0.4**2),  # t = 7.0
                    12.0 * mu - 0.5 * (5.0 * 0.2**2 + 7.0 * 0.4**2)  # t = 12.0
                ],
                [
                    0.0,  # mu = 0 at t = 0
                    1.5 * mu - 0.5 * 1.5 * 0.5**2,  # t = 1.5
                    3.5 * mu - 0.5 * 3.5 * 0.5**2,  # t = 3.5
                    9.0 * mu - 0.5 * (7.0 * 0.5**2 + 2.0 * 0.3**2),  # t = 9.0
                    17.0 * mu - 0.5 *
                    (7.0 * 0.5**2 + 3.0 * 0.3**2 + 7.0 * 0.1**2)
                ]
            ],
            dtype=dtype) + np.log(initial_state)

        expected_var = np.array(
            [
                [
                    0.0,  # t = 0
                    1.0 * 0.2**2,  # t = 1.0
                    5.0 * 0.2**2,  # t = 5.0
                    5.0 * 0.2**2 + 2.0 * 0.4**2,  # t = 7.0
                    5.0 * 0.2**2 + 7.0 * 0.4**2  # t = 12.0
                ],
                [
                    0.0,  # t = 0
                    1.5 * 0.5**2,  # t = 1.5
                    3.5 * 0.5**2,  # t = 3.5
                    7.0 * 0.5**2 + 2.0 * 0.3**2,  # t = 9.0
                    7.0 * 0.5**2 + 3.0 * 0.3**2 + 7.0 * 0.1**2  # t = 17.0
                ]
            ],
            dtype=dtype)

        mean_tol = np.maximum(se_mean * NUM_STDERRS, min_tol)
        var_tol = np.maximum(se_var * NUM_STDERRS, min_tol)

        arrays_all_close(self,
                         tf.squeeze(mean),
                         expected_mean,
                         mean_tol,
                         msg="comparing means")
        arrays_all_close(self,
                         tf.squeeze(var),
                         expected_var,
                         var_tol,
                         msg="comparing variances")
Exemple #24
0
 def log_likelihood(y_true, y_sample):
   """Expected conditional log-likelihood."""
   del y_sample  # unused arg
   return model.output.distribution.log_prob(tf.squeeze(y_true))
def main(argv):
    del argv  # unused arg
    np.random.seed(FLAGS.seed)
    tf.random.set_seed(FLAGS.seed)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    tf1.disable_v2_behavior()

    session = tf1.Session()
    with session.as_default():
        x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset, session)
        n_train = x_train.shape[0]

        num_classes = int(np.amax(y_train)) + 1
        model = lenet5(n_train, x_train.shape[1:], num_classes)
        for l in model.layers:
            l.kl_cost_weight = l.add_weight(
                name='kl_cost_weight',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)
            l.kl_cost_bias = l.add_variable(
                name='kl_cost_bias',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)

        [negative_log_likelihood, accuracy, log_likelihood, kl,
         elbo] = get_losses_and_metrics(model, n_train)
        metrics = [elbo, log_likelihood, kl, accuracy]
        tensorboard = tf1.keras.callbacks.TensorBoard(
            log_dir=FLAGS.output_dir,
            update_freq=FLAGS.batch_size * FLAGS.validation_freq)

        def fit_fn(model, steps, initial_epoch):
            return model.fit(
                x=x_train,
                y=y_train,
                batch_size=FLAGS.batch_size,
                epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train,
                initial_epoch=initial_epoch,
                validation_data=(x_test, y_test),
                validation_freq=max(
                    (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1),
                verbose=1,
                callbacks=[tensorboard])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)),
            loss=negative_log_likelihood,
            metrics=metrics)
        session.run(tf1.initialize_all_variables())

        train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train
        fit_fn(model, FLAGS.training_steps, initial_epoch=0)

        labels = tf.keras.layers.Input(shape=y_train.shape[1:])
        ll = tf.keras.backend.function([model.input, labels], [
            model.output.distribution.log_prob(tf.squeeze(labels)),
            model.output.distribution.logits
        ])

        base_metrics = [
            utils.ensemble_metrics(x_train, y_train, model, ll, n_samples=10),
            utils.ensemble_metrics(x_test, y_test, model, ll, n_samples=10)
        ]
        model_dir = os.path.join(FLAGS.output_dir, 'models')
        tf.io.gfile.makedirs(model_dir)
        base_model_filename = os.path.join(model_dir, 'base_model.weights')
        model.save_weights(base_model_filename)

        # Train base model further for comparison.
        fit_fn(model,
               FLAGS.n_auxiliary_variables *
               FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size,
               initial_epoch=train_epochs)

        overtrained_metrics = [
            utils.ensemble_metrics(x_train, y_train, model, ll, n_samples=10),
            utils.ensemble_metrics(x_test, y_test, model, ll, n_samples=10)
        ]

        # Perform refined VI.
        sample_op = []
        for l in model.layers:
            if isinstance(
                    l, tfp.layers.DenseLocalReparameterization) or isinstance(
                        l, tfp.layers.Convolution2DFlipout):
                weight_op, weight_cost = sample_auxiliary_op(
                    l.kernel_prior.distribution,
                    l.kernel_posterior.distribution,
                    FLAGS.auxiliary_variance_ratio)
                sample_op.append(weight_op)
                sample_op.append(l.kl_cost_weight.assign_add(weight_cost))
                # Fix the variance of the prior
                session.run(l.kernel_prior.distribution.istrainable.assign(0.))
                if hasattr(l.bias_prior, 'distribution'):
                    bias_op, bias_cost = sample_auxiliary_op(
                        l.bias_prior.distribution,
                        l.bias_posterior.distribution,
                        FLAGS.auxiliary_variance_ratio)
                    sample_op.append(bias_op)
                    sample_op.append(l.kl_cost_bias.assign_add(bias_cost))
                    # Fix the variance of the prior
                    session.run(
                        l.bias_prior.distribution.istrainable.assign(0.))

        ensemble_filenames = []
        for i in range(FLAGS.ensemble_size):
            model.load_weights(base_model_filename)
            for j in range(FLAGS.n_auxiliary_variables):
                session.run(sample_op)
                model.compile(
                    optimizer=tf.keras.optimizers.Adam(
                        # The learning rate is proportional to the scale of the prior.
                        lr=float(FLAGS.learning_rate_for_sampling *
                                 np.sqrt(1. -
                                         FLAGS.auxiliary_variance_ratio)**j)),
                    loss=negative_log_likelihood,
                    metrics=metrics)
                fit_fn(model,
                       FLAGS.auxiliary_sampling_frequency,
                       initial_epoch=train_epochs)
            ensemble_filename = os.path.join(
                model_dir, 'ensemble_component_' + str(i) + '.weights')
            ensemble_filenames.append(ensemble_filename)
            model.save_weights(ensemble_filename)

        auxiliary_metrics = [
            utils.ensemble_metrics(x_train,
                                   y_train,
                                   model,
                                   ll,
                                   weight_files=ensemble_filenames,
                                   n_samples=10),
            utils.ensemble_metrics(x_test,
                                   y_test,
                                   model,
                                   ll,
                                   weight_files=ensemble_filenames,
                                   n_samples=10)
        ]

        for metrics, name in [(base_metrics, 'Base model'),
                              (overtrained_metrics, 'Overtrained model'),
                              (auxiliary_metrics, 'Auxiliary sampling')]:
            logging.info(name)
            for metrics_dict, split in [(metrics[0], 'train'),
                                        (metrics[1], 'test')]:
                logging.info(split)
                for metric_name in metrics_dict:
                    logging.info('%s: %s', metric_name,
                                 metrics_dict[metric_name])
Exemple #26
0
def pass_arg(Xx, nsim, tr_size, num_iter):
    print("Tr_size:", tr_size)

    def fix_seeds(seed):
        random.seed(seed)
        np.random.seed(seed)
        tf.random.set_seed(seed)
        session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                                inter_op_parallelism_threads=1)
        sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                                    config=session_conf)
        #     K.set_session(sess)
        tf.compat.v1.keras.backend.set_session(sess)

    ss = 1
    fix_seeds(ss)

    # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred)
    def root_mean_squared_error(y_true, y_pred):
        return tf.math.sqrt(
            tf.math.reduce_mean(tf.math.square(y_pred - y_true), axis=-1))

    class InputTransformedKernel(
            tfp.math.psd_kernels.PositiveSemidefiniteKernel):
        def __init__(self,
                     kernel,
                     transformation,
                     name='InputTransformedKernel'):
            self._kernel = kernel
            self._transformation = transformation
            super(InputTransformedKernel,
                  self).__init__(feature_ndims=kernel.feature_ndims,
                                 dtype=kernel.dtype,
                                 name=name)

        def apply(self, x1, x2):
            return self._kernel.apply(self._transformation(x1),
                                      self._transformation(x2))

        def matrix(self, x1, x2):
            return self._kernel.matrix(self._transformation(x1),
                                       self._transformation(x2))

        @property
        def batch_shape(self):
            return self._kernel.batch_shape

        def batch_shape_tensor(self):
            return self._kernel.batch_shape_tensor

    class InputScaledKernel(InputTransformedKernel):
        def __init__(self, kernel, length_scales):
            super(InputScaledKernel, self).__init__(
                kernel, lambda x: x / tf.expand_dims(
                    length_scales, -(kernel.feature_ndims + 1)))

    # Load labeled data
    data = np.loadtxt('../data/labeled_data.dat')
    x_labeled = data[:, :2].astype(
        np.float64)  # -2 because we do not need porosity predictions
    y_labeled = data[:, -2:-1].astype(
        np.float64)  # dimensionless bond length and porosity measurements

    # normalize dataset with MinMaxScaler
    scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0))
    x_labeled = scaler.fit_transform(x_labeled)
    # y_labeled = scaler.fit_transform(y_labeled)

    tr_size = int(tr_size)

    # train and test data
    trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size]
    # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:]

    trainY = np.transpose(trainY)
    # testY = np.transpose(testY)

    data_phyloss = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat')
    x_unlabeled = data_phyloss[:, :]

    # initial porosity
    initporo = x_unlabeled[:, -1]

    x_unlabeled1 = x_unlabeled[:1303, :2]
    x_unlabeled2 = x_unlabeled[-6:, :2]
    x_unlabeled = np.vstack((x_unlabeled1, x_unlabeled2))

    x_unlabeled = scaler.fit_transform(x_unlabeled)
    init_poro1 = initporo[:1303]
    init_poro2 = initporo[-6:]
    init_poro = np.hstack((init_poro1, init_poro2))

    def build_gp(amplitude, length_scale):
        """Defines the conditional dist. of GP outputs, given kernel parameters."""

        # Create the covariance kernel, which will be shared between the prior (which we
        # use for maximum likelihood training) and the posterior (which we use for
        # posterior predictive sampling)
        se_kernel = tfk.ExponentiatedQuadratic(
            amplitude)  # length_scale = None here, implicitly

        # This is the "ARD" kernel (we don't like abbreviations or bizarrely obscure names in
        # TFP, so we're probably going to call this "InputScaledKernel" since....that's what it is! :)
        kernel = InputScaledKernel(se_kernel, length_scale)

        # Create the GP prior distribution, which we will use to train the model
        # parameters.
        return tfd.GaussianProcess(kernel=kernel, index_points=trainX)

    gp_joint_model = tfd.JointDistributionNamedAutoBatched({
        'amplitude':
        tfd.TransformedDistribution(distribution=tfd.Normal(
            loc=0., scale=np.float64(1.)),
                                    bijector=tfb.Exp(),
                                    batch_shape=[1]),
        'length_scale':
        tfd.TransformedDistribution(distribution=tfd.Normal(
            loc=0., scale=np.float64(1.)),
                                    bijector=tfb.Exp(),
                                    batch_shape=[2]),
        'observations':
        build_gp,
    })

    # Create the trainable model parameters, which we'll subsequently optimize.
    # Note that we constrain them to be strictly positive.
    constrain_positive = tfb.Shift(np.finfo(np.float64).tiny)(tfb.Exp())

    amplitude_var = tfp.util.TransformedVariable(
        initial_value=np.random.uniform(size=1),
        bijector=constrain_positive,
        name='amplitude',
        dtype=np.float64)

    length_scale_var = tfp.util.TransformedVariable(
        initial_value=np.random.uniform(size=[2]),
        bijector=constrain_positive,
        name='length_scale',
        dtype=np.float64)

    trainable_variables = [
        v.trainable_variables[0] for v in [amplitude_var, length_scale_var]
    ]

    @tf.function(autograph=False, experimental_compile=False)
    def target_log_prob(amplitude, length_scale, poroi, lam):
        tf.random.set_seed(1234)
        se_kernel = tfk.ExponentiatedQuadratic(
            amplitude)  # length_scale = None here, implicitly
        optimized_kernel = InputScaledKernel(se_kernel, length_scale)
        gprm = tfd.GaussianProcessRegressionModel(kernel=optimized_kernel,
                                                  index_points=x_unlabeled)
        samples = gprm.sample(1)
        pred = tf.squeeze(samples, axis=0)

        phyloss_poro = tf.math.reduce_mean(
            tf.nn.relu(tf.negative(pred)) + tf.nn.relu(pred - poroi))

        #     print("phyloss_poro:",lam*phyloss_poro)
        #     return lam*phyloss_poro
        return lam * phyloss_poro - gp_joint_model.log_prob(
            {
                'amplitude': amplitude,
                'length_scale': length_scale,
                'observations': trainY
            })

    fix_seeds(1)

    # Optimize the model parameters.
    num_iters = int(num_iter)
    lam = 100000
    optimizer = tf.optimizers.Adam(learning_rate=.1)

    # Store the likelihood values during training, so we can plot the progress
    lls_ = np.zeros(num_iters, np.float64)

    for i in range(num_iters):
        with tf.GradientTape() as tape:
            loss = target_log_prob(amplitude_var, length_scale_var, init_poro,
                                   lam)  # physics loss & normal loss

        # print(i,"loss_inloop:",loss)
        grads = tape.gradient(loss, trainable_variables)
        optimizer.apply_gradients(zip(grads, trainable_variables))
        lls_[i] = loss

    # print('Trained parameters:')
    # print('amplitude: {}'.format(amplitude_var._value().numpy()))
    # print('length_scale: {}'.format(length_scale_var._value().numpy()))

    # tf.random.set_seed(1234)
    fix_seeds(1)
    se_kernel = tfk.ExponentiatedQuadratic(
        amplitude_var)  # length_scale = None here, implicitly
    optimized_kernel = InputScaledKernel(se_kernel, length_scale_var)
    gprm = tfd.GaussianProcessRegressionModel(kernel=optimized_kernel,
                                              index_points=Xx)
    preds = gprm.sample(int(nsim))
    samples = np.array(tf.squeeze(preds, axis=1))

    return samples
Exemple #27
0
def softquantiles(x,
                  quantiles,
                  quantile_width=None,
                  axis=-1,
                  may_squeeze=True,
                  **kwargs):
    """Computes soft quantiles via optimal transport.

  This operator takes advantage of the fact that an exhaustive softsort is not
  required to recover a single quantile. Instead, one can transport all
  input values in x onto only 3 weighted values. Target weights are adjusted so
  that those values in x that are transported to the middle value in the target
  vector y correspond to those concentrating around the quantile of interest.

  This idea generalizes to more quantiles, interleaving small weights on the
  quantile indices and bigger weights in between, corresponding to the gap from
  one desired quantile to the next one.

  Args:
   x: Tensor<float> of any shape.
   quantiles: list<float> the quantiles to be returned. It can also be a single
     float.
   quantile_width: (float) mass given to the bucket supposed to attract points
     whose value concentrate around the desired quantile value. Bigger width
     means that we allow the soft quantile to be a mixture of more points
     further away from the quantile. If None, the width is set at 1/n where n is
     the number of values considered (the size along the 'axis').
   axis: (int) the axis along which to compute the quantile.
   may_squeeze: (bool) should we squeeze the output tensor in case of a single
     quantile.
   **kwargs: see SoftQuantilizer for possible extra parameters.

  Returns:
    A Tensor<float> similar to the input tensor, but the axis dimension is
    replaced by the number of quantiles specified in the quantiles list.
    Hence, if only a quantile is requested (quantiles is a float) only one value
    in that axis is returned. When several quantiles are requested, the tensor
    will have that many values in that axis.

  Raises:
    tf.errors.InvalidArgumentError when the quantiles and quantile width are not
    correct, namely quantiles are either not in sorted order or the
    quantile_width is too large.
  """
    if isinstance(quantiles, float):
        quantiles = [quantiles]
    quantiles = tf.constant(quantiles, tf.float32)

    # Preprocesses submitted quantiles to check that they satisfy elementary
    # constraints.
    valid_quantiles = tf.boolean_mask(
        quantiles, tf.logical_and(quantiles > 0.0, quantiles < 1.0))
    num_quantiles = tf.shape(valid_quantiles)[0]

    # Includes values on both ends of [0,1].
    extended_quantiles = tf.concat([[0.0], valid_quantiles, [1.0]], axis=0)

    # Builds filler_weights in between the target quantiles.
    filler_weights = extended_quantiles[1:] - extended_quantiles[:-1]
    if quantile_width is None:
        quantile_width = tf.reduce_min(
            tf.concat([
                filler_weights,
                [1.0 / tf.cast(tf.shape(x)[axis], dtype=x.dtype)]
            ],
                      axis=0))

    # Takes into account quantile_width in the definition of weights
    shift = -tf.ones(tf.shape(filler_weights), dtype=x.dtype)
    shift = shift + 0.5 * (tf.one_hot(0, num_quantiles + 1) +
                           tf.one_hot(num_quantiles, num_quantiles + 1))
    filler_weights = filler_weights + quantile_width * shift

    assert_op = tf.Assert(tf.reduce_all(filler_weights >= 0.0),
                          [filler_weights])
    with tf.control_dependencies([assert_op]):
        # Adds one more value to have tensors of the same shape to interleave them.
        quantile_weights = tf.ones(num_quantiles + 1) * quantile_width

        # Interleaves the filler_weights with the quantile weights.
        weights = tf.reshape(
            tf.stack([filler_weights, quantile_weights], axis=1), (-1, ))[:-1]

        # Sends only the positive weights to the softsort operator.
        positive_weights = tf.boolean_mask(weights, weights > 0.0)
        all_quantiles = softsort(x,
                                 direction='ASCENDING',
                                 axis=axis,
                                 target_weights=positive_weights,
                                 **kwargs)

        # Recovers the indices corresponding to the desired quantiles.
        odds = tf.math.floormod(tf.range(weights.shape[0], dtype=tf.float32),
                                2)
        positives = tf.cast(weights > 0.0, tf.float32)
        indices = tf.cast(tf.math.cumsum(positives) * odds, dtype=tf.int32)
        indices = tf.boolean_mask(indices, indices > 0) - 1
        result = tf.gather(all_quantiles, indices, axis=axis)

        # In the specific case where we want a single quantile, squeezes the
        # quantile dimension.
        can_squeeze = tf.equal(tf.shape(result)[axis], 1)
        if tf.math.logical_and(can_squeeze, may_squeeze):
            result = tf.squeeze(result, axis=axis)
        return result
Exemple #28
0
    def get_is_weighted_reward_samples(self,
                                       dataset: dataset_lib.OffpolicyDataset,
                                       target_policy: tf_policy.TFPolicy,
                                       episode_limit: Optional[int] = None,
                                       eps: Optional[float] = 1e-8):
        """Get the IS weighted reweard samples."""
        episodes, valid_steps = dataset.get_all_episodes(limit=episode_limit)
        total_num_steps_per_episode = tf.shape(valid_steps)[1] - 1
        num_episodes = tf.shape(valid_steps)[0]
        num_samples = num_episodes * total_num_steps_per_episode

        init_env_step = tf.nest.map_structure(lambda t: t[:, 0, ...], episodes)
        env_step = tf.nest.map_structure(
            lambda t: tf.squeeze(
                tf.reshape(t[:, 0:total_num_steps_per_episode, ...],
                           [num_samples, -1])), episodes)
        next_env_step = tf.nest.map_structure(
            lambda t: tf.squeeze(
                tf.reshape(t[:, 1:1 + total_num_steps_per_episode, ...],
                           [num_samples, -1])), episodes)
        tfagents_env_step = dataset_lib.convert_to_tfagents_timestep(env_step)

        gamma_weights = tf.reshape(
            tf.pow(self._gamma, tf.cast(env_step.step_num, tf.float32)),
            [num_episodes, total_num_steps_per_episode])

        rewards = (-self._get_q_value(env_step) + self._reward_fn(env_step) +
                   self._gamma * next_env_step.discount *
                   self._get_v_value(next_env_step, target_policy))
        rewards = tf.reshape(rewards,
                             [num_episodes, total_num_steps_per_episode])

        init_values = self._get_v_value(init_env_step, target_policy)
        init_offset = (1 - self._gamma) * init_values

        target_log_probabilities = target_policy.distribution(
            tfagents_env_step).action.log_prob(env_step.action)
        if tf.rank(target_log_probabilities) > 1:
            target_log_probabilities = tf.reduce_sum(target_log_probabilities,
                                                     -1)
        if self._policy_network is not None:
            baseline_policy_log_probability = self._get_log_prob(
                self._policy_network, env_step)
            if tf.rank(baseline_policy_log_probability) > 1:
                baseline_policy_log_probability = tf.reduce_sum(
                    baseline_policy_log_probability, -1)
            policy_log_ratios = tf.reshape(
                tf.maximum(
                    -1.0 / eps, target_log_probabilities -
                    baseline_policy_log_probability),
                [num_episodes, total_num_steps_per_episode])
        else:
            policy_log_ratios = tf.reshape(
                tf.maximum(
                    -1.0 / eps,
                    target_log_probabilities - env_step.get_log_probability()),
                [num_episodes, total_num_steps_per_episode])
        valid_steps_in = valid_steps[:, 0:total_num_steps_per_episode]
        mask = tf.cast(
            tf.logical_and(valid_steps_in, episodes.discount[:, :-1] > 0.),
            tf.float32)

        masked_rewards = tf.where(mask > 0, rewards, tf.zeros_like(rewards))
        clipped_policy_log_ratios = mask * self.clip_log_factor(
            policy_log_ratios)

        if self._mode in ['trajectory-wise', 'weighted-trajectory-wise']:
            trajectory_avg_rewards = tf.reduce_sum(
                masked_rewards * gamma_weights, axis=1) / tf.reduce_sum(
                    gamma_weights, axis=1)
            trajectory_log_ratios = tf.reduce_sum(clipped_policy_log_ratios,
                                                  axis=1)
            if self._mode == 'trajectory-wise':
                trajectory_avg_rewards *= tf.exp(trajectory_log_ratios)
                return init_offset + trajectory_avg_rewards
            else:
                offset = tf.reduce_max(trajectory_log_ratios)
                normalized_clipped_ratios = tf.exp(trajectory_log_ratios -
                                                   offset)
                normalized_clipped_ratios /= tf.maximum(
                    eps, tf.reduce_mean(normalized_clipped_ratios))
                trajectory_avg_rewards *= normalized_clipped_ratios
                return init_offset + trajectory_avg_rewards

        elif self._mode in ['step-wise', 'weighted-step-wise']:
            trajectory_log_ratios = mask * tf.cumsum(policy_log_ratios, axis=1)
            if self._mode == 'step-wise':
                trajectory_avg_rewards = tf.reduce_sum(
                    masked_rewards * gamma_weights *
                    tf.exp(trajectory_log_ratios),
                    axis=1) / tf.reduce_sum(gamma_weights, axis=1)
                return init_offset + trajectory_avg_rewards
            else:
                # Average over data, for each time step.
                offset = tf.reduce_max(trajectory_log_ratios,
                                       axis=0)  # TODO: Handle mask.
                normalized_imp_weights = tf.exp(trajectory_log_ratios - offset)
                normalized_imp_weights /= tf.maximum(
                    eps,
                    tf.reduce_sum(mask * normalized_imp_weights, axis=0) /
                    tf.maximum(eps, tf.reduce_sum(mask, axis=0)))[None, :]

                trajectory_avg_rewards = tf.reduce_sum(
                    masked_rewards * gamma_weights * normalized_imp_weights,
                    axis=1) / tf.reduce_sum(gamma_weights, axis=1)
                return init_offset + trajectory_avg_rewards
        else:
            ValueError('Estimator is not implemented!')
Exemple #29
0
def parse_example(serialized):
  features_spec = _make_features_spec()
  features = tf.io.parse_example(serialized, features_spec)
  return {k: tf.squeeze(v, axis=1) for k, v in six.iteritems(features)}
 def _map_fn(features, labels):
   features = tf.expand_dims(features, 0)
   features = module(features)
   features = tf.squeeze(features, 0)
   return features, labels
Exemple #31
0
def remove_squeezable_dimensions(labels,
                                 predictions,
                                 expected_rank_diff=0,
                                 name=None):
    """Squeeze last dim if ranks differ from expected by exactly 1.

    In the common case where we expect shapes to match, `expected_rank_diff`
    defaults to 0, and we squeeze the last dimension of the larger rank if they
    differ by 1.

    But, for example, if `labels` contains class IDs and `predictions` contains
    1 probability per class, we expect `predictions` to have 1 more dimension
    than `labels`, so `expected_rank_diff` would be 1. In this case, we'd
    squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and
    `predictions` if `rank(predictions) - rank(labels) == 2`.

    This will use static shape if available. Otherwise, it will add graph
    operations, which could result in a performance hit.

    Args:
      labels: Label values, a `Tensor` whose dimensions match `predictions`.
      predictions: Predicted values, a `Tensor` of arbitrary dimensions.
      expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
      name: Name of the op.

    Returns:
      Tuple of `labels` and `predictions`, possibly with last dim squeezed.
    """
    with backend.name_scope(name or "remove_squeezable_dimensions"):
        if not tf_utils.is_tensor_or_extension_type(predictions):
            predictions = tf.convert_to_tensor(predictions)
        if not tf_utils.is_tensor_or_extension_type(labels):
            labels = tf.convert_to_tensor(labels)
        predictions_shape = predictions.shape
        predictions_rank = predictions_shape.ndims
        labels_shape = labels.shape
        labels_rank = labels_shape.ndims
        if (labels_rank is not None) and (predictions_rank is not None):
            # Use static rank.
            rank_diff = predictions_rank - labels_rank
            if rank_diff == expected_rank_diff + 1 and predictions_shape.dims[
                    -1].is_compatible_with(1):
                predictions = tf.squeeze(predictions, [-1])
            elif rank_diff == expected_rank_diff - 1 and labels_shape.dims[
                    -1].is_compatible_with(1):
                labels = tf.squeeze(labels, [-1])
            return labels, predictions

        # Use dynamic rank.
        rank_diff = tf.rank(predictions) - tf.rank(labels)
        if (predictions_rank is
                None) or (predictions_shape.dims[-1].is_compatible_with(1)):
            predictions = tf.cond(
                tf.equal(expected_rank_diff + 1, rank_diff),
                lambda: tf.squeeze(predictions, [-1]),
                lambda: predictions,
            )
        if (labels_rank is
                None) or (labels_shape.dims[-1].is_compatible_with(1)):
            labels = tf.cond(
                tf.equal(expected_rank_diff - 1, rank_diff),
                lambda: tf.squeeze(labels, [-1]),
                lambda: labels,
            )
        return labels, predictions