def _map_fn(features, labels): features = tf.expand_dims(features, 0) features = module(features) features = tf.squeeze(features, 0) return features, labels
def negative_log_likelihood(y, rv_y): del rv_y # unused arg return -model.output.distribution.log_prob(tf.squeeze(y))
def accuracy(y_true, y_sample): del y_sample # unused arg return tf.equal( tf.argmax(input=model.output.distribution.logits, axis=1), tf.cast(tf.squeeze(y_true), tf.int64))
def main(argv): del argv # unused arg np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() session = tf1.Session() with session.as_default(): x_train, y_train, x_test, y_test = datasets.load(session) n_train = x_train.shape[0] num_classes = int(np.amax(y_train)) + 1 if not FLAGS.resnet: model = lenet5(n_train, x_train.shape[1:], num_classes) else: datagen = tf.keras.preprocessing.image.ImageDataGenerator( rotation_range=90, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True) datagen.fit(x_train) model = res_net(n_train, x_train.shape[1:], num_classes, batchnorm=FLAGS.batchnorm, variational='hybrid' if FLAGS.hybrid else 'full') def schedule_fn(epoch): """Learning rate schedule function.""" rate = FLAGS.learning_rate if epoch > 180: rate *= 0.5e-3 elif epoch > 160: rate *= 1e-3 elif epoch > 120: rate *= 1e-2 elif epoch > 80: rate *= 1e-1 return float(rate) lr_callback = tf.keras.callbacks.LearningRateScheduler(schedule_fn) for l in model.layers: l.kl_cost_weight = l.add_weight( name='kl_cost_weight', shape=(), initializer=tf.constant_initializer(0.), trainable=False) l.kl_cost_bias = l.add_variable( name='kl_cost_bias', shape=(), initializer=tf.constant_initializer(0.), trainable=False) [negative_log_likelihood, accuracy, log_likelihood, kl, elbo] = get_losses_and_metrics(model, n_train) metrics = [elbo, log_likelihood, kl, accuracy] tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=FLAGS.output_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) if FLAGS.resnet: callbacks = [tensorboard, lr_callback] else: callbacks = [tensorboard] if not FLAGS.resnet or not FLAGS.data_augmentation: def fit_fn(model, steps, initial_epoch=0, with_lr_schedule=FLAGS.resnet): return model.fit( x=x_train, y=y_train, batch_size=FLAGS.batch_size, epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train, initial_epoch=initial_epoch, validation_data=(x_test, y_test), validation_freq=( (FLAGS.validation_freq * FLAGS.batch_size) // n_train), verbose=1, callbacks=callbacks if with_lr_schedule else [tensorboard]) else: def fit_fn(model, steps, initial_epoch=0, with_lr_schedule=FLAGS.resnet): return model.fit_generator( datagen.flow(x_train, y_train, batch_size=FLAGS.batch_size), epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train, initial_epoch=initial_epoch, steps_per_epoch=n_train // FLAGS.batch_size, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=1, callbacks=callbacks if with_lr_schedule else [tensorboard]) model.compile( optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)), loss=negative_log_likelihood, metrics=metrics) session.run(tf1.initialize_all_variables()) train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train fit_fn(model, FLAGS.training_steps) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function([model.input, labels], [ model.output.distribution.log_prob(tf.squeeze(labels)), model.output.distribution.logits ]) base_metrics = [ ensemble_metrics(x_train, y_train, model, ll), ensemble_metrics(x_test, y_test, model, ll) ] model_dir = os.path.join(FLAGS.output_dir, 'models') tf.io.gfile.makedirs(model_dir) base_model_filename = os.path.join(model_dir, 'base_model.weights') model.save_weights(base_model_filename) # Train base model further for comparison. fit_fn( model, FLAGS.n_auxiliary_variables * FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size, initial_epoch=train_epochs) overtrained_metrics = [ ensemble_metrics(x_train, y_train, model, ll), ensemble_metrics(x_test, y_test, model, ll) ] # Perform refined VI. sample_op = [] for l in model.layers: if isinstance(l, tfp.layers.DenseLocalReparameterization) or isinstance( l, tfp.layers.Convolution2DFlipout): weight_op, weight_cost = sample_auxiliary_op( l.kernel_prior.distribution, l.kernel_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(weight_op) sample_op.append(l.kl_cost_weight.assign_add(weight_cost)) # Fix the variance of the prior session.run(l.kernel_prior.distribution.istrainable.assign(0.)) if hasattr(l.bias_prior, 'distribution'): bias_op, bias_cost = sample_auxiliary_op( l.bias_prior.distribution, l.bias_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(bias_op) sample_op.append(l.kl_cost_bias.assign_add(bias_cost)) # Fix the variance of the prior session.run(l.bias_prior.distribution.istrainable.assign(0.)) ensemble_filenames = [] for i in range(FLAGS.ensemble_size): model.load_weights(base_model_filename) for j in range(FLAGS.n_auxiliary_variables): session.run(sample_op) model.compile( optimizer=tf.keras.optimizers.Adam( # The learning rate is proportional to the scale of the prior. lr=float(FLAGS.learning_rate_for_sampling * np.sqrt(1. - FLAGS.auxiliary_variance_ratio)**j)), loss=negative_log_likelihood, metrics=metrics) fit_fn( model, FLAGS.auxiliary_sampling_frequency, initial_epoch=train_epochs, with_lr_schedule=False) ensemble_filename = os.path.join( model_dir, 'ensemble_component_' + str(i) + '.weights') ensemble_filenames.append(ensemble_filename) model.save_weights(ensemble_filename) auxiliary_metrics = [ ensemble_metrics( x_train, y_train, model, ll, weight_files=ensemble_filenames), ensemble_metrics( x_test, y_test, model, ll, weight_files=ensemble_filenames) ] for metrics, name in [(base_metrics, 'Base model'), (overtrained_metrics, 'Overtrained model'), (auxiliary_metrics, 'Auxiliary sampling')]: logging.info(name) for metrics_dict, split in [(metrics[0], 'Training'), (metrics[1], 'Testing')]: logging.info(split) for metric_name in metrics_dict: logging.info('%s: %s', metric_name, metrics_dict[metric_name])
def build(self, input_shape): dtype = self.dtype if len(input_shape) == 2: batch_image_shape, batch_conditional_shape = input_shape conditional_input = tf.keras.layers.Input( shape=batch_conditional_shape[1:], dtype=dtype) else: batch_image_shape = input_shape conditional_input = None image_shape = batch_image_shape[1:] image_input = tf.keras.layers.Input(shape=image_shape, dtype=dtype) if self._resnet_activation == 'concat_elu': activation = tf.keras.layers.Lambda( lambda x: tf.nn.elu(tf.concat([x, -x], axis=-1)), dtype=dtype) else: activation = tf.keras.activations.get(self._resnet_activation) # Define layers with default inputs and layer wrapper applied Conv2D = functools.partial( # pylint:disable=invalid-name self._layer_wrapper(tf.keras.layers.Convolution2D), filters=self._num_filters, padding='same', dtype=dtype) Dense = functools.partial( # pylint:disable=invalid-name self._layer_wrapper(tf.keras.layers.Dense), dtype=dtype) Conv2DTranspose = functools.partial( # pylint:disable=invalid-name self._layer_wrapper(tf.keras.layers.Conv2DTranspose), filters=self._num_filters, padding='same', strides=(2, 2), dtype=dtype) rows, cols = self._receptive_field_dims # Define the dimensions of the valid (unmasked) areas of the layer kernels # for stride 1 convolutions in the internal layers. kernel_valid_dims = {'vertical': (rows - 1, cols), 'horizontal': (2, cols // 2 + 1)} # Define the size of the kernel necessary to center the current pixel # correctly for stride 1 convolutions in the internal layers. kernel_sizes = {'vertical': (2 * rows - 3, cols), 'horizontal': (3, cols)} # Make the kernel constraint functions for stride 1 convolutions in internal # layers. kernel_constraints = { k: _make_kernel_constraint(kernel_sizes[k], (0, v[0]), (0, v[1])) for k, v in kernel_valid_dims.items()} # Build the initial vertical stack/horizontal stack convolutional layers, # as shown in Figure 1 of [2]. The receptive field of the initial vertical # stack layer is a rectangular area centered above the current pixel. vertical_stack_init = Conv2D( kernel_size=(2 * rows - 1, cols), kernel_constraint=_make_kernel_constraint( (2 * rows - 1, cols), (0, rows - 1), (0, cols)))(image_input) # In Figure 1 [2], the receptive field of the horizontal stack is # illustrated as the pixels in the same row and to the left of the current # pixel. [1] increases the height of this receptive field from one pixel to # two (`horizontal_stack_left`) and additionally includes a subset of the # row of pixels centered above the current pixel (`horizontal_stack_up`). horizontal_stack_up = Conv2D( kernel_size=(3, cols), kernel_constraint=_make_kernel_constraint( (3, cols), (0, 1), (0, cols)))(image_input) horizontal_stack_left = Conv2D( kernel_size=(3, cols), kernel_constraint=_make_kernel_constraint( (3, cols), (0, 2), (0, cols // 2)))(image_input) horizontal_stack_init = tf.keras.layers.add( [horizontal_stack_up, horizontal_stack_left], dtype=dtype) layer_stacks = { 'vertical': [vertical_stack_init], 'horizontal': [horizontal_stack_init]} # Build the downward pass of the U-net (left-hand half of Figure 2 of [1]). # Each `i` iteration builds one of the highest-level blocks (identified as # 'Sequence of 6 layers' in the figure, consisting of `num_resnet=5` stride- # 1 layers, and one stride-2 layer that contracts the height/width # dimensions). The `_` iterations build the stride 1 layers. The layers of # the downward pass are stored in lists, since we'll later need them to make # skip-connections to layers in the upward pass of the U-net (the skip- # connections are represented by curved lines in Figure 2 [1]). for i in range(self._num_hierarchies): for _ in range(self._num_resnet): # Build a layer shown in Figure 2 of [2]. The 'vertical' iteration # builds the layers in the left half of the figure, and the 'horizontal' # iteration builds the layers in the right half. for stack in ['vertical', 'horizontal']: input_x = layer_stacks[stack][-1] x = activation(input_x) x = Conv2D(kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) # Add the vertical-stack layer to the horizontal-stack layer if stack == 'horizontal': h = activation(layer_stacks['vertical'][-1]) h = Dense(self._num_filters)(h) x = tf.keras.layers.add([h, x], dtype=dtype) x = activation(x) x = tf.keras.layers.Dropout(self._dropout_p, dtype=dtype)(x) x = Conv2D(filters=2*self._num_filters, kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) if conditional_input is not None: h_projection = _build_and_apply_h_projection( conditional_input, self._num_filters, dtype=dtype) x = tf.keras.layers.add([x, h_projection], dtype=dtype) x = _apply_sigmoid_gating(x) # Add a residual connection from the layer's input. out = tf.keras.layers.add([input_x, x], dtype=dtype) layer_stacks[stack].append(out) if i < self._num_hierarchies - 1: # Build convolutional layers that contract the height/width dimensions # on the downward pass between each set of layers (e.g. contracting from # 32x32 to 16x16 in Figure 2 of [1]). for stack in ['vertical', 'horizontal']: # Define kernel dimensions/masking to maintain the autoregressive # property. x = layer_stacks[stack][-1] h, w = kernel_valid_dims[stack] kernel_height = 2 * h if stack == 'vertical': kernel_width = w + 1 else: kernel_width = 2 * w kernel_size = (kernel_height, kernel_width) kernel_constraint = _make_kernel_constraint( kernel_size, (0, h), (0, w)) x = Conv2D(strides=(2, 2), kernel_size=kernel_size, kernel_constraint=kernel_constraint)(x) layer_stacks[stack].append(x) # Upward pass of the U-net (right-hand half of Figure 2 of [1]). We stored # the layers of the downward pass in a list, in order to access them to make # skip-connections to the upward pass. For the upward pass, we need to keep # track of only the current layer, so we maintain a reference to the # current layer of the horizontal/vertical stack in the `upward_pass` dict. # The upward pass begins with the last layer of the downward pass. upward_pass = {key: stack.pop() for key, stack in layer_stacks.items()} # As with the downward pass, each `i` iteration builds a highest level block # in Figure 2 [1], and the `_` iterations build individual layers within the # block. for i in range(self._num_hierarchies): num_resnet = self._num_resnet if i == 0 else self._num_resnet + 1 for _ in range(num_resnet): # Build a layer as shown in Figure 2 of [2], with a skip-connection # from the symmetric layer in the downward pass. for stack in ['vertical', 'horizontal']: input_x = upward_pass[stack] x_symmetric = layer_stacks[stack].pop() x = activation(input_x) x = Conv2D(kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) # Include the vertical-stack layer of the upward pass in the layers # to be added to the horizontal layer. if stack == 'horizontal': x_symmetric = tf.keras.layers.Concatenate(axis=-1, dtype=dtype)( [upward_pass['vertical'], x_symmetric]) # Add a skip-connection from the symmetric layer in the downward # pass to the layer `x` in the upward pass. h = activation(x_symmetric) h = Dense(self._num_filters)(h) x = tf.keras.layers.add([h, x], dtype=dtype) x = activation(x) x = tf.keras.layers.Dropout(self._dropout_p, dtype=dtype)(x) x = Conv2D(filters=2*self._num_filters, kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) if conditional_input is not None: h_projection = _build_and_apply_h_projection( conditional_input, self._num_filters, dtype=dtype) x = tf.keras.layers.add([x, h_projection], dtype=dtype) x = _apply_sigmoid_gating(x) upward_pass[stack] = tf.keras.layers.add([input_x, x], dtype=dtype) # Define deconvolutional layers that expand height/width dimensions on the # upward pass (e.g. expanding from 8x8 to 16x16 in Figure 2 of [1]), with # the correct kernel dimensions/masking to maintain the autoregressive # property. if i < self._num_hierarchies - 1: for stack in ['vertical', 'horizontal']: h, w = kernel_valid_dims[stack] kernel_height = 2 * h - 2 if stack == 'vertical': kernel_width = w + 1 kernel_constraint = _make_kernel_constraint( (kernel_height, kernel_width), (h - 2, kernel_height), (0, w)) else: kernel_width = 2 * w - 2 kernel_constraint = _make_kernel_constraint( (kernel_height, kernel_width), (h - 2, kernel_height), (w - 2, kernel_width)) x = upward_pass[stack] x = Conv2DTranspose(kernel_size=(kernel_height, kernel_width), kernel_constraint=kernel_constraint)(x) upward_pass[stack] = x x_out = tf.keras.layers.ELU(dtype=dtype)(upward_pass['horizontal']) # Build final Dense/Reshape layers to output the correct number of # parameters per pixel. num_channels = tensorshape_util.as_list(image_shape)[-1] num_coeffs = num_channels * (num_channels - 1) // 2 num_out = num_channels * 2 + num_coeffs + 1 num_out_total = num_out * self._num_logistic_mix params = Dense(num_out_total)(x_out) params = tf.reshape(params, prefer_static.concat( [[-1], image_shape[:-1], [self._num_logistic_mix, num_out]], axis=0)) # If there is one color channel, split the parameters into a list of three # output `Tensor`s: (1) component logits for the Quantized Logistic mixture # distribution, (2) location parameters for each component, and (3) scale # parameters for each component. If there is more than one color channel, # return a fourth `Tensor` for the coefficients for the linear dependence # among color channels. splits = (3 if num_channels == 1 else [1, num_channels, num_channels, num_coeffs]) outputs = tf.split(params, splits, axis=-1) # Squeeze singleton dimension from component logits outputs[0] = tf.squeeze(outputs[0], axis=-1) # Ensure scales are positive and do not collapse to near-zero outputs[2] = tf.nn.softplus(outputs[2]) + tf.cast(tf.exp(-7.), self.dtype) inputs = (image_input if conditional_input is None else [image_input, conditional_input]) self._network = tf.keras.Model(inputs=inputs, outputs=outputs) super(_PixelCNNNetwork, self).build(input_shape)
def solve_nu_zeta(self, dataset: dataset_lib.OffpolicyDataset, target_policy: tf_policy.TFPolicy, regularizer: float = 1e-6): """Solves for density ratios and then approximates target policy value. Args: dataset: The dataset to sample experience from. target_policy: The policy whose value we want to estimate. regularizer: A small constant to add to matrices before inverting them or to floats before taking square root. Returns: Estimated average per-step reward of the target policy. """ if not hasattr(self, '_td_mat'): # Set up env_steps. episodes, valid_steps = dataset.get_all_episodes( limit=self._limit_episodes) total_num_steps_per_episode = tf.shape(valid_steps)[1] - 1 num_episodes = tf.shape(valid_steps)[0] num_samples = num_episodes * total_num_steps_per_episode valid_and_not_last = tf.logical_and(valid_steps, episodes.discount > 0) valid_indices = tf.squeeze( tf.where(tf.reshape(valid_and_not_last[:, :-1], [-1]))) initial_env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape( tf.repeat( t[:, 0:1, ...], axis=1, repeats=total_num_steps_per_episode), [num_samples, -1])), episodes) initial_env_step = tf.nest.map_structure( lambda t: tf.gather(t, valid_indices), initial_env_step) tfagents_initial_env_step = dataset_lib.convert_to_tfagents_timestep( initial_env_step) env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape(t[:, 0:total_num_steps_per_episode, ...], [num_samples, -1])), episodes) env_step = tf.nest.map_structure(lambda t: tf.gather(t, valid_indices), env_step) tfagents_env_step = dataset_lib.convert_to_tfagents_timestep(env_step) next_env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape(t[:, 1:total_num_steps_per_episode + 1, ...], [num_samples, -1])), episodes) next_env_step = tf.nest.map_structure( lambda t: tf.gather(t, valid_indices), next_env_step) tfagents_next_env_step = dataset_lib.convert_to_tfagents_timestep( next_env_step) # get probabilities initial_target_probs = target_policy.distribution( tfagents_initial_env_step).action.probs_parameter() next_target_probs = target_policy.distribution( tfagents_next_env_step).action.probs_parameter() # First, get the nu_loss and data weights #current_nu_loss = self._get_nu_loss(initial_env_step, env_step, # next_env_step, target_policy) #data_weight, _ = self._get_weights(current_nu_loss) # # debug only and to reproduce dual dice result, DELETE # data_weight = tf.ones_like(data_weight) state_action_count = self._get_state_action_counts(env_step) counts = tf.reduce_sum(tf.one_hot(state_action_count, self._dimension), 0) gamma_sample = tf.pow(self._gamma, tf.cast(env_step.step_num, tf.float32)) # # debug only and to reproduce dual dice result, DELETE # gamma_sample = tf.ones_like(gamma_sample) # now we need to expand_dims to include action space in extra dimensions #data_weights = tf.reshape(data_weight, [-1, self._num_limits]) # both are data sample weights for L2 problem, needs to be normalized later #gamma_data_weights = tf.reshape(gamma_sample, [-1, 1]) * data_weights initial_states = tf.tile( tf.reshape(initial_env_step.observation, [-1, 1]), [1, self._num_actions]) initial_actions = tf.tile( tf.reshape(tf.range(self._num_actions), [1, -1]), [initial_env_step.observation.shape[0], 1]) initial_nu_indices = self._get_index(initial_states, initial_actions) # linear term w.r.t. initial distribution #b_vec_2 = tf.stack([ # tf.reduce_sum( # tf.reshape( # data_weights[:, itr] / tf.reduce_sum(data_weights[:, itr]), # [-1, 1]) * tf.reduce_sum( # tf.one_hot(initial_nu_indices, self._dimension) * # (1 - self._gamma) * # tf.expand_dims(initial_target_probs, axis=-1), # axis=1), # axis=0) for itr in range(self._num_limits) #], # axis=0) next_states = tf.tile( tf.reshape(next_env_step.observation, [-1, 1]), [1, self._num_actions]) next_actions = tf.tile( tf.reshape(tf.range(self._num_actions), [1, -1]), [next_env_step.observation.shape[0], 1]) next_nu_indices = self._get_index(next_states, next_actions) next_nu_indices = tf.where( tf.expand_dims(next_env_step.is_absorbing(), -1), -1 * tf.ones_like(next_nu_indices), next_nu_indices) nu_indices = self._get_index(env_step.observation, env_step.action) target_log_probabilities = target_policy.distribution( tfagents_env_step).action.log_prob(env_step.action) if not self._solve_for_state_action_ratio: policy_ratio = tf.exp(target_log_probabilities - env_step.get_log_probability()) else: policy_ratio = tf.ones([ target_log_probabilities.shape[0], ]) policy_ratios = tf.tile( tf.reshape(policy_ratio, [-1, 1]), [1, self._num_actions]) # the tabular feature vector a_vec = tf.one_hot(nu_indices, self._dimension) - tf.reduce_sum( self._gamma * tf.expand_dims(next_target_probs * policy_ratios, axis=-1) * tf.one_hot(next_nu_indices, self._dimension), axis=1) # linear term w.r.t. reward #b_vec_1 = tf.stack([ # tf.reduce_sum( # tf.reshape( # (gamma_data_weights[:, itr] / # tf.reduce_sum(gamma_data_weights[:, itr])) * self._reward_fn(env_step), #/ # #tf.cast(state_action_count, tf.float32), # [-1, 1]) * a_vec, # axis=0) for itr in range(self._num_limits) #], # axis=0) # quadratic term of feature # Get weighted outer product by using einsum to save computing resource! #a_mat = tf.stack([ # tf.einsum( # 'ai, a, aj -> ij', a_vec, # #1.0 / tf.cast(state_action_count, tf.float32), # gamma_data_weights[:, itr] / # tf.reduce_sum(gamma_data_weights[:, itr]), # a_vec) # for itr in range(self._num_limits) #], # axis=0) td_mat = tf.einsum('ai, a, aj -> ij', tf.one_hot(nu_indices, self._dimension), 1.0 / tf.cast(state_action_count, tf.float32), a_vec) weighted_rewards = policy_ratio * self._reward_fn(env_step) bias = tf.reduce_sum( tf.one_hot(nu_indices, self._dimension) * tf.reshape(weighted_rewards, [-1, 1]) * 1.0 / tf.cast(state_action_count, tf.float32)[:, None], axis=0) # Initialize self._nu = np.ones_like(self._nu) * bias[:, None] self._nu2 = np.ones_like(self._nu2) * bias[:, None] self._a_vec = a_vec self._td_mat = td_mat self._bias = bias self._weighted_rewards = weighted_rewards self._state_action_count = state_action_count self._nu_indices = nu_indices self._initial_nu_indices = initial_nu_indices self._initial_target_probs = initial_target_probs self._gamma_sample = gamma_sample self._gamma_sample = tf.ones_like(gamma_sample) saddle_bellman_residuals = ( tf.matmul(self._a_vec, self._nu) - self._weighted_rewards[:, None]) saddle_bellman_residuals *= -1 * self._algae_alpha_sign saddle_zetas = tf.gather(self._zeta, self._nu_indices) saddle_initial_nu_values = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu, self._initial_nu_indices), axis=1) saddle_init_nu_loss = ((1 - self._gamma) * saddle_initial_nu_values * self._algae_alpha_sign) saddle_bellman_residuals2 = ( tf.matmul(self._a_vec, self._nu2) - self._weighted_rewards[:, None]) saddle_bellman_residuals2 *= 1 * self._algae_alpha_sign saddle_zetas2 = tf.gather(self._zeta2, self._nu_indices) saddle_initial_nu_values2 = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu2, self._initial_nu_indices), axis=1) saddle_init_nu_loss2 = ((1 - self._gamma) * saddle_initial_nu_values2 * -1 * self._algae_alpha_sign) saddle_loss = 0.5 * ( saddle_init_nu_loss + saddle_bellman_residuals * saddle_zetas + -tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas) + -saddle_init_nu_loss2 + -saddle_bellman_residuals2 * saddle_zetas2 + tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas2)) # Binary search to find best alpha. left = tf.constant([-8., -8.]) right = tf.constant([32., 32.]) for _ in range(16): mid = 0.5 * (left + right) self._alpha.assign(mid) weights, log_weights = self._get_weights(saddle_loss * self._gamma_sample[:, None]) divergence = self._compute_divergence(weights, log_weights) divergence_violation = divergence - self._two_sided_limit left = tf.where(divergence_violation > 0., mid, left) right = tf.where(divergence_violation > 0., right, mid) self._alpha.assign(0.5 * (left + right)) weights, log_weights = self._get_weights(saddle_loss * self._gamma_sample[:, None]) gamma_data_weights = tf.stop_gradient(weights * self._gamma_sample[:, None]) #print(tf.concat([gamma_data_weights, saddle_loss], axis=-1)) avg_saddle_loss = ( tf.reduce_sum(gamma_data_weights * saddle_loss, axis=0) / tf.reduce_sum(gamma_data_weights, axis=0)) weighted_state_action_count = tf.reduce_sum( tf.one_hot(self._nu_indices, self._dimension)[:, :, None] * weights[:, None, :], axis=0) weighted_state_action_count = tf.gather(weighted_state_action_count, self._nu_indices) my_td_mat = tf.einsum( 'ai, ab, ab, aj -> bij', tf.one_hot(self._nu_indices, self._dimension), #1.0 / tf.cast(self._state_action_count, tf.float32), 1.0 / weighted_state_action_count, weights, self._a_vec) my_bias = tf.reduce_sum( tf.transpose(weights)[:, :, None] * tf.one_hot(self._nu_indices, self._dimension)[None, :, :] * tf.reshape(self._weighted_rewards, [1, -1, 1]) * #1.0 / tf.cast(self._state_action_count, tf.float32)[None, :, None], 1.0 / tf.transpose(weighted_state_action_count)[:, :, None], axis=1) #print('hello', saddle_initial_nu_values[:1], saddle_zetas[:3], # self._nu[:2], my_bias[:, :2], saddle_loss[:4]) with tf.GradientTape( watch_accessed_variables=False, persistent=True) as tape: tape.watch([self._nu, self._nu2, self._alpha]) bellman_residuals = tf.matmul( my_td_mat, tf.transpose(self._nu)[:, :, None]) - my_bias[:, :, None] bellman_residuals = tf.transpose(tf.squeeze(bellman_residuals, -1)) bellman_residuals = tf.gather(bellman_residuals, self._nu_indices) initial_nu_values = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu, self._initial_nu_indices), axis=1) bellman_residuals *= self._algae_alpha_sign init_nu_loss = ((1 - self._gamma) * initial_nu_values * self._algae_alpha_sign) nu_loss = ( tf.math.square(bellman_residuals) / 2.0 + tf.math.abs(self._algae_alpha) * init_nu_loss) loss = ( gamma_data_weights * nu_loss / tf.reduce_sum(gamma_data_weights, axis=0, keepdims=True)) bellman_residuals2 = tf.matmul( my_td_mat, tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :, None] bellman_residuals2 = tf.transpose(tf.squeeze(bellman_residuals2, -1)) bellman_residuals2 = tf.gather(bellman_residuals2, self._nu_indices) initial_nu_values2 = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu2, self._initial_nu_indices), axis=1) bellman_residuals2 *= -1 * self._algae_alpha_sign init_nu_loss2 = ((1 - self._gamma) * initial_nu_values2 * -1 * self._algae_alpha_sign) nu_loss2 = ( tf.math.square(bellman_residuals2) / 2.0 + tf.math.abs(self._algae_alpha) * init_nu_loss2) loss2 = ( gamma_data_weights * nu_loss2 / tf.reduce_sum(gamma_data_weights, axis=0, keepdims=True)) divergence = self._compute_divergence(weights, log_weights) divergence_violation = divergence - self._two_sided_limit alpha_loss = (-tf.exp(self._alpha) * tf.stop_gradient(divergence_violation)) extra_loss = tf.reduce_sum(tf.math.square(self._nu[-1, :])) extra_loss2 = tf.reduce_sum(tf.math.square(self._nu2[-1, :])) nu_grad = tape.gradient(loss + extra_loss, [self._nu])[0] nu_grad2 = tape.gradient(loss2 + extra_loss2, [self._nu2])[0] avg_loss = tf.reduce_sum( 0.5 * (loss - loss2) / tf.math.abs(self._algae_alpha), axis=0) nu_jacob = tape.jacobian(nu_grad, [self._nu])[0] nu_hess = tf.stack([nu_jacob[:, i, :, i] for i in range(self._num_limits)], axis=0) nu_jacob2 = tape.jacobian(nu_grad2, [self._nu2])[0] nu_hess2 = tf.stack( [nu_jacob2[:, i, :, i] for i in range(self._num_limits)], axis=0) for idx, div in enumerate(divergence): tf.summary.scalar('divergence%d' % idx, div) #alpha_grads = tape.gradient(alpha_loss, [self._alpha]) #alpha_grad_op = self._alpha_optimizer.apply_gradients( # zip(alpha_grads, [self._alpha])) #self._alpha.assign(tf.minimum(8., tf.maximum(-8., self._alpha))) #print(self._alpha, tf.concat([weights, nu_loss], -1)) #regularizer = 0.1 nu_transformed = tf.transpose( tf.squeeze( tf.linalg.solve(nu_hess + regularizer * tf.eye(self._dimension), tf.expand_dims(-tf.transpose(nu_grad), axis=-1)))) self._nu = self._nu + 0.1 * nu_transformed nu_transformed2 = tf.transpose( tf.squeeze( tf.linalg.solve(nu_hess2 + regularizer * tf.eye(self._dimension), tf.expand_dims(-tf.transpose(nu_grad2), axis=-1)))) self._nu2 = self._nu2 + 0.1 * nu_transformed2 print(avg_loss * self._algae_alpha_sign, avg_saddle_loss * self._algae_alpha_sign, self._nu[:2], divergence) #print(init_nu_loss[:8], init_nu_loss[-8:]) #print(bellman_residuals[:8]) #print(self._nu[:3], self._zeta[:3]) zetas = tf.matmul(my_td_mat, tf.transpose(self._nu)[:, :, None]) - my_bias[:, :, None] zetas = tf.transpose(tf.squeeze(zetas, -1)) zetas *= -self._algae_alpha_sign zetas /= tf.math.abs(self._algae_alpha) self._zeta = self._zeta + 0.1 * (zetas - self._zeta) zetas2 = tf.matmul(my_td_mat, tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :, None] zetas2 = tf.transpose(tf.squeeze(zetas2, -1)) zetas2 *= 1 * self._algae_alpha_sign zetas2 /= tf.math.abs(self._algae_alpha) self._zeta2 = self._zeta2 + 0.1 * (zetas2 - self._zeta2) #self._zeta = ( # tf.einsum('ij,ja-> ia', self._td_mat, self._nu) - # tf.transpose(my_bias)) #self._zeta *= -tf.reshape(self._algae_alpha_sign, [1, self._num_limits]) #self._zeta /= tf.math.abs(self._algae_alpha) return [ avg_saddle_loss * self._algae_alpha_sign, avg_loss * self._algae_alpha_sign, divergence ]
def main(argv): del argv # unused arg np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() session = tf1.Session() x_train, y_train, x_test, y_test = datasets.load(session) n_train = x_train.shape[0] num_classes = int(np.amax(y_train)) + 1 ensemble_filenames = [] for i in range(FLAGS.ensemble_size): # TODO(trandustin): We re-build the graph for each ensemble member. This # is due to an unknown bug where the variables are otherwise not # re-initialized to be random. While this is inefficient in graph mode, I'm # keeping this for now as we'd like to move to eager mode anyways. if not FLAGS.resnet: model = lenet5(x_train.shape[1:], num_classes) else: model = res_net(n_train, x_train.shape[1:], num_classes, batchnorm=FLAGS.batchnorm, variational=False) def schedule_fn(epoch): """Learning rate schedule function.""" rate = FLAGS.learning_rate if epoch > 180: rate *= 0.5e-3 elif epoch > 160: rate *= 1e-3 elif epoch > 120: rate *= 1e-2 elif epoch > 80: rate *= 1e-1 return rate lr_callback = tf.keras.callbacks.LearningRateScheduler(schedule_fn) def negative_log_likelihood(y, rv_y): del rv_y # unused arg return -model.output.distribution.log_prob(tf.squeeze(y)) # pylint: disable=cell-var-from-loop def accuracy(y_true, y_sample): del y_sample # unused arg return tf.equal( tf.argmax(input=model.output.distribution.logits, axis=1), # pylint: disable=cell-var-from-loop tf.cast(tf.squeeze(y_true), tf.int64)) def log_likelihood(y_true, y_sample): del y_sample # unused arg return model.output.distribution.log_prob(tf.squeeze(y_true)) # pylint: disable=cell-var-from-loop model.compile( optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate), loss=negative_log_likelihood, metrics=[log_likelihood, accuracy]) member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i)) tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=member_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) if FLAGS.bootstrap: inds = np.random.choice(n_train, n_train, replace=True) x_sampled = x_train[inds] y_sampled = y_train[inds] model.fit(x=x_train if not FLAGS.bootstrap else x_sampled, y=y_train if not FLAGS.bootstrap else y_sampled, batch_size=FLAGS.batch_size, epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=1, callbacks=[tensorboard] if not FLAGS.resnet else [tensorboard, lr_callback]) member_filename = os.path.join(member_dir, 'model.weights') ensemble_filenames.append(member_filename) model.save_weights(member_filename) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function([model.input, labels], [ model.output.distribution.log_prob(tf.squeeze(labels)), model.output.distribution.logits, ]) ensemble_metrics_vals = { 'train': ensemble_metrics(x_train, y_train, model, ll, weight_files=ensemble_filenames), 'test': ensemble_metrics(x_test, y_test, model, ll, weight_files=ensemble_filenames), } for split, metrics in ensemble_metrics_vals.items(): logging.info(split) for metric_name in metrics: logging.info('%s: %s', metric_name, metrics[metric_name])
def __init__(self, vocab_size, hidden_size=768, num_layers=12, num_attention_heads=12, sequence_length=512, max_sequence_length=None, type_vocab_size=16, intermediate_size=3072, activation=activations.gelu, dropout_rate=0.1, attention_dropout_rate=0.1, initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), return_all_encoder_outputs=False, **kwargs): activation = tf.keras.activations.get(activation) initializer = tf.keras.initializers.get(initializer) if not max_sequence_length: max_sequence_length = sequence_length self._self_setattr_tracking = False self._config_dict = { 'vocab_size': vocab_size, 'hidden_size': hidden_size, 'num_layers': num_layers, 'num_attention_heads': num_attention_heads, 'sequence_length': sequence_length, 'max_sequence_length': max_sequence_length, 'type_vocab_size': type_vocab_size, 'intermediate_size': intermediate_size, 'activation': tf.keras.activations.serialize(activation), 'dropout_rate': dropout_rate, 'attention_dropout_rate': attention_dropout_rate, 'initializer': tf.keras.initializers.serialize(initializer), 'return_all_encoder_outputs': return_all_encoder_outputs, } word_ids = tf.keras.layers.Input( shape=(sequence_length,), dtype=tf.int32, name='input_word_ids') mask = tf.keras.layers.Input( shape=(sequence_length,), dtype=tf.int32, name='input_mask') type_ids = tf.keras.layers.Input( shape=(sequence_length,), dtype=tf.int32, name='input_type_ids') self._embedding_layer = layers.OnDeviceEmbedding( vocab_size=vocab_size, embedding_width=hidden_size, initializer=initializer, name='word_embeddings') word_embeddings = self._embedding_layer(word_ids) # Always uses dynamic slicing for simplicity. self._position_embedding_layer = layers.PositionEmbedding( initializer=initializer, use_dynamic_slicing=True, max_sequence_length=max_sequence_length) position_embeddings = self._position_embedding_layer(word_embeddings) type_embeddings = ( layers.OnDeviceEmbedding( vocab_size=type_vocab_size, embedding_width=hidden_size, initializer=initializer, use_one_hot=True, name='type_embeddings')(type_ids)) embeddings = tf.keras.layers.Add()( [word_embeddings, position_embeddings, type_embeddings]) embeddings = ( tf.keras.layers.LayerNormalization( name='embeddings/layer_norm', axis=-1, epsilon=util.LAYER_NORM_EPSILON, dtype=tf.float32)(embeddings)) embeddings = ( tf.keras.layers.Dropout(rate=dropout_rate)(embeddings)) self._transformer_layers = [] data = embeddings attention_mask = layers.SelfAttentionMask()([data, mask]) encoder_outputs = [] for i in range(num_layers): layer = layers.Transformer( num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, intermediate_activation=activation, dropout_rate=dropout_rate, attention_dropout_rate=attention_dropout_rate, kernel_initializer=initializer, name='transformer/layer_%d' % i) self._transformer_layers.append(layer) data = layer([data, attention_mask]) encoder_outputs.append(data) first_token_tensor = ( tf.keras.layers.Lambda(lambda x: tf.squeeze(x[:, 0:1, :], axis=1))( encoder_outputs[-1])) cls_output = tf.keras.layers.Dense( units=hidden_size, activation='tanh', kernel_initializer=initializer, name='pooler_transform')( first_token_tensor) if return_all_encoder_outputs: outputs = [encoder_outputs, cls_output] else: outputs = [encoder_outputs[-1], cls_output] super(TransformerEncoder, self).__init__( inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs)
def calc_spectrograms(waves, window_lengths, spectral_diffs=(0, 1), window_name='hann', use_mel_scale=True, proj_method='matmul', num_spec_bins=256, random_crop=True): """Calculate spectrograms with multiple window sizes for list of input waves. Args: waves: List of float tensors of shape [batch, length] or [batch, length, 1]. window_lengths: List of Int. Window sizes (frame lengths) to use for computing the spectrograms. spectral_diffs: Int. order of finite diff. to take before computing specs. window_name: Str. Name of the window to use when computing the spectrograms. Supports 'hann' and None. use_mel_scale: Bool. Whether or not to project to mel-scale frequencies. proj_method: Str. Spectral projection method implementation to use. Supported are 'fft' and 'matmul'. num_spec_bins: Int. Number of bins in the spectrogram. random_crop: Bool. Take random crop or not. Returns: Tuple of lists of magnitude spectrograms, with output[i][j] being the spectrogram for input wave i, computed for window length j. """ waves = [tf.squeeze(w, axis=-1) for w in waves] if window_name == 'hann': windows = [ tf.reshape(tf.signal.hann_window(wl, periodic=False), [1, 1, -1]) for wl in window_lengths ] elif window_name is None: windows = [None] * len(window_lengths) else: raise ValueError('Unknown window function (%s).' % window_name) spec_len_wave = [] for d in spectral_diffs: for length, window in zip(window_lengths, windows): wave_crops = waves for _ in range(d): wave_crops = [w[:, 1:] - w[:, :-1] for w in wave_crops] if random_crop: wave_crops = aligned_random_crop(wave_crops, length) frames = [ tf.signal.frame(wc, length, length // 2) for wc in wave_crops ] if window is not None: frames = [f * window for f in frames] if proj_method == 'fft': ffts = [tf.signal.rfft(f)[:, :, 1:] for f in frames] elif proj_method == 'matmul': mat = get_spectral_matrix(length, num_spec_bins=num_spec_bins, use_mel_scale=use_mel_scale) ffts = [matmul_real_with_complex(f, mat) for f in frames] sq_mag = lambda x: tf.square(tf.math.real(x)) + tf.square( tf.math.imag(x)) specs_sq = [sq_mag(f) for f in ffts] if use_mel_scale and proj_method == 'fft': sample_rate = 24000 upper_edge_hertz = sample_rate / 2. lower_edge_hertz = sample_rate / length lin_to_mel = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=num_spec_bins, num_spectrogram_bins=length // 2 + 1, sample_rate=sample_rate, lower_edge_hertz=lower_edge_hertz, upper_edge_hertz=upper_edge_hertz, dtype=tf.dtypes.float32)[1:] specs_sq = [tf.matmul(s, lin_to_mel) for s in specs_sq] specs = [tf.sqrt(s + EPSILON) for s in specs_sq] spec_len_wave.append(specs) spec_wave_len = zip(*spec_len_wave) return spec_wave_len
def log_likelihood(y_true, y_sample): del y_sample # unused arg return model.output.distribution.log_prob(tf.squeeze(y_true)) # pylint: disable=cell-var-from-loop
def train_step(self, dataset: dataset_lib.OffpolicyDataset, target_policy: tf_policy.TFPolicy, regularizer: float = 1e-6): """Performs single iteration of CoinDICE. Args: dataset: The dataset to sample experience from. target_policy: The policy whose value we want to estimate. regularizer: A small constant to add to matrices before inverting them or to floats before taking square root. Returns: Estimated average per-step reward of the target policy. """ # First compute Lagrangian loss. saddle_bellman_residuals = (tf.matmul(self._a_vec, self._nu) - self._weighted_rewards[:, None]) saddle_bellman_residuals *= -1 * self._algae_alpha_sign saddle_zetas = tf.gather(self._zeta, self._nu_indices) saddle_initial_nu_values = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu, self._initial_nu_indices), axis=1) saddle_init_nu_loss = ((1 - self._gamma) * saddle_initial_nu_values * self._algae_alpha_sign) # This second optimization switches the sign of algae_alpha. # We add these two together to get the final loss, and thus counteract # the bias introduced by algae_alpha. saddle_bellman_residuals2 = (tf.matmul(self._a_vec, self._nu2) - self._weighted_rewards[:, None]) saddle_bellman_residuals2 *= 1 * self._algae_alpha_sign saddle_zetas2 = tf.gather(self._zeta2, self._nu_indices) saddle_initial_nu_values2 = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu2, self._initial_nu_indices), axis=1) saddle_init_nu_loss2 = ((1 - self._gamma) * saddle_initial_nu_values2 * -1 * self._algae_alpha_sign) saddle_loss = 0.5 * ( saddle_init_nu_loss + saddle_bellman_residuals * saddle_zetas + -tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas) + -saddle_init_nu_loss2 + -saddle_bellman_residuals2 * saddle_zetas2 + tf.math.abs(self._algae_alpha) * 0.5 * tf.square(saddle_zetas2)) # Find optimal weights by doing binary search on alpha (lambda in the # paper). left = tf.constant([-8., -8.]) right = tf.constant([32., 32.]) for _ in range(16): mid = 0.5 * (left + right) self._alpha.assign(mid) weights, log_weights = self._get_weights(saddle_loss) divergence = self._compute_divergence(weights, log_weights) divergence_violation = divergence - self._two_sided_limit left = tf.where(divergence_violation > 0., mid, left) right = tf.where(divergence_violation > 0., right, mid) self._alpha.assign(0.5 * (left + right)) weights, log_weights = self._get_weights(saddle_loss) # Now that we have weights, we reconstruct the Bellman residual matrices. data_weights = tf.stop_gradient(weights) avg_saddle_loss = (tf.reduce_sum(data_weights * saddle_loss, axis=0) / tf.reduce_sum(data_weights, axis=0)) weighted_state_action_count = tf.reduce_sum( tf.one_hot(self._nu_indices, self._dimension)[:, :, None] * weights[:, None, :], axis=0) weighted_state_action_count = tf.gather(weighted_state_action_count, self._nu_indices) my_td_mat = tf.einsum('ai, ab, ab, aj -> bij', tf.one_hot(self._nu_indices, self._dimension), 1.0 / weighted_state_action_count, weights, self._a_vec) my_bias = tf.reduce_sum( tf.transpose(weights)[:, :, None] * tf.one_hot(self._nu_indices, self._dimension)[None, :, :] * tf.reshape(self._weighted_rewards, [1, -1, 1]) * 1.0 / tf.transpose(weighted_state_action_count)[:, :, None], axis=1) # Solve for nu using primal form; i.e., E[(nu - B nu)^2] - (1-g) * E[nu0]. with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape: tape.watch([self._nu, self._nu2, self._alpha]) bellman_residuals = tf.matmul( my_td_mat, tf.transpose(self._nu)[:, :, None]) - my_bias[:, :, None] bellman_residuals = tf.transpose(tf.squeeze(bellman_residuals, -1)) bellman_residuals = tf.gather(bellman_residuals, self._nu_indices) initial_nu_values = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu, self._initial_nu_indices), axis=1) bellman_residuals *= self._algae_alpha_sign init_nu_loss = ((1 - self._gamma) * initial_nu_values * self._algae_alpha_sign) nu_loss = (tf.math.square(bellman_residuals) / 2.0 + tf.math.abs(self._algae_alpha) * init_nu_loss) loss = (data_weights * nu_loss / tf.reduce_sum(data_weights, axis=0, keepdims=True)) bellman_residuals2 = tf.matmul( my_td_mat, tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :, None] bellman_residuals2 = tf.transpose( tf.squeeze(bellman_residuals2, -1)) bellman_residuals2 = tf.gather(bellman_residuals2, self._nu_indices) initial_nu_values2 = tf.reduce_sum( # Average over actions. self._initial_target_probs[:, :, None] * tf.gather(self._nu2, self._initial_nu_indices), axis=1) bellman_residuals2 *= -1 * self._algae_alpha_sign init_nu_loss2 = ((1 - self._gamma) * initial_nu_values2 * -1 * self._algae_alpha_sign) nu_loss2 = (tf.math.square(bellman_residuals2) / 2.0 + tf.math.abs(self._algae_alpha) * init_nu_loss2) loss2 = (data_weights * nu_loss2 / tf.reduce_sum(data_weights, axis=0, keepdims=True)) divergence = self._compute_divergence(weights, log_weights) divergence_violation = divergence - self._two_sided_limit # Extra loss if for the 'terminal' state (index = -1). extra_loss = tf.reduce_sum(tf.math.square(self._nu[-1, :])) extra_loss2 = tf.reduce_sum(tf.math.square(self._nu2[-1, :])) nu_grad = tape.gradient(loss + extra_loss, [self._nu])[0] nu_grad2 = tape.gradient(loss2 + extra_loss2, [self._nu2])[0] avg_loss = tf.reduce_sum(0.5 * (loss - loss2) / tf.math.abs(self._algae_alpha), axis=0) nu_jacob = tape.jacobian(nu_grad, [self._nu])[0] nu_hess = tf.stack( [nu_jacob[:, i, :, i] for i in range(self._num_limits)], axis=0) nu_jacob2 = tape.jacobian(nu_grad2, [self._nu2])[0] nu_hess2 = tf.stack( [nu_jacob2[:, i, :, i] for i in range(self._num_limits)], axis=0) for idx, div in enumerate(divergence): tf.summary.scalar('divergence%d' % idx, div) # Perform Newton step on nu. nu_transformed = tf.transpose( tf.squeeze( tf.linalg.solve( nu_hess + regularizer * tf.eye(self._dimension), tf.expand_dims(-tf.transpose(nu_grad), axis=-1)))) self._nu = self._nu + self._nu_learning_rate * nu_transformed nu_transformed2 = tf.transpose( tf.squeeze( tf.linalg.solve( nu_hess2 + regularizer * tf.eye(self._dimension), tf.expand_dims(-tf.transpose(nu_grad2), axis=-1)))) self._nu2 = self._nu2 + self._nu_learning_rate * nu_transformed2 # Perform step on zeta based on fact that zeta* = (nu* - bellman nu*)/a. zetas = tf.matmul(my_td_mat, tf.transpose(self._nu)[:, :, None]) - my_bias[:, :, None] zetas = tf.transpose(tf.squeeze(zetas, -1)) zetas *= -self._algae_alpha_sign zetas /= tf.math.abs(self._algae_alpha) self._zeta = self._zeta + self._zeta_learning_rate * (zetas - self._zeta) zetas2 = tf.matmul(my_td_mat, tf.transpose(self._nu2)[:, :, None]) - my_bias[:, :, None] zetas2 = tf.transpose(tf.squeeze(zetas2, -1)) zetas2 *= 1 * self._algae_alpha_sign zetas2 /= tf.math.abs(self._algae_alpha) self._zeta2 = (self._zeta2 + self._zeta_learning_rate * (zetas2 - self._zeta2)) return [ avg_saddle_loss * self._algae_alpha_sign, avg_loss * self._algae_alpha_sign, divergence ]
def prepare_dataset(self, dataset: dataset_lib.OffpolicyDataset, target_policy: tf_policy.TFPolicy): """Performs pre-computations on dataset to make solving easier.""" episodes, valid_steps = dataset.get_all_episodes( limit=self._limit_episodes) total_num_steps_per_episode = tf.shape(valid_steps)[1] - 1 num_episodes = tf.shape(valid_steps)[0] num_samples = num_episodes * total_num_steps_per_episode valid_and_not_last = tf.logical_and(valid_steps, episodes.discount > 0) valid_indices = tf.squeeze( tf.where(tf.reshape(valid_and_not_last[:, :-1], [-1]))) # Flatten all tensors so that each data sample is a tuple of # (initial_env_step, env_step, next_env_step). initial_env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape( tf.repeat(t[:, 0:1, ...], axis=1, repeats=total_num_steps_per_episode), [num_samples, -1])), episodes) initial_env_step = tf.nest.map_structure( lambda t: tf.gather(t, valid_indices), initial_env_step) tfagents_initial_env_step = dataset_lib.convert_to_tfagents_timestep( initial_env_step) env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape(t[:, 0:total_num_steps_per_episode, ...], [num_samples, -1])), episodes) env_step = tf.nest.map_structure(lambda t: tf.gather(t, valid_indices), env_step) tfagents_env_step = dataset_lib.convert_to_tfagents_timestep(env_step) next_env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape(t[:, 1:total_num_steps_per_episode + 1, ...], [num_samples, -1])), episodes) next_env_step = tf.nest.map_structure( lambda t: tf.gather(t, valid_indices), next_env_step) tfagents_next_env_step = dataset_lib.convert_to_tfagents_timestep( next_env_step) # Get target probabilities for initial and next steps. initial_target_probs = target_policy.distribution( tfagents_initial_env_step).action.probs_parameter() next_target_probs = target_policy.distribution( tfagents_next_env_step).action.probs_parameter() # Map states and actions to indices into tabular representation. initial_states = tf.tile( tf.reshape(initial_env_step.observation, [-1, 1]), [1, self._num_actions]) initial_actions = tf.tile( tf.reshape(tf.range(self._num_actions), [1, -1]), [initial_env_step.observation.shape[0], 1]) initial_nu_indices = self._get_index(initial_states, initial_actions) next_states = tf.tile(tf.reshape(next_env_step.observation, [-1, 1]), [1, self._num_actions]) next_actions = tf.tile( tf.reshape(tf.range(self._num_actions), [1, -1]), [next_env_step.observation.shape[0], 1]) next_nu_indices = self._get_index(next_states, next_actions) next_nu_indices = tf.where( tf.expand_dims(next_env_step.is_absorbing(), -1), -1 * tf.ones_like(next_nu_indices), next_nu_indices) nu_indices = self._get_index(env_step.observation, env_step.action) target_log_probabilities = target_policy.distribution( tfagents_env_step).action.log_prob(env_step.action) if not self._solve_for_state_action_ratio: policy_ratio = tf.exp(target_log_probabilities - env_step.get_log_probability()) else: policy_ratio = tf.ones([ target_log_probabilities.shape[0], ]) policy_ratios = tf.tile(tf.reshape(policy_ratio, [-1, 1]), [1, self._num_actions]) # Bellman residual matrix of size [n_data, n_dim]. a_vec = tf.one_hot(nu_indices, self._dimension) - tf.reduce_sum( self._gamma * tf.expand_dims(next_target_probs * policy_ratios, axis=-1) * tf.one_hot(next_nu_indices, self._dimension), axis=1) state_action_count = self._get_state_action_counts(env_step) # Bellman residual matrix of size [n_dim, n_dim]. td_mat = tf.einsum('ai, a, aj -> ij', tf.one_hot(nu_indices, self._dimension), 1.0 / tf.cast(state_action_count, tf.float32), a_vec) # Reward vector of size [n_data]. weighted_rewards = policy_ratio * self._reward_fn(env_step) # Reward vector of size [n_dim]. bias = tf.reduce_sum(tf.one_hot(nu_indices, self._dimension) * tf.reshape(weighted_rewards, [-1, 1]) * 1.0 / tf.cast(state_action_count, tf.float32)[:, None], axis=0) # Initialize. self._nu = np.ones_like(self._nu) * bias[:, None] self._nu2 = np.ones_like(self._nu2) * bias[:, None] self._a_vec = a_vec self._td_mat = td_mat self._bias = bias self._weighted_rewards = weighted_rewards self._state_action_count = state_action_count self._nu_indices = nu_indices self._initial_nu_indices = initial_nu_indices self._initial_target_probs = initial_target_probs
def test_correctness(self, optimizer_fn, noise_size, use_analytic_pricing, expected_mr, expected_vol, mr_rtol=1e-4, mr_atol=1e-3, vol_rtol=1e-4, vol_atol=1e-3): """Tests calibration with constant parameters.""" dtype = tf.float64 # Setup - generate some observed prices using the model. zero_rate_fn = lambda x: 0.01 * tf.ones_like(x, dtype=dtype) prices = tff.models.hull_white.cap_floor_price( strikes=self.strikes, expiries=self.expiries, maturities=self.maturities, daycount_fractions=self.daycount_fractions, reference_rate_fn=zero_rate_fn, notional=1.0, dim=1, mean_reversion=[expected_mr], volatility=[expected_vol], is_cap=tf.expand_dims(self.is_cap, axis=1), use_analytic_pricing=True, dtype=dtype) prices = prices + tf.random.normal( prices.shape, stddev=noise_size * prices, seed=0, dtype=dtype) # Calibrate the model. calibrated_model, is_converged, _ = ( tff.models.hull_white.calibration_from_cap_floors( prices=tf.squeeze(prices), strikes=self.strikes, expiries=self.expiries, maturities=self.maturities, daycount_fractions=self.daycount_fractions, reference_rate_fn=zero_rate_fn, mean_reversion=[0.4], volatility=[0.02], notional=1.0, dim=1, is_cap=tf.expand_dims(self.is_cap, axis=1), use_analytic_pricing=use_analytic_pricing, optimizer_fn=optimizer_fn, num_samples=1000, random_type=tff.math.random.RandomType.STATELESS_ANTITHETIC, seed=[0, 0], time_step=0.1, maximum_iterations=200, dtype=dtype)) calib_parameters = tf.concat( axis=0, values=[ calibrated_model.mean_reversion.values(), calibrated_model.volatility.values() ]) calib_parameters = self.evaluate(calib_parameters) mr = calib_parameters[0] vol = calib_parameters[1] # Assert model convergence to expected parameters. self.assertTrue(is_converged) self.assertAllClose(mr, expected_mr, rtol=mr_rtol, atol=mr_atol) self.assertAllClose(vol, expected_vol, rtol=vol_rtol, atol=vol_atol)
def test_docstring_example(self): """Explicitly test the code provided in the docstring.""" # In this example, we synthetically generate some prices. Then we use our # calibration to back out these prices. dtype = tf.float64 daycount_fractions = np.array([ [0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0], [0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0], [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], ]) expiries = np.array([ [0.0, 0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0], [0.0, 0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0], [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75], [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75], ]) maturities = np.array([ [0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0, 0.0], [0.25, 0.5, 0.75, 1.0, 0.0, 0.0, 0.0, 0.0], [0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0], [0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0], ]) is_cap = np.array([True, False, True, False]) strikes = 0.01 * np.ones_like(expiries) # Setup - generate some observed prices using the model. expected_mr = [0.4] expected_vol = [0.01] zero_rate_fn = lambda x: 0.01 * tf.ones_like(x, dtype=dtype) prices = tff.models.hull_white.cap_floor_price( strikes=strikes, expiries=expiries, maturities=maturities, daycount_fractions=daycount_fractions, reference_rate_fn=zero_rate_fn, notional=1.0, dim=1, mean_reversion=expected_mr, volatility=expected_vol, is_cap=tf.expand_dims(is_cap, axis=1), use_analytic_pricing=True, dtype=dtype) # Calibrate the model. calibrated_model, is_converged, _ = ( tff.models.hull_white.calibration_from_cap_floors( prices=tf.squeeze(prices), strikes=strikes, expiries=expiries, maturities=maturities, daycount_fractions=daycount_fractions, reference_rate_fn=zero_rate_fn, mean_reversion=[0.3], volatility=[0.02], notional=1.0, dim=1, is_cap=tf.expand_dims(is_cap, axis=1), use_analytic_pricing=True, optimizer_fn=None, num_samples=1000, random_type=tff.math.random.RandomType.STATELESS_ANTITHETIC, seed=[0, 0], time_step=0.1, maximum_iterations=200, dtype=dtype)) calibrated_mr = calibrated_model.mean_reversion.values() calibrated_vol = calibrated_model.volatility.values() calibrated_mr, calibrated_vol = self.evaluate( [calibrated_mr, calibrated_vol]) self.assertTrue(is_converged) self.assertAllClose(calibrated_mr, expected_mr, atol=1e-3, rtol=1e-2) self.assertAllClose(calibrated_vol, expected_vol, atol=1e-3, rtol=1e-2)
def negative_log_likelihood(y, rv_y): del rv_y # unused arg return -model.output.distribution.log_prob(tf.squeeze(y)) # pylint: disable=cell-var-from-loop
def call(self, y_true, y_pred): error = tf.pow(tf.abs(tf.squeeze(y_pred) - y_true), self._power) return ops.softquantiles(error, self._quantile, axis=0, **self._kwargs)
def apply(self, x1, x2, example_ndims=0): """Apply the kernel function pairs of inputs. Args: x1: `Tensor` input to the kernel, of shape `B1 + E1 + F`, where `B1` and `E1` may be empty (ie, no batch/example dims, resp.) and `F` (the feature shape) must have rank equal to the kernel's `feature_ndims` property. Batch shape must broadcast with the batch shape of `x2` and with the kernel's batch shape. Example shape must broadcast with example shape of `x2`. `x1` and `x2` must have the same *number* of example dims (ie, same rank). x2: `Tensor` input to the kernel, of shape `B2 + E2 + F`, where `B2` and `E2` may be empty (ie, no batch/example dims, resp.) and `F` (the feature shape) must have rank equal to the kernel's `feature_ndims` property. Batch shape must broadcast with the batch shape of `x2` and with the kernel's batch shape. Example shape must broadcast with example shape of `x2`. `x1` and `x2` must have the same *number* of example example_ndims: A python integer, the number of example dims in the inputs. In essence, this parameter controls how broadcasting of the kernel's batch shape with input batch shapes works. The kernel batch shape will be broadcast against everything to the left of the combined example and feature dimensions in the input shapes. Returns: `Tensor` containing the results of applying the kernel function to inputs `x1` and `x2`. If the kernel parameters' batch shape is `Bk` then the shape of the `Tensor` resulting from this method call is `broadcast(Bk, B1, B2) + broadcast(E1, E2)`. Given an index set `S`, a kernel function is mathematically defined as a real- or complex-valued function on `S` satisfying the positive semi-definiteness constraint: ```none sum_i sum_j (c[i]*) c[j] k(x[i], x[j]) >= 0 ``` for any finite collections `{x[1], ..., x[N]}` in `S` and `{c[1], ..., c[N]}` in the reals (or the complex plane). '*' is the complex conjugate, in the complex case. This method most closely resembles the function described in the mathematical definition of a kernel. Given a PositiveSemidefiniteKernel `k` with scalar parameters and inputs `x` and `y` in `S`, `apply(x, y)` yields a single scalar value. #### Examples ```python import tensorflow_probability as tfp # Suppose `SomeKernel` acts on vectors (rank-1 tensors) scalar_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=.5) scalar_kernel.batch_shape # ==> [] # `x` and `y` are batches of five 3-D vectors: x = np.ones([5, 3], np.float32) y = np.ones([5, 3], np.float32) scalar_kernel.apply(x, y).shape # ==> [5] ``` The above output is the result of vectorized computation of the five values ```none [k(x[0], y[0]), k(x[1], y[1]), ..., k(x[4], y[4])] ``` Now we can consider a kernel with batched parameters: ```python batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=[.2, .5]) batch_kernel.batch_shape # ==> [2] batch_kernel.apply(x, y).shape # ==> Error! [2] and [5] can't broadcast. ``` The parameter batch shape of `[2]` and the input batch shape of `[5]` can't be broadcast together. We can fix this in either of two ways: 1. Give the parameter a shape of `[2, 1]` which will correctly broadcast with `[5]` to yield `[2, 5]`: ```python batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel( param=[[.2], [.5]]) batch_kernel.batch_shape # ==> [2, 1] batch_kernel.apply(x, y).shape # ==> [2, 5] ``` 2. By specifying `example_ndims`, which tells the kernel to treat the `5` in the input shape as part of the "example shape", and "pushing" the kernel batch shape to the left: ```python batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=[.2, .5]) batch_kernel.batch_shape # ==> [2] batch_kernel.apply(x, y, example_ndims=1).shape # ==> [2, 5] """ with self._name_and_control_scope(self._name): x1 = tf.convert_to_tensor(x1, name='x1', dtype_hint=self.dtype) x2 = tf.convert_to_tensor(x2, name='x2', dtype_hint=self.dtype) should_expand_dims = (example_ndims == 0) if should_expand_dims: example_ndims += 1 x1 = tf.expand_dims(x1, -(self.feature_ndims + 1)) x2 = tf.expand_dims(x2, -(self.feature_ndims + 1)) result = self._apply(x1, x2, example_ndims=example_ndims) if should_expand_dims: result = tf.squeeze(result, axis=-1) return result
def test_batching(self, input_batch_shape, kernel_batch_shape): input_shape = (12, 12, 2) filter_shape = (3, 3) channels_out = 4 strides = 2 dilations = (1, 1) padding = 'SAME' x, k = _make_input_and_kernel(self.make_input, input_batch_shape=input_batch_shape, input_shape=input_shape, kernel_batch_shape=kernel_batch_shape, filter_shape=filter_shape, channels_out=channels_out, dtype=self.dtype) conv_fn = self.make_conv_fn(filter_shape, strides, padding, dilations) y_batched = conv_fn(x, k) broadcast_batch_shape = ps.broadcast_shape(input_batch_shape, kernel_batch_shape) broadcasted_input = tf.broadcast_to( x, shape=ps.concat([broadcast_batch_shape, input_shape], axis=0)) broadcasted_kernel = tf.broadcast_to( k, shape=ps.concat([broadcast_batch_shape, ps.shape(k)[-2:]], axis=0)) flat_y = tf.reshape(y_batched, shape=ps.pad(ps.shape(y_batched)[-3:], paddings=[[1, 0]], constant_values=-1)) flat_x = tf.reshape(broadcasted_input, shape=ps.pad(input_shape, paddings=[[1, 0]], constant_values=-1)) flat_tf_kernel = tf.einsum( '...ij->...ji', tf.reshape(broadcasted_kernel, shape=ps.concat([(-1, ), filter_shape, (input_shape[-1], channels_out)], axis=0))) rank = 2 output_shape, strides_ = convolution_util._get_output_shape( rank=rank, strides=(strides, ) * rank, padding=padding, dilations=dilations, input_shape=input_shape, output_size=channels_out, filter_shape=filter_shape) y_expected = tf.vectorized_map( lambda args: tf.nn.conv2d_transpose( # pylint: disable=g-long-lambda args[0][tf.newaxis], args[1], output_shape=ps.concat([[1], output_shape], axis=0), strides=strides_, padding=padding), elems=(flat_x, flat_tf_kernel)) [y_actual_, y_expected_] = self.evaluate([flat_y, tf.squeeze(y_expected, axis=1)]) self.assertAllClose(y_expected_, y_actual_, rtol=1e-5, atol=0)
def beam_search(symbols_to_logits_fn, init_seq_BxT, initial_cache_BxU, vocab_size, beam_size, length_norm_fn, eos_id=1): """Beam search. Args: symbols_to_logits_fn: fn(seq_BxT, cache_BxU, i) -> (logits_BxV, cache_BxU) init_seq_BxT: initial sequence ids. initial_cache_BxU: dictionary of tensors with shape BxU. vocab_size: vocabulary size. beam_size: beam size. length_norm_fn: length normalization function. eos_id: end of sequence. Returns: Tuple of (beams_BxMxT, scores_BxM). Beam searched sequences and scores. """ B, T = init_seq_BxT.shape M, V = beam_size, vocab_size dtype = tf.float32 int_dtype = init_seq_BxT.dtype def _loop_body(i, alive_seq_BxMxT, alive_log_probs_BxM, alive_cache_BxMxU, finished_seq_BxMxT, finished_scores_BxM): """Beam search loop body.""" # Decode one step with beam logits_BMxV, cache_BMxU = symbols_to_logits_fn( _flatten_beam_dim(alive_seq_BxMxT), tf.nest.map_structure(_flatten_beam_dim, alive_cache_BxMxU), i) logits_BxMxV = _unflatten_beam_dim(logits_BMxV, M) new_cache_BxMxU = tf.nest.map_structure(lambda t: _unflatten_beam_dim(t, M), cache_BMxU) # select top 2 * beam_size and fill alive and finished. log_probs_BxMxV = logits_BxMxV - tf.reduce_logsumexp( logits_BxMxV, axis=2, keepdims=True) log_probs_BxMxV += tf.expand_dims(alive_log_probs_BxM, axis=2) log_probs_BxMV = tf.reshape(log_probs_BxMxV, [B, -1]) new_log_probs_Bx2M, topk_indices_Bx2M = tf.nn.top_k(log_probs_BxMV, k=2 * M) topk_beam_Bx2M = topk_indices_Bx2M // V topk_seq_Bx2MxT, new_cache_Bx2MxU = _gather_nested( [alive_seq_BxMxT, new_cache_BxMxU], topk_beam_Bx2M) topk_ids_Bx2M = topk_indices_Bx2M % V new_seq_Bx2MxT = _update_i(topk_seq_Bx2MxT, topk_ids_Bx2M, i) new_finished_flags_Bx2M = tf.cast( tf.reduce_any(tf.equal(new_seq_Bx2MxT, eos_id), axis=-1), dtype) # get new alive _, topk_alive_indices_BxM = tf.nn.top_k( new_log_probs_Bx2M + new_finished_flags_Bx2M * dtype.min, k=M) (alive_seq_BxMxT, alive_log_probs_BxM, alive_cache_BxMxU) = _gather_nested( [new_seq_Bx2MxT, new_log_probs_Bx2M, new_cache_Bx2MxU], topk_alive_indices_BxM) # get new finished new_scores_Bx2M = length_norm_fn(new_log_probs_Bx2M, i + 1) new_scores_Bx2M += (1 - new_finished_flags_Bx2M) * dtype.min finished_seq_Bx3MxT = tf.concat([finished_seq_BxMxT, new_seq_Bx2MxT], axis=1) finished_scores_Bx3M = tf.concat([finished_scores_BxM, new_scores_Bx2M], axis=1) _, topk_finished_indices_BxM = tf.nn.top_k(finished_scores_Bx3M, k=M) (finished_seq_BxMxT, finished_scores_BxM) = _gather_nested( [finished_seq_Bx3MxT, finished_scores_Bx3M], topk_finished_indices_BxM) return [ i + 1, alive_seq_BxMxT, alive_log_probs_BxM, alive_cache_BxMxU, finished_seq_BxMxT, finished_scores_BxM ] # initialize. init_i = tf.constant(0, dtype=int_dtype) init_alive_seq_BxMxT = _expand_to_beam_size(init_seq_BxT, M) log_probs_1xM = tf.constant([[0.] + [dtype.min] * (M - 1)], dtype=dtype) init_alive_log_probs_BxM = tf.tile(log_probs_1xM, [B, 1]) init_alive_cache_BxMxU = tf.nest.map_structure( lambda t: _expand_to_beam_size(t, M), initial_cache_BxU) init_finished_seq_BxMxT = tf.zeros(tf.shape(init_alive_seq_BxMxT), int_dtype) init_finished_scores_BxM = tf.zeros([B, M], dtype=dtype) + dtype.min # run loop. (_, final_alive_seq_BxMxT, final_alive_scores_BxM, _, final_finished_seq_BxMxT, final_finished_scores_BxM) = tf.while_loop( lambda *args: True, # Always do T iterations _loop_body, loop_vars=[ init_i, init_alive_seq_BxMxT, init_alive_log_probs_BxM, init_alive_cache_BxMxU, init_finished_seq_BxMxT, init_finished_scores_BxM ], parallel_iterations=1, back_prop=False, maximum_iterations=T, ) # process finished. final_finished_flag_BxMx1 = tf.reduce_any( tf.equal(final_finished_seq_BxMxT, eos_id), axis=-1, keepdims=True) final_seq_BxMxT = tf.where( tf.tile(final_finished_flag_BxMx1, [1, 1, T]), final_finished_seq_BxMxT, final_alive_seq_BxMxT) final_scores_BxM = tf.where( tf.squeeze(final_finished_flag_BxMx1, axis=-1), final_finished_scores_BxM, final_alive_scores_BxM) return final_seq_BxMxT, final_scores_BxM
def concatenate_batch_into_sample(batch): for feature in batch.keys(): batch[feature] = tf.reshape(batch[feature], [1, -1]) return batch for batch in dataset: concatenated_examples.append(concatenate_batch_into_sample(batch)) feature_dict = {} for feature in concatenated_examples[0].keys(): feature_list = [example[feature] for example in concatenated_examples] feature_dict[feature] = tf.squeeze(tf.stack( feature_list, axis=0)) feature_dict["f0_hz"] = feature_dict["f0_hz"].numpy() if INTONATION: for di in range(feature_dict["f0_hz"].shape[0]): feature_dict["f0_hz"][di, :] = intonate( feature_dict["f0_hz"][di, :]) dataset = tf.data.Dataset.from_tensor_slices(feature_dict) ex = next(iter(dataset)) assert ex["audio"].shape[0] == 16000*16
def test_univariate_sample_mean_and_variance_time_varying_drift( self, supply_draws, dtype): """Tests the mean and vol of the univariate GBM sampled paths.""" initial_state = 2.0 min_tol = _tolerance_by_dtype(dtype) with self.subTest("Drift as a step function, sigma = 0.0"): mu_times = np.array([0.0, 5.0, 10.0], dtype=dtype) mu_values = np.array([0.0, 0.0, 0.05, 0.05], dtype=dtype) mu = tff.math.piecewise.PiecewiseConstantFunc( jump_locations=mu_times, values=mu_values, dtype=dtype) sigma = 0.0 times = np.array([0.0, 1.0, 5.0, 7.0, 10.0], dtype=dtype) mean, var, se_mean, se_var = calculate_sample_paths_mean_and_variance( self, mu, sigma, times, initial_state, supply_draws, NUM_SAMPLES, dtype) expected_mean = np.array( [ 0.0, # mu = 0 at t = 0 0.0, # mu = 0 for t <= 1.0 0.0, # mu = 0 for t < 5.0 2.0 * 0.05, # mu = 0.05 for 5.0 < t <= 7.0 5.0 * 0.05 # mu = 0.05 for 5.0 < t <= 10.0 ], dtype=dtype) + np.log(initial_state) expected_var = sigma * np.sqrt( times) # As sigma is zero this will be 0.0 mean_tol = np.maximum(se_mean * NUM_STDERRS, min_tol) var_tol = np.maximum(se_var * NUM_STDERRS, min_tol) arrays_all_close(self, tf.squeeze(mean), expected_mean, mean_tol, msg="comparing means") arrays_all_close(self, tf.squeeze(var), expected_var, var_tol, msg="comparing variances") with self.subTest("Drift = 0.05, sigma = step function"): mu = 0.05 sigma_times = np.array([0.0, 5.0, 10.0], dtype=dtype) sigma_values = np.array([0.0, 0.2, 0.4, 0.6], dtype=dtype) sigma = tff.math.piecewise.PiecewiseConstantFunc( jump_locations=sigma_times, values=sigma_values, dtype=dtype) times = np.array([0.0, 1.0, 5.0, 7.0, 10.0], dtype=dtype) mean, var, se_mean, se_var = calculate_sample_paths_mean_and_variance( self, mu, sigma, times, initial_state, supply_draws, NUM_SAMPLES, dtype) expected_mean = np.array( [ 0.0, # mu = 0 at t = 0 1.0 * mu - 0.5 * 1.0 * 0.2**2, # t = 1.0 5.0 * mu - 0.5 * 5.0 * 0.2**2, # t = 5.0 7.0 * mu - 0.5 * (5.0 * 0.2**2 + 2.0 * 0.4**2), # t = 7.0 10.0 * mu - 0.5 * (5.0 * 0.2**2 + 5.0 * 0.4**2) # t = 10.0 ], dtype=dtype) + np.log(initial_state) expected_var = np.array( [ 0.0, # t = 0 1.0 * 0.2**2, # t = 1.0 5.0 * 0.2**2, # t = 5.0 5.0 * 0.2**2 + 2.0 * 0.4**2, # t = 7.0 5.0 * 0.2**2 + 5.0 * 0.4**2 # t = 10.0 ], dtype=dtype) # Set acceptable tolerances based on the predicted variance and a minimum # tolerance based on the precision. mean_tol = np.maximum(se_mean * NUM_STDERRS, min_tol) var_tol = np.maximum(se_var * NUM_STDERRS, min_tol) arrays_all_close(self, tf.squeeze(mean), expected_mean, mean_tol, msg="comparing means") arrays_all_close(self, tf.squeeze(var), expected_var, var_tol, msg="comparing variances")
def call(self, x, training=False): x_flat = tf.reshape(x, shape=(-1, self.depth)) # Split each input vector into one segment per head. x_flat_split = tf.split(x_flat, self.num_heads, axis=1) x_flat = tf.concat(x_flat_split, axis=0) if training: # Figure out which centroids we want to keep, and which we want to # restart. n = x_flat.shape[0] keep = self.counts * self.k > self.restart_threshold * n restart = tf.math.logical_not(keep) # Replace centroids to restart with elements from the batch, using samples # from a uniform distribution as a fallback in case we need to restart # more centroids than we have elements in the batch. restart_idx = tf.squeeze(tf.where(restart), -1) n_replace = tf.minimum(tf.shape(restart_idx)[0], x_flat.shape[0]) e_restart = tf.tensor_scatter_nd_update( tf.random.uniform([self.k, self.depth // self.num_heads]), tf.expand_dims(restart_idx[:n_replace], 1), tf.random.shuffle(x_flat)[:n_replace]) # Compute the values of the centroids we want to keep by dividing the # summed vectors by the corresponding counts. e = tf.where( tf.expand_dims(keep, 1), tf.math.divide_no_nan(self.sums, tf.expand_dims(self.counts, 1)), e_restart) else: # If not training, just use the centroids as is with no restarts. e = tf.math.divide_no_nan(self.sums, tf.expand_dims(self.counts, 1)) # Compute distance between each input vector and each cluster center. distances = (tf.expand_dims(tf.reduce_sum(x_flat**2, axis=1), 1) - 2 * tf.matmul(x_flat, tf.transpose(e)) + tf.expand_dims(tf.reduce_sum(e**2, axis=1), 0)) # Find nearest cluster center for each input vector. c = tf.argmin(distances, axis=1) # Quantize input vectors with straight-through estimator. z = tf.nn.embedding_lookup(e, c) z_split = tf.split(z, self.num_heads, axis=0) z = tf.concat(z_split, axis=1) z = tf.reshape(z, tf.shape(x)) z = x + tf.stop_gradient(z - x) if training: # Compute cluster counts and vector sums over the batch. oh = tf.one_hot(indices=c, depth=self.k) counts = tf.reduce_sum(oh, axis=0) sums = tf.matmul(oh, x_flat, transpose_a=True) # Apply exponential moving average to cluster counts and vector sums. self.counts.assign_sub((1 - self.gamma) * (self.counts - counts)) self.sums.assign_sub((1 - self.gamma) * (self.sums - sums)) c_split = tf.split(c, self.num_heads, axis=0) c = tf.stack(c_split, axis=1) c = tf.reshape(c, tf.concat([tf.shape(x)[:-1], [self.num_heads]], axis=0)) return z, c
def test_univariate_time_varying_vol_batched_time(self, supply_draws, dtype): """Tests the mean and vol of the univariate GBM sampled paths.""" initial_state = 2.0 min_tol = 5e-3 mu = 0.05 sigma_times = np.array([[0.0, 5.0, 10.0], [0.0, 7.0, 10.0]], dtype=dtype) sigma_values = np.array([[0.2, 0.2, 0.4, 0.4], [0.5, 0.5, 0.3, 0.1]], dtype=dtype) sigma = tff.math.piecewise.PiecewiseConstantFunc( jump_locations=sigma_times, values=sigma_values, dtype=dtype) times = np.array( [[0.0, 1.0, 5.0, 7.0, 12.0], [0.0, 1.5, 3.5, 9.0, 17.0]], dtype=dtype) mean, var, se_mean, se_var = calculate_sample_paths_mean_and_variance( self, mu, sigma, times, initial_state, supply_draws, NUM_SAMPLES, dtype) expected_mean = np.array( [ [ 0.0, # t = 0 1.0 * mu - 0.5 * 1.0 * 0.2**2, # t = 1.0 5.0 * mu - 0.5 * 5.0 * 0.2**2, # t = 5.0 7.0 * mu - 0.5 * (5.0 * 0.2**2 + 2.0 * 0.4**2), # t = 7.0 12.0 * mu - 0.5 * (5.0 * 0.2**2 + 7.0 * 0.4**2) # t = 12.0 ], [ 0.0, # mu = 0 at t = 0 1.5 * mu - 0.5 * 1.5 * 0.5**2, # t = 1.5 3.5 * mu - 0.5 * 3.5 * 0.5**2, # t = 3.5 9.0 * mu - 0.5 * (7.0 * 0.5**2 + 2.0 * 0.3**2), # t = 9.0 17.0 * mu - 0.5 * (7.0 * 0.5**2 + 3.0 * 0.3**2 + 7.0 * 0.1**2) ] ], dtype=dtype) + np.log(initial_state) expected_var = np.array( [ [ 0.0, # t = 0 1.0 * 0.2**2, # t = 1.0 5.0 * 0.2**2, # t = 5.0 5.0 * 0.2**2 + 2.0 * 0.4**2, # t = 7.0 5.0 * 0.2**2 + 7.0 * 0.4**2 # t = 12.0 ], [ 0.0, # t = 0 1.5 * 0.5**2, # t = 1.5 3.5 * 0.5**2, # t = 3.5 7.0 * 0.5**2 + 2.0 * 0.3**2, # t = 9.0 7.0 * 0.5**2 + 3.0 * 0.3**2 + 7.0 * 0.1**2 # t = 17.0 ] ], dtype=dtype) mean_tol = np.maximum(se_mean * NUM_STDERRS, min_tol) var_tol = np.maximum(se_var * NUM_STDERRS, min_tol) arrays_all_close(self, tf.squeeze(mean), expected_mean, mean_tol, msg="comparing means") arrays_all_close(self, tf.squeeze(var), expected_var, var_tol, msg="comparing variances")
def log_likelihood(y_true, y_sample): """Expected conditional log-likelihood.""" del y_sample # unused arg return model.output.distribution.log_prob(tf.squeeze(y_true))
def main(argv): del argv # unused arg np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() session = tf1.Session() with session.as_default(): x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset, session) n_train = x_train.shape[0] num_classes = int(np.amax(y_train)) + 1 model = lenet5(n_train, x_train.shape[1:], num_classes) for l in model.layers: l.kl_cost_weight = l.add_weight( name='kl_cost_weight', shape=(), initializer=tf.constant_initializer(0.), trainable=False) l.kl_cost_bias = l.add_variable( name='kl_cost_bias', shape=(), initializer=tf.constant_initializer(0.), trainable=False) [negative_log_likelihood, accuracy, log_likelihood, kl, elbo] = get_losses_and_metrics(model, n_train) metrics = [elbo, log_likelihood, kl, accuracy] tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=FLAGS.output_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) def fit_fn(model, steps, initial_epoch): return model.fit( x=x_train, y=y_train, batch_size=FLAGS.batch_size, epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train, initial_epoch=initial_epoch, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=1, callbacks=[tensorboard]) model.compile( optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)), loss=negative_log_likelihood, metrics=metrics) session.run(tf1.initialize_all_variables()) train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train fit_fn(model, FLAGS.training_steps, initial_epoch=0) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function([model.input, labels], [ model.output.distribution.log_prob(tf.squeeze(labels)), model.output.distribution.logits ]) base_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll, n_samples=10), utils.ensemble_metrics(x_test, y_test, model, ll, n_samples=10) ] model_dir = os.path.join(FLAGS.output_dir, 'models') tf.io.gfile.makedirs(model_dir) base_model_filename = os.path.join(model_dir, 'base_model.weights') model.save_weights(base_model_filename) # Train base model further for comparison. fit_fn(model, FLAGS.n_auxiliary_variables * FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size, initial_epoch=train_epochs) overtrained_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll, n_samples=10), utils.ensemble_metrics(x_test, y_test, model, ll, n_samples=10) ] # Perform refined VI. sample_op = [] for l in model.layers: if isinstance( l, tfp.layers.DenseLocalReparameterization) or isinstance( l, tfp.layers.Convolution2DFlipout): weight_op, weight_cost = sample_auxiliary_op( l.kernel_prior.distribution, l.kernel_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(weight_op) sample_op.append(l.kl_cost_weight.assign_add(weight_cost)) # Fix the variance of the prior session.run(l.kernel_prior.distribution.istrainable.assign(0.)) if hasattr(l.bias_prior, 'distribution'): bias_op, bias_cost = sample_auxiliary_op( l.bias_prior.distribution, l.bias_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(bias_op) sample_op.append(l.kl_cost_bias.assign_add(bias_cost)) # Fix the variance of the prior session.run( l.bias_prior.distribution.istrainable.assign(0.)) ensemble_filenames = [] for i in range(FLAGS.ensemble_size): model.load_weights(base_model_filename) for j in range(FLAGS.n_auxiliary_variables): session.run(sample_op) model.compile( optimizer=tf.keras.optimizers.Adam( # The learning rate is proportional to the scale of the prior. lr=float(FLAGS.learning_rate_for_sampling * np.sqrt(1. - FLAGS.auxiliary_variance_ratio)**j)), loss=negative_log_likelihood, metrics=metrics) fit_fn(model, FLAGS.auxiliary_sampling_frequency, initial_epoch=train_epochs) ensemble_filename = os.path.join( model_dir, 'ensemble_component_' + str(i) + '.weights') ensemble_filenames.append(ensemble_filename) model.save_weights(ensemble_filename) auxiliary_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll, weight_files=ensemble_filenames, n_samples=10), utils.ensemble_metrics(x_test, y_test, model, ll, weight_files=ensemble_filenames, n_samples=10) ] for metrics, name in [(base_metrics, 'Base model'), (overtrained_metrics, 'Overtrained model'), (auxiliary_metrics, 'Auxiliary sampling')]: logging.info(name) for metrics_dict, split in [(metrics[0], 'train'), (metrics[1], 'test')]: logging.info(split) for metric_name in metrics_dict: logging.info('%s: %s', metric_name, metrics_dict[metric_name])
def pass_arg(Xx, nsim, tr_size, num_iter): print("Tr_size:", tr_size) def fix_seeds(seed): random.seed(seed) np.random.seed(seed) tf.random.set_seed(seed) session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf) # K.set_session(sess) tf.compat.v1.keras.backend.set_session(sess) ss = 1 fix_seeds(ss) # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred) def root_mean_squared_error(y_true, y_pred): return tf.math.sqrt( tf.math.reduce_mean(tf.math.square(y_pred - y_true), axis=-1)) class InputTransformedKernel( tfp.math.psd_kernels.PositiveSemidefiniteKernel): def __init__(self, kernel, transformation, name='InputTransformedKernel'): self._kernel = kernel self._transformation = transformation super(InputTransformedKernel, self).__init__(feature_ndims=kernel.feature_ndims, dtype=kernel.dtype, name=name) def apply(self, x1, x2): return self._kernel.apply(self._transformation(x1), self._transformation(x2)) def matrix(self, x1, x2): return self._kernel.matrix(self._transformation(x1), self._transformation(x2)) @property def batch_shape(self): return self._kernel.batch_shape def batch_shape_tensor(self): return self._kernel.batch_shape_tensor class InputScaledKernel(InputTransformedKernel): def __init__(self, kernel, length_scales): super(InputScaledKernel, self).__init__( kernel, lambda x: x / tf.expand_dims( length_scales, -(kernel.feature_ndims + 1))) # Load labeled data data = np.loadtxt('../data/labeled_data.dat') x_labeled = data[:, :2].astype( np.float64) # -2 because we do not need porosity predictions y_labeled = data[:, -2:-1].astype( np.float64) # dimensionless bond length and porosity measurements # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0)) x_labeled = scaler.fit_transform(x_labeled) # y_labeled = scaler.fit_transform(y_labeled) tr_size = int(tr_size) # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] trainY = np.transpose(trainY) # testY = np.transpose(testY) data_phyloss = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat') x_unlabeled = data_phyloss[:, :] # initial porosity initporo = x_unlabeled[:, -1] x_unlabeled1 = x_unlabeled[:1303, :2] x_unlabeled2 = x_unlabeled[-6:, :2] x_unlabeled = np.vstack((x_unlabeled1, x_unlabeled2)) x_unlabeled = scaler.fit_transform(x_unlabeled) init_poro1 = initporo[:1303] init_poro2 = initporo[-6:] init_poro = np.hstack((init_poro1, init_poro2)) def build_gp(amplitude, length_scale): """Defines the conditional dist. of GP outputs, given kernel parameters.""" # Create the covariance kernel, which will be shared between the prior (which we # use for maximum likelihood training) and the posterior (which we use for # posterior predictive sampling) se_kernel = tfk.ExponentiatedQuadratic( amplitude) # length_scale = None here, implicitly # This is the "ARD" kernel (we don't like abbreviations or bizarrely obscure names in # TFP, so we're probably going to call this "InputScaledKernel" since....that's what it is! :) kernel = InputScaledKernel(se_kernel, length_scale) # Create the GP prior distribution, which we will use to train the model # parameters. return tfd.GaussianProcess(kernel=kernel, index_points=trainX) gp_joint_model = tfd.JointDistributionNamedAutoBatched({ 'amplitude': tfd.TransformedDistribution(distribution=tfd.Normal( loc=0., scale=np.float64(1.)), bijector=tfb.Exp(), batch_shape=[1]), 'length_scale': tfd.TransformedDistribution(distribution=tfd.Normal( loc=0., scale=np.float64(1.)), bijector=tfb.Exp(), batch_shape=[2]), 'observations': build_gp, }) # Create the trainable model parameters, which we'll subsequently optimize. # Note that we constrain them to be strictly positive. constrain_positive = tfb.Shift(np.finfo(np.float64).tiny)(tfb.Exp()) amplitude_var = tfp.util.TransformedVariable( initial_value=np.random.uniform(size=1), bijector=constrain_positive, name='amplitude', dtype=np.float64) length_scale_var = tfp.util.TransformedVariable( initial_value=np.random.uniform(size=[2]), bijector=constrain_positive, name='length_scale', dtype=np.float64) trainable_variables = [ v.trainable_variables[0] for v in [amplitude_var, length_scale_var] ] @tf.function(autograph=False, experimental_compile=False) def target_log_prob(amplitude, length_scale, poroi, lam): tf.random.set_seed(1234) se_kernel = tfk.ExponentiatedQuadratic( amplitude) # length_scale = None here, implicitly optimized_kernel = InputScaledKernel(se_kernel, length_scale) gprm = tfd.GaussianProcessRegressionModel(kernel=optimized_kernel, index_points=x_unlabeled) samples = gprm.sample(1) pred = tf.squeeze(samples, axis=0) phyloss_poro = tf.math.reduce_mean( tf.nn.relu(tf.negative(pred)) + tf.nn.relu(pred - poroi)) # print("phyloss_poro:",lam*phyloss_poro) # return lam*phyloss_poro return lam * phyloss_poro - gp_joint_model.log_prob( { 'amplitude': amplitude, 'length_scale': length_scale, 'observations': trainY }) fix_seeds(1) # Optimize the model parameters. num_iters = int(num_iter) lam = 100000 optimizer = tf.optimizers.Adam(learning_rate=.1) # Store the likelihood values during training, so we can plot the progress lls_ = np.zeros(num_iters, np.float64) for i in range(num_iters): with tf.GradientTape() as tape: loss = target_log_prob(amplitude_var, length_scale_var, init_poro, lam) # physics loss & normal loss # print(i,"loss_inloop:",loss) grads = tape.gradient(loss, trainable_variables) optimizer.apply_gradients(zip(grads, trainable_variables)) lls_[i] = loss # print('Trained parameters:') # print('amplitude: {}'.format(amplitude_var._value().numpy())) # print('length_scale: {}'.format(length_scale_var._value().numpy())) # tf.random.set_seed(1234) fix_seeds(1) se_kernel = tfk.ExponentiatedQuadratic( amplitude_var) # length_scale = None here, implicitly optimized_kernel = InputScaledKernel(se_kernel, length_scale_var) gprm = tfd.GaussianProcessRegressionModel(kernel=optimized_kernel, index_points=Xx) preds = gprm.sample(int(nsim)) samples = np.array(tf.squeeze(preds, axis=1)) return samples
def softquantiles(x, quantiles, quantile_width=None, axis=-1, may_squeeze=True, **kwargs): """Computes soft quantiles via optimal transport. This operator takes advantage of the fact that an exhaustive softsort is not required to recover a single quantile. Instead, one can transport all input values in x onto only 3 weighted values. Target weights are adjusted so that those values in x that are transported to the middle value in the target vector y correspond to those concentrating around the quantile of interest. This idea generalizes to more quantiles, interleaving small weights on the quantile indices and bigger weights in between, corresponding to the gap from one desired quantile to the next one. Args: x: Tensor<float> of any shape. quantiles: list<float> the quantiles to be returned. It can also be a single float. quantile_width: (float) mass given to the bucket supposed to attract points whose value concentrate around the desired quantile value. Bigger width means that we allow the soft quantile to be a mixture of more points further away from the quantile. If None, the width is set at 1/n where n is the number of values considered (the size along the 'axis'). axis: (int) the axis along which to compute the quantile. may_squeeze: (bool) should we squeeze the output tensor in case of a single quantile. **kwargs: see SoftQuantilizer for possible extra parameters. Returns: A Tensor<float> similar to the input tensor, but the axis dimension is replaced by the number of quantiles specified in the quantiles list. Hence, if only a quantile is requested (quantiles is a float) only one value in that axis is returned. When several quantiles are requested, the tensor will have that many values in that axis. Raises: tf.errors.InvalidArgumentError when the quantiles and quantile width are not correct, namely quantiles are either not in sorted order or the quantile_width is too large. """ if isinstance(quantiles, float): quantiles = [quantiles] quantiles = tf.constant(quantiles, tf.float32) # Preprocesses submitted quantiles to check that they satisfy elementary # constraints. valid_quantiles = tf.boolean_mask( quantiles, tf.logical_and(quantiles > 0.0, quantiles < 1.0)) num_quantiles = tf.shape(valid_quantiles)[0] # Includes values on both ends of [0,1]. extended_quantiles = tf.concat([[0.0], valid_quantiles, [1.0]], axis=0) # Builds filler_weights in between the target quantiles. filler_weights = extended_quantiles[1:] - extended_quantiles[:-1] if quantile_width is None: quantile_width = tf.reduce_min( tf.concat([ filler_weights, [1.0 / tf.cast(tf.shape(x)[axis], dtype=x.dtype)] ], axis=0)) # Takes into account quantile_width in the definition of weights shift = -tf.ones(tf.shape(filler_weights), dtype=x.dtype) shift = shift + 0.5 * (tf.one_hot(0, num_quantiles + 1) + tf.one_hot(num_quantiles, num_quantiles + 1)) filler_weights = filler_weights + quantile_width * shift assert_op = tf.Assert(tf.reduce_all(filler_weights >= 0.0), [filler_weights]) with tf.control_dependencies([assert_op]): # Adds one more value to have tensors of the same shape to interleave them. quantile_weights = tf.ones(num_quantiles + 1) * quantile_width # Interleaves the filler_weights with the quantile weights. weights = tf.reshape( tf.stack([filler_weights, quantile_weights], axis=1), (-1, ))[:-1] # Sends only the positive weights to the softsort operator. positive_weights = tf.boolean_mask(weights, weights > 0.0) all_quantiles = softsort(x, direction='ASCENDING', axis=axis, target_weights=positive_weights, **kwargs) # Recovers the indices corresponding to the desired quantiles. odds = tf.math.floormod(tf.range(weights.shape[0], dtype=tf.float32), 2) positives = tf.cast(weights > 0.0, tf.float32) indices = tf.cast(tf.math.cumsum(positives) * odds, dtype=tf.int32) indices = tf.boolean_mask(indices, indices > 0) - 1 result = tf.gather(all_quantiles, indices, axis=axis) # In the specific case where we want a single quantile, squeezes the # quantile dimension. can_squeeze = tf.equal(tf.shape(result)[axis], 1) if tf.math.logical_and(can_squeeze, may_squeeze): result = tf.squeeze(result, axis=axis) return result
def get_is_weighted_reward_samples(self, dataset: dataset_lib.OffpolicyDataset, target_policy: tf_policy.TFPolicy, episode_limit: Optional[int] = None, eps: Optional[float] = 1e-8): """Get the IS weighted reweard samples.""" episodes, valid_steps = dataset.get_all_episodes(limit=episode_limit) total_num_steps_per_episode = tf.shape(valid_steps)[1] - 1 num_episodes = tf.shape(valid_steps)[0] num_samples = num_episodes * total_num_steps_per_episode init_env_step = tf.nest.map_structure(lambda t: t[:, 0, ...], episodes) env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape(t[:, 0:total_num_steps_per_episode, ...], [num_samples, -1])), episodes) next_env_step = tf.nest.map_structure( lambda t: tf.squeeze( tf.reshape(t[:, 1:1 + total_num_steps_per_episode, ...], [num_samples, -1])), episodes) tfagents_env_step = dataset_lib.convert_to_tfagents_timestep(env_step) gamma_weights = tf.reshape( tf.pow(self._gamma, tf.cast(env_step.step_num, tf.float32)), [num_episodes, total_num_steps_per_episode]) rewards = (-self._get_q_value(env_step) + self._reward_fn(env_step) + self._gamma * next_env_step.discount * self._get_v_value(next_env_step, target_policy)) rewards = tf.reshape(rewards, [num_episodes, total_num_steps_per_episode]) init_values = self._get_v_value(init_env_step, target_policy) init_offset = (1 - self._gamma) * init_values target_log_probabilities = target_policy.distribution( tfagents_env_step).action.log_prob(env_step.action) if tf.rank(target_log_probabilities) > 1: target_log_probabilities = tf.reduce_sum(target_log_probabilities, -1) if self._policy_network is not None: baseline_policy_log_probability = self._get_log_prob( self._policy_network, env_step) if tf.rank(baseline_policy_log_probability) > 1: baseline_policy_log_probability = tf.reduce_sum( baseline_policy_log_probability, -1) policy_log_ratios = tf.reshape( tf.maximum( -1.0 / eps, target_log_probabilities - baseline_policy_log_probability), [num_episodes, total_num_steps_per_episode]) else: policy_log_ratios = tf.reshape( tf.maximum( -1.0 / eps, target_log_probabilities - env_step.get_log_probability()), [num_episodes, total_num_steps_per_episode]) valid_steps_in = valid_steps[:, 0:total_num_steps_per_episode] mask = tf.cast( tf.logical_and(valid_steps_in, episodes.discount[:, :-1] > 0.), tf.float32) masked_rewards = tf.where(mask > 0, rewards, tf.zeros_like(rewards)) clipped_policy_log_ratios = mask * self.clip_log_factor( policy_log_ratios) if self._mode in ['trajectory-wise', 'weighted-trajectory-wise']: trajectory_avg_rewards = tf.reduce_sum( masked_rewards * gamma_weights, axis=1) / tf.reduce_sum( gamma_weights, axis=1) trajectory_log_ratios = tf.reduce_sum(clipped_policy_log_ratios, axis=1) if self._mode == 'trajectory-wise': trajectory_avg_rewards *= tf.exp(trajectory_log_ratios) return init_offset + trajectory_avg_rewards else: offset = tf.reduce_max(trajectory_log_ratios) normalized_clipped_ratios = tf.exp(trajectory_log_ratios - offset) normalized_clipped_ratios /= tf.maximum( eps, tf.reduce_mean(normalized_clipped_ratios)) trajectory_avg_rewards *= normalized_clipped_ratios return init_offset + trajectory_avg_rewards elif self._mode in ['step-wise', 'weighted-step-wise']: trajectory_log_ratios = mask * tf.cumsum(policy_log_ratios, axis=1) if self._mode == 'step-wise': trajectory_avg_rewards = tf.reduce_sum( masked_rewards * gamma_weights * tf.exp(trajectory_log_ratios), axis=1) / tf.reduce_sum(gamma_weights, axis=1) return init_offset + trajectory_avg_rewards else: # Average over data, for each time step. offset = tf.reduce_max(trajectory_log_ratios, axis=0) # TODO: Handle mask. normalized_imp_weights = tf.exp(trajectory_log_ratios - offset) normalized_imp_weights /= tf.maximum( eps, tf.reduce_sum(mask * normalized_imp_weights, axis=0) / tf.maximum(eps, tf.reduce_sum(mask, axis=0)))[None, :] trajectory_avg_rewards = tf.reduce_sum( masked_rewards * gamma_weights * normalized_imp_weights, axis=1) / tf.reduce_sum(gamma_weights, axis=1) return init_offset + trajectory_avg_rewards else: ValueError('Estimator is not implemented!')
def parse_example(serialized): features_spec = _make_features_spec() features = tf.io.parse_example(serialized, features_spec) return {k: tf.squeeze(v, axis=1) for k, v in six.iteritems(features)}
def remove_squeezable_dimensions(labels, predictions, expected_rank_diff=0, name=None): """Squeeze last dim if ranks differ from expected by exactly 1. In the common case where we expect shapes to match, `expected_rank_diff` defaults to 0, and we squeeze the last dimension of the larger rank if they differ by 1. But, for example, if `labels` contains class IDs and `predictions` contains 1 probability per class, we expect `predictions` to have 1 more dimension than `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and `predictions` if `rank(predictions) - rank(labels) == 2`. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: labels: Label values, a `Tensor` whose dimensions match `predictions`. predictions: Predicted values, a `Tensor` of arbitrary dimensions. expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. name: Name of the op. Returns: Tuple of `labels` and `predictions`, possibly with last dim squeezed. """ with backend.name_scope(name or "remove_squeezable_dimensions"): if not tf_utils.is_tensor_or_extension_type(predictions): predictions = tf.convert_to_tensor(predictions) if not tf_utils.is_tensor_or_extension_type(labels): labels = tf.convert_to_tensor(labels) predictions_shape = predictions.shape predictions_rank = predictions_shape.ndims labels_shape = labels.shape labels_rank = labels_shape.ndims if (labels_rank is not None) and (predictions_rank is not None): # Use static rank. rank_diff = predictions_rank - labels_rank if rank_diff == expected_rank_diff + 1 and predictions_shape.dims[ -1].is_compatible_with(1): predictions = tf.squeeze(predictions, [-1]) elif rank_diff == expected_rank_diff - 1 and labels_shape.dims[ -1].is_compatible_with(1): labels = tf.squeeze(labels, [-1]) return labels, predictions # Use dynamic rank. rank_diff = tf.rank(predictions) - tf.rank(labels) if (predictions_rank is None) or (predictions_shape.dims[-1].is_compatible_with(1)): predictions = tf.cond( tf.equal(expected_rank_diff + 1, rank_diff), lambda: tf.squeeze(predictions, [-1]), lambda: predictions, ) if (labels_rank is None) or (labels_shape.dims[-1].is_compatible_with(1)): labels = tf.cond( tf.equal(expected_rank_diff - 1, rank_diff), lambda: tf.squeeze(labels, [-1]), lambda: labels, ) return labels, predictions