def _set_names_and_shapes(self, step_type, reward, discount, *flat_observations): """Returns a `TimeStep` namedtuple.""" step_type = tf.identity(step_type, name='step_type') reward = tf.identity(reward, name='reward') discount = tf.identity(discount, name='discount') batch_shape = () if not self.batched else (self.batch_size, ) batch_shape = tf.TensorShape(batch_shape) if not tfe.executing_eagerly(): # Shapes are not required in eager mode. reward.set_shape(batch_shape) step_type.set_shape(batch_shape) discount.set_shape(batch_shape) # Give each tensor a meaningful name and set the static shape. named_observations = [] for obs, spec in zip(flat_observations, nest.flatten(self.observation_spec())): named_observation = tf.identity(obs, name=spec.name) if not tfe.executing_eagerly(): named_observation.set_shape(batch_shape.concatenate( spec.shape)) named_observations.append(named_observation) observations = nest.pack_sequence_as(self.observation_spec(), named_observations) return ts.TimeStep(step_type, reward, discount, observations)
def _value_and_gradients(fn, fn_arg_list, result=None, grads=None, name=None): """Helper to `maybe_call_fn_and_grads`.""" with tf.name_scope(name, 'value_and_gradients', [fn_arg_list, result, grads]): def _convert_to_tensor(x, name): ctt = lambda x_: x_ if x_ is None else tf.convert_to_tensor(x_, name=name) return [ctt(x_) for x_ in x] if is_list_like(x) else ctt(x) fn_arg_list = (list(fn_arg_list) if is_list_like(fn_arg_list) else [fn_arg_list]) fn_arg_list = _convert_to_tensor(fn_arg_list, 'fn_arg') if result is None: result = fn(*fn_arg_list) if grads is None and tfe.executing_eagerly(): # Ensure we disable bijector cacheing in eager mode. # TODO(b/72831017): Remove this once bijector cacheing is fixed for # eager mode. fn_arg_list = [0 + x for x in fn_arg_list] result = _convert_to_tensor(result, 'fn_result') if grads is not None: grads = _convert_to_tensor(grads, 'fn_grad') return result, grads if tfe.executing_eagerly(): if is_list_like(result) and len(result) == len(fn_arg_list): # Compute the block diagonal of Jacobian. # TODO(b/79158574): Guard this calculation by an arg which explicitly # requests block diagonal Jacobian calculation. def make_fn_slice(i): """Needed to prevent `cell-var-from-loop` pylint warning.""" return lambda *args: fn(*args)[i] grads = [ tfe.gradients_function(make_fn_slice(i))(*fn_arg_list)[i] for i in range(len(result)) ] else: grads = tfe.gradients_function(fn)(*fn_arg_list) else: if is_list_like(result) and len(result) == len(fn_arg_list): # Compute the block diagonal of Jacobian. # TODO(b/79158574): Guard this calculation by an arg which explicitly # requests block diagonal Jacobian calculation. grads = [tf.gradients(result[i], fn_arg_list[i])[0] for i in range(len(result))] else: grads = tf.gradients(result, fn_arg_list) return result, grads
def _value_and_gradients(fn, *args): """Calls `fn` and computes the gradient of the result wrt `args_list`.""" if tfe.executing_eagerly(): return tfe.value_and_gradients_function(fn)(*args) result = fn(*args) grads = tf.gradients(result, args) return result, grads
def main(_): tf.enable_eager_execution() print(tfe.executing_eagerly()) # True # Ground truth constants. true_w = [[-2.0],[4.0],[1.0]] true_b = [0.5] noise_level = 0.01 # Training constants batch_size = 64 learning_rate = 0.1 print("True w: %s" % true_w) print("True b: %s\n" % true_b) model = LinearModel() dataset = synthetic_dataset(true_w, true_b, noise_level, batch_size, 20) device = "gpu:0" if tfe.num_gpus() else "cpu:0" print("Using device: %s" % device) with tf.device(device): optimizer = tf.train.GradientDescentOptimizer(learning_rate) fit(model, dataset, optimizer, verbose=True, logdir=FLAGS.logdir) print("\nAfter training: w=%s" % model.variables[0].numpy()) print("\nAfter training: b=%s" % model.variables[1].numpy())
def _set_seed(seed): """Helper which uses graph seed if using TFE.""" # TODO(b/68017812): Deprecate once TFE supports seed. if tfe.executing_eagerly(): tf.set_random_seed(seed) return None return seed
def testJacobianDiagonal3DListInput(self): """Tests that the diagonal of the Jacobian matrix computes correctly.""" dtype = np.float32 true_mean = dtype([0, 0, 0]) true_cov = dtype([[1, 0.25, 0.25], [0.25, 2, 0.25], [0.25, 0.25, 3]]) chol = tf.linalg.cholesky(true_cov) target = tfd.MultivariateNormalTriL(loc=true_mean, scale_tril=chol) # Assume that the state is passed as a list of tensors `x` and `y`. # Then the target function is defined as follows: def target_fn(x, y): # Stack the input tensors together z = tf.concat([x, y], axis=-1) - true_mean return target.log_prob(z) sample_shape = [3, 5] state = [ tf.ones(sample_shape + [2], dtype=dtype), tf.ones(sample_shape + [1], dtype=dtype) ] fn_val = target_fn(*state) grad_fn = tfe.gradients_function(target_fn) if tfe.executing_eagerly(): grads = grad_fn(*state) else: grads = tf.gradients(fn_val, state) _, diag_jacobian_shape_passed = tfp.math.diag_jacobian( xs=state, ys=grads, fn=grad_fn, sample_shape=tf.shape(fn_val)) _, diag_jacobian_shape_none = tfp.math.diag_jacobian(xs=state, ys=grads, fn=grad_fn) true_diag_jacobian_1 = np.zeros(sample_shape + [2]) true_diag_jacobian_1[..., 0] = -1.05 true_diag_jacobian_1[..., 1] = -0.52 true_diag_jacobian_2 = -0.34 * np.ones(sample_shape + [1]) self.assertAllClose(self.evaluate(diag_jacobian_shape_passed[0]), true_diag_jacobian_1, atol=0.01, rtol=0.01) self.assertAllClose(self.evaluate(diag_jacobian_shape_none[0]), true_diag_jacobian_1, atol=0.01, rtol=0.01) self.assertAllClose(self.evaluate(diag_jacobian_shape_passed[1]), true_diag_jacobian_2, atol=0.01, rtol=0.01) self.assertAllClose(self.evaluate(diag_jacobian_shape_none[1]), true_diag_jacobian_2, atol=0.01, rtol=0.01)
def _chain_gets_correct_expectations(self, x, independent_chain_ndims): counter = collections.Counter() def log_gamma_log_prob(x): counter['target_calls'] += 1 event_dims = tf.range(independent_chain_ndims, tf.rank(x)) return self._log_gamma_log_prob(x, event_dims) samples, kernel_results = tfp.mcmc.sample_chain( num_results=150, current_state=x, kernel=tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=log_gamma_log_prob, step_size=0.05, num_leapfrog_steps=2, seed=_set_seed(42)), num_burnin_steps=150, parallel_iterations=1) if tfe.executing_eagerly(): # TODO(b/79991421): Figure out why this is approx twice as many as it # should be. I.e., `expected_calls = (150 + 150) * 2 + 1`. expected_calls = 1202 else: expected_calls = 2 self.assertAllEqual(dict(target_calls=expected_calls), counter) expected_x = (tf.digamma(self._shape_param) - np.log(self._rate_param)) expected_exp_x = self._shape_param / self._rate_param log_accept_ratio_, samples_, expected_x_ = self.evaluate( [kernel_results.log_accept_ratio, samples, expected_x]) actual_x = samples_.mean() actual_exp_x = np.exp(samples_).mean() acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.)) tf.logging.vlog( 1, 'True E[x, exp(x)]: {}\t{}'.format(expected_x_, expected_exp_x)) tf.logging.vlog( 1, 'Estimated E[x, exp(x)]: {}\t{}'.format(actual_x, actual_exp_x)) self.assertNear(actual_x, expected_x_, 2e-2) self.assertNear(actual_exp_x, expected_exp_x, 2e-2) self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), acceptance_probs > 0.5) self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), acceptance_probs <= 1.)
def testChainWorksCorrelatedMultivariate(self): dtype = np.float32 true_mean = dtype([0, 0]) true_cov = dtype([[1, 0.5], [0.5, 1]]) num_results = 1500 counter = collections.Counter() def target_log_prob(x, y): counter['target_calls'] += 1 # Corresponds to unnormalized MVN. # z = matmul(inv(chol(true_cov)), [x, y] - true_mean) z = tf.stack([x, y], axis=-1) - true_mean z = tf.squeeze( tf.linalg.triangular_solve( np.linalg.cholesky(true_cov), z[..., tf.newaxis]), axis=-1) return -0.5 * tf.reduce_sum(z**2., axis=-1) states, kernel_results = tfp.mcmc.sample_chain( num_results=num_results, current_state=[dtype(-2), dtype(2)], kernel=tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob, step_size=[1.23, 1.23], num_leapfrog_steps=2, seed=_set_seed(54)), num_burnin_steps=200, parallel_iterations=1) if tfe.executing_eagerly(): # TODO(b/79991421): Figure out why this is approx twice as many as it # should be. I.e., `expected_calls = (num_results + 200) * 2 * 2 + 1`. expected_calls = 6802 else: expected_calls = 2 self.assertAllEqual(dict(target_calls=expected_calls), counter) states = tf.stack(states, axis=-1) self.assertEqual(num_results, states.shape[0].value) sample_mean = tf.reduce_mean(states, axis=0) x = states - sample_mean sample_cov = tf.matmul(x, x, transpose_a=True) / dtype(num_results) [sample_mean_, sample_cov_, is_accepted_] = self.evaluate([ sample_mean, sample_cov, kernel_results.is_accepted]) self.assertNear(0.6, is_accepted_.mean(), err=0.05) self.assertAllClose(true_mean, sample_mean_, atol=0.06, rtol=0.) self.assertAllClose(true_cov, sample_cov_, atol=0., rtol=0.2)
def testJacobianDiagonal4D(self): """Tests that the diagonal of the Jacobian matrix computes correctly.""" dtype = np.float32 true_mean = dtype([0, 0, 0, 0]) true_cov = dtype([[1, 0.25, 0.25, 0.25], [0.25, 2, 0.25, 0.25], [0.25, 0.25, 3, 0.25], [0.25, 0.25, 0.25, 4]]) chol = tf.linalg.cholesky(true_cov) target = tfd.MultivariateNormalTriL(loc=true_mean, scale_tril=chol) # Assume that the state is passed as a 2x2 matrix of sample_shape = [5, 3]: sample_shape = [5, 3] def target_fn(*x): z = tf.reshape(x, sample_shape + [4]) return target.log_prob(z) state = [tf.ones(sample_shape + [2, 2], dtype=dtype)] fn_val = target_fn(*state) grad_fn = tfe.gradients_function(target_fn) if tfe.executing_eagerly(): grads = grad_fn(state) else: grads = tf.gradients(fn_val, state) _, diag_jacobian_shape_passed = tfp.math.diag_jacobian( xs=state, ys=grads, fn=grad_fn, sample_shape=tf.shape(fn_val)) _, diag_jacobian_shape_none = tfp.math.diag_jacobian(xs=state, ys=grads, fn=grad_fn) true_diag_jacobian = np.zeros(sample_shape + [2, 2]) true_diag_jacobian[..., 0, 0] = -1.06 true_diag_jacobian[..., 0, 1] = -0.52 true_diag_jacobian[..., 1, 0] = -0.34 true_diag_jacobian[..., 1, 1] = -0.26 self.assertAllClose(self.evaluate(diag_jacobian_shape_passed[0]), true_diag_jacobian, atol=0.01, rtol=0.01) self.assertAllClose(self.evaluate(diag_jacobian_shape_none[0]), true_diag_jacobian, atol=0.01, rtol=0.01)
def _value_and_gradients(fn, fn_arg_list, result=None, grads=None, name=None): """Helper to `maybe_call_fn_and_grads`.""" with tf.name_scope(name, 'value_and_gradients', [fn_arg_list, result, grads]): def _convert_to_tensor(x, name): ctt = lambda x_: x_ if x_ is None else tf.convert_to_tensor(x_, name=name) return [ctt(x_) for x_ in x] if is_list_like(x) else ctt(x) fn_arg_list = (list(fn_arg_list) if is_list_like(fn_arg_list) else [fn_arg_list]) fn_arg_list = _convert_to_tensor(fn_arg_list, 'fn_arg') if result is None: result = fn(*fn_arg_list) result = _convert_to_tensor(result, 'fn_result') if grads is not None: grads = _convert_to_tensor(grads, 'fn_grad') return result, grads if tfe.executing_eagerly(): if is_list_like(result) and len(result) == len(fn_arg_list): # Compute the block diagonal of Jacobian. # TODO(b/79158574): Guard this calculation by an arg which explicitly # requests block diagonal Jacobian calculation. def make_fn_slice(i): """Needed to prevent `cell-var-from-loop` pylint warning.""" return lambda *args: fn(*args)[i] grads = [ tfe.gradients_function(make_fn_slice(i))(*fn_arg_list)[i] for i in range(len(result)) ] else: grads = tfe.gradients_function(fn)(*fn_arg_list) else: if is_list_like(result) and len(result) == len(fn_arg_list): # Compute the block diagonal of Jacobian. # TODO(b/79158574): Guard this calculation by an arg which explicitly # requests block diagonal Jacobian calculation. grads = [tf.gradients(result[i], fn_arg_list[i])[0] for i in range(len(result))] else: grads = tf.gradients(result, fn_arg_list) return result, grads
def __init__(self, target_log_prob_fn, step_size, num_leapfrog_steps, seed=None, name=None): if seed is not None and tfe.executing_eagerly(): # TODO(b/68017812): Re-enable once TFE supports `tf.random_shuffle` seed. raise NotImplementedError( 'Specifying a `seed` when running eagerly is ' 'not currently supported. To run in Eager ' 'mode with a seed, use `tf.set_random_seed`.') self._seed_stream = tf.contrib.distributions.SeedStream( seed, 'hmc_one_step') self._parameters = dict(target_log_prob_fn=target_log_prob_fn, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, seed=seed, name=name)
def testNoGradientsNiceError(self): dtype = np.float32 def fn(x, y): return x**2 + tf.stop_gradient(y)**2 fn_args = [dtype(3), dtype(3)] # Convert function input to a list of tensors fn_args = [ tf.convert_to_tensor(arg, name='arg{}'.format(i)) for i, arg in enumerate(fn_args) ] if tfe.executing_eagerly(): with self.assertRaisesRegexp( ValueError, 'Encountered `None`.*\n.*fn_arg_list.*\n.*None'): maybe_call_fn_and_grads(fn, fn_args) else: with self.assertRaisesRegexp( ValueError, 'Encountered `None`.*\n.*fn_arg_list.*arg1.*\n.*None'): maybe_call_fn_and_grads(fn, fn_args)
def __init__(self): self._mode = 'eager' if tfe.executing_eagerly() else 'graph'
def diag_jacobian(xs, ys=None, sample_shape=None, fn=None, parallel_iterations=10, name=None): """Computes diagonal of the Jacobian matrix of `ys=fn(xs)` wrt `xs`. If `ys` is a tensor or a list of tensors of the form `(ys_1, .., ys_n)` and `xs` is of the form `(xs_1, .., xs_n)`, the function `jacobians_diag` computes the diagonal of the Jacobian matrix, i.e., the partial derivatives `(dys_1/dxs_1,.., dys_n/dxs_n`). For definition details, see https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant #### Example ##### Diagonal Hessian of the log-density of a 3D Gaussian distribution In this example we sample from a standard univariate normal distribution using MALA with `step_size` equal to 0.75. ```python import tensorflow as tf import tensorflow_probability as tfp import numpy as np tfd = tfp.distributions dtype = np.float32 with tf.Session(graph=tf.Graph()) as sess: true_mean = dtype([0, 0, 0]) true_cov = dtype([[1, 0.25, 0.25], [0.25, 2, 0.25], [0.25, 0.25, 3]]) chol = tf.linalg.cholesky(true_cov) target = tfd.MultivariateNormalTriL(loc=true_mean, scale_tril=chol) # Assume that the state is passed as a list of tensors `x` and `y`. # Then the target function is defined as follows: def target_fn(x, y): # Stack the input tensors together z = tf.concat([x, y], axis=-1) - true_mean return target.log_prob(z) sample_shape = [3, 5] state = [tf.ones(sample_shape + [2], dtype=dtype), tf.ones(sample_shape + [1], dtype=dtype)] fn_val = target_fn(*state) grad_fn = tfe.gradients_function(target_fn) if tfe.executing_eagerly(): grads = grad_fn(*state) else: grads = tf.gradients(fn_val, state) # We can either pass the `sample_shape` of the `state` or not, which impacts # computational speed of `diag_jacobian` _, diag_jacobian_shape_passed = diag_jacobian( xs=state, ys=grads, sample_shape=tf.shape(fn_val)) _, diag_jacobian_shape_none = diag_jacobian( xs=state, ys=grads) diag_jacobian_shape_passed_ = sess.run(diag_jacobian_shape_passed) diag_jacobian_shape_none_ = sess.run(diag_jacobian_shape_none) print('hessian computed through `diag_jacobian`, sample_shape passed: ', np.concatenate(diag_jacobian_shape_passed_, -1)) print('hessian computed through `diag_jacobian`, sample_shape skipped', np.concatenate(diag_jacobian_shape_none_, -1)) ``` Args: xs: `Tensor` or a python `list` of `Tensors` of real-like dtypes and shapes `sample_shape` + `event_shape_i`, where `event_shape_i` can be different for different tensors. ys: `Tensor` or a python `list` of `Tensors` of the same dtype as `xs`. Must broadcast with the shape of `xs`. Can be omitted if `fn` is provided. sample_shape: A common `sample_shape` of the input tensors of `xs`. If not, provided, assumed to be `[1]`, which may result in a slow performance of `jacobians_diag`. fn: Python callable that takes `xs` as an argument (or `*xs`, if it is a list) and returns `ys`. Might be skipped if `ys` is provided and `tf.enable_eager_execution()` is disabled. parallel_iterations: `int` that specifies the allowed number of coordinates of the input tensor `xs`, for which the partial derivatives `dys_i/dxs_i` can be computed in parallel. name: Python `str` name prefixed to `Ops` created by this function. Default value: `None` (i.e., "diag_jacobian"). Returns: ys: a list, which coincides with the input `ys`, when provided. If the input `ys` is None, `fn(*xs)` gets computed and returned as a list. jacobians_diag_res: a `Tensor` or a Python list of `Tensor`s of the same dtypes and shapes as the input `xs`. This is the diagonal of the Jacobian of ys wrt xs. Raises: ValueError: if lists `xs` and `ys` have different length or both `ys` and `fn` are `None`, or `fn` is None in the eager execution mode. """ with tf.name_scope(name, 'jacobians_diag', [xs, ys]): if sample_shape is None: sample_shape = [1] # Output Jacobian diagonal jacobians_diag_res = [] # Convert input `xs` to a list xs = list(xs) if _is_list_like(xs) else [xs] xs = [tf.convert_to_tensor(x) for x in xs] if not tfe.executing_eagerly(): if ys is None: if fn is None: raise ValueError('Both `ys` and `fn` can not be `None`') else: ys = fn(*xs) # Convert ys to a list ys = list(ys) if _is_list_like(ys) else [ys] if len(xs) != len(ys): raise ValueError('`xs` and `ys` should have the same length') for y, x in zip(ys, xs): # Broadcast `y` to the shape of `x`. y_ = y + tf.zeros_like(x) # Change `event_shape` to one-dimension y_ = tf.reshape(y, tf.concat([sample_shape, [-1]], -1)) # Declare an iterator and tensor array loop variables for the gradients. n = tf.size(x) / tf.to_int32(tf.reduce_prod(sample_shape)) n = tf.to_int32(n) loop_vars = [ 0, tf.TensorArray(x.dtype, n) ] def loop_body(j): """Loop function to compute gradients of the each direction.""" # Gradient along direction `j`. res = tf.gradients(y_[..., j], x)[0] # pylint: disable=cell-var-from-loop if res is None: # Return zero, if the gradient is `None`. res = tf.zeros(tf.concat([sample_shape, [1]], -1), dtype=x.dtype) # pylint: disable=cell-var-from-loop else: # Reshape `event_shape` to 1D res = tf.reshape(res, tf.concat([sample_shape, [-1]], -1)) # Add artificial dimension for the case of zero shape input tensor res = tf.expand_dims(res, 0) res = res[..., j] return res # pylint: disable=cell-var-from-loop # Iterate over all elements of the gradient and compute second order # derivatives. _, jacobian_diag_res = tf.while_loop( lambda j, _: j < n, # pylint: disable=cell-var-from-loop lambda j, result: (j + 1, result.write(j, loop_body(j))), loop_vars, parallel_iterations=parallel_iterations ) shape_x = tf.shape(x) # Stack gradients together and move flattened `event_shape` to the # zero position reshaped_jacobian_diag = tf.transpose(jacobian_diag_res.stack()) # Reshape to the original tensor reshaped_jacobian_diag = tf.reshape(reshaped_jacobian_diag, shape_x) jacobians_diag_res.append(reshaped_jacobian_diag) else: if fn is None: raise ValueError('`fn` can not be `None` when eager execution is ' 'enabled') if ys is None: ys = fn(*xs) def fn_slice(i, j): """Broadcast y[i], flatten event shape of y[i], return y[i][..., j].""" def fn_broadcast(*state): res = fn(*state) res = list(res) if _is_list_like(res) else [res] if len(res) != len(state): res *= len(state) res = [tf.reshape(r + tf.zeros_like(s), tf.concat([sample_shape, [-1]], -1)) for r, s in zip(res, state)] return res # Expand dimensions before returning in order to support 0D input `xs` return lambda *state: tf.expand_dims(fn_broadcast(*state)[i], 0)[..., j] for i, x in enumerate(xs): # Declare an iterator and tensor array loop variables for the gradients. n = tf.size(x) / tf.to_int32(tf.reduce_prod(sample_shape)) n = tf.to_int32(n) loop_vars = [ 0, tf.TensorArray(x.dtype, n) ] def loop_body(j): """Loop function to compute gradients of the each direction.""" res = tfe.gradients_function(fn_slice(i, j))(*xs)[i] # pylint: disable=cell-var-from-loop if res is None: res = tf.zeros(tf.concat([sample_shape, [1]], -1), dtype=x.dtype) # pylint: disable=cell-var-from-loop else: res = tf.reshape(res, tf.concat([sample_shape, [-1]], -1)) res = res[..., j] return res # Iterate over all elements of the gradient and compute second order # derivatives. _, jacobian_diag_res = tf.while_loop( lambda j, _: j < n, lambda j, result: (j + 1, result.write(j, loop_body(j))), loop_vars, parallel_iterations=parallel_iterations ) shape_x = tf.shape(x) # Stack gradients together and move flattened `event_shape` to the # zero position reshaped_jacobian_diag = tf.transpose(jacobian_diag_res.stack()) # Reshape to the original tensor reshaped_jacobian_diag = tf.reshape(reshaped_jacobian_diag, shape_x) jacobians_diag_res.append(reshaped_jacobian_diag) return ys, jacobians_diag_res
def diag_jacobian(xs, ys=None, sample_shape=None, fn=None, parallel_iterations=10, name=None): """Computes diagonal of the Jacobian matrix of `ys=fn(xs)` wrt `xs`. If `ys` is a tensor or a list of tensors of the form `(ys_1, .., ys_n)` and `xs` is of the form `(xs_1, .., xs_n)`, the function `jacobians_diag` computes the diagonal of the Jacobian matrix, i.e., the partial derivatives `(dys_1/dxs_1,.., dys_n/dxs_n`). For definition details, see https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant #### Example ##### Diagonal Hessian of the log-density of a 3D Gaussian distribution In this example we sample from a standard univariate normal distribution using MALA with `step_size` equal to 0.75. ```python import tensorflow as tf import tensorflow_probability as tfp import numpy as np tfd = tfp.distributions dtype = np.float32 with tf.Session(graph=tf.Graph()) as sess: true_mean = dtype([0, 0, 0]) true_cov = dtype([[1, 0.25, 0.25], [0.25, 2, 0.25], [0.25, 0.25, 3]]) chol = tf.linalg.cholesky(true_cov) target = tfd.MultivariateNormalTriL(loc=true_mean, scale_tril=chol) # Assume that the state is passed as a list of tensors `x` and `y`. # Then the target function is defined as follows: def target_fn(x, y): # Stack the input tensors together z = tf.concat([x, y], axis=-1) - true_mean return target.log_prob(z) sample_shape = [3, 5] state = [tf.ones(sample_shape + [2], dtype=dtype), tf.ones(sample_shape + [1], dtype=dtype)] fn_val = target_fn(*state) grad_fn = tfe.gradients_function(target_fn) if tfe.executing_eagerly(): grads = grad_fn(*state) else: grads = tf.gradients(fn_val, state) # We can either pass the `sample_shape` of the `state` or not, which impacts # computational speed of `diag_jacobian` _, diag_jacobian_shape_passed = diag_jacobian( xs=state, ys=grads, sample_shape=tf.shape(fn_val)) _, diag_jacobian_shape_none = diag_jacobian( xs=state, ys=grads) diag_jacobian_shape_passed_ = sess.run(diag_jacobian_shape_passed) diag_jacobian_shape_none_ = sess.run(diag_jacobian_shape_none) print('hessian computed through `diag_jacobian`, sample_shape passed: ', np.concatenate(diag_jacobian_shape_passed_, -1)) print('hessian computed through `diag_jacobian`, sample_shape skipped', np.concatenate(diag_jacobian_shape_none_, -1)) ``` Args: xs: `Tensor` or a python `list` of `Tensors` of real-like dtypes and shapes `sample_shape` + `event_shape_i`, where `event_shape_i` can be different for different tensors. ys: `Tensor` or a python `list` of `Tensors` of the same dtype as `xs`. Must broadcast with the shape of `xs`. Can be omitted if `fn` is provided. sample_shape: A common `sample_shape` of the input tensors of `xs`. If not, provided, assumed to be `[1]`, which may result in a slow performance of `jacobians_diag`. fn: Python callable that takes `xs` as an argument (or `*xs`, if it is a list) and returns `ys`. Might be skipped if `ys` is provided and `tf.enable_eager_execution()` is disabled. parallel_iterations: `int` that specifies the allowed number of coordinates of the input tensor `xs`, for which the partial derivatives `dys_i/dxs_i` can be computed in parallel. name: Python `str` name prefixed to `Ops` created by this function. Default value: `None` (i.e., "diag_jacobian"). Returns: ys: a list, which coincides with the input `ys`, when provided. If the input `ys` is None, `fn(*xs)` gets computed and returned as a list. jacobians_diag_res: a `Tensor` or a Python list of `Tensor`s of the same dtypes and shapes as the input `xs`. This is the diagonal of the Jacobian of ys wrt xs. Raises: ValueError: if lists `xs` and `ys` have different length or both `ys` and `fn` are `None`, or `fn` is None in the eager execution mode. """ with tf.name_scope(name, 'jacobians_diag', [xs, ys]): if sample_shape is None: sample_shape = [1] # Output Jacobian diagonal jacobians_diag_res = [] # Convert input `xs` to a list xs = list(xs) if _is_list_like(xs) else [xs] xs = [tf.convert_to_tensor(x) for x in xs] if not tfe.executing_eagerly(): if ys is None: if fn is None: raise ValueError('Both `ys` and `fn` can not be `None`') else: ys = fn(*xs) # Convert ys to a list ys = list(ys) if _is_list_like(ys) else [ys] if len(xs) != len(ys): raise ValueError('`xs` and `ys` should have the same length') for y, x in zip(ys, xs): # Broadcast `y` to the shape of `x`. y_ = y + tf.zeros_like(x) # Change `event_shape` to one-dimension y_ = tf.reshape(y, tf.concat([sample_shape, [-1]], -1)) # Declare an iterator and tensor array loop variables for the gradients. n = tf.size(x) / tf.to_int32(tf.reduce_prod(sample_shape)) n = tf.to_int32(n) loop_vars = [0, tf.TensorArray(x.dtype, n)] def loop_body(j): """Loop function to compute gradients of the each direction.""" # Gradient along direction `j`. res = tf.gradients(y_[..., j], x)[0] # pylint: disable=cell-var-from-loop if res is None: # Return zero, if the gradient is `None`. res = tf.zeros(tf.concat([sample_shape, [1]], -1), dtype=x.dtype) # pylint: disable=cell-var-from-loop else: # Reshape `event_shape` to 1D res = tf.reshape(res, tf.concat([sample_shape, [-1]], -1)) # Add artificial dimension for the case of zero shape input tensor res = tf.expand_dims(res, 0) res = res[..., j] return res # pylint: disable=cell-var-from-loop # Iterate over all elements of the gradient and compute second order # derivatives. _, jacobian_diag_res = tf.while_loop( lambda j, _: j < n, # pylint: disable=cell-var-from-loop lambda j, result: (j + 1, result.write(j, loop_body(j))), loop_vars, parallel_iterations=parallel_iterations) shape_x = tf.shape(x) # Stack gradients together and move flattened `event_shape` to the # zero position reshaped_jacobian_diag = tf.transpose( jacobian_diag_res.stack()) # Reshape to the original tensor reshaped_jacobian_diag = tf.reshape(reshaped_jacobian_diag, shape_x) jacobians_diag_res.append(reshaped_jacobian_diag) else: if fn is None: raise ValueError( '`fn` can not be `None` when eager execution is ' 'enabled') if ys is None: ys = fn(*xs) def fn_slice(i, j): """Broadcast y[i], flatten event shape of y[i], return y[i][..., j].""" def fn_broadcast(*state): res = fn(*state) res = list(res) if _is_list_like(res) else [res] if len(res) != len(state): res *= len(state) res = [ tf.reshape(r + tf.zeros_like(s), tf.concat([sample_shape, [-1]], -1)) for r, s in zip(res, state) ] return res # Expand dimensions before returning in order to support 0D input `xs` return lambda *state: tf.expand_dims( fn_broadcast(*state)[i], 0)[..., j] for i, x in enumerate(xs): # Declare an iterator and tensor array loop variables for the gradients. n = tf.size(x) / tf.to_int32(tf.reduce_prod(sample_shape)) n = tf.to_int32(n) loop_vars = [0, tf.TensorArray(x.dtype, n)] def loop_body(j): """Loop function to compute gradients of the each direction.""" res = tfe.gradients_function(fn_slice(i, j))(*xs)[i] # pylint: disable=cell-var-from-loop if res is None: res = tf.zeros(tf.concat([sample_shape, [1]], -1), dtype=x.dtype) # pylint: disable=cell-var-from-loop else: res = tf.reshape(res, tf.concat([sample_shape, [-1]], -1)) res = res[..., j] return res # Iterate over all elements of the gradient and compute second order # derivatives. _, jacobian_diag_res = tf.while_loop( lambda j, _: j < n, lambda j, result: (j + 1, result.write(j, loop_body(j))), loop_vars, parallel_iterations=parallel_iterations) shape_x = tf.shape(x) # Stack gradients together and move flattened `event_shape` to the # zero position reshaped_jacobian_diag = tf.transpose( jacobian_diag_res.stack()) # Reshape to the original tensor reshaped_jacobian_diag = tf.reshape(reshaped_jacobian_diag, shape_x) jacobians_diag_res.append(reshaped_jacobian_diag) return ys, jacobians_diag_res
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" def get_session(): cfg = tf.ConfigProto() cfg.gpu_options.allow_growth = True # cfg.gpu_options.per_process_gpu_memory_fraction = 0.1 return tf.Session(config=cfg) get_session() tfe.enable_eager_execution() tfe.executing_eagerly() # => True W = tf.get_variable(name="W", shape=(784, 10)) b = tf.get_variable(name="b", shape=(10, )) def softmax_model(image_batch): model_output = tf.nn.softmax(tf.matmul(image_batch, W) + b) return model_output def cross_entropy(model_output, label_batch): loss = tf.reduce_mean(-tf.reduce_sum(label_batch * tf.log(model_output), reduction_indices=[1])) return loss
from data_utils import load_dataset, batch_data from sklearn.model_selection import train_test_split import tensorflow as tf import time from seq2seq2_with_attention import seq2seq import tensorflow.contrib.eager as tfe tf.enable_eager_execution() print(tfe.executing_eagerly()) # prepare for dataset num_examples = 3000 input_tensor, target_tensor, inp_lang, targ_lang = load_dataset( num_examples=num_examples) input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = \ train_test_split(input_tensor, target_tensor, test_size=0.2) vocab_size_input = len(inp_lang.word2idx) # 1899 vocab_size_target = len(targ_lang.word2idx) # 919 batch_size = 60 num_epochs = 10 embedding_dim = 256 hidden_units = 256 learning_rate = 0.01 # add all variable in loss function # def wrapper_loss(input_sent, target_sent): # model = seq2seq(input_sent,target_sent, # vocab_size_input=vocab_size_input, # vocab_size_target=vocab_size_target, # embeding_dim=embedding_dim,
# Assume that the state is passed as a list of tensors `x` and `y`. # Then the target function is defined as follows: def target_fn(x, y): # Stack the input tensors together z = tf.concat([x, y], axis=-1) - true_mean return target.log_prob(z) sample_shape = [3, 5] state = [tf.ones(sample_shape + [2], dtype=dtype), tf.ones(sample_shape + [1], dtype=dtype)] fn_val = target_fn(*state) grad_fn = tfe.gradients_function(target_fn) if tfe.executing_eagerly(): grads = grad_fn(*state) else: grads = tf.gradients(fn_val, state) # We can either pass the `sample_shape` of the `state` or not, which impacts # computational speed of `diag_jacobian` _, diag_jacobian_shape_passed = diag_jacobian( xs=state, ys=grads, sample_shape=tf.shape(fn_val)) _, diag_jacobian_shape_none = diag_jacobian( xs=state, ys=grads) diag_jacobian_shape_passed_ = sess.run(diag_jacobian_shape_passed) diag_jacobian_shape_none_ = sess.run(diag_jacobian_shape_none) print('hessian computed through `diag_jacobian`, sample_shape passed: ',
def synth_texture(self, path_to_img, patchLength, numPatches, mode="cut", sequence=False): tf.enable_eager_execution() print("Eager execution: {}".format(tf.executing_eagerly())) # define calculation running step to the external thread. self.update_prograssBar_value = external_run_prograssBar_1() self.update_prograssBar_value.countChanged.connect( self.progressBar.setValue) def randomPatch(texture, patchLength): h, w, _ = texture.shape i = np.random.randint(h - patchLength) j = np.random.randint(w - patchLength) return texture[i:i + patchLength, j:j + patchLength] def L2OverlapDiff(patch, patchLength, overlap, res, y, x): error = 0 if x > 0: left = patch[:, :overlap] - res[y:y + patchLength, x:x + overlap] error += np.sum(left**2) if y > 0: up = patch[:overlap, :] - res[y:y + overlap, x:x + patchLength] error += np.sum(up**2) if x > 0 and y > 0: corner = patch[:overlap, :overlap] - res[y:y + overlap, x:x + overlap] error -= np.sum(corner**2) return error def randomBestPatch(texture, patchLength, overlap, res, y, x): h, w, _ = texture.shape errors = np.zeros((h - patchLength, w - patchLength)) for i in range(h - patchLength): for j in range(w - patchLength): patch = texture[i:i + patchLength, j:j + patchLength] e = L2OverlapDiff(patch, patchLength, overlap, res, y, x) errors[i, j] = e i, j = np.unravel_index(np.argmin(errors), errors.shape) return texture[i:i + patchLength, j:j + patchLength] def minCutPath(errors): # dijkstra's algorithm vertical pq = [(error, [i]) for i, error in enumerate(errors[0])] heapq.heapify(pq) h, w = errors.shape seen = set() while pq: error, path = heapq.heappop(pq) curDepth = len(path) curIndex = path[-1] if curDepth == h: return path for delta in -1, 0, 1: nextIndex = curIndex + delta if 0 <= nextIndex < w: if (curDepth, nextIndex) not in seen: cumError = error + errors[curDepth, nextIndex] heapq.heappush(pq, (cumError, path + [nextIndex])) seen.add((curDepth, nextIndex)) def minCutPatch(patch, patchLength, overlap, res, y, x): patch = patch.copy() dy, dx, _ = patch.shape minCut = np.zeros_like(patch, dtype=bool) if x > 0: left = patch[:, :overlap] - res[y:y + dy, x:x + overlap] leftL2 = np.sum(left**2, axis=2) for i, j in enumerate(minCutPath(leftL2)): minCut[i, :j] = True if y > 0: up = patch[:overlap, :] - res[y:y + overlap, x:x + dx] upL2 = np.sum(up**2, axis=2) for j, i in enumerate(minCutPath(upL2.T)): minCut[:i, j] = True np.copyto(patch, res[y:y + dy, x:x + dx], where=minCut) return patch texture = Image.open(path_to_img) texture = util.img_as_float(texture) overlap = patchLength // 6 numPatchesHigh, numPatchesWide = numPatches h = (numPatchesHigh * patchLength) - (numPatchesHigh - 1) * overlap w = (numPatchesWide * patchLength) - (numPatchesWide - 1) * overlap res = np.zeros((h, w, texture.shape[2])) self.update_prograssBar_value.start() for i in range(numPatchesHigh): global count count = i self.update_prograssBar_value.start() print("Iteration: {}".format(i)) for j in range(numPatchesWide): y = i * (patchLength - overlap) x = j * (patchLength - overlap) if i == 0 and j == 0 or mode == "random": patch = randomPatch(texture, patchLength) elif mode == "best": patch = randomBestPatch(texture, patchLength, overlap, res, y, x) elif mode == "cut": patch = randomBestPatch(texture, patchLength, overlap, res, y, x) patch = minCutPatch(patch, patchLength, overlap, res, y, x) res[y:y + patchLength, x:x + patchLength] = patch image = Image.fromarray((res * 255).astype(np.uint8)) return image