def testConditionalMaskUpdate(self): param_list = [ "pruning_frequency=2", "begin_pruning_step=1", "end_pruning_step=6" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) weights = variables.Variable(math_ops.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) sparsity = variables.Variable(0.00, name="sparsity") # Set up pruning p = pruning.Pruning(pruning_hparams, sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.conditional_mask_update_op() sparsity_val = math_ops.linspace(0.0, 0.9, 10) increment_global_step = state_ops.assign_add(self.global_step, 1) non_zero_count = [] with self.test_session() as session: variables.global_variables_initializer().run() for i in range(10): session.run(state_ops.assign(sparsity, sparsity_val[i])) session.run(mask_update_op) session.run(increment_global_step) non_zero_count.append(np.count_nonzero(masked_weights.eval())) # Weights pruned at steps 0,2,4,and,6 expected_non_zero_count = [100, 100, 80, 80, 60, 60, 40, 40, 40, 40] self.assertAllEqual(expected_non_zero_count, non_zero_count)
def testConditionalMaskUpdate(self): param_list = [ "pruning_frequency=2", "begin_pruning_step=1", "end_pruning_step=6" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) weights = variables.Variable( math_ops.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) sparsity = variables.Variable(0.00, name="sparsity") # Set up pruning p = pruning.Pruning(pruning_hparams, sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.conditional_mask_update_op() sparsity_val = math_ops.linspace(0.0, 0.9, 10) increment_global_step = state_ops.assign_add(self.global_step, 1) non_zero_count = [] with self.test_session() as session: variables.global_variables_initializer().run() for i in range(10): session.run(state_ops.assign(sparsity, sparsity_val[i])) session.run(mask_update_op) session.run(increment_global_step) non_zero_count.append(np.count_nonzero(masked_weights.eval())) # Weights pruned at steps 0,2,4,and,6 expected_non_zero_count = [100, 100, 80, 80, 60, 60, 40, 40, 40, 40] self.assertAllEqual(expected_non_zero_count, non_zero_count)
def testWeightSpecificSparsity(self): param_list = [ "begin_pruning_step=1", "pruning_frequency=1", "end_pruning_step=100", "target_sparsity=0.5", "weight_sparsity_map=[layer2/weights:0.75]", "threshold_decay=0.0" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) with variable_scope.variable_scope("layer1"): w1 = variables.Variable(math_ops.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w1) with variable_scope.variable_scope("layer2"): w2 = variables.Variable(math_ops.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w2) p = pruning.Pruning(pruning_hparams) mask_update_op = p.conditional_mask_update_op() increment_global_step = state_ops.assign_add(self.global_step, 1) with self.test_session() as session: variables.global_variables_initializer().run() for _ in range(110): session.run(mask_update_op) session.run(increment_global_step) self.assertAllEqual(session.run(pruning.get_weight_sparsity()), [0.5, 0.75])
def testWeightSpecificSparsity(self): param_list = [ "begin_pruning_step=1", "pruning_frequency=1", "end_pruning_step=100", "target_sparsity=0.5", "weight_sparsity_map=[layer2/weights:0.75]", "threshold_decay=0.0" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) with variable_scope.variable_scope("layer1"): w1 = variables.Variable( math_ops.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w1) with variable_scope.variable_scope("layer2"): w2 = variables.Variable( math_ops.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w2) p = pruning.Pruning(pruning_hparams) mask_update_op = p.conditional_mask_update_op() increment_global_step = state_ops.assign_add(self.global_step, 1) with self.cached_session() as session: variables.global_variables_initializer().run() for _ in range(110): session.run(mask_update_op) session.run(increment_global_step) self.assertAllEqual( session.run(pruning.get_weight_sparsity()), [0.5, 0.75])
def linspace( # pylint: disable=missing-docstring start, stop, num=50, endpoint=True, retstep=False, dtype=float, axis=0): if dtype: dtype = np_utils.result_type(dtype) start = np_array_ops.array(start, dtype=dtype).data stop = np_array_ops.array(stop, dtype=dtype).data if num < 0: raise ValueError('Number of samples {} must be non-negative.'.format(num)) step = ops.convert_to_tensor(np.nan) if endpoint: result = math_ops.linspace(start, stop, num, axis=axis) if num > 1: step = (stop - start) / (num - 1) else: # math_ops.linspace does not support endpoint=False so we manually handle it # here. if num > 1: step = ((stop - start) / num) new_stop = math_ops.cast(stop, step.dtype) - step start = math_ops.cast(start, new_stop.dtype) result = math_ops.linspace(start, new_stop, num, axis=axis) else: result = math_ops.linspace(start, stop, num, axis=axis) if dtype: result = math_ops.cast(result, dtype) if retstep: return (np_arrays.tensor_to_ndarray(result), np_arrays.tensor_to_ndarray(step)) else: return np_arrays.tensor_to_ndarray(result)
def _get_grid_locations(image_height, image_width): """Wrapper for array_ops.meshgrid.""" y_range = math_ops.linspace(0.0, math_ops.to_float(image_height) - 1, image_height) x_range = math_ops.linspace(0.0, math_ops.to_float(image_width) - 1, image_width) y_grid, x_grid = array_ops.meshgrid(y_range, x_range, indexing='ij') return array_ops.stack((y_grid, x_grid), -1)
def _get_boundary_locations(image_height, image_width, num_points_per_edge): """Compute evenly-spaced indices along edge of image.""" image_height = math_ops.to_float(image_height) image_width = math_ops.to_float(image_width) y_range = math_ops.linspace(0.0, image_height - 1, num_points_per_edge + 2) x_range = math_ops.linspace(0.0, image_width - 1, num_points_per_edge + 2) ys, xs = array_ops.meshgrid(y_range, x_range, indexing='ij') is_boundary = math_ops.logical_or( math_ops.logical_or(math_ops.equal(xs, 0), math_ops.equal(xs, image_width - 1)), math_ops.logical_or(math_ops.equal(ys, 0), math_ops.equal(ys, image_height - 1))) return array_ops.stack([array_ops.boolean_mask(ys, is_boundary), array_ops.boolean_mask(xs, is_boundary)], axis=-1)
def setUp(self): ops.reset_default_graph() dim = 1 num = 3 with ops.name_scope('some_scope'): # Basically from 0 to dim*num-1. flat_data = math_ops.linspace(0.0, dim * num - 1, dim * num) bias = variables.Variable( array_ops.reshape(flat_data, (num, dim)), name='bias') save = saver.Saver([bias]) with self.test_session() as sess: variables.global_variables_initializer().run() self.bundle_file = os.path.join(test.get_temp_dir(), 'bias_checkpoint') save.save(sess, self.bundle_file) self.new_class_vocab_file = os.path.join( test.test_src_dir_path(_TESTDATA_PATH), 'keyword_new.txt') self.old_class_vocab_file = os.path.join( test.test_src_dir_path(_TESTDATA_PATH), 'keyword.txt') self.init_val = 42 def _init_val_initializer(shape, dtype=None, partition_info=None): del dtype, partition_info # Unused by this unit-testing initializer. return array_ops.tile( constant_op.constant([[self.init_val]], dtype=dtypes.float32), shape) self.initializer = _init_val_initializer
def testNanFromGradsDontPropagate(self): """Test that update with NaN gradients does not cause NaN in results.""" def _nan_log_prob_with_nan_gradient(x): return np.nan * math_ops.reduce_sum(x) with self.test_session() as sess: initial_x = math_ops.linspace(0.01, 5, 10) updated_x, acceptance_probs, new_log_prob, new_grad = hmc.kernel( 2., 5, initial_x, _nan_log_prob_with_nan_gradient, [0]) initial_x_val, updated_x_val, acceptance_probs_val = sess.run( [initial_x, updated_x, acceptance_probs]) logging.vlog(1, 'initial_x = {}'.format(initial_x_val)) logging.vlog(1, 'updated_x = {}'.format(updated_x_val)) logging.vlog(1, 'acceptance_probs = {}'.format(acceptance_probs_val)) self.assertAllEqual(initial_x_val, updated_x_val) self.assertEqual(acceptance_probs_val, 0.) self.assertAllFinite( gradients_impl.gradients(updated_x, initial_x)[0].eval()) self.assertTrue( gradients_impl.gradients(new_grad, initial_x)[0] is None) # Gradients of the acceptance probs and new log prob are not finite. _ = new_log_prob # Prevent unused arg error.
def testNanRejection(self): """Tests that an update that yields NaN potentials gets rejected. We run HMC with a target distribution that returns NaN log-likelihoods if any element of x < 0, and unit-scale exponential log-likelihoods otherwise. The exponential potential pushes x towards 0, ensuring that any reasonably large update will push us over the edge into NaN territory. """ def _unbounded_exponential_log_prob(x): """An exponential distribution with log-likelihood NaN for x < 0.""" per_element_potentials = array_ops.where( x < 0, np.nan * array_ops.ones_like(x), -x) return math_ops.reduce_sum(per_element_potentials) with self.test_session() as sess: initial_x = math_ops.linspace(0.01, 5, 10) updated_x, acceptance_probs, _, _ = hmc.kernel( 2., 5, initial_x, _unbounded_exponential_log_prob, [0]) initial_x_val, updated_x_val, acceptance_probs_val = sess.run( [initial_x, updated_x, acceptance_probs]) logging.vlog(1, 'initial_x = {}'.format(initial_x_val)) logging.vlog(1, 'updated_x = {}'.format(updated_x_val)) logging.vlog(1, 'acceptance_probs = {}'.format(acceptance_probs_val)) self.assertAllEqual(initial_x_val, updated_x_val) self.assertEqual(acceptance_probs_val, 0.)
def _LinSpace(self, start, stop, num): # NOTE(touts): Needs to pass a graph to get a new session each time. with ops.Graph().as_default() as graph: with self.test_session(graph=graph, force_gpu=self.force_gpu): tf_ans = math_ops.linspace(start, stop, num, name="linspace") self.assertEqual([num], tf_ans.get_shape()) return tf_ans.eval()
def testNanFromGradsDontPropagate(self): """Test that update with NaN gradients does not cause NaN in results.""" def _nan_log_prob_with_nan_gradient(x): return np.nan * math_ops.reduce_sum(x) with self.test_session(graph=ops.Graph()) as sess: initial_x = math_ops.linspace(0.01, 5, 10) updated_x, kernel_results = hmc.kernel( target_log_prob_fn=_nan_log_prob_with_nan_gradient, current_state=initial_x, step_size=2., num_leapfrog_steps=5, seed=47) initial_x_, updated_x_, acceptance_probs_ = sess.run( [initial_x, updated_x, kernel_results.acceptance_probs]) logging_ops.vlog(1, "initial_x = {}".format(initial_x_)) logging_ops.vlog(1, "updated_x = {}".format(updated_x_)) logging_ops.vlog(1, "acceptance_probs = {}".format(acceptance_probs_)) self.assertAllEqual(initial_x_, updated_x_) self.assertEqual(acceptance_probs_, 0.) self.assertAllFinite( gradients_ops.gradients(updated_x, initial_x)[0].eval()) self.assertAllEqual([True], [ g is None for g in gradients_ops.gradients( kernel_results.proposed_grads_target_log_prob, initial_x) ]) self.assertAllEqual([False], [ g is None for g in gradients_ops.gradients( kernel_results.proposed_grads_target_log_prob, kernel_results.proposed_state) ])
def testNanRejection(self): """Tests that an update that yields NaN potentials gets rejected. We run HMC with a target distribution that returns NaN log-likelihoods if any element of x < 0, and unit-scale exponential log-likelihoods otherwise. The exponential potential pushes x towards 0, ensuring that any reasonably large update will push us over the edge into NaN territory. """ def _unbounded_exponential_log_prob(x): """An exponential distribution with log-likelihood NaN for x < 0.""" per_element_potentials = array_ops.where(x < 0, np.nan * array_ops.ones_like(x), -x) return math_ops.reduce_sum(per_element_potentials) with self.test_session() as sess: initial_x = math_ops.linspace(0.01, 5, 10) updated_x, acceptance_probs, _, _ = hmc.kernel( 2., 5, initial_x, _unbounded_exponential_log_prob, [0]) initial_x_val, updated_x_val, acceptance_probs_val = sess.run( [initial_x, updated_x, acceptance_probs]) logging.vlog(1, 'initial_x = {}'.format(initial_x_val)) logging.vlog(1, 'updated_x = {}'.format(updated_x_val)) logging.vlog(1, 'acceptance_probs = {}'.format(acceptance_probs_val)) self.assertAllEqual(initial_x_val, updated_x_val) self.assertEqual(acceptance_probs_val, 0.)
def testNanFromGradsDontPropagate(self): """Test that update with NaN gradients does not cause NaN in results.""" def _nan_log_prob_with_nan_gradient(x): return np.nan * math_ops.reduce_sum(x) with self.test_session() as sess: initial_x = math_ops.linspace(0.01, 5, 10) updated_x, kernel_results = hmc.kernel( target_log_prob_fn=_nan_log_prob_with_nan_gradient, current_state=initial_x, step_size=2., num_leapfrog_steps=5, seed=47) initial_x_, updated_x_, acceptance_probs_ = sess.run( [initial_x, updated_x, kernel_results.acceptance_probs]) logging_ops.vlog(1, "initial_x = {}".format(initial_x_)) logging_ops.vlog(1, "updated_x = {}".format(updated_x_)) logging_ops.vlog(1, "acceptance_probs = {}".format(acceptance_probs_)) self.assertAllEqual(initial_x_, updated_x_) self.assertEqual(acceptance_probs_, 0.) self.assertAllFinite( gradients_ops.gradients(updated_x, initial_x)[0].eval()) self.assertAllEqual([True], [g is None for g in gradients_ops.gradients( kernel_results.proposed_grads_target_log_prob, initial_x)]) self.assertAllEqual([False], [g is None for g in gradients_ops.gradients( kernel_results.proposed_grads_target_log_prob, kernel_results.proposed_state)])
def testNanRejection(self): """Tests that an update that yields NaN potentials gets rejected. We run HMC with a target distribution that returns NaN log-likelihoods if any element of x < 0, and unit-scale exponential log-likelihoods otherwise. The exponential potential pushes x towards 0, ensuring that any reasonably large update will push us over the edge into NaN territory. """ def _unbounded_exponential_log_prob(x): """An exponential distribution with log-likelihood NaN for x < 0.""" per_element_potentials = array_ops.where( x < 0., array_ops.fill(array_ops.shape(x), x.dtype.as_numpy_dtype(np.nan)), -x) return math_ops.reduce_sum(per_element_potentials) with self.test_session(graph=ops.Graph()) as sess: initial_x = math_ops.linspace(0.01, 5, 10) updated_x, kernel_results = hmc.kernel( target_log_prob_fn=_unbounded_exponential_log_prob, current_state=initial_x, step_size=2., num_leapfrog_steps=5, seed=46) initial_x_, updated_x_, acceptance_probs_ = sess.run( [initial_x, updated_x, kernel_results.acceptance_probs]) logging_ops.vlog(1, "initial_x = {}".format(initial_x_)) logging_ops.vlog(1, "updated_x = {}".format(updated_x_)) logging_ops.vlog(1, "acceptance_probs = {}".format(acceptance_probs_)) self.assertAllEqual(initial_x_, updated_x_) self.assertEqual(acceptance_probs_, 0.)
def testNanRejection(self): """Tests that an update that yields NaN potentials gets rejected. We run HMC with a target distribution that returns NaN log-likelihoods if any element of x < 0, and unit-scale exponential log-likelihoods otherwise. The exponential potential pushes x towards 0, ensuring that any reasonably large update will push us over the edge into NaN territory. """ def _unbounded_exponential_log_prob(x): """An exponential distribution with log-likelihood NaN for x < 0.""" per_element_potentials = array_ops.where( x < 0., array_ops.fill(array_ops.shape(x), x.dtype.as_numpy_dtype(np.nan)), -x) return math_ops.reduce_sum(per_element_potentials) with self.test_session() as sess: initial_x = math_ops.linspace(0.01, 5, 10) updated_x, kernel_results = hmc.kernel( target_log_prob_fn=_unbounded_exponential_log_prob, current_state=initial_x, step_size=2., num_leapfrog_steps=5, seed=46) initial_x_, updated_x_, acceptance_probs_ = sess.run( [initial_x, updated_x, kernel_results.acceptance_probs]) logging_ops.vlog(1, "initial_x = {}".format(initial_x_)) logging_ops.vlog(1, "updated_x = {}".format(updated_x_)) logging_ops.vlog(1, "acceptance_probs = {}".format(acceptance_probs_)) self.assertAllEqual(initial_x_, updated_x_) self.assertEqual(acceptance_probs_, 0.)
def _multi_gamma_sequence(self, a, p, name="multi_gamma_sequence"): """Creates sequence used in multivariate (di)gamma; shape = shape(a)+[p].""" with self._name_scope(name, values=[a, p]): # Linspace only takes scalars, so we'll add in the offset afterwards. seq = math_ops.linspace(constant_op.constant(0., dtype=self.dtype), 0.5 - 0.5 * p, math_ops.cast(p, dtypes.int32)) return seq + array_ops.expand_dims(a, [-1])
def make_variable(self): n = 256 shape = (n, n, n) items = n**3 var = variables.Variable( array_ops.reshape(math_ops.linspace(1., float(items), items), shape), dtype=dtypes.float32) return var
def _multi_gamma_sequence(self, a, p, name="multi_gamma_sequence"): """Creates sequence used in multivariate (di)gamma; shape = shape(a)+[p].""" with self._name_scope(name, values=[a, p]): # Linspace only takes scalars, so we'll add in the offset afterwards. seq = math_ops.linspace( constant_op.constant(0.0, dtype=self.dtype), 0.5 - 0.5 * p, math_ops.cast(p, dtypes.int32) ) return seq + array_ops.expand_dims(a, [-1])
def make_variable(self): n = 256 shape = (n, n, n) items = n**3 var = variables.Variable(array_ops.reshape( math_ops.linspace(1., float(items), items), shape), dtype=dtypes.float32) return var
def input_fn(): start = random_ops.random_uniform( (), minval=0, maxval=(np.pi * 2.0), dtype=dtypes.float32, seed=seed) sin_curves = math_ops.sin( math_ops.linspace(start, (sequence_length - 1) * increment, sequence_length + 1)) inputs = array_ops.slice(sin_curves, [0], [sequence_length]) labels = array_ops.slice(sin_curves, [1], [sequence_length]) return {'inputs': inputs}, labels
def test_finds_max_of_long_array(self): # d - 1 == d in float32 and d = 3e7. # So this test only passes if we use double for the percentile indices. # If float is used, it fails with InvalidArgumentError about an index out of # bounds. x = math_ops.linspace(0., 3e7, num=int(3e7)) with self.cached_session(): minval = sample_stats.percentile(x, q=0, validate_args=True) self.assertAllEqual(0, minval.eval())
def uniform_keypoints_for_signal(num_keypoints, input_min, input_max, output_min, output_max, dtype=dtypes.float32): """Returns a pair of initialization tensors for calibration keypoints. This is used when the input range to be calibrated is known. Args: num_keypoints: number of keypoints to use for calibrating this signal. input_min: Scalar with the minimum value that the uncalibrated input can take. input_max: Scalar with the maximum value that the uncalibrated input can take. output_min: Scalar with calibrated value associated with input_min. Typically the minimum expected calibrated value, but not necessarily. Specially if the calibration is decreasing. output_max: Scalar with calibrated scalar value associated with input_max. dtype: If any of the scalars are not given as tensors, they are converted to tensors with this dtype. Returns: Two tensors to be used as the keypoints_inputs and keypoints_outputs initialization, uniformly distributed over given ranges. Dtype is given by input_min, input_max, output_min, output_max. Raises: ValueError: if underlying types (dtype) don't match. """ input_min = tools.cast_to_scalar_tensor_of_dtype(input_min, dtype) input_max = tools.cast_to_scalar_tensor_of_dtype(input_max, dtype) output_min = tools.cast_to_scalar_tensor_of_dtype(output_min, dtype) output_max = tools.cast_to_scalar_tensor_of_dtype(output_max, dtype) types_set = set( [input_min.dtype, input_max.dtype, output_min.dtype, output_max.dtype]) if len(types_set) != 1: raise ValueError("different dtypes for parameters: got %s" % types_set) return (math_ops.linspace(input_min, input_max, num_keypoints), math_ops.linspace(output_min, output_max, num_keypoints))
def linspace( # pylint: disable=missing-docstring start, stop, num=50, endpoint=True, retstep=False, dtype=float, axis=0): if dtype: dtype = np_utils.result_type(dtype) start = np_array_ops.array(start, dtype=dtype) stop = np_array_ops.array(stop, dtype=dtype) if num < 0: raise ValueError( 'Argument `num` (number of samples) must be a non-negative integer. ' f'Received: num={num}') step = ops.convert_to_tensor(np.nan) if endpoint: result = math_ops.linspace(start, stop, num, axis=axis) if num > 1: step = (stop - start) / (num - 1) else: # math_ops.linspace does not support endpoint=False so we manually handle it # here. if num > 0: step = ((stop - start) / num) if num > 1: new_stop = math_ops.cast(stop, step.dtype) - step start = math_ops.cast(start, new_stop.dtype) result = math_ops.linspace(start, new_stop, num, axis=axis) else: result = math_ops.linspace(start, stop, num, axis=axis) if dtype: if dtype.is_integer: # Since numpy 1.20, linspace's rounding is towards -inf instead of 0 result = math_ops.floor(result) result = math_ops.cast(result, dtype) if retstep: return (result, step) else: return result
def input_fn(): start = random_ops.random_uniform( (), minval=0, maxval=(np.pi * 2.0), dtype=dtypes.float32, seed=seed) sin_curves = math_ops.sin( math_ops.linspace(start, (sequence_length - 1) * increment, sequence_length + 1)) inputs = array_ops.slice(sin_curves, [0], [sequence_length]) labels = array_ops.slice(sin_curves, [1], [sequence_length]) input_key = string_ops.string_join([ 'key_', string_ops.as_string(math_ops.cast(10000 * start, dtypes.int32)) ]) return {'inputs': inputs, input_key_column_name: input_key}, labels
def _compute_quantiles(): """Helper to build quantiles.""" # Omit {0, 1} since they might lead to Inf/NaN. zero = array_ops.zeros([], dtype=dist.dtype) edges = math_ops.linspace(zero, 1., quadrature_size + 3)[1:-1] # Expand edges so its broadcast across batch dims. edges = array_ops.reshape(edges, shape=array_ops.concat([ [-1], array_ops.ones([batch_ndims], dtype=dtypes.int32)], axis=0)) quantiles = dist.quantile(edges) # Cyclically permute left by one. perm = array_ops.concat([ math_ops.range(1, 1 + batch_ndims), [0]], axis=0) quantiles = array_ops.transpose(quantiles, perm) return quantiles
def testUpdateSingleMask(self): with self.test_session() as session: weights = variables.Variable(math_ops.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) sparsity = variables.Variable(0.5, name="sparsity") p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() variables.global_variables_initializer().run() masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 100) session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
def testUpdateSingleMask(self): with self.test_session() as session: weights = variables.Variable( math_ops.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) sparsity = variables.Variable(0.5, name="sparsity") p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() variables.global_variables_initializer().run() masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 100) session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
def testPartitionedVariableMasking(self): partitioner = partitioned_variables.variable_axis_size_partitioner(40) with self.test_session() as session: with variable_scope.variable_scope("", partitioner=partitioner): sparsity = variables.Variable(0.5, name="Sparsity") weights = variable_scope.get_variable( "weights", initializer=math_ops.linspace(1.0, 100.0, 100)) masked_weights = pruning.apply_mask( weights, scope=variable_scope.get_variable_scope()) p = pruning.Pruning(sparsity=sparsity, partitioner=partitioner) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() variables.global_variables_initializer().run() masked_weights_val = masked_weights.eval() session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
def testPartitionedVariableMasking(self): partitioner = partitioned_variables.variable_axis_size_partitioner(40) with self.test_session() as session: with variable_scope.variable_scope("", partitioner=partitioner): sparsity = variables.Variable(0.5, name="Sparsity") weights = variable_scope.get_variable( "weights", initializer=math_ops.linspace(1.0, 100.0, 100)) masked_weights = pruning.apply_mask( weights, scope=variable_scope.get_variable_scope()) p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() variables.global_variables_initializer().run() masked_weights_val = masked_weights.eval() session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
def test_gradients(self): """Test that spectral_ops.stft has a working gradient.""" with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)) as sess: signal_length = 512 # An all-zero signal has all zero gradients with respect to the sum of the # magnitude STFT. empty_signal = array_ops.zeros([signal_length], dtype=dtypes.float32) empty_signal_gradient = sess.run( self._compute_stft_gradient(empty_signal)) self.assertTrue((empty_signal_gradient == 0.0).all()) # A sinusoid will have non-zero components of its gradient with respect to # the sum of the magnitude STFT. sinusoid = math_ops.sin( 2 * np.pi * math_ops.linspace(0.0, 1.0, signal_length)) sinusoid_gradient = sess.run(self._compute_stft_gradient(sinusoid)) self.assertFalse((sinusoid_gradient == 0.0).all())
def input_fn(): start = random_ops.random_uniform((), minval=0, maxval=(np.pi * 2.0), dtype=dtypes.float32, seed=seed) sin_curves = math_ops.sin( math_ops.linspace(start, (sequence_length - 1) * increment, sequence_length + 1)) inputs = array_ops.slice(sin_curves, [0], [sequence_length]) labels = array_ops.slice(sin_curves, [1], [sequence_length]) input_key = string_ops.string_join([ 'key_', string_ops.as_string( math_ops.cast(10000 * start, dtypes.int32)) ]) return { 'inputs': inputs, input_key_column_name: input_key }, labels
def test_gradients(self): """Test that spectral_ops.stft has a working gradient.""" # TODO(rjryan): Update gradient tests for Eager. if context.executing_eagerly(): return with self.session(use_gpu=True) as sess: signal_length = 512 # An all-zero signal has all zero gradients with respect to the sum of the # magnitude STFT. empty_signal = array_ops.zeros([signal_length], dtype=dtypes.float32) empty_signal_gradient = sess.run( self._compute_stft_gradient(empty_signal)) self.assertTrue((empty_signal_gradient == 0.0).all()) # A sinusoid will have non-zero components of its gradient with respect to # the sum of the magnitude STFT. sinusoid = math_ops.sin( 2 * np.pi * math_ops.linspace(0.0, 1.0, signal_length)) sinusoid_gradient = self.evaluate(self._compute_stft_gradient(sinusoid)) self.assertFalse((sinusoid_gradient == 0.0).all())
def kp_out_fn(*args, **kwargs): return math_ops.linspace(float(_DEFAULT_OUTPUT_MIN), float(_DEFAULT_OUTPUT_MAX), num_keypoints)
def _sin_fn(x): ranger = math_ops.linspace(array_ops.reshape(x[0], []), (sequence_length - 1) * increment, sequence_length + 1) return math_ops.sin(ranger)
def load_keypoints_from_quantiles(feature_names, save_dir, num_keypoints, output_min, output_max, dtype=dtypes.float32): """Retrieves keypoints initialization values for selected features. It expects that the quantiles have already been calculated and saved in the save_dir by the save_quantiles_for_keypoints function. It will raise an I/O error if not. Args: feature_names: List of features names for which to get keypoints initialization values. save_dir: Directory where the quantiles have been saved to. Same value used when save_quantiles_for_keypoints was called. num_keypoints: Desired number of keypoints to use for calibration. This can either be a scalar to be used for all features, or a dict mapping feature name to num_keypoints. Fewer keypoints than requested can end up being used when for the given feature there are not enough different values. If num_keypoints for a feature is missing, None or 0, no initialization is generated. output_min: Initial calibrated value associated with the first calibration keypoint. The keypoints outputs in between will be linearly interpolated. It can be given as a scalar, in which case value is used for all features, or a dict mapping feature name to output_min. output_max: Like output_min, but the calibrated value associated to the last keypoint. Scalar or dict. dtype: Type to be used for calibration. Returns: Dict of feature name to pair of constant tensors that can be used to initialize calibrators keypoints inputs and outputs. Raises: tf.errors.NotFoundError: if quantiles file not found. values in the signal. This would probably be better handled as categorical, but still this should handle the case correctly. """ subdir = os.path.join(save_dir, _QUANTILES_SUBDIRECTORY) num_keypoints = tools.cast_to_dict(num_keypoints, feature_names, num_keypoints) output_min = tools.cast_to_dict_of_tensor_scalars(output_min, feature_names, dtype, "output_min") output_max = tools.cast_to_dict_of_tensor_scalars(output_max, feature_names, dtype, "output_max") keypoints = {} for feature_name in feature_names: if feature_name not in num_keypoints or not num_keypoints[feature_name]: continue all_quantiles = _load_quantiles(subdir, feature_name) percentiles = np.linspace(0., 100., num_keypoints[feature_name]) quantiles = np.percentile(all_quantiles, percentiles, interpolation="nearest") quantiles = sorted(set(quantiles)) # Remove repeated quantiles. keypoints[feature_name] = (array_ops.constant(quantiles, shape=[len(quantiles)], dtype=dtype), math_ops.linspace(output_min[feature_name], output_max[feature_name], len(quantiles))) return keypoints
def _forward_log_det_jacobian(self, x): # Let Y be a symmetric, positive definite matrix and write: # Y = X X.T # where X is lower-triangular. # # Observe that, # dY[i,j]/dX[a,b] # = d/dX[a,b] { X[i,:] X[j,:] } # = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] } # # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is # symmetric and X is lower-triangular, we need vectors of dimension: # d = p (p + 1) / 2 # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e., # k = { i (i + 1) / 2 + j i>=j # { undef i<j # and assume zero-based indexes. When k is undef, the element is dropped. # Example: # j k # 0 1 2 3 / # 0 [ 0 . . . ] # i 1 [ 1 2 . . ] # 2 [ 3 4 5 . ] # 3 [ 6 7 8 9 ] # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With # slight abuse: k(i,j)=undef means the element is dropped.) # # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b. # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since: # (1) j<=i<a thus i,j!=a. # (2) i=a>j thus i,j!=a. # # Since the Jacobian is lower-triangular, we need only compute the product # of diagonal elements: # d vec[Y] / d vec[X] @[k(i,j), k(i,j)] # = X[j,j] + I[i=j] X[i,j] # = 2 X[j,j]. # Since there is a 2 X[j,j] term for every lower-triangular element of X we # conclude: # |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}. if self._static_event_ndims == 0: if self.validate_args: is_positive = check_ops.assert_positive( x, message="All elements must be positive.") x = control_flow_ops.with_dependencies([is_positive], x) return np.log(2.) + math_ops.log(x) diag = array_ops.matrix_diag_part(x) # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the # output is unchanged. diag = self._make_columnar(diag) if self.validate_args: is_matrix = check_ops.assert_rank_at_least( x, 2, message="Input must be a (batch of) matrix.") shape = array_ops.shape(x) is_square = check_ops.assert_equal( shape[-2], shape[-1], message="Input must be a (batch of) square matrix.") # Assuming lower-triangular means we only need check diag>0. is_positive_definite = check_ops.assert_positive( diag, message="Input must be positive definite.") x = control_flow_ops.with_dependencies( [is_matrix, is_square, is_positive_definite], x) # Create a vector equal to: [p, p-1, ..., 2, 1]. if x.get_shape().ndims is None or x.get_shape()[-1].value is None: p_int = array_ops.shape(x)[-1] p_float = math_ops.cast(p_int, dtype=x.dtype) else: p_int = x.get_shape()[-1].value p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype) exponents = math_ops.linspace(p_float, 1., p_int) sum_weighted_log_diag = array_ops.squeeze( math_ops.matmul(math_ops.log(diag), exponents[..., array_ops.newaxis]), squeeze_dims=-1) fldj = p_float * np.log(2.) + sum_weighted_log_diag return fldj
def _sin_fn(x): ranger = math_ops.linspace( array_ops.reshape(x[0], []), (sequence_length - 1) * increment, sequence_length + 1) return math_ops.sin(ranger)
def linear_to_mel_weight_matrix(num_mel_bins=20, num_spectrogram_bins=129, sample_rate=8000, lower_edge_hertz=125.0, upper_edge_hertz=3800.0, dtype=dtypes.float32, name=None): """Returns a matrix to warp linear scale spectrograms to the [mel scale][mel]. Returns a weight matrix that can be used to re-weight a `Tensor` containing `num_spectrogram_bins` linearly sampled frequency information from `[0, sample_rate / 2]` into `num_mel_bins` frequency information from `[lower_edge_hertz, upper_edge_hertz]` on the [mel scale][mel]. For example, the returned matrix `A` can be used to right-multiply a spectrogram `S` of shape `[frames, num_spectrogram_bins]` of linear scale spectrum values (e.g. STFT magnitudes) to generate a "mel spectrogram" `M` of shape `[frames, num_mel_bins]`. # `S` has shape [frames, num_spectrogram_bins] # `M` has shape [frames, num_mel_bins] M = tf.matmul(S, A) The matrix can be used with @{tf.tensordot} to convert an arbitrary rank `Tensor` of linear-scale spectral bins into the mel scale. # S has shape [..., num_spectrogram_bins]. # M has shape [..., num_mel_bins]. M = tf.tensordot(S, A, 1) # tf.tensordot does not support shape inference for this case yet. M.set_shape(S.shape[:-1].concatenate(A.shape[-1:])) Args: num_mel_bins: Python int. How many bands in the resulting mel spectrum. num_spectrogram_bins: An integer `Tensor`. How many bins there are in the source spectrogram data, which is understood to be `fft_size // 2 + 1`, i.e. the spectrogram only contains the nonredundant FFT bins. sample_rate: Python float. Samples per second of the input signal used to create the spectrogram. We need this to figure out the actual frequencies for each spectrogram bin, which dictates how they are mapped into the mel scale. lower_edge_hertz: Python float. Lower bound on the frequencies to be included in the mel spectrum. This corresponds to the lower edge of the lowest triangular band. upper_edge_hertz: Python float. The desired top edge of the highest frequency band. dtype: The `DType` of the result matrix. Must be a floating point type. name: An optional name for the operation. Returns: A `Tensor` of shape `[num_spectrogram_bins, num_mel_bins]`. Raises: ValueError: If num_mel_bins/num_spectrogram_bins/sample_rate are not positive, lower_edge_hertz is negative, frequency edges are incorrectly ordered, or upper_edge_hertz is larger than the Nyquist frequency. [mel]: https://en.wikipedia.org/wiki/Mel_scale """ with ops.name_scope(name, 'linear_to_mel_weight_matrix') as name: # Note: As num_spectrogram_bins is passed to `math_ops.linspace` # and the validation is already done in linspace (both in shape function # and in kernel), there is no need to validate num_spectrogram_bins here. _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz, upper_edge_hertz, dtype) # To preserve accuracy, we compute the matrix at float64 precision and then # cast to `dtype` at the end. This function can be constant folded by graph # optimization since there are no Tensor inputs. sample_rate = ops.convert_to_tensor( sample_rate, dtypes.float64, name='sample_rate') lower_edge_hertz = ops.convert_to_tensor( lower_edge_hertz, dtypes.float64, name='lower_edge_hertz') upper_edge_hertz = ops.convert_to_tensor( upper_edge_hertz, dtypes.float64, name='upper_edge_hertz') zero_float64 = ops.convert_to_tensor(0.0, dtypes.float64) # HTK excludes the spectrogram DC bin. bands_to_zero = 1 nyquist_hertz = sample_rate / 2.0 linear_frequencies = math_ops.linspace( zero_float64, nyquist_hertz, num_spectrogram_bins)[bands_to_zero:] spectrogram_bins_mel = array_ops.expand_dims( _hertz_to_mel(linear_frequencies), 1) # Compute num_mel_bins triples of (lower_edge, center, upper_edge). The # center of each band is the lower and upper edge of the adjacent bands. # Accordingly, we divide [lower_edge_hertz, upper_edge_hertz] into # num_mel_bins + 2 pieces. band_edges_mel = shape_ops.frame( math_ops.linspace(_hertz_to_mel(lower_edge_hertz), _hertz_to_mel(upper_edge_hertz), num_mel_bins + 2), frame_length=3, frame_step=1) # Split the triples up and reshape them into [1, num_mel_bins] tensors. lower_edge_mel, center_mel, upper_edge_mel = tuple(array_ops.reshape( t, [1, num_mel_bins]) for t in array_ops.split( band_edges_mel, 3, axis=1)) # Calculate lower and upper slopes for every spectrogram bin. # Line segments are linear in the mel domain, not Hertz. lower_slopes = (spectrogram_bins_mel - lower_edge_mel) / ( center_mel - lower_edge_mel) upper_slopes = (upper_edge_mel - spectrogram_bins_mel) / ( upper_edge_mel - center_mel) # Intersect the line segments with each other and zero. mel_weights_matrix = math_ops.maximum( zero_float64, math_ops.minimum(lower_slopes, upper_slopes)) # Re-add the zeroed lower bins we sliced out above. mel_weights_matrix = array_ops.pad( mel_weights_matrix, [[bands_to_zero, 0], [0, 0]]) # Cast to the desired type. return math_ops.cast(mel_weights_matrix, dtype, name=name)
def linear_to_mel_weight_matrix(num_mel_bins=20, num_spectrogram_bins=129, sample_rate=8000, lower_edge_hertz=125.0, upper_edge_hertz=3800.0, dtype=dtypes.float32, name=None): """Returns a matrix to warp linear scale spectrograms to the [mel scale][mel]. Returns a weight matrix that can be used to re-weight a `Tensor` containing `num_spectrogram_bins` linearly sampled frequency information from `[0, sample_rate / 2]` into `num_mel_bins` frequency information from `[lower_edge_hertz, upper_edge_hertz]` on the [mel scale][mel]. For example, the returned matrix `A` can be used to right-multiply a spectrogram `S` of shape `[frames, num_spectrogram_bins]` of linear scale spectrum values (e.g. STFT magnitudes) to generate a "mel spectrogram" `M` of shape `[frames, num_mel_bins]`. # `S` has shape [frames, num_spectrogram_bins] # `M` has shape [frames, num_mel_bins] M = tf.matmul(S, A) The matrix can be used with `tf.tensordot` to convert an arbitrary rank `Tensor` of linear-scale spectral bins into the mel scale. # S has shape [..., num_spectrogram_bins]. # M has shape [..., num_mel_bins]. M = tf.tensordot(S, A, 1) # tf.tensordot does not support shape inference for this case yet. M.set_shape(S.shape[:-1].concatenate(A.shape[-1:])) Args: num_mel_bins: Python int. How many bands in the resulting mel spectrum. num_spectrogram_bins: An integer `Tensor`. How many bins there are in the source spectrogram data, which is understood to be `fft_size // 2 + 1`, i.e. the spectrogram only contains the nonredundant FFT bins. sample_rate: Python float. Samples per second of the input signal used to create the spectrogram. We need this to figure out the actual frequencies for each spectrogram bin, which dictates how they are mapped into the mel scale. lower_edge_hertz: Python float. Lower bound on the frequencies to be included in the mel spectrum. This corresponds to the lower edge of the lowest triangular band. upper_edge_hertz: Python float. The desired top edge of the highest frequency band. dtype: The `DType` of the result matrix. Must be a floating point type. name: An optional name for the operation. Returns: A `Tensor` of shape `[num_spectrogram_bins, num_mel_bins]`. Raises: ValueError: If `num_mel_bins`/`num_spectrogram_bins`/`sample_rate` are not positive, `lower_edge_hertz` is negative, frequency edges are incorrectly ordered, `upper_edge_hertz` is larger than the Nyquist frequency, or `sample_rate` is neither a Python float nor a constant Tensor. [mel]: https://en.wikipedia.org/wiki/Mel_scale """ with ops.name_scope(name, 'linear_to_mel_weight_matrix') as name: # Convert Tensor `sample_rate` to float, if possible. if isinstance(sample_rate, ops.Tensor): maybe_const_val = tensor_util.constant_value(sample_rate) if maybe_const_val is not None: sample_rate = maybe_const_val else: raise ValueError( '`sample_rate` was a non-constant Tensor. Must be a ' 'Python float or a constant Tensor.') # Note: As num_spectrogram_bins is passed to `math_ops.linspace` # and the validation is already done in linspace (both in shape function # and in kernel), there is no need to validate num_spectrogram_bins here. _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz, upper_edge_hertz, dtype) # This function can be constant folded by graph optimization since there are # no Tensor inputs. sample_rate = ops.convert_to_tensor(sample_rate, dtype, name='sample_rate') lower_edge_hertz = ops.convert_to_tensor(lower_edge_hertz, dtype, name='lower_edge_hertz') upper_edge_hertz = ops.convert_to_tensor(upper_edge_hertz, dtype, name='upper_edge_hertz') zero = ops.convert_to_tensor(0.0, dtype) # HTK excludes the spectrogram DC bin. bands_to_zero = 1 nyquist_hertz = sample_rate / 2.0 linear_frequencies = math_ops.linspace( zero, nyquist_hertz, num_spectrogram_bins)[bands_to_zero:] spectrogram_bins_mel = array_ops.expand_dims( _hertz_to_mel(linear_frequencies), 1) # Compute num_mel_bins triples of (lower_edge, center, upper_edge). The # center of each band is the lower and upper edge of the adjacent bands. # Accordingly, we divide [lower_edge_hertz, upper_edge_hertz] into # num_mel_bins + 2 pieces. band_edges_mel = shape_ops.frame(math_ops.linspace( _hertz_to_mel(lower_edge_hertz), _hertz_to_mel(upper_edge_hertz), num_mel_bins + 2), frame_length=3, frame_step=1) # Split the triples up and reshape them into [1, num_mel_bins] tensors. lower_edge_mel, center_mel, upper_edge_mel = tuple( array_ops.reshape(t, [1, num_mel_bins]) for t in array_ops.split(band_edges_mel, 3, axis=1)) # Calculate lower and upper slopes for every spectrogram bin. # Line segments are linear in the mel domain, not Hertz. lower_slopes = (spectrogram_bins_mel - lower_edge_mel) / (center_mel - lower_edge_mel) upper_slopes = (upper_edge_mel - spectrogram_bins_mel) / (upper_edge_mel - center_mel) # Intersect the line segments with each other and zero. mel_weights_matrix = math_ops.maximum( zero, math_ops.minimum(lower_slopes, upper_slopes)) # Re-add the zeroed lower bins we sliced out above. return array_ops.pad(mel_weights_matrix, [[bands_to_zero, 0], [0, 0]], name=name)
def kp_in_fn(*args, **kwargs): return math_ops.linspace(0., 1., num_keypoints)
def _LinSpace(self, start, stop, num): with ops.Graph().as_default() as graph: with self.session(graph=graph, force_gpu=self.force_gpu): tf_ans = math_ops.linspace(start, stop, num, name="linspace") self.assertEqual([num], tf_ans.get_shape()) return self.evaluate(tf_ans)
def _LinSpace(self, start, stop, num): with ops.Graph().as_default() as graph: with self.session(graph=graph, force_gpu=self.force_gpu): tf_ans = math_ops.linspace(start, stop, num, name="linspace") self.assertEqual([num], tf_ans.get_shape()) return tf_ans.eval()
def ais_chain(n_iterations, step_size, n_leapfrog_steps, initial_x, target_log_prob_fn, proposal_log_prob_fn, event_dims=(), name=None): """Runs annealed importance sampling (AIS) to estimate normalizing constants. This routine uses Hamiltonian Monte Carlo to sample from a series of distributions that slowly interpolates between an initial "proposal" distribution `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)` and the target distribution `exp(target_log_prob_fn(x) - target_log_normalizer)`, accumulating importance weights along the way. The product of these importance weights gives an unbiased estimate of the ratio of the normalizing constants of the initial distribution and the target distribution: E[exp(w)] = exp(target_log_normalizer - proposal_log_normalizer). Args: n_iterations: Integer number of Markov chain updates to run. More iterations means more expense, but smoother annealing between q and p, which in turn means exponentially lower variance for the normalizing constant estimator. step_size: Scalar step size or array of step sizes for the leapfrog integrator. Broadcasts to the shape of `initial_x`. Larger step sizes lead to faster progress, but too-large step sizes make rejection exponentially more likely. When possible, it's often helpful to match per-variable step sizes to the standard deviations of the target distribution in each variable. n_leapfrog_steps: Integer number of steps to run the leapfrog integrator for. Total progress per HMC step is roughly proportional to step_size * n_leapfrog_steps. initial_x: Tensor of initial state(s) of the Markov chain(s). Must be a sample from q, or results will be incorrect. target_log_prob_fn: Python callable which takes an argument like `initial_x` and returns its (possibly unnormalized) log-density under the target distribution. proposal_log_prob_fn: Python callable that returns the log density of the initial distribution. event_dims: List of dimensions that should not be treated as independent. This allows for multiple chains to be run independently in parallel. Default is (), i.e., all dimensions are independent. name: Python `str` name prefixed to Ops created by this function. Returns: ais_weights: Tensor with the estimated weight(s). Has shape matching `target_log_prob_fn(initial_x)`. chain_states: Tensor with the state(s) of the Markov chain(s) the final iteration. Has shape matching `initial_x`. acceptance_probs: Tensor with the acceptance probabilities for the final iteration. Has shape matching `target_log_prob_fn(initial_x)`. #### Examples: ```python # Estimating the normalizing constant of a log-gamma distribution: def proposal_log_prob(x): # Standard normal log-probability. This is properly normalized. return tf.reduce_sum(-0.5 * tf.square(x) - 0.5 * np.log(2 * np.pi), 1) def target_log_prob(x): # Unnormalized log-gamma(2, 3) distribution. # True normalizer is (lgamma(2) - 2 * log(3)) * x.shape[1] return tf.reduce_sum(2. * x - 3. * tf.exp(x), 1) # Run 100 AIS chains in parallel initial_x = tf.random_normal([100, 20]) w, _, _ = hmc.ais_chain(1000, 0.2, 2, initial_x, target_log_prob, proposal_log_prob, event_dims=[1]) log_normalizer_estimate = tf.reduce_logsumexp(w) - np.log(100) ``` ```python # Estimating the marginal likelihood of a Bayesian regression model: base_measure = -0.5 * np.log(2 * np.pi) def proposal_log_prob(x): # Standard normal log-probability. This is properly normalized. return tf.reduce_sum(-0.5 * tf.square(x) + base_measure, 1) def regression_log_joint(beta, x, y): # This function returns a vector whose ith element is log p(beta[i], y | x). # Each row of beta corresponds to the state of an independent Markov chain. log_prior = tf.reduce_sum(-0.5 * tf.square(beta) + base_measure, 1) means = tf.matmul(beta, x, transpose_b=True) log_likelihood = tf.reduce_sum(-0.5 * tf.square(y - means) + base_measure, 1) return log_prior + log_likelihood def log_joint_partial(beta): return regression_log_joint(beta, x, y) # Run 100 AIS chains in parallel initial_beta = tf.random_normal([100, x.shape[1]]) w, beta_samples, _ = hmc.ais_chain(1000, 0.1, 2, initial_beta, log_joint_partial, proposal_log_prob, event_dims=[1]) log_normalizer_estimate = tf.reduce_logsumexp(w) - np.log(100) ``` """ with ops.name_scope(name, 'hmc_ais_chain', [n_iterations, step_size, n_leapfrog_steps, initial_x]): non_event_shape = array_ops.shape(target_log_prob_fn(initial_x)) beta_series = math_ops.linspace(0., 1., n_iterations+1)[1:] def _body(a, beta): # pylint: disable=missing-docstring def log_prob_beta(x): return ((1 - beta) * proposal_log_prob_fn(x) + beta * target_log_prob_fn(x)) last_x = a[0] w = a[2] w += (1. / n_iterations) * (target_log_prob_fn(last_x) - proposal_log_prob_fn(last_x)) # TODO(b/66917083): There's an opportunity for gradient reuse here. updated_x, acceptance_probs, _, _ = kernel(step_size, n_leapfrog_steps, last_x, log_prob_beta, event_dims) return updated_x, acceptance_probs, w x, acceptance_probs, w = functional_ops.scan( _body, beta_series, (initial_x, array_ops.zeros(non_event_shape), array_ops.zeros(non_event_shape))) return w[-1], x[-1], acceptance_probs[-1]