def testVirtualAdvRegularizer(self): """Tests virtual_adv_regularizer returning expected loss.""" np_input = np.array([[1.0, -1.0]]) tf_input = tf.constant(np_input) np_weights = np.array([[1.0, 5.0], [2.0, 2.0]]) tf_weights = tf.constant(np_weights) # Linear transformation and L2 loss makes the Hessian matrix constant. embedding_fn = lambda x: tf.matmul(x, tf_weights) step_size = 0.1 vadv_config = configs.VirtualAdvConfig( adv_neighbor_config=configs.AdvNeighborConfig( feature_mask=None, adv_step_size=step_size, adv_grad_norm=configs.NormType.L2), distance_config=configs.DistanceConfig( distance_type=configs.DistanceType.L2, sum_over_axis=-1), num_approx_steps=1, approx_difference=1e-3) # enlarged for numerical stability np_seed = np.array([[0.6, 0.8]]) tf_seed = tf.constant(np_seed) vadv_loss = regularizer._virtual_adv_regularizer( tf_input, embedding_fn, vadv_config, embedding_fn(tf_input), tf_seed) actual_loss = self.evaluate(vadv_loss) # For detail derivation of the Hessian matrix, see go/vadv-tests-hessian hessian = 2 * np.dot(np_weights, np_weights.T) approx = np.matmul(np_seed, hessian) approx *= step_size / np.linalg.norm(approx, axis=-1, keepdims=True) expected_loss = np.linalg.norm(np.matmul(approx, np_weights))**2 self.assertNear(actual_loss, expected_loss, err=1e-5)
def testVirtualAdvRegularizerMultiStepApproximation(self): """Tests virtual_adv_regularizer with multi-step approximation.""" np_input = np.array([[0.28, -0.96]]) tf_input = tf.constant(np_input) embedding_fn = lambda x: x vadv_config = configs.VirtualAdvConfig( adv_neighbor_config=configs.AdvNeighborConfig( feature_mask=None, adv_step_size=1, adv_grad_norm=configs.NormType.L2), distance_config=configs.DistanceConfig( distance_type=configs.DistanceType.COSINE, sum_over_axis=-1), num_approx_steps=20, approx_difference=1) np_seed = np.array([[0.6, 0.8]]) tf_seed = tf.constant(np_seed) vadv_loss = regularizer._virtual_adv_regularizer( tf_input, embedding_fn, vadv_config, embedding_fn(tf_input), tf_seed) actual_loss = self.evaluate(vadv_loss) # For detail derivation of the Hessian matrix, see go/vadv-tests-hessian x = np_input hessian = np.dot(x, x.T) * np.identity(2) - np.dot(x.T, x) hessian /= np.linalg.norm(x)**4 approx = np.matmul(np_seed, hessian) approx /= np.linalg.norm(approx, axis=-1, keepdims=True) expected_loss = np.matmul(np.matmul(approx, hessian), np.transpose(approx)) self.assertNear(actual_loss, expected_loss, err=1e-5)
def testWeightedDistance(self): source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]], dtype='float32') target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]], dtype='float32') weights = tf.constant([[1], [0], [0.5], [0.5]], dtype='float32') l1_distance_config = configs.DistanceConfig('l1', sum_over_axis=-1) l1_distance_tensor = distances.pairwise_distance_wrapper( source_tensor, target_tensor, weights, l1_distance_config) l2_distance_config = configs.DistanceConfig('l2', sum_over_axis=-1) l2_distance_tensor = distances.pairwise_distance_wrapper( source_tensor, target_tensor, weights, l2_distance_config) with self.cached_session() as sess: l1_distance_value = sess.run(l1_distance_tensor) self.assertAllClose(l1_distance_value, 5.5 / 3) l2_distance_value = sess.run(l2_distance_tensor) self.assertAllClose(l2_distance_value, 18.5 / 3)
def _make_model(sources_shape, targets_shape): """Makes a model where `sources` and `targets` have the same rank.""" sources = tf.keras.Input(sources_shape, name='sources') targets = tf.keras.Input(targets_shape, name='targets') outputs = pairwise_distance_lib.PairwiseDistance( configs.DistanceConfig( distance_type=configs.DistanceType.KL_DIVERGENCE, reduction=tf.compat.v1.losses.Reduction.NONE, sum_over_axis=-1))(sources, targets) return tf.keras.Model(inputs=[sources, targets], outputs=outputs)
def testDistanceInvalidAxis(self): source_tensor = tf.constant(1.0, dtype='float32', shape=[4, 2]) target_tensor = tf.constant(1.0, dtype='float32', shape=[4, 2]) weights = tf.constant(1.0, dtype='float32', shape=[4, 2]) distance_config = configs.DistanceConfig(sum_over_axis=2) with self.assertRaises(ValueError): distance_tensor = distances.pairwise_distance_wrapper( source_tensor, target_tensor, weights, distance_config) distance_tensor.eval()
def testL2Distance(self): source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]], dtype='float32') target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]], dtype='float32') distance_config = configs.DistanceConfig('l2', sum_over_axis=-1) distance_tensor = distances.pairwise_distance_wrapper( source_tensor, target_tensor, distance_config=distance_config) with self.cached_session() as sess: distance_value = sess.run(distance_tensor) self.assertAllClose(distance_value, 10.25)
def testAssertions(self): """Tests that assertions still work with Keras.""" distance_config = configs.DistanceConfig( distance_type=configs.DistanceType.JENSEN_SHANNON_DIVERGENCE, sum_over_axis=-1) regularizer = pairwise_distance_lib.PairwiseDistance(distance_config) # Try Jennsen-Shannon divergence on an improper probability distribution. with self.assertRaisesRegex( tf.errors.InvalidArgumentError, 'x and/or y is not a proper probability distribution'): self.evaluate(regularizer(np.array([0.6, 0.5]), np.array([[0.25, 0.75]])))
def testCosineDistance(self): source_tensor = tf.constant([[1, 1], [1, 1], [3, 4], [-1, -1]], dtype='float32') target_tensor = tf.constant([[1, 1], [5, 5], [4, 3], [1, 1]], dtype='float32') distance_config = configs.DistanceConfig('cosine', sum_over_axis=-1) distance_tensor = distances.pairwise_distance_wrapper( source_tensor, target_tensor, distance_config=distance_config) with self.cached_session() as sess: distance_value = sess.run(distance_tensor) self.assertAllClose(distance_value, 0.51) # sum([0.0, 1.0, 0.04, 2.0]) / 4
def testCall(self): """Makes a function from config and runs it.""" regularizer = pairwise_distance_lib.PairwiseDistance( configs.DistanceConfig( distance_type=configs.DistanceType.KL_DIVERGENCE, sum_over_axis=-1), name='kl_loss') # Run a computation. example = np.array([0.3, 0.3, 0.4]) neighbors = np.array([[0.9, 0.05, 0.05]]) kl_loss = self.evaluate(regularizer(example, neighbors)) # Assert correctness of KL divergence calculation. self.assertNear(kl_loss, np.sum(special.kl_div(example, neighbors)), _ERR_TOL)
def testDistanceWithoutSumOverAxis(self): source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]], dtype='float32') target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]], dtype='float32') weights = tf.constant([[1], [0], [0.5], [0.5]], dtype='float32') distance_config = configs.DistanceConfig('l1') distance_tensor = distances.pairwise_distance_wrapper( source_tensor, target_tensor, weights, distance_config) with self.cached_session() as sess: distance_value = sess.run(distance_tensor) self.assertAllClose(distance_value, 5.5 / 6)
def testDistanceReductionMean(self): source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]], dtype='float32') target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]], dtype='float32') weights = tf.constant([[1], [0], [0.5], [0.5]], dtype='float32') distance_mean_config = configs.DistanceConfig( 'l1', tf.compat.v1.losses.Reduction.MEAN, sum_over_axis=-1) distance_mean_tensor = distances.pairwise_distance_wrapper( source_tensor, target_tensor, weights, distance_mean_config) with self.cached_session() as sess: distance_mean_value = sess.run(distance_mean_tensor) self.assertAllClose(distance_mean_value, 5.5 / 2.0)
def testJensenShannonDistance(self): source_tensor = np.array([[1, 0, 0], [0.1, 0.2, 0.7]], dtype='float32') target_tensor = np.array([[1, 0, 0], [0.1, 0.9, 0]], dtype='float32') expected_tensor = np.sum(self._jsd_func(source_tensor, target_tensor), -1) expected_value = np.mean(expected_tensor) distance_config = configs.DistanceConfig( 'jensen_shannon_divergence', sum_over_axis=-1) distance_tensor = distances.pairwise_distance_wrapper( tf.constant(source_tensor), tf.constant(target_tensor), distance_config=distance_config) with self.cached_session() as sess: distance_value = sess.run(distance_tensor) self.assertAllClose(distance_value, expected_value)
def testWeights(self): """Tests that weights are propagated to the distance function.""" regularizer = pairwise_distance_lib.PairwiseDistance( configs.DistanceConfig( distance_type=configs.DistanceType.KL_DIVERGENCE, sum_over_axis=-1), name='weighted_kl_loss') example = np.array([0.1, 0.4, 0.5]) neighbors = np.array([[0.6, 0.2, 0.2], [0.9, 0.01, 0.09]]) neighbor_weight = 0.5 loss = self.evaluate(regularizer(example, neighbors, neighbor_weight)) self.assertAllClose( loss, neighbor_weight * np.mean(np.sum(special.kl_div(example, neighbors), -1)), _ERR_TOL)
def testKLDistanceFromLogit(self): source = np.array([[1, 2, 3], [1, -1, 2]], dtype='float32') target = np.array([[1, 2, 3], [1, 0, -1]], dtype='float32') expected_value = np.mean( np.sum( self._kl_func( self._softmax_func(source), self._softmax_func(target)), -1)) distance_config = configs.DistanceConfig( 'kl_divergence', transform_fn='softmax', sum_over_axis=-1) distance_tensor = distances.pairwise_distance_wrapper( tf.constant(source), tf.constant(target), distance_config=distance_config) with self.cached_session() as sess: distance_value = sess.run(distance_tensor) self.assertAllClose(distance_value, expected_value)
def testDistanceWithTransformButNoSumOverAxis(self): source = np.array([[1, 1], [2, 2], [0, 2], [10, -10]], dtype='float32') target = np.array([[0, 0], [0, 2], [1, 3], [3, 3]], dtype='float32') distance_config = configs.DistanceConfig( distance_type='l1', reduction=tf.compat.v1.losses.Reduction.NONE, transform_fn='softmax') distance_tensor = distances.pairwise_distance_wrapper( tf.constant(source), tf.constant(target), distance_config=distance_config) expected_distance = np.abs( self._softmax_func(source) - self._softmax_func(target)) with self.cached_session() as sess: distance = sess.run(distance_tensor) self.assertAllClose(distance, expected_distance)
def testVirtualAdvRegularizerRandomPerturbation(self): """Tests virtual_adv_regularizer with num_approx_steps=0.""" input_layer = tf.constant([[1.0, -1.0]]) embedding_fn = lambda x: x step_size = 0.1 vadv_config = configs.VirtualAdvConfig( adv_neighbor_config=configs.AdvNeighborConfig( feature_mask=None, adv_step_size=step_size, adv_grad_norm=configs.NormType.L2), distance_config=configs.DistanceConfig( distance_type=configs.DistanceType.L2, sum_over_axis=-1), num_approx_steps=0) vadv_loss = regularizer.virtual_adv_regularizer( input_layer, embedding_fn, vadv_config) actual_loss = self.evaluate(vadv_loss) # The identity embedding_fn makes the virtual adversarial loss immune to the # direction of the perturbation, only the size matters. expected_loss = step_size**2 # square loss self.assertNear(actual_loss, expected_loss, err=1e-5)
def testModelFitAndEvaluate(self, model_fn, distance_type): """Fit and evaluate models with various distance configurations.""" # Set up graph-regularized model. distance_config = configs.DistanceConfig( distance_type=distance_type, transform_fn=configs.TransformType.SOFTMAX, sum_over_axis=-1) model = model_fn(distance_config) model.compile(optimizer=tf.keras.optimizers.SGD(), loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=[ tf.keras.metrics.SparseCategoricalAccuracy(), tf.keras.metrics.SparseCategoricalCrossentropy( from_logits=True), ]) # Fit and evaluate the model on dummy data that has 8 examples. features = { 'features': np.random.normal(size=(8, 4)), 'neighbors': np.random.normal(size=(8, 2, 4)), 'neighbor_weights': np.random.uniform(size=(8, 2, 1)), } labels = np.random.randint(0, 3, size=8) train_history = model.fit(features, labels, batch_size=2, epochs=16).history evaluation_results = dict( zip(model.metrics_names, model.evaluate(features, labels, batch_size=4))) # Assert that losses and metrics were evaluated. self.assertAllGreater(train_history['graph_loss'], 0.) self.assertGreater(evaluation_results['graph_loss'], 0.) self.assertAllClose( train_history['loss'], np.add(train_history['graph_loss'], train_history['sparse_categorical_crossentropy']), _ERR_TOL) self.assertNear( evaluation_results['loss'], evaluation_results['graph_loss'] + evaluation_results['sparse_categorical_crossentropy'], _ERR_TOL)
def __init__(self, distance_config=None, **kwargs): super(PairwiseDistance, self).__init__(**kwargs) self._distance_config = (configs.DistanceConfig() if distance_config is None else attr.evolve(distance_config))
def from_config(cls, config): return cls(configs.DistanceConfig(**config["distance_config"]), name=config.get("name"))
def pairwise_distance_wrapper(sources, targets, weights=1.0, distance_config=None): """A wrapper to compute pairwise distance between sources and targets. distances = weights * distance_type(sources, targets) This wrapper calculates the weighted distance between `(sources, targets)` pairs, and provides an option to return the distance as the sum over the difference along the given axis, when vector based distance is needed. For the usage of `weights` and `reduction`, please refer to tf.losses. For the usage of `sum_over_axis`, see the following examples: Given target tensors with shape `[batch_size, features]`, reduction set to be MEAN, and `sum_over_axis` set to be last dimension, the weighted average distance of `sample pairs` will be returned. For example: With a distance_config('L2', sum_over_axis=-1), the distance between [[1, 1], [2, 2], [0, 2], [5, 5]] and [[1, 1], [0, 2], [4, 4], [1, 4]] will be {(0+0) + (4+0) + (16+4) + (16+1)}/4 = 10.25 If `sum_over_axis` is None, the weighted average distance of `feature pairs` (instead of sample pairs) will be returned. For example: With a distance_config('L2'), the distance between [[1, 1], [2, 2], [0, 2], [5, 5]] and [[1, 1], [0, 2], [4, 4], [1, 4]] will be {(0+0) + (4+0) + (16+4) + (16+1)}/8 = 5.125 If `transform_fn` is not None, the transform function is applied to both sources and targets before computing the distance. For example: distance_config('KL_DIVERGENCE', sum_over_axis=-1, transform_fn='SOFTMAX') treats `sources` and `targets` as logits, and computes the KL-divergence between the probability distributions. Args: sources: `Tensor` of type float32 or float64. targets: `Tensor` of the same type and shape as sources. weights: (optional) `Tensor` whose rank is either 0, or the same rank as `targets`, and must be broadcastable to `targets` (i.e., all dimensions must be either `1`, or the same as the corresponding `distance` dimension). distance_config: DistanceConfig contains the following configs (or hyper-parameters) for computing distances: (a) 'distance_type': Type of distance function to apply. (b) 'reduction': Type of distance reduction. Refer to tf.losses.Reduction. (c) 'sum_over_axis': (optional) The distance is sum over the difference along the axis. Note, if `sum_over_axis` is not None and the rank of `weights` is nonzero, the size of `weights` along the `sum_over_axis` must be 1. (d) 'transform_fn': (optional) If set, both sources and targets will be transformed before calculating the distance. If set to 'SOFTMAX', it will be performed on the axis specified by 'sum_over_axis', or -1 if that is not specified. If None, the default distance config will be used. Returns: Weighted distance scalar `Tensor`. If `reduction` is `NONE`, this has the same shape as `targets`. Raises: ValueError: If the shape of targets doesn't match that of sources, or if the shape of weights is invalid. TypeError: If the distance function gets an unexpected keyword argument. """ if distance_config is None: distance_config = configs.DistanceConfig() # Default configs. tf.compat.v1.losses.Reduction.validate(distance_config.reduction) if distance_config.transform_fn is not configs.TransformType.NONE: sources = _apply_transform(sources, distance_config.transform_fn, distance_config.sum_over_axis) targets = _apply_transform(targets, distance_config.transform_fn, distance_config.sum_over_axis) sum_over_axis = distance_config.sum_over_axis # Validates the `sum_over_axis` _assert_valid_axis(sources.get_shape().ndims, sum_over_axis) distance_fn = _select_distance_fn(distance_config.distance_type) if distance_config.distance_type == configs.DistanceType.COSINE: # Cosine distance function assumes input tensors have been unit-normalized sources = tf.nn.l2_normalize(sources, axis=sum_over_axis) targets = tf.nn.l2_normalize(targets, axis=sum_over_axis) if _is_axis_required_in_distance_fn(distance_config.distance_type): distances = distance_fn(labels=sources, predictions=targets, weights=weights, axis=sum_over_axis, reduction=distance_config.reduction, loss_collection=None) else: distances = distance_fn(labels=sources, predictions=targets, weights=weights, reduction=distance_config.reduction, loss_collection=None) if sum_over_axis is not None and _is_reduced_by_average( distance_config.reduction): # The distance is divided by the size of targets tensor, so we need to # rescale the distance by multiplying the size of axis. Note, the distance # function with `axis` as a required argument (e.g., consine distance) # does not need to be rescaled. weights = tf.convert_to_tensor(value=weights) weights_shape = weights.get_shape().as_list() if weights_shape and weights_shape[sum_over_axis] != 1: raise ValueError( 'Shape of weights along the axis %d must be 1.' % sum_over_axis) distances *= sources.shape.dims[sum_over_axis].value return distances