def testWordNoising(self): tokens = tf.constant([["a■", "b", "c■", "d", "■e"], ["a", "b", "c", "", ""]]) lengths = tf.constant([5, 3]) noiser = noise.WordNoiser() noiser.add(noise.WordDropout(0.1)) noiser.add(noise.WordReplacement(0.1)) noiser.add(noise.WordPermutation(3)) noisy_tokens, noisy_lengths = noiser(tokens, sequence_length=lengths, keep_shape=True) tokens, noisy_tokens = self.evaluate([tokens, noisy_tokens]) self.assertAllEqual(noisy_tokens.shape, tokens.shape)
def testWordNoising(self, as_function, tokens, lengths): tokens = tf.constant(tokens) if lengths is not None: lengths = tf.constant(lengths, dtype=tf.int32) noiser = noise.WordNoiser() noiser.add(noise.WordDropout(0.1)) noiser.add(noise.WordReplacement(0.1)) noiser.add(noise.WordPermutation(3)) noiser_fn = tf.function(noiser) if as_function else noiser noisy_tokens, noisy_lengths = noiser_fn(tokens, sequence_length=lengths, keep_shape=True) tokens, noisy_tokens = self.evaluate([tokens, noisy_tokens]) self.assertAllEqual(noisy_tokens.shape, tokens.shape)
def testWordDropoutAll(self, words): x = tf.constant(words, dtype=tf.string) y = noise.WordDropout(1)(x) y = self.evaluate(y) self.assertEqual(y.shape[0], 1 if words else 0) # At least one is not dropped.
def testWordDropoutNone(self, words): x = tf.constant(words) y = noise.WordDropout(0)(x) x, y = self.evaluate([x, y]) self.assertAllEqual(x, y)