def _makeDataset(self, inputter, data_file, metadata=None, dataset_size=1, shapes=None): if metadata is not None: inputter.initialize(metadata) self.assertEqual(dataset_size, inputter.get_dataset_size(data_file)) dataset = inputter.make_dataset(data_file) dataset = dataset.map(lambda *arg: inputter.process(item_or_tuple(arg))) dataset = dataset.padded_batch(1, padded_shapes=data.get_padded_shapes(dataset)) if compat.is_tf2(): iterator = None features = iter(dataset).next() else: iterator = dataset.make_initializable_iterator() features = iterator.get_next() if shapes is not None: all_features = [features] if not compat.is_tf2() and not inputter.is_target: all_features.append(inputter.get_serving_input_receiver().features) for f in all_features: for field, shape in six.iteritems(shapes): self.assertIn(field, f) self.assertTrue(f[field].shape.is_compatible_with(shape)) inputs = inputter.make_inputs(features, training=True) if not compat.is_tf2(): with self.test_session() as sess: sess.run(tf.tables_initializer()) sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) return self.evaluate((features, inputs))
def __call__(self, inputs, sequence_length=None, position=None): # pylint: disable=arguments-differ """Apply position encoding to inputs. Args: inputs: The inputs of shape :math:`[B, T, D]`. sequence_length: The length of each sequence of shape :math:`[B]`. If ``None``, sequences are assumed to have the same length. position: If known, the position to encode (1-indexed). Returns: A ``tf.Tensor`` of shape :math:`[B, T, D]` where :math:`D` depends on the :attr:`reducer`. """ if compat.is_tf2(): return super(PositionEncoder, self).__call__(inputs, sequence_length=sequence_length, position=position) self._dtype = inputs.dtype # Build by default for backward compatibility. if not compat.reuse(): self.build(inputs.shape) return self.call(inputs, sequence_length=sequence_length, position=position)
def build(self, input_shape=None): if self.embedding_file: pretrained = load_pretrained_embeddings( self.embedding_file, self.vocabulary_file, num_oov_buckets=self.num_oov_buckets, with_header=self.embedding_file_with_header, case_insensitive_embeddings=self.case_insensitive_embeddings) self.embedding_size = pretrained.shape[-1] initializer = tf.constant_initializer( value=pretrained.astype(self.dtype)) else: initializer = None shape = [self.vocabulary_size, self.embedding_size] if compat.is_tf2(): self.embedding = self.add_variable( name=compat.name_from_variable_scope("w_embs"), shape=shape, initializer=initializer, trainable=self.trainable) else: self.embedding = tf.get_variable("w_embs", shape=shape, dtype=self.dtype, initializer=initializer, trainable=self.trainable) super(WordEmbedder, self).build(input_shape)
def dropout(x, rate, training=None): """Simple dropout layer.""" if not training or rate == 0: return x if compat.is_tf2(): return tf.nn.dropout(x, rate) else: return tf.nn.dropout(x, 1.0 - rate)
def testRegularization(self, type, scale): layer = tf.keras.layers.Dense(256) layer.build([None, 128]) regularization = optim.regularization_penalty( type, scale, weights_list=layer.trainable_variables) self.assertEqual(0, len(regularization.shape.as_list())) if not compat.is_tf2(): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) self.evaluate(regularization)
def build(self, input_shape=None): shape = [self.vocabulary_size, self.embedding_size] if compat.is_tf2(): self.embedding = self.add_variable( name=compat.name_from_variable_scope("w_char_embs"), shape=shape) else: self.embedding = tf.get_variable("w_char_embs", shape=shape, dtype=self.dtype) super(CharEmbedder, self).build(input_shape)
def testParallelEncoderReuse(self): lengths = [tf.constant([2, 5, 4], dtype=tf.int32), tf.constant([6, 6, 3], dtype=tf.int32)] inputs = [tf.zeros([3, 5, 10]), tf.zeros([3, 6, 10])] encoder = encoders.ParallelEncoder(DenseEncoder(2, 20), outputs_reducer=None) outputs, _, _ = encoder.encode(inputs, sequence_length=lengths) if not compat.is_tf2(): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs = self.evaluate(outputs) self.assertIsInstance(outputs, tuple) self.assertEqual(len(outputs), 2)
def testSequentialEncoder(self, transition_layer_fn): inputs = tf.zeros([3, 5, 10]) encoder = encoders.SequentialEncoder( [DenseEncoder(1, 20), DenseEncoder(3, 20)], transition_layer_fn=transition_layer_fn) outputs, states, _ = encoder.encode(inputs) self.assertEqual(len(states), 4) if not compat.is_tf2(): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs = self.evaluate(outputs) self.assertAllEqual(outputs.shape, [3, 5, 20])
def testParallelEncoder(self): sequence_lengths = [[3, 5, 2], [6, 6, 4]] inputs = [tf.zeros([3, 5, 10]), tf.zeros([3, 6, 10])] encoder = encoders.ParallelEncoder( [DenseEncoder(1, 20), DenseEncoder(2, 20)], outputs_reducer=reducer.ConcatReducer(axis=1)) outputs, state, encoded_length = encoder.encode( inputs, sequence_length=sequence_lengths) self.assertEqual(len(state), 3) if not compat.is_tf2(): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs, encoded_length = self.evaluate([outputs, encoded_length]) self.assertAllEqual([3, 11, 20], outputs.shape) self.assertAllEqual([9, 11, 6], encoded_length)
def _encodeInParallel(self, inputs, sequence_length=None, outputs_layer_fn=None, combined_output_layer_fn=None): columns = [DenseEncoder(1, 20), DenseEncoder(1, 20)] encoder = encoders.ParallelEncoder( columns, outputs_reducer=reducer.ConcatReducer(), outputs_layer_fn=outputs_layer_fn, combined_output_layer_fn=combined_output_layer_fn) outputs, _, _ = encoder.encode(inputs, sequence_length=sequence_length) if not compat.is_tf2(): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) return self.evaluate(outputs)
def __call__(self, encoder_state, decoder_zero_state): # pylint: disable=arguments-differ """Returns the initial decoder state. Args: encoder_state: The encoder state. decoder_zero_state: The default decoder state. Returns: The decoder initial state. """ inputs = [encoder_state, decoder_zero_state] if compat.is_tf2(): return super(Bridge, self).__call__(inputs) # Build by default for backward compatibility. if not compat.reuse(): self.build(compat.nest.map_structure(lambda x: x.shape, inputs)) return self.call(inputs)
"""Module defining custom optimizers.""" from opennmt.utils.compat import is_tf2 if not is_tf2(): from opennmt.optimizers.adafactor import AdafactorOptimizer from opennmt.optimizers.adafactor import get_optimizer_from_params \ as get_adafactor_optimizer_from_params from opennmt.optimizers.multistep_adam import MultistepAdamOptimizer from opennmt.optimizers.mixed_precision_wrapper import MixedPrecisionOptimizerWrapper from opennmt.optimizers.adam_weight_decay import AdamWeightDecayOptimizer
def run_tf1_only(func): return unittest.skipIf(compat.is_tf2(), "TensorFlow v1 only test")(func)
def run_tf2_only(func): return unittest.skipIf(not compat.is_tf2(), "TensorFlow v2 only test")(func)