def build(self, input_shape=None): if self.share_parameters: # When sharing parameters, build the first leaf inputter and then set # all attributes with parameters to the other inputters. leaves = self.get_leaf_inputters() first, others = leaves[0], leaves[1:] # When the first leaf is also PrallelInputter and sharing parameters, # build the first leaf inputter of it and then set all attributes with # parameters to the other inputters. if isinstance(first, ParallelInputter): if first.share_parameters: first.built = True first_leaves = first.get_leaf_inputters() others.append(first_leaves[1:]) first = first_leaves[0] with compat.tf_compat(v1="variable_scope")( self._get_shared_name()): first.build(input_shape) for name, attr in six.iteritems(first.__dict__): if (isinstance(attr, tf.Variable) or (isinstance(attr, tf.keras.layers.Layer) and attr.variables)): for inputter in others: setattr(inputter, name, attr) inputter.built = True else: for inputter, scope in zip(self.inputters, self._get_names()): with compat.tf_compat(v1="variable_scope")(scope): inputter.build(input_shape) super(ParallelInputter, self).build(input_shape)
def make_features(self, element=None, features=None, training=None): if features is None: features = {} if self.input_features[0].name in features: return features if element is None: raise RuntimeError("make_features was called with None element") tf_parse_example = compat.tf_compat(v2="io.parse_single_example", v1="parse_single_example") tf_var_len_feature = compat.tf_compat(v2="io.VarLenFeature", v1="VarLenFeature") featuresDict = {} if self.has_word(): featuresDict["numWords"] = tf_var_len_feature(tf.int64) for feature in self.input_features: featuresDict[feature.name] = tf_var_len_feature(tf.float32) example = tf_parse_example(element, features=featuresDict) if self.has_word(): features["numWords"] = tf.cast(example["numWords"].values, tf.int32)[0] for feature in self.input_features: print(feature.name, feature.shape) features[feature.name] = tf.reshape(example[feature.name].values, feature.shape) print("features", features) return features
def _lower_triangle_mask(sequence_length, maximum_length=None, dtype=tf.float32): batch_size = tf.shape(sequence_length)[0] if maximum_length is None: maximum_length = tf.reduce_max(sequence_length) mask = tf.ones([batch_size, maximum_length, maximum_length], dtype=dtype) mask = compat.tf_compat(v2="linalg.band_part", v1="matrix_band_part")(mask, -1, 0) return mask
def make_dataset(self, data_file, training=None): first_record = next( compat.tf_compat(v1="python_io.tf_record_iterator")(data_file)) first_record = tf.train.Example.FromString(first_record) shape = first_record.features.feature["shape"].int64_list.value self.input_depth = shape[-1] return tf.data.TFRecordDataset(data_file)
def make_inputs(self, features, training=None): transformed = [] for i, inputter in enumerate(self.inputters): with compat.tf_compat(v1="variable_scope")("inputter_{}".format(i)): transformed.append(inputter.make_inputs(features, training=training)) outputs = self.reducer(transformed) outputs = tf.layers.dropout(outputs, rate=self.dropout, training=training) return outputs
def encode(self, inputs, sequence_length=None, mode=tf.estimator.ModeKeys.TRAIN): all_outputs = [] all_states = [] all_sequence_lengths = [] parallel_inputs = isinstance(inputs, (list, tuple)) parallel_encoders = isinstance(self.encoders, (list, tuple)) if parallel_encoders and parallel_inputs and len(inputs) != len( self.encoders): raise ValueError( "ParallelEncoder expects as many inputs as parallel encoders") if parallel_encoders: encoders = self.encoders else: encoders = itertools.repeat(self.encoders, len(inputs) if parallel_inputs else 1) for i, encoder in enumerate(encoders): scope_name = "encoder_{}".format( i) if not self.share_parameters else "parallel_encoder" reuse = self.share_parameters and i > 0 with compat.tf_compat(v1="variable_scope")(scope_name, reuse=reuse): if parallel_inputs: encoder_inputs = inputs[i] length = sequence_length[i] else: encoder_inputs = inputs length = sequence_length outputs, state, length = encoder.encode(encoder_inputs, sequence_length=length, mode=mode) if self.outputs_layer_fn is not None: if isinstance(self.outputs_layer_fn, list): outputs = self.outputs_layer_fn[i](outputs) else: outputs = self.outputs_layer_fn(outputs) all_outputs.append(outputs) all_states.append(state) all_sequence_lengths.append(length) outputs, sequence_length = self.outputs_reducer( all_outputs, sequence_length=all_sequence_lengths) if self.combined_output_layer_fn is not None: outputs = self.combined_output_layer_fn(outputs) return (outputs, self.states_reducer(all_states), sequence_length)
def make_features(self, element=None, features=None, training=None): if features is None: features = {} if "tensor" in features: return features tf_parse_example = compat.tf_compat(v2="io.parse_single_example", v1="parse_single_example") tf_var_len_feature = compat.tf_compat(v2="io.VarLenFeature", v1="VarLenFeature") example = tf_parse_example(element, features={ "shape": tf_var_len_feature(tf.int64), "values": tf_var_len_feature(tf.float32) }) values = example["values"].values shape = tf.cast(example["shape"].values, tf.int32) tensor = tf.reshape(values, shape) tensor.set_shape([None, self.input_depth]) features["length"] = tf.shape(tensor)[0] features["tensor"] = tf.cast(tensor, self.dtype) return features
def make_inputs(self, features, training=None): if not self.built: self.build() transformed = [] for i, (inputter, scope) in enumerate(zip(self.inputters, self._get_scopes())): with compat.tf_compat(v1="variable_scope")(scope): if self.combine_features: sub_features = extract_prefixed_keys(features, "inputter_{}_".format(i)) else: sub_features = features[i] transformed.append(inputter.make_inputs(sub_features, training=training)) if self.reducer is not None: transformed = self.reducer(transformed) return transformed
def alignment_matrix_from_pharaoh(alignment_line, source_length, target_length, dtype=tf.float32): """Parse Pharaoh alignments into an alignment matrix. Args: alignment_line: A string ``tf.Tensor`` in the Pharaoh format. source_length: The length of the source sentence, without special symbols. target_length The length of the target sentence, without special symbols. dtype: The output matrix dtype. Defaults to ``tf.float32`` for convenience when computing the guided alignment loss. Returns: The alignment matrix as a 2-D ``tf.Tensor`` of type :obj:`dtype` and shape ``[target_length, source_length]``, where ``[i, j] = 1`` if the ``i`` th target word is aligned with the ``j`` th source word. """ if compat.tf_supports("strings.split"): align_pairs_str = tf.strings.split([alignment_line]).values align_pairs_flat_str = tf.strings.split(align_pairs_str, sep="-").values else: align_pairs_str = tf.string_split([alignment_line], delimiter=" ").values align_pairs_flat_str = tf.string_split(align_pairs_str, delimiter="-").values align_pairs_flat = compat.tf_compat(v2="strings.to_number", v1="string_to_number")( align_pairs_flat_str, out_type=tf.int64) sparse_indices = tf.reshape(align_pairs_flat, [-1, 2]) sparse_values = tf.ones([tf.shape(sparse_indices)[0]], dtype=dtype) source_length = tf.cast(source_length, tf.int64) target_length = tf.cast(target_length, tf.int64) if compat.tf_supports("sparse.to_dense"): alignment_matrix_sparse = tf.sparse.SparseTensor( sparse_indices, sparse_values, [source_length, target_length]) alignment_matrix = tf.sparse.to_dense(alignment_matrix_sparse, validate_indices=False) else: alignment_matrix = tf.sparse_to_dense(sparse_indices, [source_length, target_length], sparse_values, validate_indices=False) return tf.transpose(alignment_matrix)
def testSequenceRecord(self): vector = np.array([[0.2, 0.3], [0.4, 0.5]], dtype=np.float32) record_file = os.path.join(self.get_temp_dir(), "data.records") writer = compat.tf_compat(v2="io.TFRecordWriter", v1="python_io.TFRecordWriter")(record_file) record_inputter.write_sequence_record(vector, writer) writer.close() inputter = record_inputter.SequenceRecordInputter() features, transformed = self._makeDataset( inputter, record_file, shapes={"tensor": [None, None, 2], "length": [None]}) self.assertEqual([2], features["length"]) self.assertAllEqual([vector], features["tensor"]) self.assertAllEqual([vector], transformed)
def encode(self, inputs, sequence_length=None, mode=tf.estimator.ModeKeys.TRAIN): encoder_state = [] for i, encoder in enumerate(self.encoders): with compat.tf_compat(v1="variable_scope")("encoder_{}".format(i)): if i > 0 and self.transition_layer_fn is not None: if isinstance(self.transition_layer_fn, list): inputs = self.transition_layer_fn[i - 1](inputs) else: inputs = self.transition_layer_fn(inputs) inputs, state, sequence_length = encoder.encode( inputs, sequence_length=sequence_length, mode=mode) encoder_state.append(state) return (inputs, self.states_reducer(encoder_state), sequence_length)
def _detokenize_tensor(self, tokens): reduce_join = compat.tf_compat(v2="strings.reduce_join", v1="reduce_join") return reduce_join(tokens, axis=0, separator=" ")
# pylint: disable=missing-docstring """Custom hooks.""" from __future__ import print_function import io import time import six import tensorflow as tf from opennmt.utils import compat, misc _SESSION_RUN_HOOK = compat.tf_compat(v2="estimator.SessionRunHook", v1="train.SessionRunHook") class LogParametersCountHook(_SESSION_RUN_HOOK): """Simple hook that logs the number of trainable parameters.""" def begin(self): tf.logging.info("Number of trainable parameters: %d", misc.count_parameters()) _DEFAULT_COUNTERS_COLLECTION = "counters" def add_counter(name, tensor): """Registers a new counter. Args:
def get_dataset_size(self, data_file): return sum(1 for _ in compat.tf_compat( v1="python_io.tf_record_iterator")(data_file))