def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Start logging. level = logging.DEBUG if settings.verbose else logging.INFO logging.basicConfig(level=level, format='%(levelname)s: %(message)s') # Create the TensorFlow session. tf_config = tf.ConfigProto() tf_config.allow_soft_placement = True session = tf.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = load_config_from_json_file(model) setattr(config, 'reload', model) configs.append(config) # Create the model graphs and restore their variables. logging.debug("Loading models\n") models = [] # ============= 19/8/16 KP ============ warning('='*20 + 'Model Config to Load') warning(settings.models) # ===================================== for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: if config.model_type == "transformer": model = TransformerModel(config) else: model = rnn_model.RNNModel(config) saver = model_loader.init_or_restore_variables(config, session, ensemble_scope=scope) model.sampling_utils = SamplingUtils(settings) models.append(model) # ============= 19/8/16 KP ============ model_summary() # ===================================== # TODO Ensembling is currently only supported for RNNs, so if # TODO len(models) > 1 then check models are all rnn # Translate the source file. inference.translate_file(input_file=settings.input, output_file=settings.output, session=session, models=models, configs=configs, beam_size=settings.beam_size, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size, normalization_alpha=settings.normalization_alpha)
def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Create the TensorFlow session. tf_config = tf.ConfigProto() tf_config.allow_soft_placement = True session = tf.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = load_config_from_json_file(model) setattr(config, 'reload', model) configs.append(config) # Create the model graphs. logging.debug("Loading models\n") models = [] for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: if config.model_type == "transformer": model = TransformerModel(config) else: model = rnn_model.RNNModel(config) model.sampling_utils = SamplingUtils(settings) models.append(model) # Add smoothing variables (if the models were trained with smoothing). #FIXME Assumes either all models were trained with smoothing or none were. if configs[0].exponential_smoothing > 0.0: smoothing = ExponentialSmoothing(configs[0].exponential_smoothing) # Restore the model variables. for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: _ = model_loader.init_or_restore_variables(config, session, ensemble_scope=scope) # Swap-in the smoothed versions of the variables. if configs[0].exponential_smoothing > 0.0: session.run(fetches=smoothing.swap_ops) # TODO Ensembling is currently only supported for RNNs, so if # TODO len(models) > 1 then check models are all rnn # Translate the source file. inference.translate_file(input_file=settings.input, output_file=settings.output, session=session, models=models, configs=configs, beam_size=settings.beam_size, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size, normalization_alpha=settings.normalization_alpha)
def __init__(self, config): # Set attributes self.config = config self.source_vocab_size = config.source_vocab_sizes[0] self.target_vocab_size = config.target_vocab_size self.name = 'transformer' self.int_dtype = tf.int32 self.float_dtype = tf.float32 # Placeholders self.inputs = model_inputs.ModelInputs(config) # Convert from time-major to batch-major, handle factors self.source_ids, \ self.source_mask, \ self.target_ids_in, \ self.target_ids_out, \ self.target_mask = self._convert_inputs(self.inputs) self.training = self.inputs.training # Build the common parts of the graph. with tf.name_scope('{:s}_loss'.format(self.name)): # (Re-)generate the computational graph self.dec_vocab_size = self._build_graph() # Build the training-specific parts of the graph. with tf.name_scope('{:s}_loss'.format(self.name)): # Encode source sequences with tf.name_scope('{:s}_encode'.format(self.name)): enc_output, cross_attn_mask = self.enc.encode( self.source_ids, self.source_mask) # Decode into target sequences with tf.name_scope('{:s}_decode'.format(self.name)): logits = self.dec.decode_at_train(self.target_ids_in, enc_output, cross_attn_mask) # Instantiate loss layer(s) loss_layer = MaskedCrossEntropy(self.dec_vocab_size, self.config.label_smoothing, self.int_dtype, self.float_dtype, time_major=False, name='loss_layer') # Calculate loss masked_loss, sentence_loss, batch_loss = \ loss_layer.forward(logits, self.target_ids_out, self.target_mask, self.training) sent_lens = tf.reduce_sum(self.target_mask, axis=1, keepdims=False) self._loss_per_sentence = sentence_loss * sent_lens self._loss = tf.reduce_mean(self._loss_per_sentence, keepdims=False) self.sampling_utils = SamplingUtils(config)
def __init__(self, config): self.inputs = model_inputs.ModelInputs(config) # Dropout functions for words. # These probabilistically zero-out all embedding values for individual # words. dropout_source, dropout_target = None, None if config.rnn_use_dropout and config.rnn_dropout_source > 0.0: def dropout_source(x): return tf.layers.dropout(x, noise_shape=(tf.shape(x)[0], tf.shape(x)[1], 1), rate=config.rnn_dropout_source, training=self.inputs.training) if config.rnn_use_dropout and config.rnn_dropout_target > 0.0: def dropout_target(y): return tf.layers.dropout(y, noise_shape=(tf.shape(y)[0], tf.shape(y)[1], 1), rate=config.rnn_dropout_target, training=self.inputs.training) # Dropout functions for use within FF, GRU, and attention layers. # We use Gal and Ghahramani (2016)-style dropout, so these functions # will be used to create 2D dropout masks that are reused at every # timestep. dropout_embedding, dropout_hidden = None, None if config.rnn_use_dropout and config.rnn_dropout_embedding > 0.0: def dropout_embedding(e): return tf.layers.dropout(e, noise_shape=tf.shape(e), rate=config.rnn_dropout_embedding, training=self.inputs.training) if config.rnn_use_dropout and config.rnn_dropout_hidden > 0.0: def dropout_hidden(h): return tf.layers.dropout(h, noise_shape=tf.shape(h), rate=config.rnn_dropout_hidden, training=self.inputs.training) batch_size = tf.shape(self.inputs.x)[-1] # dynamic value with tf.variable_scope("encoder"): self.encoder = Encoder(config, batch_size, dropout_source, dropout_embedding, dropout_hidden) ctx, embs = self.encoder.get_context(self.inputs.x, self.inputs.x_mask) with tf.variable_scope("decoder"): if config.tie_encoder_decoder_embeddings: tied_embeddings = self.encoder.emb_layer else: tied_embeddings = None self.decoder = Decoder(config, ctx, embs, self.inputs.x_mask, dropout_target, dropout_embedding, dropout_hidden, tied_embeddings) self.logits = self.decoder.score(self.inputs.y) with tf.variable_scope("loss"): self.loss_layer = layers.Masked_cross_entropy_loss( self.inputs.y, self.inputs.y_mask, config.label_smoothing, training=self.inputs.training) self._loss_per_sentence = self.loss_layer.forward(self.logits) self._loss = tf.reduce_mean(self._loss_per_sentence, keepdims=False) self.sampling_utils = SamplingUtils(config)
def __init__(self, config): # Set attributes self.config = config self.source_vocab_size = config.source_vocab_sizes[0] self.target_vocab_size = config.target_vocab_size self.name = 'transformer' # Placeholders self.inputs = model_inputs.ModelInputs(config) # Convert from time-major to batch-major, handle factors self.source_ids, \ self.source_mask, \ self.target_ids_in, \ self.target_ids_out, \ self.target_mask = self._convert_inputs(self.inputs) self.training = self.inputs.training self.scores = self.inputs.scores self.index = self.inputs.index # Build the common parts of the graph. with tf.name_scope('{:s}_loss'.format(self.name)): # (Re-)generate the computational graph self.dec_vocab_size = self._build_graph() # Build the training-specific parts of the graph. with tf.name_scope('{:s}_loss'.format(self.name)): # Encode source sequences with tf.name_scope('{:s}_encode'.format(self.name)): enc_output, cross_attn_mask = self.enc.encode( self.source_ids, self.source_mask) # Decode into target sequences with tf.name_scope('{:s}_decode'.format(self.name)): logits = self.dec.decode_at_train(self.target_ids_in, enc_output, cross_attn_mask) # Instantiate loss layer(s) loss_layer = MaskedCrossEntropy(self.dec_vocab_size, self.config.label_smoothing, INT_DTYPE, FLOAT_DTYPE, time_major=False, name='loss_layer') # Calculate loss masked_loss, sentence_loss, batch_loss = \ loss_layer.forward(logits, self.target_ids_out, self.target_mask, self.training) if self.config.print_per_token_pro: # e**(-(-log(probability))) = probability self._print_pro = tf.math.exp(-masked_loss) sent_lens = tf.reduce_sum(self.target_mask, axis=1, keepdims=False) self._loss_per_sentence = sentence_loss * sent_lens self._loss = tf.reduce_mean(self._loss_per_sentence, keepdims=False) # calculate expected risk if self.config.loss_function == 'MRT': # self._loss_per_sentence is negative log probability of the output sentence, each element represents # the loss of each sample pair. self._risk = mru.mrt_cost(self._loss_per_sentence, self.scores, self.index, self.config) self.sampling_utils = SamplingUtils(config)
def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Create the TensorFlow session. g = tf.Graph() with g.as_default(): tf_config = tf.compat.v1.ConfigProto() tf_config.allow_soft_placement = True session = tf.compat.v1.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = load_config_from_json_file(model) setattr(config, 'reload', model) setattr(config, 'translation_maxlen', settings.translation_maxlen) configs.append(config) # Create the model graphs. logging.debug("Loading models\n") models = [] for i, config in enumerate(configs): with tf.compat.v1.variable_scope("model%d" % i) as scope: if config.model_type == "transformer": model = TransformerModel( config, consts_config_str=settings.config_str) else: model = rnn_model.RNNModel(config) model.sampling_utils = SamplingUtils(settings) models.append(model) # Add smoothing variables (if the models were trained with smoothing). # FIXME Assumes either all models were trained with smoothing or none were. if configs[0].exponential_smoothing > 0.0: smoothing = ExponentialSmoothing(configs[0].exponential_smoothing) # Restore the model variables. for i, config in enumerate(configs): with tf.compat.v1.variable_scope("model%d" % i) as scope: _ = model_loader.init_or_restore_variables( config, session, ensemble_scope=scope) # Swap-in the smoothed versions of the variables. if configs[0].exponential_smoothing > 0.0: session.run(fetches=smoothing.swap_ops) max_translation_len = settings.translation_maxlen # Create a BeamSearchSampler / RandomSampler. if settings.translation_strategy == 'beam_search': sampler = BeamSearchSampler(models, configs, settings.beam_size) else: assert settings.translation_strategy == 'sampling' sampler = RandomSampler(models, configs, settings.beam_size) # Warn about the change from neg log probs to log probs for the RNN. if settings.n_best: model_types = [config.model_type for config in configs] if 'rnn' in model_types: logging.warn( 'n-best scores for RNN models have changed from ' 'positive to negative (as of commit 95793196...). ' 'If you are using the scores for reranking etc, then ' 'you may need to update your scripts.') # Translate the source file. translate_utils.translate_file( input_file=settings.input, output_file=settings.output, session=session, sampler=sampler, config=configs[0], max_translation_len=max_translation_len, normalization_alpha=settings.normalization_alpha, consts_config_str=settings.config_str, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size)