Ejemplo n.º 1
0
def main(settings):
    """
    Translates a source language file (or STDIN) into a target language file
    (or STDOUT).
    """
    # Start logging.
    level = logging.DEBUG if settings.verbose else logging.INFO
    logging.basicConfig(level=level, format='%(levelname)s: %(message)s')

    # Create the TensorFlow session.
    tf_config = tf.ConfigProto()
    tf_config.allow_soft_placement = True
    session = tf.Session(config=tf_config)

    # Load config file for each model.
    configs = []
    for model in settings.models:
        config = load_config_from_json_file(model)
        setattr(config, 'reload', model)
        configs.append(config)

    # Create the model graphs and restore their variables.
    logging.debug("Loading models\n")
    models = []

    # ============= 19/8/16 KP ============
    warning('='*20 + 'Model Config to Load')
    warning(settings.models)
    # =====================================

    for i, config in enumerate(configs):
        with tf.variable_scope("model%d" % i) as scope:
            if config.model_type == "transformer":
                model = TransformerModel(config)
            else:
                model = rnn_model.RNNModel(config)
            saver = model_loader.init_or_restore_variables(config, session,
                                                           ensemble_scope=scope)
            model.sampling_utils = SamplingUtils(settings)
            models.append(model)

    # ============= 19/8/16 KP ============
    model_summary()
    # =====================================

    # TODO Ensembling is currently only supported for RNNs, so if
    # TODO len(models) > 1 then check models are all rnn

    # Translate the source file.
    inference.translate_file(input_file=settings.input,
                             output_file=settings.output,
                             session=session,
                             models=models,
                             configs=configs,
                             beam_size=settings.beam_size,
                             nbest=settings.n_best,
                             minibatch_size=settings.minibatch_size,
                             maxibatch_size=settings.maxibatch_size,
                             normalization_alpha=settings.normalization_alpha)
Ejemplo n.º 2
0
def main(settings):
    """
    Translates a source language file (or STDIN) into a target language file
    (or STDOUT).
    """
    # Create the TensorFlow session.
    tf_config = tf.ConfigProto()
    tf_config.allow_soft_placement = True
    session = tf.Session(config=tf_config)

    # Load config file for each model.
    configs = []
    for model in settings.models:
        config = load_config_from_json_file(model)
        setattr(config, 'reload', model)
        configs.append(config)

    # Create the model graphs.
    logging.debug("Loading models\n")
    models = []
    for i, config in enumerate(configs):
        with tf.variable_scope("model%d" % i) as scope:
            if config.model_type == "transformer":
                model = TransformerModel(config)
            else:
                model = rnn_model.RNNModel(config)
            model.sampling_utils = SamplingUtils(settings)
            models.append(model)

    # Add smoothing variables (if the models were trained with smoothing).
    #FIXME Assumes either all models were trained with smoothing or none were.
    if configs[0].exponential_smoothing > 0.0:
        smoothing = ExponentialSmoothing(configs[0].exponential_smoothing)

    # Restore the model variables.
    for i, config in enumerate(configs):
        with tf.variable_scope("model%d" % i) as scope:
            _ = model_loader.init_or_restore_variables(config, session,
                                                       ensemble_scope=scope)

    # Swap-in the smoothed versions of the variables.
    if configs[0].exponential_smoothing > 0.0:
        session.run(fetches=smoothing.swap_ops)

    # TODO Ensembling is currently only supported for RNNs, so if
    # TODO len(models) > 1 then check models are all rnn

    # Translate the source file.
    inference.translate_file(input_file=settings.input,
                             output_file=settings.output,
                             session=session,
                             models=models,
                             configs=configs,
                             beam_size=settings.beam_size,
                             nbest=settings.n_best,
                             minibatch_size=settings.minibatch_size,
                             maxibatch_size=settings.maxibatch_size,
                             normalization_alpha=settings.normalization_alpha)
Ejemplo n.º 3
0
    def __init__(self, config):
        # Set attributes
        self.config = config
        self.source_vocab_size = config.source_vocab_sizes[0]
        self.target_vocab_size = config.target_vocab_size
        self.name = 'transformer'
        self.int_dtype = tf.int32
        self.float_dtype = tf.float32

        # Placeholders
        self.inputs = model_inputs.ModelInputs(config)

        # Convert from time-major to batch-major, handle factors
        self.source_ids, \
            self.source_mask, \
            self.target_ids_in, \
            self.target_ids_out, \
            self.target_mask = self._convert_inputs(self.inputs)

        self.training = self.inputs.training

        # Build the common parts of the graph.
        with tf.name_scope('{:s}_loss'.format(self.name)):
            # (Re-)generate the computational graph
            self.dec_vocab_size = self._build_graph()

        # Build the training-specific parts of the graph.

        with tf.name_scope('{:s}_loss'.format(self.name)):
            # Encode source sequences
            with tf.name_scope('{:s}_encode'.format(self.name)):
                enc_output, cross_attn_mask = self.enc.encode(
                    self.source_ids, self.source_mask)
            # Decode into target sequences
            with tf.name_scope('{:s}_decode'.format(self.name)):
                logits = self.dec.decode_at_train(self.target_ids_in,
                                                  enc_output,
                                                  cross_attn_mask)
            # Instantiate loss layer(s)
            loss_layer = MaskedCrossEntropy(self.dec_vocab_size,
                                            self.config.label_smoothing,
                                            self.int_dtype,
                                            self.float_dtype,
                                            time_major=False,
                                            name='loss_layer')
            # Calculate loss
            masked_loss, sentence_loss, batch_loss = \
                loss_layer.forward(logits, self.target_ids_out, self.target_mask, self.training)

            sent_lens = tf.reduce_sum(self.target_mask, axis=1, keepdims=False)
            self._loss_per_sentence = sentence_loss * sent_lens
            self._loss = tf.reduce_mean(self._loss_per_sentence, keepdims=False)
        
        self.sampling_utils = SamplingUtils(config)
Ejemplo n.º 4
0
    def __init__(self, config):
        self.inputs = model_inputs.ModelInputs(config)

        # Dropout functions for words.
        # These probabilistically zero-out all embedding values for individual
        # words.
        dropout_source, dropout_target = None, None
        if config.rnn_use_dropout and config.rnn_dropout_source > 0.0:

            def dropout_source(x):
                return tf.layers.dropout(x,
                                         noise_shape=(tf.shape(x)[0],
                                                      tf.shape(x)[1], 1),
                                         rate=config.rnn_dropout_source,
                                         training=self.inputs.training)

        if config.rnn_use_dropout and config.rnn_dropout_target > 0.0:

            def dropout_target(y):
                return tf.layers.dropout(y,
                                         noise_shape=(tf.shape(y)[0],
                                                      tf.shape(y)[1], 1),
                                         rate=config.rnn_dropout_target,
                                         training=self.inputs.training)

        # Dropout functions for use within FF, GRU, and attention layers.
        # We use Gal and Ghahramani (2016)-style dropout, so these functions
        # will be used to create 2D dropout masks that are reused at every
        # timestep.
        dropout_embedding, dropout_hidden = None, None
        if config.rnn_use_dropout and config.rnn_dropout_embedding > 0.0:

            def dropout_embedding(e):
                return tf.layers.dropout(e,
                                         noise_shape=tf.shape(e),
                                         rate=config.rnn_dropout_embedding,
                                         training=self.inputs.training)

        if config.rnn_use_dropout and config.rnn_dropout_hidden > 0.0:

            def dropout_hidden(h):
                return tf.layers.dropout(h,
                                         noise_shape=tf.shape(h),
                                         rate=config.rnn_dropout_hidden,
                                         training=self.inputs.training)

        batch_size = tf.shape(self.inputs.x)[-1]  # dynamic value

        with tf.variable_scope("encoder"):
            self.encoder = Encoder(config, batch_size, dropout_source,
                                   dropout_embedding, dropout_hidden)
            ctx, embs = self.encoder.get_context(self.inputs.x,
                                                 self.inputs.x_mask)

        with tf.variable_scope("decoder"):
            if config.tie_encoder_decoder_embeddings:
                tied_embeddings = self.encoder.emb_layer
            else:
                tied_embeddings = None
            self.decoder = Decoder(config, ctx, embs, self.inputs.x_mask,
                                   dropout_target, dropout_embedding,
                                   dropout_hidden, tied_embeddings)
            self.logits = self.decoder.score(self.inputs.y)

        with tf.variable_scope("loss"):
            self.loss_layer = layers.Masked_cross_entropy_loss(
                self.inputs.y,
                self.inputs.y_mask,
                config.label_smoothing,
                training=self.inputs.training)
            self._loss_per_sentence = self.loss_layer.forward(self.logits)
            self._loss = tf.reduce_mean(self._loss_per_sentence,
                                        keepdims=False)

        self.sampling_utils = SamplingUtils(config)
Ejemplo n.º 5
0
    def __init__(self, config):
        # Set attributes
        self.config = config
        self.source_vocab_size = config.source_vocab_sizes[0]
        self.target_vocab_size = config.target_vocab_size
        self.name = 'transformer'

        # Placeholders
        self.inputs = model_inputs.ModelInputs(config)

        # Convert from time-major to batch-major, handle factors
        self.source_ids, \
            self.source_mask, \
            self.target_ids_in, \
            self.target_ids_out, \
            self.target_mask = self._convert_inputs(self.inputs)

        self.training = self.inputs.training
        self.scores = self.inputs.scores
        self.index = self.inputs.index

        # Build the common parts of the graph.
        with tf.name_scope('{:s}_loss'.format(self.name)):
            # (Re-)generate the computational graph
            self.dec_vocab_size = self._build_graph()

        # Build the training-specific parts of the graph.

        with tf.name_scope('{:s}_loss'.format(self.name)):
            # Encode source sequences
            with tf.name_scope('{:s}_encode'.format(self.name)):
                enc_output, cross_attn_mask = self.enc.encode(
                    self.source_ids, self.source_mask)
            # Decode into target sequences
            with tf.name_scope('{:s}_decode'.format(self.name)):
                logits = self.dec.decode_at_train(self.target_ids_in,
                                                  enc_output, cross_attn_mask)
            # Instantiate loss layer(s)
            loss_layer = MaskedCrossEntropy(self.dec_vocab_size,
                                            self.config.label_smoothing,
                                            INT_DTYPE,
                                            FLOAT_DTYPE,
                                            time_major=False,
                                            name='loss_layer')
            # Calculate loss
            masked_loss, sentence_loss, batch_loss = \
                loss_layer.forward(logits, self.target_ids_out, self.target_mask, self.training)
            if self.config.print_per_token_pro:
                # e**(-(-log(probability))) =  probability
                self._print_pro = tf.math.exp(-masked_loss)

            sent_lens = tf.reduce_sum(self.target_mask, axis=1, keepdims=False)
            self._loss_per_sentence = sentence_loss * sent_lens
            self._loss = tf.reduce_mean(self._loss_per_sentence,
                                        keepdims=False)

            # calculate expected risk
            if self.config.loss_function == 'MRT':
                # self._loss_per_sentence is negative log probability of the output sentence, each element represents
                # the loss of each sample pair.
                self._risk = mru.mrt_cost(self._loss_per_sentence, self.scores,
                                          self.index, self.config)

            self.sampling_utils = SamplingUtils(config)
Ejemplo n.º 6
0
def main(settings):
    """
    Translates a source language file (or STDIN) into a target language file
    (or STDOUT).
    """
    # Create the TensorFlow session.
    g = tf.Graph()
    with g.as_default():
        tf_config = tf.compat.v1.ConfigProto()
        tf_config.allow_soft_placement = True
        session = tf.compat.v1.Session(config=tf_config)

        # Load config file for each model.
        configs = []
        for model in settings.models:
            config = load_config_from_json_file(model)
            setattr(config, 'reload', model)
            setattr(config, 'translation_maxlen', settings.translation_maxlen)
            configs.append(config)

        # Create the model graphs.
        logging.debug("Loading models\n")
        models = []
        for i, config in enumerate(configs):
            with tf.compat.v1.variable_scope("model%d" % i) as scope:
                if config.model_type == "transformer":
                    model = TransformerModel(
                        config, consts_config_str=settings.config_str)
                else:
                    model = rnn_model.RNNModel(config)
                model.sampling_utils = SamplingUtils(settings)
                models.append(model)
        # Add smoothing variables (if the models were trained with smoothing).
        # FIXME Assumes either all models were trained with smoothing or none were.
        if configs[0].exponential_smoothing > 0.0:
            smoothing = ExponentialSmoothing(configs[0].exponential_smoothing)

        # Restore the model variables.
        for i, config in enumerate(configs):
            with tf.compat.v1.variable_scope("model%d" % i) as scope:
                _ = model_loader.init_or_restore_variables(
                    config, session, ensemble_scope=scope)

        # Swap-in the smoothed versions of the variables.
        if configs[0].exponential_smoothing > 0.0:
            session.run(fetches=smoothing.swap_ops)

        max_translation_len = settings.translation_maxlen

        # Create a BeamSearchSampler / RandomSampler.
        if settings.translation_strategy == 'beam_search':
            sampler = BeamSearchSampler(models, configs, settings.beam_size)
        else:
            assert settings.translation_strategy == 'sampling'
            sampler = RandomSampler(models, configs, settings.beam_size)

        # Warn about the change from neg log probs to log probs for the RNN.
        if settings.n_best:
            model_types = [config.model_type for config in configs]
            if 'rnn' in model_types:
                logging.warn(
                    'n-best scores for RNN models have changed from '
                    'positive to negative (as of commit 95793196...). '
                    'If you are using the scores for reranking etc, then '
                    'you may need to update your scripts.')

        # Translate the source file.
        translate_utils.translate_file(
            input_file=settings.input,
            output_file=settings.output,
            session=session,
            sampler=sampler,
            config=configs[0],
            max_translation_len=max_translation_len,
            normalization_alpha=settings.normalization_alpha,
            consts_config_str=settings.config_str,
            nbest=settings.n_best,
            minibatch_size=settings.minibatch_size,
            maxibatch_size=settings.maxibatch_size)