Exemple #1
0
 def add_args(parser):
     PytorchTranslateTask.add_args(parser)
     """Add knowledge-distillation arguments to the parser."""
     parser.add_argument(
         "--top-k-probs-binary-file",
         metavar="PROBSFILE",
         type=str,
         default=None,
         help="path to .npz file containing KD target probabilities for "
         "each output token in training data.",
     )
     parser.add_argument(
         "--teacher-path",
         metavar="FILE",
         type=str,
         default=None,
         help="path(s) to teacher model file(s) colon separated",
     )
     parser.add_argument(
         "--top-k-teacher-tokens",
         type=int,
         default=8,
         help=(
             "Incorporating only the top k words from the teacher model.",
             "We zero out all other possibilities and normalize the probabilities",
             "based on the K top element.",
             "If top-k-teacher-tokens=0, it backs up to the original way of",
             "enumerating all.",
         ),
     )
 def add_args(parser):
     PytorchTranslateTask.add_args(parser)
     """Add semi-supervised arguments to the parser."""
     parser.add_argument(
         "--train-mono-source-text-file",
         default="",
         help="Path for the text file containing monolingual source "
         "training examples.",
     )
     parser.add_argument(
         "--train-mono-target-text-file",
         default="",
         help="Path for the text file containing monolingual target "
         "training examples.",
     )
     parser.add_argument(
         "--monolingual-ratio",
         default=None,
         type=float,
         metavar="N",
         help="Upper-bounds the number of monolingual examples to N times "
         "the amount of parallel data.",
     )
     parser.add_argument(
         "--loss-weights-json",
         default="",
         help="JSON representation of `loss_weights`:"
         "[[num_epochs, {'model_key': weight, ...}], ...]",
     )
Exemple #3
0
 def add_args(parser):
     PytorchTranslateTask.add_args(parser)
     """Add semi-supervised arguments to the parser."""
     parser.add_argument(
         "--train-mono-source-binary-path",
         default="",
         help="Path for the binary file containing monolingual source "
         "training examples.",
     )
     parser.add_argument(
         "--train-mono-target-binary-path",
         default="",
         help="Path for the binary file containing monolingual target "
         "training examples.",
     )
 def _init_task(self):
     src_dict = Dictionary()
     self.symbols = "abcdefghijklmnopqrstuvwxyz"
     for symbol in self.symbols[:self.n_symbols]:
         src_dict.add_symbol(symbol)
     dst_dict = Dictionary()
     self.task = PytorchTranslateTask(None, src_dict, dst_dict)
Exemple #5
0
    def add_args(parser):
        PytorchTranslateTask.add_args(parser)

        """Add semi-supervised arguments to the parser."""
        parser.add_argument(
            "--train-mono-source-binary-path",
            default="",
            help="Path for the binary file containing monolingual source "
            "training examples.",
        )
        parser.add_argument(
            "--train-mono-target-binary-path",
            default="",
            help="Path for the binary file containing monolingual target "
            "training examples.",
        )
        parser.add_argument(
            "--monolingual-ratio",
            default=None,
            type=float,
            metavar="N",
            help="Upper-bounds the number of monolingual examples to N times "
            "the amount of parallel data.",
        )
    def build_model(cls, args, task):
        """ Build both the primal and dual models.
        For simplicity, both models share the same arch, i.e. the same model
        params would be used to initialize both models.
        Support for different models/archs would be added in further iterations.
        """
        base_architecture(args)

        if args.sequence_lstm:
            encoder_class = LSTMSequenceEncoder
        else:
            encoder_class = RNNEncoder
        decoder_class = RNNDecoder

        encoder_embed_tokens, decoder_embed_tokens = RNNModel.build_embed_tokens(
            args, task.primal_src_dict, task.primal_tgt_dict)
        primal_encoder = encoder_class(
            task.primal_src_dict,
            embed_dim=args.encoder_embed_dim,
            embed_tokens=encoder_embed_tokens,
            cell_type=args.cell_type,
            num_layers=args.encoder_layers,
            hidden_dim=args.encoder_hidden_dim,
            dropout_in=args.encoder_dropout_in,
            dropout_out=args.encoder_dropout_out,
            residual_level=args.residual_level,
            bidirectional=bool(args.encoder_bidirectional),
        )
        primal_decoder = decoder_class(
            src_dict=task.primal_src_dict,
            dst_dict=task.primal_tgt_dict,
            embed_tokens=decoder_embed_tokens,
            vocab_reduction_params=args.vocab_reduction_params,
            encoder_hidden_dim=args.encoder_hidden_dim,
            embed_dim=args.decoder_embed_dim,
            out_embed_dim=args.decoder_out_embed_dim,
            cell_type=args.cell_type,
            num_layers=args.decoder_layers,
            hidden_dim=args.decoder_hidden_dim,
            attention_type=args.attention_type,
            dropout_in=args.decoder_dropout_in,
            dropout_out=args.decoder_dropout_out,
            residual_level=args.residual_level,
            averaging_encoder=args.averaging_encoder,
        )
        primal_task = PytorchTranslateTask(args, task.primal_src_dict,
                                           task.primal_tgt_dict)
        primal_model = rnn.RNNModel(primal_task, primal_encoder,
                                    primal_decoder)
        if args.pretrained_forward_checkpoint:
            pretrained_forward_state = checkpoint_utils.load_checkpoint_to_cpu(
                args.pretrained_forward_checkpoint)
            primal_model.load_state_dict(pretrained_forward_state["model"],
                                         strict=True)
            print(
                f"Loaded pretrained primal model from {args.pretrained_forward_checkpoint}"
            )

        encoder_embed_tokens, decoder_embed_tokens = RNNModel.build_embed_tokens(
            args, task.dual_src_dict, task.dual_tgt_dict)
        dual_encoder = encoder_class(
            task.dual_src_dict,
            embed_dim=args.encoder_embed_dim,
            embed_tokens=encoder_embed_tokens,
            cell_type=args.cell_type,
            num_layers=args.encoder_layers,
            hidden_dim=args.encoder_hidden_dim,
            dropout_in=args.encoder_dropout_in,
            dropout_out=args.encoder_dropout_out,
            residual_level=args.residual_level,
            bidirectional=bool(args.encoder_bidirectional),
        )
        dual_decoder = decoder_class(
            src_dict=task.dual_src_dict,
            dst_dict=task.dual_tgt_dict,
            embed_tokens=decoder_embed_tokens,
            vocab_reduction_params=args.vocab_reduction_params,
            encoder_hidden_dim=args.encoder_hidden_dim,
            embed_dim=args.decoder_embed_dim,
            out_embed_dim=args.decoder_out_embed_dim,
            cell_type=args.cell_type,
            num_layers=args.decoder_layers,
            hidden_dim=args.decoder_hidden_dim,
            attention_type=args.attention_type,
            dropout_in=args.decoder_dropout_in,
            dropout_out=args.decoder_dropout_out,
            residual_level=args.residual_level,
            averaging_encoder=args.averaging_encoder,
        )
        dual_task = PytorchTranslateTask(args, task.dual_src_dict,
                                         task.dual_tgt_dict)
        dual_model = rnn.RNNModel(dual_task, dual_encoder, dual_decoder)
        if args.pretrained_backward_checkpoint:
            pretrained_backward_state = checkpoint_utils.load_checkpoint_to_cpu(
                args.pretrained_backward_checkpoint)
            dual_model.load_state_dict(pretrained_backward_state["model"],
                                       strict=True)
            print(
                f"Loaded pretrained dual model from {args.pretrained_backward_checkpoint}"
            )

        # TODO (T36875783): instantiate a langauge model
        lm_model = None
        return RNNDualLearningModel(args, task, primal_model, dual_model,
                                    lm_model)
 def add_args(parser):
     PytorchTranslateTask.add_args(parser)
     """Add task-specific arguments to the parser."""
     parser.add_argument("--save-only",
                         action="store_true",
                         help="skip eval and only do save")
    def add_args(parser):
        PytorchTranslateTask.add_args(parser)
        """
        Add denoising autoencoder arguments to the parser.
        Monolingual data is only required if you are adding a denoising
        autoencoder objective to using monolingual data. It is possible to
        just add a denoising autoencoder objective using one side (source or
        target) of the parallel dataset.
        """
        parser.add_argument(
            "--train-mono-source-binary-path",
            default="",
            metavar="FILE",
            type=str,
            help="Path for the binary file containing monolingual source "
            "training examples.",
        )
        parser.add_argument(
            "--train-mono-target-binary-path",
            default="",
            metavar="FILE",
            type=str,
            help="Path for the binary file containing monolingual target "
            "training examples.",
        )

        # TODO(T35539829): implement a Noising registry so we can build a noiser
        # and use the corresponding class to pass noise-type specific args
        parser.add_argument(
            "--max-word-shuffle-distance",
            default=3,
            type=int,
            help="Maximum distance to swap words.",
        )
        parser.add_argument(
            "--word-dropout-prob",
            default=0.2,
            type=float,
            help="Probability for dropping words.",
        )
        parser.add_argument(
            "--word-blanking-prob",
            default=0.2,
            type=float,
            help="Probability for replacing a word with an UNK token",
        )

        parser.add_argument(
            "--denoising-source-parallel",
            type=utils.bool_flag,
            nargs="?",
            const=True,
            default=False,
            help="Whether to add a denoising autoencoder objective using "
            "the source side of the parallel data",
        )
        parser.add_argument(
            "--denoising-target-parallel",
            type=utils.bool_flag,
            nargs="?",
            const=True,
            default=False,
            help="Whether to add a denoising autoencoder objective using "
            "the target side of the parallel data",
        )
        parser.add_argument(
            "--denoising-source-mono",
            type=utils.bool_flag,
            nargs="?",
            const=True,
            default=False,
            help="Whether to add a denoising autoencoder objective using "
            "the monolingual source corpus",
        )
        parser.add_argument(
            "--denoising-target-mono",
            type=utils.bool_flag,
            nargs="?",
            const=True,
            default=False,
            help="Whether to add a denoising autoencoder objective using "
            "the monolingual source corpus",
        )
Exemple #9
0
    def build_model(cls, args, task):
        """ Build both the primal and dual models.
        For simplicity, both models share the same arch, i.e. the same model
        params would be used to initialize both models.
        Support for different models/archs would be added in further iterations.
        """
        base_architecture(args)

        if args.sequence_lstm:
            encoder_class = LSTMSequenceEncoder
        else:
            encoder_class = RNNEncoder
        decoder_class = RNNDecoder

        primal_encoder = encoder_class(
            task.primal_src_dict,
            embed_dim=args.encoder_embed_dim,
            freeze_embed=args.encoder_freeze_embed,
            cell_type=args.cell_type,
            num_layers=args.encoder_layers,
            hidden_dim=args.encoder_hidden_dim,
            dropout_in=args.encoder_dropout_in,
            dropout_out=args.encoder_dropout_out,
            residual_level=args.residual_level,
            bidirectional=bool(args.encoder_bidirectional),
        )
        primal_decoder = decoder_class(
            src_dict=task.primal_src_dict,
            dst_dict=task.primal_tgt_dict,
            vocab_reduction_params=args.vocab_reduction_params,
            encoder_hidden_dim=args.encoder_hidden_dim,
            embed_dim=args.decoder_embed_dim,
            freeze_embed=args.decoder_freeze_embed,
            out_embed_dim=args.decoder_out_embed_dim,
            cell_type=args.cell_type,
            num_layers=args.decoder_layers,
            hidden_dim=args.decoder_hidden_dim,
            attention_type=args.attention_type,
            dropout_in=args.decoder_dropout_in,
            dropout_out=args.decoder_dropout_out,
            residual_level=args.residual_level,
            averaging_encoder=args.averaging_encoder,
        )
        primal_task = PytorchTranslateTask(args, task.primal_src_dict,
                                           task.primal_tgt_dict)
        primal_model = rnn.RNNModel(primal_task, primal_encoder,
                                    primal_decoder)

        dual_encoder = encoder_class(
            task.dual_src_dict,
            embed_dim=args.encoder_embed_dim,
            freeze_embed=args.encoder_freeze_embed,
            cell_type=args.cell_type,
            num_layers=args.encoder_layers,
            hidden_dim=args.encoder_hidden_dim,
            dropout_in=args.encoder_dropout_in,
            dropout_out=args.encoder_dropout_out,
            residual_level=args.residual_level,
            bidirectional=bool(args.encoder_bidirectional),
        )
        dual_decoder = decoder_class(
            src_dict=task.dual_src_dict,
            dst_dict=task.dual_tgt_dict,
            vocab_reduction_params=args.vocab_reduction_params,
            encoder_hidden_dim=args.encoder_hidden_dim,
            embed_dim=args.decoder_embed_dim,
            freeze_embed=args.decoder_freeze_embed,
            out_embed_dim=args.decoder_out_embed_dim,
            cell_type=args.cell_type,
            num_layers=args.decoder_layers,
            hidden_dim=args.decoder_hidden_dim,
            attention_type=args.attention_type,
            dropout_in=args.decoder_dropout_in,
            dropout_out=args.decoder_dropout_out,
            residual_level=args.residual_level,
            averaging_encoder=args.averaging_encoder,
        )
        dual_task = PytorchTranslateTask(args, task.dual_src_dict,
                                         task.dual_tgt_dict)
        dual_model = rnn.RNNModel(dual_task, dual_encoder, dual_decoder)
        # TODO (T36875783): instantiate a langauge model
        lm_model = None
        return RNNDualLearningModel(args, task, primal_model, dual_model,
                                    lm_model)
Exemple #10
0
 def add_args(parser):
     PytorchTranslateTask.add_args(parser)
     """Add semi-supervised arguments to the parser."""
     parser.add_argument(
         "--dual-criterion",
         default="unsupervised_criterion",
         help="Criterion for jointly train primal and dual models",
     )
     parser.add_argument(
         "--reward-alpha",
         type=float,
         default=0.005,
         help="Hyperparam to weigh two rewards",
     )
     parser.add_argument(
         "--soft-updates",
         type=int,
         metavar="N",
         default=15000,
         help="Number of updates before training with mono",
     )
     parser.add_argument(
         "--train-mono-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing monolingual source "
         "training examples.",
     )
     parser.add_argument(
         "--train-mono-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing monolingual target "
         "training examples.",
     )
     parser.add_argument(
         "--forward-source-vocab-file",
         default="",
         metavar="FILE",
         help=
         "Path to text file representing the dictionary of tokens to use. "
         "If the file does not exist, the dict is auto-generated from source "
         "training data and saved as that file.",
     )
     parser.add_argument(
         "--forward-target-vocab-file",
         default="",
         metavar="FILE",
         help=
         "Path to text file representing the dictionary of tokens to use. "
         "If the file does not exist, the dict is auto-generated from source "
         "training data and saved as that file.",
     )
     parser.add_argument(
         "--forward-train-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source training "
         "examples for forward model.",
     )
     parser.add_argument(
         "--forward-train-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for forward model.",
     )
     parser.add_argument(
         "--forward-eval-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source valid "
         "examples for forward model.",
     )
     parser.add_argument(
         "--forward-eval-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for forward model.",
     )
     parser.add_argument(
         "--backward-source-vocab-file",
         default="",
         metavar="FILE",
         help=
         "Path to text file representing the dictionary of tokens to use. "
         "If the file does not exist, the dict is auto-generated from source "
         "training data and saved as that file.",
     )
     parser.add_argument(
         "--backward-target-vocab-file",
         default="",
         metavar="FILE",
         help=
         "Path to text file representing the dictionary of tokens to use. "
         "If the file does not exist, the dict is auto-generated from source "
         "training data and saved as that file.",
     )
     parser.add_argument(
         "--backward-train-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source training "
         "examples for backward model.",
     )
     parser.add_argument(
         "--backward-train-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for backward model.",
     )
     parser.add_argument(
         "--backward-eval-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source valid "
         "examples for backward model.",
     )
     parser.add_argument(
         "--backward-eval-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for backwawrd model.",
     )
     parser.add_argument("--remove-eos-at-src",
                         action="store_true",
                         help="If True, remove eos")
 def add_args(parser):
     PytorchTranslateTask.add_args(parser)
     """Add semi-supervised arguments to the parser."""
     parser.add_argument(
         "--dual-criterion",
         default="unsupervised_criterion",
         help="Criterion for jointly train primal and dual models",
     )
     parser.add_argument(
         "--reward-alpha",
         type=float,
         default=0.005,
         help="Hyperparam to weigh two rewards",
     )
     parser.add_argument(
         "--soft-updates",
         type=int,
         metavar="N",
         default=15000,
         help="Number of updates before training with mono",
     )
     parser.add_argument(
         "--forward-train-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source training "
         "examples for forward model.",
     )
     parser.add_argument(
         "--forward-train-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for forward model.",
     )
     parser.add_argument(
         "--forward-eval-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source valid "
         "examples for forward model.",
     )
     parser.add_argument(
         "--forward-eval-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for forward model.",
     )
     parser.add_argument(
         "--backward-train-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source training "
         "examples for backward model.",
     )
     parser.add_argument(
         "--backward-train-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for backward model.",
     )
     parser.add_argument(
         "--backward-eval-source-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing source valid "
         "examples for backward model.",
     )
     parser.add_argument(
         "--backward-eval-target-binary-path",
         default="",
         metavar="FILE",
         help="Path for the binary file containing target training "
         "examples for backward model.",
     )
     parser.add_argument(
         "--remove-eos-at-src", action="store_true", help="If True, remove eos"
     )
     parser.add_argument(
         "--pretrained-forward-checkpoint",
         default="",
         help="Load pretrained forward model",
     )
     parser.add_argument(
         "--pretrained-backward-checkpoint",
         default="",
         help="Load pretrained backward model",
     )
     parser.add_argument(
         "--reconstruction-bleu-order",
         default=2,
         help="BLEU score order to use as reward for reconstruction",
     )