예제 #1
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     parser.add_argument(
         "--input-feat-per-channel",
         type=int,
         metavar="N",
         help="encoder input dimension per input channel",
     )
     TransformerModel.add_args(parser)
     parser.add_argument('--encoder-convolutions', type=str, metavar='EXPR',
                         help='encoder layers [(dim, kernel_size), ...]')
     parser.add_argument('--normalization-constant', type=float, default=1.0)
     parser.add_argument('--no-attn-2d', action='store_true', default=False,
                         help="Whether to use 2d attention")
     parser.add_argument('--distance-penalty', type=str, default=False,
                         choices=['log', 'gauss'],
                         help='Add distance penalty to the encoder')
     parser.add_argument('--init-variance', type=float, default=1.0,
                         help='Initialization value for variance')
     parser.add_argument('--ctc-compress-out',  action='store_true', default=False,
                         help="If set, compress the CTC output based on predictions")
     parser.add_argument('--ctc-compress-strategy', type=str, default="avg",
                         choices=['avg', 'weighted', 'softmax'],
                         help="Strategy to use when compressing CTC output")
     parser.add_argument('--freeze-pretrained', action='store_true',
                         help='if set, all params loaded from the pretrained model are freezed')
 def setUp(self):
     self.task, self.parser = get_dummy_task_and_parser()
     TransformerModel.add_args(self.parser)
     self.args = self.parser.parse_args([])
     self.args.encoder_layers = 2
     self.args.decoder_layers = 1
     logging.disable(logging.CRITICAL)
예제 #3
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     # fmt: off
     TransformerModel.add_args(parser)
     parser.add_argument('--alignment-heads',
                         type=int,
                         metavar='N',
                         help='number of attention heads to be used for '
                         'pointing')
     parser.add_argument('--alignment-layer',
                         type=int,
                         metavar='I',
                         help='layer number to be used for pointing (0 '
                         'corresponding to the bottommost layer)')
     parser.add_argument('--source-position-markers',
                         type=int,
                         metavar='N',
                         help='dictionary includes N additional items that '
                         'represent an OOV token at a particular input '
                         'position')
     parser.add_argument(
         '--force-generation',
         type=float,
         metavar='P',
         default=None,
         help='set the vocabulary distribution weight to P, '
         'instead of predicting it from the input (1.0 '
         'corresponding to generation, 0.0 to pointing)')
예제 #4
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--pretrained-checkpoint",
         type=str,
         metavar="STR",
     )
     parser.add_argument(
         "--init-encoder -only",
         action="store_true",
     )
     parser.add_argument(
         "--init-decoder-only",
         action="store_true",
     )
     parser.add_argument('--share-encoder-embeddings',
                         action='store_true',
                         help='share encoder embeddings across languages')
     parser.add_argument('--share-decoder-embeddings',
                         action='store_true',
                         help='share decoder embeddings across languages')
     parser.add_argument('--share-encoders',
                         action='store_true',
                         help='share encoders across languages')
     parser.add_argument('--share-decoders',
                         action='store_true',
                         help='share decoders across languages')
예제 #5
0
 def test_export_transformer(self):
     task, parser = get_dummy_task_and_parser()
     TransformerModel.add_args(parser)
     args = parser.parse_args([])
     model = TransformerModel.build_model(args, task)
     scripted = torch.jit.script(model)
     _test_save_and_load(scripted)
예제 #6
0
    def init_from_config(cls, impl, decoder_kwargs, embedding):

        module = cls(impl)

        module.embedding = embedding
        module.decoder_kwargs = decoder_kwargs

        if impl == "fairseq":
            args = {}

            # fairseq default args
            ap = ArgumentParser()
            FairseqModel.add_args(ap)
            args.update(vars(ap.parse_args("")))

            # fairseq base architecture args
            ns = Namespace(**decoder_kwargs)
            base_architecture(ns)
            args.update(vars(ns))

            # our args
            args.update(decoder_kwargs)

            namespace = Namespace(**args)
            dumb_dict = {0 for _ in range(embedding.weight.shape[0])}

            module.model = FairseqDecoder(namespace, dumb_dict, embedding)
        else:
            raise NotImplementedError()

        module.is_initialized = True

        return module
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--apply-bert-init",
         action="store_true",
         help="use custom param initialization for BERT",
     )
     parser.add_argument(
         "--early-exit",
         default="6,6,6",
         type=str,
         help="number of decoder layers for del_word, ins_mask, ins_word",
     )
     parser.add_argument(
         "--no-share-discriminator",
         action="store_true",
         help="addtional decoder-layers to learn deletion",
     )
     parser.add_argument(
         "--no-share-maskpredictor",
         action="store_true",
         help="addtional decoder-layers to learn predicting masks",
     )
     parser.add_argument(
         "--sampling-for-deletion",
         action='store_true',
         help='instead of argmax, use sampling to predict the tokens')
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--apply-bert-init",
         action="store_true",
         help="use custom param initialization for BERT",
     )
     parser.add_argument(
         "--early-exit",
         default="6,6,6",
         type=str,
         help="number of decoder layers before word_del, mask_ins, word_ins",
     )
     parser.add_argument(
         "--no-share-discriminator",
         action="store_true",
         help="separate parameters for discriminator",
     )
     parser.add_argument(
         "--no-share-maskpredictor",
         action="store_true",
         help="separate parameters for mask-predictor",
     )
     parser.add_argument(
         "--share-discriminator-maskpredictor",
         action="store_true",
         help="share the parameters for both mask-predictor and discriminator",
     )
     parser.add_argument(
         "--sampling-for-deletion",
         action='store_true',
         help='instead of argmax, use sampling to predict the tokens'
     )
예제 #9
0
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--apply-bert-init",
         action="store_true",
         help="use custom param initialization for BERT",
     )
    def add_args(parser):
        TransformerModel.add_args(parser)
        parser.add_argument(
            "--apply-bert-init",
            action="store_true",
            help="use custom param initialization for BERT",
        )

        # length prediction
        parser.add_argument(
            "--src-embedding-copy",
            action="store_true",
            help=
            "copy encoder word embeddings as the initial input of the decoder")
        parser.add_argument(
            "--pred-length-offset",
            action="store_true",
            help=
            "predicting the length difference between the target and source sentences"
        )
        parser.add_argument(
            "--sg-length-pred",
            action="store_true",
            help="stop the gradients back-propagated from the length predictor"
        )
        parser.add_argument("--length-loss-factor",
                            type=float,
                            help="weights on the length prediction loss")
예제 #11
0
    def add_args(parser):
        """Add model-specific arguments to the parser."""
        # fmt: off
        TransformerModel.add_args(parser)
        parser.add_argument("--encoder-conv-channels", type=str, metavar="EXPR",
                            help="list of encoder convolution\'s out channels")
        parser.add_argument("--encoder-conv-kernel-sizes", type=str, metavar="EXPR",
                            help="list of encoder convolution\'s kernel sizes")
        parser.add_argument("--encoder-conv-strides", type=str, metavar="EXPR",
                            help="list of encoder convolution\'s strides")
        parser.add_argument("--encoder-transformer-context", type=str, metavar="EXPR",
                            help="left/right context for time-restricted self-attention; "
                            "can be None or a tuple of two non-negative integers/None")
        parser.add_argument("--decoder-input-dim", type=int, metavar="N",
                            help="decoder input dimension (extra linear layer "
                                 "if different from decoder embed dim)")

        # Scheduled sampling options
        parser.add_argument("--scheduled-sampling-probs", type=lambda p: options.eval_str_list(p),
                            metavar="P_1,P_2,...,P_N", default=[1.0],
                            help="scheduled sampling probabilities of sampling the truth "
                            "labels for N epochs starting from --start-schedule-sampling-epoch; "
                            "all later epochs using P_N")
        parser.add_argument("--start-scheduled-sampling-epoch", type=int,
                            metavar="N", default=1,
                            help="start scheduled sampling from the specified epoch")
예제 #12
0
    def add_args(parser):
        TransformerModel.add_args(parser)

        # Arguments related to parameter initialization
        parser.add_argument('--apply-bert-init',
                            action='store_true',
                            help='use custom param initialization for BERT')
예제 #13
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)  #TODO:
     parser.add_argument(
         '--encoder-drop-residual',
         type=int,
         help='drop residual after self-attention in this encoder layer',
     )
예제 #14
0
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument('--add-topic-encoder-pre', default=False, action='store_true',
                         help='')
     parser.add_argument('--add-topic-encoder-post', default=False, action='store_true',
                         help='')
     parser.add_argument('--add-topic-decoder', default=False, action='store_true',
                         help='')
예제 #15
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--pretrained-deltalm-checkpoint",
         type=str,
         metavar="STR",
     )
예제 #16
0
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument("--full-mask",
                         action="store_true",
                         help="Full masking")
     parser.add_argument("--ignore-nat-loss",
                         action="store_true",
                         help="Ignore NAT Loss")
예제 #17
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument('--img-dim',
                         type=int,
                         metavar='N',
                         default=2048,
                         help='image feature dimension')
예제 #18
0
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--apply-bert-init",
         action="store_true",
         help="use custom param initialization for BERT",
     )
     parser.add_argument("--label-tau", default=None, type=float)
예제 #19
0
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument('--share-encoders',
                         action='store_true',
                         help='share encoders across languages')
     parser.add_argument('--share-decoders',
                         action='store_true',
                         help='share decoders across languages')
예제 #20
0
 def test_export_transformer_no_token_pos_emb(self):
     task, parser = get_dummy_task_and_parser()
     TransformerModel.add_args(parser)
     args = parser.parse_args([])
     args.no_token_positional_embeddings = True
     model = TransformerModel.build_model(args, task)
     scripted = torch.jit.script(model)
     _test_save_and_load(scripted)
예제 #21
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--decoder-lang-embed-dim",
         type=int,
         metavar="N",
         help="decoder language embedding dimension",
     )
예제 #22
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--pretrained-roberta-checkpoint",
         type=str,
         metavar="STR",
         help="roberta model to use for initializing transformer encoder",
     )
    def add_args(parser):
        TransformerModel.add_args(parser)
        """Add task-specific arguments to the parser."""

        parser.add_argument(
            "--vgg-config",
            type=str,
            help=
            """config in json format e.g. '[{"in_channels":64, "subsample": 2}, {"in_channels":64, "subsample": 2}]'.
             If a dict is empty, default values are used.""",
        )
예제 #24
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument('--share-encoder-embeddings', action='store_true',
                         help='share encoder embeddings across languages')
     parser.add_argument('--share-decoder-embeddings', action='store_true',
                         help='share decoder embeddings across languages')
     parser.add_argument('--share-encoders', action='store_true',
                         help='share encoders across languages')
     parser.add_argument('--share-decoders', action='store_true',
                         help='share decoders across languages')
예제 #25
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument('--img-dim',
                         type=int,
                         metavar='N',
                         default=1000,
                         help='image feature dimension')
     parser.add_argument('--use-img',
                         default=False,
                         action='store_true',
                         help='if set, use image features')
예제 #26
0
 def add_args(parser):
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--apply-bert-init",
         action="store_true",
         help="use custom param initialization for BERT",
     )
     parser.add_argument(
         "--early-exit",
         default="6,6,6",
         type=str,
         help="number of decoder layers before mask_ins, word_ins and word_del heads",
     )
예제 #27
0
 def setUp(self):
     self.task, self.parser = get_dummy_task_and_parser()
     eos = self.task.tgt_dict.eos()
     src_tokens = torch.randint(3, 50, (2, 10)).long()
     src_tokens = torch.cat((src_tokens, torch.LongTensor([[eos], [eos]])), -1)
     src_lengths = torch.LongTensor([2, 10])
     self.sample = {
         "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths}
     }
     TransformerModel.add_args(self.parser)
     args = self.parser.parse_args([])
     args.encoder_layers = 2
     args.decoder_layers = 1
     self.transformer_model = TransformerModel.build_model(args, self.task)
예제 #28
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     TransformerModel.add_args(parser)
     parser.add_argument(
         "--bottleneck-dim",
         default=256,
         type=int,
         help="bottleneck size of adapter",
     )
     parser.add_argument(
         "--num-src-lang",
         default=1,
         type=int,
         help="number of unique adapters",
     )
예제 #29
0
 def __init__(self):
     super().__init__()
     task, parser = _get_dummy_task_and_parser()
     TransformerModel.add_args(parser)
     args = parser.parse_args([])
     args.encoder_layers = 2
     args.decoder_layers = 1
     transformer_model = TransformerModel.build_model(args, task)
     self.sequence_generator = SequenceGenerator(
         [transformer_model],
         task.tgt_dict,
         beam_size=2,
         no_repeat_ngram_size=2,
         max_len_b=10,
     )
예제 #30
0
 def add_args(parser):
     """Add model-specific arguments to the parser."""
     # fmt: off
     TransformerModel.add_args(parser)
     parser.add_argument('--encoder-conv-channels',
                         type=str,
                         metavar='EXPR',
                         help='list of encoder convolution\'s out channels')
     parser.add_argument('--encoder-conv-kernel-sizes',
                         type=str,
                         metavar='EXPR',
                         help='list of encoder convolution\'s kernel sizes')
     parser.add_argument('--encoder-conv-strides',
                         type=str,
                         metavar='EXPR',
                         help='list of encoder convolution\'s strides')