Beispiel #1
0
 def __init__(
     self,
     args,
     src_dict,
     dst_dict,
     embed_tokens,
     num_chars=50,
     char_embed_dim=32,
     char_cnn_params="[(128, 3), (128, 5)]",
     char_cnn_nonlinear_fn="tanh",
     char_cnn_num_highway_layers=0,
     use_pretrained_weights=False,
     finetune_pretrained_weights=False,
 ):
     super().__init__(args, src_dict, dst_dict, embed_tokens)
     convolutions_params = literal_eval(char_cnn_params)
     self.char_cnn_encoder = char_encoder.CharCNNModel(
         dictionary=dst_dict,
         num_chars=num_chars,
         char_embed_dim=char_embed_dim,
         convolutions_params=convolutions_params,
         nonlinear_fn_type=char_cnn_nonlinear_fn,
         num_highway_layers=char_cnn_num_highway_layers,
         # char_cnn_output_dim should match the word embedding dimension.
         char_cnn_output_dim=embed_tokens.embedding_dim,
         use_pretrained_weights=use_pretrained_weights,
         finetune_pretrained_weights=finetune_pretrained_weights,
     )
     self.char_layer_norm = nn.LayerNorm(embed_tokens.embedding_dim)
    def __init__(
        self,
        args,
        src_dict,
        dst_dict,
        embed_tokens,
        num_chars=50,
        char_embed_dim=32,
        char_cnn_params="[(128, 3), (128, 5)]",
        char_cnn_nonlinear_fn="tanh",
        char_cnn_num_highway_layers=0,
        use_pretrained_weights=False,
        finetune_pretrained_weights=False,
    ):
        super().__init__(args, src_dict, dst_dict, embed_tokens)
        convolutions_params = literal_eval(char_cnn_params)
        self.char_cnn_encoder = char_encoder.CharCNNModel(
            dictionary=dst_dict,
            num_chars=num_chars,
            char_embed_dim=char_embed_dim,
            convolutions_params=convolutions_params,
            nonlinear_fn_type=char_cnn_nonlinear_fn,
            num_highway_layers=char_cnn_num_highway_layers,
            # char_cnn_output_dim should match the word embedding dimension.
            char_cnn_output_dim=embed_tokens.embedding_dim,
            use_pretrained_weights=use_pretrained_weights,
            finetune_pretrained_weights=finetune_pretrained_weights,
        )
        self.char_layer_norm = nn.LayerNorm(embed_tokens.embedding_dim)

        # By default (before training ends), character representations are
        # not precomputed. After precomputation, this value should be used in place of
        # the two embeddings.
        self._is_precomputed = False
        self.combined_word_char_embed = nn.Embedding(
            embed_tokens.num_embeddings, embed_tokens.embedding_dim
        )
    def __init__(
        self,
        args,
        dictionary,
        embed_tokens,
        num_chars=50,
        embed_dim=32,
        char_cnn_params="[(128, 3), (128, 5)]",
        char_cnn_nonlinear_fn="tanh",
        char_cnn_pool_type="max",
        char_cnn_num_highway_layers=0,
        char_cnn_output_dim=-1,
        use_pretrained_weights=False,
        finetune_pretrained_weights=False,
        weights_file=None,
    ):
        super().__init__(dictionary)

        convolutions_params = literal_eval(char_cnn_params)
        self.char_cnn_encoder = char_encoder.CharCNNModel(
            dictionary,
            num_chars,
            embed_dim,
            convolutions_params,
            char_cnn_nonlinear_fn,
            char_cnn_pool_type,
            char_cnn_num_highway_layers,
            char_cnn_output_dim,
            use_pretrained_weights,
            finetune_pretrained_weights,
            weights_file,
        )

        self.embed_tokens = embed_tokens
        token_embed_dim = embed_tokens.embedding_dim
        self.word_layer_norm = nn.LayerNorm(token_embed_dim)

        char_embed_dim = (
            char_cnn_output_dim
            if char_cnn_output_dim != -1
            else sum(out_dim for (out_dim, _) in convolutions_params)
        )
        self.char_layer_norm = nn.LayerNorm(char_embed_dim)
        self.word_dim = char_embed_dim + token_embed_dim
        self.char_scale = math.sqrt(char_embed_dim / self.word_dim)
        self.word_scale = math.sqrt(token_embed_dim / self.word_dim)
        if self.word_dim != args.encoder_embed_dim:
            self.word_to_transformer_embed = fairseq_transformer.Linear(
                self.word_dim, args.encoder_embed_dim
            )

        self.dropout = args.dropout

        self.padding_idx = dictionary.pad()
        self.embed_positions = fairseq_transformer.PositionalEmbedding(
            1024,
            args.encoder_embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        )

        self.transformer_encoder_given_embeddings = TransformerEncoderGivenEmbeddings(
            args=args, proj_to_decoder=True
        )

        # Variable tracker
        self.tracker = VariableTracker()
        # Initialize adversarial mode
        self.set_gradient_tracking_mode(False)
        self.set_embed_noising_mode(False)

        # disables sorting and word-length thresholding if True
        # (enables ONNX tracing of length-sorted input with batch_size = 1)
        self.onnx_export_model = False
    def __init__(
        self,
        dictionary,
        num_chars=50,
        embed_dim=32,
        token_embed_dim=256,
        freeze_embed=False,
        char_cnn_params="[(128, 3), (128, 5)]",
        char_cnn_output_dim=256,
        char_cnn_nonlinear_fn="tanh",
        char_cnn_pool_type="max",
        char_cnn_num_highway_layers=0,
        hidden_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        residual_level=None,
        bidirectional=False,
        word_dropout_params=None,
    ):

        super().__init__(dictionary)
        self.dictionary = dictionary
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.residual_level = residual_level
        self.hidden_dim = hidden_dim
        self.bidirectional = bidirectional

        convolutions_params = literal_eval(char_cnn_params)
        self.char_cnn_encoder = char_encoder.CharCNNModel(
            dictionary,
            num_chars,
            embed_dim,
            convolutions_params,
            char_cnn_nonlinear_fn,
            char_cnn_pool_type,
            char_cnn_num_highway_layers,
        )

        self.embed_tokens = None
        num_tokens = len(dictionary)
        self.padding_idx = dictionary.pad()
        if token_embed_dim > 0:
            self.embed_tokens = rnn.Embedding(
                num_embeddings=num_tokens,
                embedding_dim=token_embed_dim,
                padding_idx=self.padding_idx,
                freeze_embed=freeze_embed,
            )
        self.word_dim = (sum(out_dim for (out_dim, _) in convolutions_params) +
                         token_embed_dim)

        self.layers = nn.ModuleList([])
        for layer in range(num_layers):
            is_layer_bidirectional = self.bidirectional and layer == 0
            if is_layer_bidirectional:
                assert hidden_dim % 2 == 0, (
                    "encoder_hidden_dim must be even if encoder_bidirectional "
                    "(to be divided evenly between directions)"
                )
            self.layers.append(
                rnn.LSTMSequenceEncoder.LSTM(
                    self.word_dim if layer == 0 else hidden_dim,
                    hidden_dim // 2 if is_layer_bidirectional else hidden_dim,
                    num_layers=1,
                    dropout=self.dropout_out,
                    bidirectional=is_layer_bidirectional,
                )
            )

        self.num_layers = len(self.layers)
        self.word_dropout_module = None
        if (
            word_dropout_params
            and word_dropout_params["word_dropout_freq_threshold"] is not None
            and word_dropout_params["word_dropout_freq_threshold"] > 0
        ):
            self.word_dropout_module = word_dropout.WordDropout(
                dictionary, word_dropout_params
            )
    def __init__(
        self,
        dictionary,
        num_chars=50,
        unk_only_char_encoding=False,
        embed_dim=32,
        token_embed_dim=256,
        freeze_embed=False,
        normalize_embed=False,
        char_cnn_params="[(128, 3), (128, 5)]",
        char_cnn_nonlinear_fn="tanh",
        char_cnn_pool_type="max",
        char_cnn_num_highway_layers=0,
        char_cnn_output_dim=-1,
        hidden_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        residual_level=None,
        bidirectional=False,
        word_dropout_params=None,
        use_pretrained_weights=False,
        finetune_pretrained_weights=False,
        weights_file=None,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in

        convolutions_params = literal_eval(char_cnn_params)
        self.char_cnn_encoder = char_encoder.CharCNNModel(
            dictionary,
            num_chars,
            embed_dim,
            convolutions_params,
            char_cnn_nonlinear_fn,
            char_cnn_pool_type,
            char_cnn_num_highway_layers,
            char_cnn_output_dim,
            use_pretrained_weights,
            finetune_pretrained_weights,
            weights_file,
        )

        self.embed_tokens = None
        num_tokens = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.unk_idx = dictionary.unk()
        if token_embed_dim > 0:
            self.embed_tokens = rnn.Embedding(
                num_embeddings=num_tokens,
                embedding_dim=token_embed_dim,
                padding_idx=self.padding_idx,
                freeze_embed=freeze_embed,
                normalize_embed=normalize_embed,
            )
        self.word_dim = (
            char_cnn_output_dim
            if char_cnn_output_dim != -1
            else sum(out_dim for (out_dim, _) in convolutions_params)
        )
        self.token_embed_dim = token_embed_dim

        self.unk_only_char_encoding = unk_only_char_encoding
        if self.unk_only_char_encoding:
            assert char_cnn_output_dim == token_embed_dim, (
                "char_cnn_output_dim (%d) must equal to token_embed_dim (%d)"
                % (char_cnn_output_dim, token_embed_dim)
            )
            self.word_dim = token_embed_dim
        else:
            self.word_dim = self.word_dim + token_embed_dim

        self.bilstm = rnn.BiLSTM(
            num_layers=num_layers,
            bidirectional=bidirectional,
            embed_dim=self.word_dim,
            hidden_dim=hidden_dim,
            dropout=dropout_out,
            residual_level=residual_level,
        )

        # Variable tracker
        self.tracker = VariableTracker()
        # Initialize adversarial mode
        self.set_gradient_tracking_mode(False)
        self.set_embed_noising_mode(False)