예제 #1
0
    def __init__(
        self,
        dictionary,
        node_dictionary,
        embed_dim=512,
        type_embed_dim=512,
        hidden_size=512,
        decoder_hidden_size=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        bidirectional=True,
        left_pad=True,
        pretrained_embed=None,
        pretrained_terminals_embed=None,
        padding_idx=None,
        type_padding_idx=None,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(dictionary)
        self.node_dictionary = node_dictionary
        self.num_layers = num_layers
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.max_source_positions = max_source_positions

        self.padding_idx = padding_idx if padding_idx is not None else self.dictionary.pad(
        )
        self.type_padding_idx = type_padding_idx if type_padding_idx is not None else self.node_dictionary.pad(
        )

        if pretrained_embed is None:
            self.subtoken_embed = Embedding(len(dictionary), embed_dim,
                                            self.padding_idx)
        else:
            self.subtoken_embed = pretrained_embed
        if pretrained_terminals_embed is None:
            self.node_embed = Embedding(len(self.node_dictionary),
                                        type_embed_dim, self.type_padding_idx)
        else:
            self.node_embed = pretrained_terminals_embed

        self.lstm = LSTM(
            input_size=embed_dim,
            hidden_size=hidden_size // (1 + int(bidirectional)),
            num_layers=num_layers,
            dropout=self.dropout_out if num_layers > 1 else 0.,
            bidirectional=bidirectional,
            batch_first=True,
        )
        self.left_pad = left_pad
        self.transform = nn.Sequential(
            nn.Linear(2 * type_embed_dim + hidden_size,
                      decoder_hidden_size,
                      bias=False),
            nn.Tanh(),
        )
        self.output_units = decoder_hidden_size
예제 #2
0
    def __init__(self,
                 dictionary,
                 embed_dim=400,
                 pos_len=100,
                 pos_dim=50,
                 hidden_size=400,
                 out_embed_dim=400,
                 num_layers=1,
                 dropout_in=0.5,
                 dropout_out=0.5,
                 encoder_output_units=400,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 max_target_positions=DEFAULT_MAX_TARGET_POSITIONS):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.max_target_positions = max_target_positions

        num_embeddings = len(dictionary)
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings,
                                          embed_dim,
                                          padding_idx=dictionary.pad())
        else:
            self.embed_tokens = pretrained_embed

        self.pos_len = pos_len + 1
        self.pos_dim = pos_dim
        self.pos_embed = Embedding(self.pos_len, pos_dim)

        # disable input feeding if there is no encoder
        # input feeding is described in arxiv.org/abs/1508.04025
        # self.layers = nn.ModuleList([
        #     LSTMCell(
        #         # input_size=encoder_output_units + pos_dim if layer == 0 else hidden_size,
        #         input_size=encoder_output_units if layer == 0 else hidden_size,
        #         hidden_size=hidden_size,
        #     )
        #     for layer in range(num_layers)
        # ])
        self.layers = nn.ModuleList([
            LSTM(
                in_dim=encoder_output_units +
                pos_dim if layer == 0 else hidden_size,
                # in_dim=encoder_output_units if layer == 0 else hidden_size,
                out_dim=hidden_size,
            ) for layer in range(num_layers)
        ])

        # W_H(h)+W_T(t) => fc_out
        self.W_H = nn.Linear(self.hidden_size, self.hidden_size)
        self.W_T = nn.Linear(self.hidden_size, self.hidden_size)

        if not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings)
예제 #3
0
 def __init__(
     self,
     dictionary,
     embed_dim=512,
     hidden_size=512,
     num_layers=1,
     bidirectional=False,
     dropout=0.5,
     pretrained_embed=None,
     shared_embedding=False,
 ):
     super(LSTMDecoder, self).__init__(dictionary)
     if pretrained_embed is None:
         self.embed_tokens = Embedding(len(dictionary),
                                       embed_dim,
                                       padding_idx=dictionary.pad())
     else:
         self.embed_tokens = pretrained_embed
     self.rnn = LSTM(
         embed_dim,
         hidden_size,
         num_layers=num_layers,
         dropout=dropout,
         batch_first=True,
         bidirectional=
         False,  # in prediction task, cannot set bidirectional True
     )
     # self.dropout = dropout
     # self.bidirectional = bidirectional
     # if bidirectional:
     #     self.proj = Linear(hidden_size * 2, hidden_size)
     self.fc_out = Linear(hidden_size, len(dictionary))
     if shared_embedding:
         self.fc_out.weight = self.embed_tokens.weight
예제 #4
0
    def __init__(
        self, dictionary, embed_dim=512, hidden_size=512, num_layers=1,
        dropout_in=0.1, dropout_out=0.1, bidirectional=False,
        left_pad=True, pretrained_embed=None, padding_idx=None,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(dictionary)
        self.num_layers = num_layers
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.max_source_positions = max_source_positions

        num_embeddings = len(dictionary)
        self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.lstm = ChildSumTreeLSTMCell(
            input_size=embed_dim,
            hidden_size=hidden_size,
        )

        self.left_pad = left_pad

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
예제 #5
0
 def load_pretrained_embedding_from_file(embed_path, dictionary,
                                         embed_dim):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
     embed_dict = utils.parse_embedding(embed_path)
     utils.print_embed_overlap(embed_dict, dictionary)
     return utils.load_embedding(embed_dict, dictionary, embed_tokens)
예제 #6
0
    def __init__(self,
                 dictionary, embed_dim, token_types, max_positions,
                 self_attn_layers, attention_heads, ffn_embed_dim, activation_fn,
                 dropout, **kwargs,
                 ):
        super(SelfAttnEncoder, self).__init__(dictionary)
        # word embedding
        self.embed = Embedding(
            len(dictionary), embed_dim, padding_idx=self.dictionary.pad(),
            initializer=trunc_normal(mean=.0, std=.02),
        )
        # type embedding
        if token_types is not None:
            self.type_embed = Embedding(
                token_types, embed_dim,
                initializer=trunc_normal(mean=.0, std=.02),
            )
        else:
            self.type_embed = None
        # positional embedding
        if max_positions is not None:
            self.positional_embed = Parameter(
                1, max_positions, embed_dim,
                initializer=trunc_normal(mean=.0, std=.02),
            )
        else:
            self.positional_embed = None
        # layer norm for embedding
        self.embed_layer_norm = LayerNorm(embed_dim)
        self.dropout = dropout

        # self attn
        self.num_layers = self_attn_layers
        self.layers = nn.ModuleList(
            [TransformerEncoderLayer(embed_dim, attention_heads, dropout, ffn_embed_dim, activation_fn)
             for _ in range(self_attn_layers)]
        )

        # pooling
        pooling = kwargs.get('pooling', None)
        self.pooling = pooling1d(pooling)
        if 'weighted' in pooling:
            self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform())
        else:
            self.weight_layer = None
예제 #7
0
 def build_embedding(dictionary, embed_dim, path=None):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     emb = Embedding(num_embeddings, embed_dim, padding_idx=padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
예제 #8
0
 def __init__(self, token_num: int, embed_size: int,
              rnn_type: str, hidden_size: int, layer_num: int, dropout: float, bidirectional: bool,
              attn_hops: int, attn_unit: int, ) -> None:
     super(SelfAttnEncoder, self).__init__()
     self.wemb = Embedding(token_num, embed_size, )
     self.rnn = RNNEncoder(rnn_type, embed_size, hidden_size, layer_num, 0.0, bidirectional)
     self.self_attn = SelfAttention(hidden_size * (2 if bidirectional else 1), attn_hops=attn_hops,
                                    attn_unit=attn_unit * (2 if bidirectional else 1),
                                    dropout=dropout)
     self.dropout = dropout
예제 #9
0
    def __init__(self, dictionary, embed_dim, out_channels, kernel_size,
                 **kwargs):
        super().__init__(dictionary)
        # word embedding + positional embedding
        self.embed = Embedding(
            len(dictionary), embed_dim)  # , padding_idx=self.dictionary.pad())

        self.position_encoding = kwargs.get('position_encoding', None)
        if self.position_encoding == 'learned':
            self.position_embed = Parameter(1,
                                            kwargs['max_tokens'],
                                            embed_dim,
                                            initializer=trunc_normal(mean=0.,
                                                                     std=0.02))
        else:
            self.position_embed = None
        # pooling
        pooling = kwargs.get('pooling', None)
        self.pooling = pooling1d(pooling)
        if 'weighted' in pooling:
            self.weight_layer = Linear(embed_dim, 1, bias=False)
        else:
            self.weight_layer = None
        # conv1d
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        # padding mode = ['valid'(default), 'same']
        self.padding = kwargs.get('padding', 'valid')
        if self.padding == 'same':
            self.padding_size = []
            for kernel_sz in self.kernel_size:
                padding_right = (kernel_sz - 1) // 2
                padding_left = kernel_sz - 1 - padding_right
                self.padding_size.append((
                    0,
                    0,
                    padding_left,
                    padding_right,
                ))
        self.conv_layers = nn.ModuleList([])
        # input: [bsz, 1, seq_len, embed_dim]
        # filters = 1 -> embed_dim
        # kernel_size = (kernel_width, embed_dim)
        # =>  output: [bsz, embed_dim, seq_len - kernel_width + 1]
        for idx, kernel_sz in enumerate(self.kernel_size):
            self.conv_layers.append(
                Conv2d(in_channels=1,
                       out_channels=embed_dim,
                       kernel_size=(kernel_sz, embed_dim)))

        self.residual = kwargs.get('residual', False)  # residual
        self.dropout = kwargs.get('dropout', None)
        activation_fn = kwargs.get('activation_fn', None)
        self.activation_fn = get_activation(
            activation_fn) if activation_fn else None
예제 #10
0
    def __init__(
        self, dictionary, src_modalities=['code'], embed_dim=512, hidden_size=512, out_embed_dim=512,
        num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True,
        encoder_output_units=512, pretrained_embed=None,
        share_input_output_embed=False, adaptive_softmax_cutoff=None,
        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS
    ):
        super().__init__(dictionary)
        self.src_modalities = src_modalities
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.max_target_positions = max_target_positions

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        if encoder_output_units != hidden_size and encoder_output_units != 0:
            self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size)
            self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)
        else:
            self.encoder_hidden_proj = self.encoder_cell_proj = None

        # disable input feeding if there is no encoder
        # input feeding is described in arxiv.org/abs/1508.04025
        input_feed_size = 0 if encoder_output_units == 0 else hidden_size
        self.layers = nn.ModuleList([
            LSTMCell(
                input_size=input_feed_size + embed_dim if layer == 0 else hidden_size,
                hidden_size=hidden_size,
            )
            for layer in range(num_layers)
        ])
        if attention:
            # TODO make bias configurable
            # self.attention = AttentionLayer(hidden_size, encoder_output_units, hidden_size, bias=False)
            self.attention = None
        else:
            self.attention = None
        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)
        # if adaptive_softmax_cutoff is not None:
        #     # setting adaptive_softmax dropout to dropout_out for now but can be redefined
        #     self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, hidden_size, adaptive_softmax_cutoff,
        #                                             dropout=dropout_out)
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
예제 #11
0
 def __init__(self, args, dictionary):
     super(TransformerDecoder, self).__init__(dictionary)
     self.dropout = args['model']['dropout']
     embed_dim = args['model']['decoder_embed_dim']
     self.padding_idx = dictionary.pad()
     self.embed_tokens = Embedding(len(dictionary),
                                   embed_dim,
                                   padding_idx=dictionary.pad())
     self.layers = nn.ModuleList([
         TransformerDecoderLayer(args)
         for _ in range(args['model']['decoder_layers'])
     ])
     self.num_layers = args['model']['decoder_layers']
     self.out_layer_norm = LayerNorm(embed_dim)
예제 #12
0
 def __init__(
     self,
     dictionary,
     embed_dim,
     pooling='weighted_mean',
     dropout=0.1,
     **kwargs,
 ):
     super().__init__(dictionary)
     self.padding_idx = self.dictionary.pad()
     self.embed = Embedding(len(dictionary),
                            embed_dim,
                            padding_idx=self.padding_idx,
                            initializer=xavier_uniform())
     self.dropout = dropout
     self.pooling = pooling1d(pooling)
     if self.pooling:
         self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) \
             if 'weighted' in pooling else None
예제 #13
0
    def __init__(
        self,
        dictionary,
        embed_dim,
        embed_out,
        dropout,
        edge_types,
        # scoring/transform MLPs
        out_dropout,
        dim_inner,
        dim_out,
    ):
        super(PoemEncoder, self).__init__(dictionary)
        # embedding block
        if dictionary is not None:
            self.embed = Embedding(len(dictionary), embed_dim)
        else:
            self.embed = None
        self.embed_modules = nn.Sequential(
            Linear(embed_dim, embed_out, bias=False), nn.ReLU(),
            nn.Dropout(dropout))
        # MLP-GNN
        self.gnn_modules = GNNEncoder(edge_types, dim_in=embed_out, dim_inner=dim_out, dim_out=embed_out, \
                                      dropout=dropout)

        # scoring MLP
        def get_mlp():
            return nn.Sequential(
                nn.Dropout(out_dropout),
                nn.Linear(embed_dim + embed_out, dim_inner, bias=False),
                nn.ReLU(),
                nn.Linear(dim_inner, dim_out, bias=False),
                nn.ReLU(),
            )

        self.score_mlp = get_mlp()
        self.transform_mlp = get_mlp()
        self.out_linear = nn.Sequential(
            nn.Linear(dim_out, 2),
            nn.Sigmoid(),
        )
예제 #14
0
    def __init__(
        self,
        dictionary,
        embed_dim=400,
        dropout=0.5,
        pretrained_embed=None,
        padding_idx=None,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(dictionary)
        self.dropout = dropout
        self.max_source_positions = max_source_positions

        num_embeddings = len(dictionary)
        self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad(
        )
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          self.padding_idx)
        else:
            self.embed_tokens = pretrained_embed
예제 #15
0
 def __init__(
         self,
         dictionary,
         embed_dim,
         dropout,
         # rnn config
         rnn_cell,
         rnn_hidden_dim,
         rnn_dropout,
         rnn_num_layers=1,
         rnn_bidirectional=False,
         **kwargs):
     super().__init__(dictionary)
     # word embedding + positional embedding
     self.embed = Embedding(len(dictionary),
                            embed_dim,
                            initializer=xavier_uniform())
     self.dropout = dropout
     # pooling
     pooling = kwargs.get('pooling', None)
     self.pooling = pooling1d(pooling)
     if 'weighted' in pooling:
         self.weight_layer = Linear(embed_dim,
                                    1,
                                    bias=False,
                                    weight_initializer=xavier_uniform())
     else:
         self.weight_layer = None
     # rnn
     self.rnn_dropout = rnn_dropout
     self.rnn_num_layers = rnn_num_layers
     self.rnn_bidirectional = rnn_bidirectional
     self.rnn = getattr(nn, str.upper(rnn_cell))(
         embed_dim,
         rnn_hidden_dim,
         num_layers=rnn_num_layers,
         dropout=self.rnn_dropout,  # rnn inner dropout between layers
         bidirectional=rnn_bidirectional,
         batch_first=True,
     )
예제 #16
0
 def __init__(
     self,
     dictionary,
     embed_dim,
     # rnn config
     rnn_cell,
     rnn_hidden_dim,
     rnn_dropout=None,
     rnn_num_layers=2,
     rnn_bidirectional=False,
     # auxiliary input
     aux_dim=2,
     inner_dim=32,
     out_dim=2,
 ):
     super(DeepTuneEncoder, self).__init__(dictionary)
     self.embed = Embedding(len(dictionary), embed_dim)
     # LSTM
     self.rnn_dropout = rnn_dropout
     self.rnn = getattr(nn, str.upper(rnn_cell))(
         embed_dim,
         rnn_hidden_dim,
         num_layers=rnn_num_layers,
         dropout=self.rnn_dropout,  # rnn inner dropout between layers
         bidirectional=rnn_bidirectional,
         batch_first=True,
     )
     self.src_out_proj = nn.Sequential(
         Linear(rnn_hidden_dim, out_dim),
         nn.Sigmoid(),
     )
     # Auxiliary inputs. wgsize and dsize
     self.bn = BatchNorm1d(rnn_hidden_dim + aux_dim)
     self.hybrid_out_proj = nn.Sequential(
         Linear(rnn_hidden_dim + aux_dim, inner_dim),
         nn.ReLU(),
         Linear(inner_dim, out_dim),
         nn.Sigmoid(),
     )
예제 #17
0
    def __init__(self,
                 dictionary,
                 embed_dim=512,
                 hidden_size=512,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True,
                 encoder_output_units=512,
                 pretrained_embed=None,
                 share_input_output_embed=False,
                 adaptive_softmax_cutoff=None,
                 max_target_positions=DEFAULT_MAX_TARGET_POSITIONS):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.max_target_positions = max_target_positions

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                          padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        self.lstm = LSTM(hidden_size,
                         hidden_size,
                         dropout=dropout_in,
                         batch_first=True)
        self.fc_out = Linear(out_embed_dim, num_embeddings, bias=False)
예제 #18
0
    def build_model(cls, args, config, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        # base_architecture(args)

        if args['model']['encoder_layers'] != args['model']['decoder_layers']:
            raise ValueError('--encoder-layers must match --decoder-layers')

        max_source_positions = args['model']['max_source_positions'] if args['model']['max_source_positions'] \
            else DEFAULT_MAX_SOURCE_POSITIONS
        max_target_positions = args['model']['max_target_positions'] if args['model']['max_target_positions'] \
            else DEFAULT_MAX_TARGET_POSITIONS

        def load_pretrained_embedding_from_file(embed_path, dictionary,
                                                embed_dim):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
            embed_dict = utils.parse_embedding(embed_path)
            utils.print_embed_overlap(embed_dict, dictionary)
            return utils.load_embedding(embed_dict, dictionary, embed_tokens)

        if args['model']['encoder_embed']:
            pretrained_encoder_embed = load_pretrained_embedding_from_file(
                args['model']['encoder_embed_path'], task.source_dictionary,
                args['model']['encoder_embed_dim'])
        else:
            num_embeddings = len(task.source_dictionary)
            pretrained_encoder_embed = Embedding(
                num_embeddings, args['model']['encoder_embed_dim'],
                task.source_dictionary.pad())

        if args['model']['share_all_embeddings']:
            # double check all parameters combinations are valid
            if task.source_dictionary != task.target_dictionary:
                raise ValueError(
                    '--share-all-embeddings requires a joint dictionary')
            if args['model']['decoder_embed_path'] and (
                    args['model']['decoder_embed_path'] !=
                    args['model']['encoder_embed_path']):
                raise ValueError(
                    '--share-all-embed not compatible with --decoder-embed-path'
                )
            if args['model']['encoder_embed_dim'] != args['model'][
                    'decoder_embed_dim']:
                raise ValueError(
                    '--share-all-embeddings requires --encoder-embed-dim to '
                    'match --decoder-embed-dim')
            pretrained_decoder_embed = pretrained_encoder_embed
            args['model']['share_decoder_input_output_embed'] = True
        else:
            # separate decoder input embeddings
            pretrained_decoder_embed = None
            if args['model']['decoder_embed']:
                pretrained_decoder_embed = load_pretrained_embedding_from_file(
                    args['model']['decoder_embed'], task.target_dictionary,
                    args['model']['decoder_embed_dim'])
        # one last double check of parameter combinations
        if args['model']['share_decoder_input_output_embed'] and (
                args['model']['decoder_embed_dim'] !=
                args['model']['decoder_out_embed_dim']):
            raise ValueError(
                '--share-decoder-input-output-embeddings requires '
                '--decoder-embed-dim to match --decoder-out-embed-dim')

        if args['model']['encoder_freeze_embed']:
            pretrained_encoder_embed.weight.requires_grad = False
        if args['model']['decoder_freeze_embed']:
            pretrained_decoder_embed.weight.requires_grad = False

        encoder = NaryTreeLSTMEncoder(
            dictionary=task.source_dictionary,
            embed_dim=args['model']['encoder_embed_dim'],
            hidden_size=args['model']['encoder_hidden_size'],
            num_layers=args['model']['encoder_layers'],
            dropout_in=args['model']['encoder_dropout_in'],
            dropout_out=args['model']['encoder_dropout_out'],
            bidirectional=bool(args['model']['encoder_bidirectional']),
            left_pad=args['task']['left_pad_source'],
            pretrained_embed=pretrained_encoder_embed,
            max_source_positions=max_source_positions)
        decoder = LSTMDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args['model']['decoder_embed_dim'],
            hidden_size=args['model']['decoder_hidden_size'],
            out_embed_dim=args['model']['decoder_out_embed_dim'],
            num_layers=args['model']['decoder_layers'],
            dropout_in=args['model']['decoder_dropout_in'],
            dropout_out=args['model']['decoder_dropout_out'],
            attention=args['model']['decoder_attention'],
            encoder_output_units=encoder.output_units,
            pretrained_embed=pretrained_decoder_embed,
            share_input_output_embed=args['model']
            ['share_decoder_input_output_embed'],
            adaptive_softmax_cutoff=(args['model']['adaptive_softmax_cutoff']
                                     if args['criterion'] == 'adaptive_loss'
                                     else None),
            max_target_positions=max_target_positions)
        return cls(encoder, decoder)
예제 #19
0
    def build_model(cls, args, config, task):
        if args['model']['encoder_layers'] != args['model']['decoder_layers']:
            raise ValueError('--encoder-layers must match --decoder-layers')

        max_source_positions = args['model']['max_source_positions'] if args['model']['max_source_positions'] \
            else DEFAULT_MAX_SOURCE_POSITIONS
        max_target_positions = args['model']['max_target_positions'] if args['model']['max_target_positions'] \
            else DEFAULT_MAX_TARGET_POSITIONS

        def load_pretrained_embedding_from_file(embed_path, dictionary,
                                                embed_dim):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
            embed_dict = utils.parse_embedding(embed_path)
            utils.print_embed_overlap(embed_dict, dictionary)
            return utils.load_embedding(embed_dict, dictionary, embed_tokens)

        # subtoken
        if args['model']['encoder_path_embed']:
            pretrained_encoder_path_embed = load_pretrained_embedding_from_file(
                args['model']['encoder_path_embed'], task.source_dictionary,
                args['model']['encoder_path_embed_dim'])
        else:
            num_embeddings = len(task.source_dictionary)
            pretrained_encoder_path_embed = Embedding(
                num_embeddings,
                args['model']['encoder_path_embed_dim'],
                padding_idx=task.source_dictionary.pad())
        # type
        if args['model']['encoder_terminals_embed']:
            pretrained_encoder_terminals_embed = load_pretrained_embedding_from_file(
                args['model']['encoder_terminals_embed'], task.type_dict,
                args['model']['encoder_terminals_embed_dim'])
        else:
            num_embeddings = len(task.type_dict)
            pretrained_encoder_terminals_embed = Embedding(
                num_embeddings,
                args['model']['encoder_terminals_embed_dim'],
                padding_idx=task.type_dict.pad())
        # decoder
        if args['model']['decoder_embed']:
            pretrained_decoder_embed = load_pretrained_embedding_from_file(
                args['model']['decoder_embed'], task.target_dictionary,
                args['model']['decoder_embed_dim'])
        else:
            num_embeddings = len(task.target_dictionary)
            pretrained_decoder_embed = Embedding(
                num_embeddings,
                args['model']['decoder_embed_dim'],
                padding_idx=task.target_dictionary.pad())

        if args['model']['encoder_path_freeze_embed']:
            pretrained_encoder_path_embed.weight.requires_grad = False
        if args['model']['encoder_terminals_freeze_embed']:
            pretrained_encoder_terminals_embed.weight.requires_grad = False
        if args['model']['decoder_freeze_embed']:
            pretrained_decoder_embed.weight.requires_grad = False

        encoder = PathEncoder(
            dictionary=task.source_dictionary,
            node_dictionary=task.type_dict,
            embed_dim=args['model']['encoder_path_embed_dim'],
            type_embed_dim=args['model']['encoder_terminals_embed_dim'],
            hidden_size=args['model']['encoder_hidden_size'],
            decoder_hidden_size=args['model']['decoder_hidden_size'],
            num_layers=args['model']['encoder_layers'],
            dropout_in=args['model']['encoder_dropout_in'],
            dropout_out=args['model']['encoder_dropout_out'],
            bidirectional=bool(args['model']['encoder_bidirectional']),
            pretrained_embed=pretrained_encoder_path_embed,
            pretrained_terminals_embed=pretrained_encoder_terminals_embed,
            max_source_positions=max_source_positions)
        decoder = PathDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args['model']['decoder_embed_dim'],
            hidden_size=args['model']['decoder_hidden_size'],
            out_embed_dim=args['model']['decoder_out_embed_dim'],
            num_layers=args['model']['decoder_layers'],
            dropout_in=args['model']['decoder_dropout_in'],
            dropout_out=args['model']['decoder_dropout_out'],
            attention=args['model']['decoder_attention'],
            encoder_output_units=encoder.output_units,
            pretrained_embed=pretrained_decoder_embed,
            share_input_output_embed=args['model']
            ['share_decoder_input_output_embed'],
            adaptive_softmax_cutoff=(args['model']['adaptive_softmax_cutoff']
                                     if args['criterion'] == 'adaptive_loss'
                                     else None),
            max_target_positions=max_target_positions)
        return cls(encoder, decoder)