예제 #1
0
    def __init__(self,
                 n_src_vocab,
                 n_layers=6,
                 n_head=8,
                 d_word_vec=512,
                 d_model=512,
                 d_inner_hid=1024,
                 dropout=0.1,
                 dim_per_head=None):
        super().__init__()

        self.num_layers = n_layers
        self.embeddings = Embeddings(num_embeddings=n_src_vocab,
                                     embedding_dim=d_word_vec,
                                     dropout=dropout,
                                     add_position_embedding=True)
        self.block_stack = nn.ModuleList([
            EncoderBlock(d_model=d_model,
                         d_inner_hid=d_inner_hid,
                         n_head=n_head,
                         dropout=dropout,
                         dim_per_head=dim_per_head) for _ in range(n_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model)
예제 #2
0
    def __init__(self,
                 n_words,
                 input_size,
                 hidden_size,
                 bridge_type="mlp",
                 dropout_rate=0.0):

        super(Decoder, self).__init__()

        self.bridge_type = bridge_type
        self.hidden_size = hidden_size
        self.context_size = hidden_size * 2

        self.embedding = Embeddings(num_embeddings=n_words,
                                    embedding_dim=input_size,
                                    dropout=0.0,
                                    add_position_embedding=False)

        self.cgru_cell = CGRUCell(input_size=input_size, hidden_size=hidden_size)

        self.linear_input = nn.Linear(in_features=input_size, out_features=input_size)
        self.linear_hidden = nn.Linear(in_features=hidden_size, out_features=input_size)
        self.linear_ctx = nn.Linear(in_features=hidden_size * 2, out_features=input_size)

        self.dropout = nn.Dropout(dropout_rate)

        self._reset_parameters()

        self._build_bridge()
예제 #3
0
    def __init__(self,
                 vocab_size,
                 embedding_size=300,
                 hidden_size=512,
                 num_layers=2,
                 dropout=0.3,
                 shared_weight=True,
                 **kwargs):
        super().__init__()
        self.embedding = Embeddings(num_embeddings=vocab_size,
                                    embedding_dim=embedding_size,
                                    dropout=dropout)

        self.rnn = nn.LSTM(input_size=embedding_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           bidirectional=False,
                           dropout=dropout,
                           batch_first=True)
        # 输出层
        self.output = nn.Sequential(nn.Linear(hidden_size, hidden_size),
                                    nn.Tanh(),
                                    nn.Linear(hidden_size, embedding_size))

        # 投影层
        self.proj = nn.Linear(embedding_size, vocab_size, bias=False)
        if shared_weight:
            self.proj.weight = self.embedding.embeddings.weight
        else:
            my_init.default_init(self.proj.weight)
예제 #4
0
    def __init__(self,
                 n_tgt_vocab,
                 n_layers=6,
                 n_head=8,
                 d_word_vec=512,
                 d_model=512,
                 d_inner_hid=1024,
                 dropout=0.1):

        super(Decoder, self).__init__()

        self.n_head = n_head
        self.num_layers = n_layers
        self.d_model = d_model

        self.embeddings = Embeddings(n_tgt_vocab,
                                     d_word_vec,
                                     dropout=dropout,
                                     add_position_embedding=True)

        self.block_stack = nn.ModuleList([
            DecoderBlock(d_model=d_model,
                         d_inner_hid=d_inner_hid,
                         n_head=n_head,
                         dropout=dropout) for _ in range(n_layers)
        ])

        self.out_layer_norm = nn.LayerNorm(d_model)
예제 #5
0
    def __init__(
        self,
        n_src_words,
        n_trg_words,
        d_word_vec,
        d_model,
        dropout=0.0,
        **kwargs,
    ):
        super(TransDiscriminator, self).__init__()
        # the embedding is pre-trained and without dropout layer
        self.src_embedding = Embeddings(num_embeddings=n_src_words,
                                        embedding_dim=d_word_vec,
                                        dropout=dropout,
                                        add_position_embedding=False)
        self.trg_embedding = Embeddings(num_embeddings=n_trg_words,
                                        embedding_dim=d_word_vec,
                                        dropout=dropout,
                                        add_position_embedding=False)
        if not kwargs["update_embedding"]:
            for param in self.src_embedding.parameters():
                param.requires_grad = False
            for param in self.trg_embedding.parameters():
                param.requires_grad = False

        self.src_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=d_word_vec,
                           hidden_size=d_model,
                           bidirectional=True)
        self.trg_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=d_word_vec,
                           hidden_size=d_model,
                           bidirectional=True)
        # twice of the bi-GRN dimension
        self.layer_norm = nn.LayerNorm(d_model * 4, elementwise_affine=True)

        # whether the (x,y) is a translation pair
        self.ffn = nn.Linear(in_features=4 * d_model, out_features=2)
        self.dropout = nn.Dropout(dropout)
예제 #6
0
    def __init__(self,
                 n_src_vocab,
                 char_src_vocab=-1,
                 n_layers=6,
                 n_head=8,
                 d_word_vec=512,
                 d_model=512,
                 d_inner_hid=1024,
                 dropout=0.1,
                 dim_per_head=None,
                 padding_idx=PAD,
                 positional_embedding="sin",
                 layer_norm_first=True,
                 ffn_activation="relu"):
        super().__init__()

        self.scale = d_word_vec**0.5
        self.num_layers = n_layers
        self.layer_norm_first = layer_norm_first

        self.embeddings = Embeddings(num_embeddings=n_src_vocab,
                                     embedding_dim=d_word_vec,
                                     dropout=dropout,
                                     positional_embedding=positional_embedding)

        self.char_embeddings = Embeddings(num_embeddings=char_src_vocab,
                                          embedding_dim=d_word_vec,
                                          dropout=dropout)

        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model=d_model,
                         d_inner_hid=d_inner_hid,
                         n_head=n_head,
                         dropout=dropout,
                         dim_per_head=dim_per_head,
                         layer_norm_first=layer_norm_first,
                         ffn_activation=ffn_activation,
                         contain_char_attn=True) for _ in range(n_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model)
예제 #7
0
    def __init__(self, n_words, input_size, hidden_size):
        super(Encoder, self).__init__()

        # Use PAD
        self.embeddings = Embeddings(num_embeddings=n_words,
                                     embedding_dim=input_size,
                                     dropout=0.0,
                                     add_position_embedding=False)

        self.gru = RNN(type="gru",
                       batch_first=True,
                       input_size=input_size,
                       hidden_size=hidden_size,
                       bidirectional=True)
예제 #8
0
    def __init__(self, n_words, input_size, hidden_size, dropout_rate=0.0):

        super(DisentangleRNNDecoder, self).__init__()

        self.hidden_size = hidden_size

        self.embeddings = Embeddings(num_embeddings=n_words,
                                     embedding_dim=input_size,
                                     dropout=0.0,
                                     add_position_embedding=False)

        self.cell = GRUAttnCell(input_size=input_size, hidden_size=hidden_size)

        self.linear_hidden = nn.Linear(in_features=hidden_size,
                                       out_features=input_size)
        # self.linear_ctx = nn.Linear(in_features=context_size, out_features=input_size)

        self.dropout = nn.Dropout(dropout_rate)
예제 #9
0
    def __init__(self,
                 n_tgt_vocab,
                 n_layers=6,
                 n_head=8,
                 d_word_vec=512,
                 d_model=512,
                 d_inner_hid=1024,
                 dim_per_head=None,
                 dropout=0.1,
                 positional_embedding="sin",
                 layer_norm_first=True,
                 padding_idx=PAD,
                 ffn_activation="relu"):

        super(Decoder, self).__init__()

        self.n_head = n_head
        self.num_layers = n_layers
        self.d_model = d_model
        self.layer_norm_first = layer_norm_first

        self.embeddings = Embeddings(n_tgt_vocab,
                                     d_word_vec,
                                     dropout=dropout,
                                     positional_embedding=positional_embedding,
                                     padding_idx=padding_idx)

        self.layer_stack = nn.ModuleList([
            DecoderLayer(d_model=d_model,
                         d_inner_hid=d_inner_hid,
                         n_head=n_head,
                         dropout=dropout,
                         dim_per_head=dim_per_head,
                         layer_norm_first=layer_norm_first,
                         ffn_activation=ffn_activation,
                         contain_char_attn=True) for _ in range(n_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model)

        self._dim_per_head = dim_per_head
예제 #10
0
    def __init__(self,
                 n_words,
                 action_space=2,
                 action_roll_steps=1,
                 d_word_vec=512,
                 d_model=256,
                 dropout=0.0,
                 **kwargs):
        super(Attacker, self).__init__()
        self.action_roll_steps = action_roll_steps
        self.action_space = action_space
        self.input_size = d_word_vec
        self.hidden_size = d_model
        self.src_embedding = Embeddings(num_embeddings=n_words,
                                        embedding_dim=self.input_size,
                                        dropout=dropout,
                                        add_position_embedding=False)
        # label representation
        self.src_gru = RNN(type="gru", batch_first=True, input_size=self.input_size,
                           hidden_size=self.hidden_size, bidirectional=True)

        # inputs: current input, avg_seqs as ctx
        self.ctx_linear = nn.Linear(in_features=2*self.hidden_size,
                                    out_features=self.hidden_size)
        self.input_linear = nn.Linear(in_features=self.input_size,
                                      out_features=self.hidden_size)
        # layer norm for inputs feature
        self.layer_norm = nn.LayerNorm(self.hidden_size, elementwise_affine=True)

        # outputs: actor distribution and critic value
        self.attacker_linear = nn.Linear(in_features=self.hidden_size,
                                         out_features=self.action_space)
        self.critic_linear = nn.Linear(in_features=self.hidden_size,
                                       out_features=1)
        self.dropout = nn.Dropout(dropout)

        self._reset_parameters()
예제 #11
0
class TransDiscriminator(nn.Module):
    """
    discriminate whether the trg (y) is a translation of a src (x)
    """
    def __init__(
        self,
        n_src_words,
        n_trg_words,
        d_word_vec,
        d_model,
        dropout=0.0,
        **kwargs,
    ):
        super(TransDiscriminator, self).__init__()
        # the embedding is pre-trained and without dropout layer
        self.src_embedding = Embeddings(num_embeddings=n_src_words,
                                        embedding_dim=d_word_vec,
                                        dropout=dropout,
                                        add_position_embedding=False)
        self.trg_embedding = Embeddings(num_embeddings=n_trg_words,
                                        embedding_dim=d_word_vec,
                                        dropout=dropout,
                                        add_position_embedding=False)
        if not kwargs["update_embedding"]:
            for param in self.src_embedding.parameters():
                param.requires_grad = False
            for param in self.trg_embedding.parameters():
                param.requires_grad = False

        self.src_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=d_word_vec,
                           hidden_size=d_model,
                           bidirectional=True)
        self.trg_gru = RNN(type="gru",
                           batch_first=True,
                           input_size=d_word_vec,
                           hidden_size=d_model,
                           bidirectional=True)
        # twice of the bi-GRN dimension
        self.layer_norm = nn.LayerNorm(d_model * 4, elementwise_affine=True)

        # whether the (x,y) is a translation pair
        self.ffn = nn.Linear(in_features=4 * d_model, out_features=2)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, y):
        """
        given src and trg, output classification label
        :param x: batched src in shape [batch_size, max_seq_len]
        :param y: batched trg in shape [batch_size, max_seq_len]
        :return: labels indicating probability in shape [batch_size, 2]
        """
        x_mask = x.detach().eq(PAD)
        y_mask = y.detach().eq(PAD)
        x_emb = self.src_embedding(x)
        y_emb = self.trg_embedding(y)

        ctx_x, _ = self.src_gru(x_emb, x_mask)
        ctx_y, _ = self.trg_gru(y_emb, y_mask)

        x_pad_mask = 1.0 - x_mask.float()
        y_pad_mask = 1.0 - y_mask.float()
        x_ctx_mean = (ctx_x * x_pad_mask.unsqueeze(2)
                      ).sum(1) / x_pad_mask.unsqueeze(2).sum(1)
        y_ctx_mean = (ctx_y * y_pad_mask.unsqueeze(2)
                      ).sum(1) / y_pad_mask.unsqueeze(2).sum(1)
        output = self.layer_norm(torch.cat((x_ctx_mean, y_ctx_mean), dim=-1))
        output = F.softmax(self.ffn(self.dropout(output)), dim=-1)
        return output

    def reset(self):
        def weights_init(tensor):
            if tensor.ndimension() == 1:
                nn.init.constant_(tensor, val=0.0)
            else:
                nn.init.xavier_normal_(tensor)
            return tensor

        # collect module parameters
        for name, p in self.named_parameters():
            # print(name, " reset")
            weights_init(p)
        return
예제 #12
0
def build_translate_model(victim_config,
                          victim_model_path,
                          vocab_src,
                          vocab_trg,
                          device,
                          ):
    """
    build translation env, the nmt_model without *src* embedding,
    and the corresponding(separated) embedding layers.
    :param victim_config: victim configs
    :param victim_model_path: victim_models
    :param vocab_src: source vocabulary
    :param vocab_trg: target vocabulary
    :param device: map location (cpu or cuda:*)
    :return: embeddings layers, nmt_models without source embedding used in the beam-search
    """
    translate_model_configs = victim_config["model_configs"]
    src_emb = None
    trg_emb = None
    nmt_model = None
    # build model for translation (w/o src embedding)
    if translate_model_configs["model"] == "Transformer":
        src_emb = Embeddings(
            num_embeddings=vocab_src.max_n_words, embedding_dim=translate_model_configs["d_word_vec"],
            dropout=translate_model_configs["dropout"], add_position_embedding=True)
        trg_emb = Embeddings(
            num_embeddings=vocab_trg.max_n_words, embedding_dim=translate_model_configs["d_word_vec"],
            dropout=translate_model_configs["dropout"], add_position_embedding=True)
        nmt_model = TransformerTranslator(n_tgt_vocab=vocab_trg.max_n_words,
                                          **translate_model_configs)
    elif translate_model_configs["model"] == "DL4MT":
        src_emb = Embeddings(
            num_embeddings=vocab_src.max_n_words, embedding_dim=translate_model_configs["d_word_vec"],
            dropout=0.0, add_position_embedding=False)
        trg_emb = Embeddings(
            num_embeddings=vocab_trg.max_n_words, embedding_dim=translate_model_configs["d_word_vec"],
            dropout=0.0, add_position_embedding=False)
        nmt_model = Dl4mtTranslator(n_tgt_vocab=vocab_trg.max_n_words,
                                    **translate_model_configs)
    else:
        INFO("unregistered model type of victim in config")
    src_emb.to(device)
    trg_emb.to(device)
    nmt_model.to(device)

    INFO("load params to device %s" % device)
    state_dict = load_translate_model(victim_model_path, map_location=device)
    _src_dict = dict()
    _trg_dict = dict()
    _model_dict = dict()
    for name, _ in nmt_model.state_dict().items():
        # when shared_proj the Generator will be tensor object instead of nn.Module
        if name in state_dict.keys():
            _model_dict[name] = state_dict[name]
        else:
            print("error, fail to locate %s for model in the state_dict" % name)
    for name, _ in src_emb.state_dict().items():
        if "encoder.embeddings."+name in state_dict.keys():
            _src_dict[name] = state_dict["encoder.embeddings."+name]
        else:
            print("error, fail to locate %s for src_emb in the state_dict" % name)
    for name, _ in trg_emb.state_dict().items():
        if "decoder.embeddings."+name in state_dict.keys():
            _trg_dict[name] = state_dict["decoder.embeddings."+name]
        else:
            print("error, fail to locate %s for trg_emb in the state_dict" % name)
    del state_dict
    src_emb.load_state_dict(_src_dict)
    trg_emb.load_state_dict(_trg_dict)
    nmt_model.load_state_dict(_model_dict)
    src_emb.eval()
    trg_emb.eval()
    nmt_model.eval()
    INFO("finished building translation model(w/o src embedding layer) for environment on %s" % device)
    return src_emb, trg_emb, nmt_model
예제 #13
0
    def __init__(self,
                 n_tgt_vocab,
                 n_layers=6,
                 n_head=8,
                 capsule_type="output",
                 routing_type="dynamic_routing",
                 comb_type="ffn",
                 dim_capsule=100,
                 num_capsules=8,
                 null_capsule=False,
                 d_word_vec=512,
                 d_model=512,
                 d_inner_hid=1024,
                 dim_per_head=None,
                 dropout=0.1):

        super(Decoder, self).__init__()

        self.n_head = n_head
        self.num_layers = n_layers
        self.d_model = d_model

        self.embeddings = Embeddings(n_tgt_vocab,
                                     d_word_vec,
                                     dropout=dropout,
                                     add_position_embedding=True)

        self.block_stack = nn.ModuleList([
            DecoderBlock(d_model=d_model,
                         d_inner_hid=d_inner_hid,
                         n_head=n_head,
                         dropout=dropout,
                         dim_per_head=dim_per_head,
                         dim_capsule=dim_capsule,
                         num_capsules=num_capsules
                         if capsule_type.startswith("layer-wise") else 0,
                         null_capsule=null_capsule) for _ in range(n_layers)
        ])

        self.out_layer_norm = nn.LayerNorm(d_model)

        self._dim_per_head = dim_per_head
        #
        # # contextual capsule layer
        # if capsule_type == "output":
        #     self.apply_output_capsule = True
        #     self.pre_capsule_layer_norm = nn.LayerNorm(d_model)
        #
        #     assert dim_capsule % num_capsules == 0
        #     self.dim_per_cap = dim_capsule // num_capsules
        #
        #     self.null_caps = null_capsule
        #     if null_capsule:
        #         INFO("Using Null Capsules to attract irrelevant routing.")
        #
        #     total_num_capsules = num_capsules if not self.null_caps else int(num_capsules * 1.5)
        #
        #     self.routing_type = routing_type
        #     if routing_type == "dynamic_routing":
        #         self.final_capsule_layer = ContextualCapsuleLayer(
        #             num_out_caps=total_num_capsules, num_in_caps=None,
        #             dim_in_caps=d_model,
        #             dim_out_caps=self.dim_per_cap,
        #             dim_context=d_model,
        #             num_iterations=3,
        #             share_route_weights_for_in_caps=True)
        #
        #     elif routing_type == "EM_routing":
        #         self.final_capsule_layer = EMContextualCapsuleLayer(
        #             num_out_caps=total_num_capsules, num_in_caps=None,
        #             dim_in_caps=d_model, dim_out_caps=self.dim_per_cap, dim_context=d_model,
        #             num_iterations=3,
        #             share_route_weights_for_in_caps=True)
        #
        #     dim_per_part = dim_capsule // 2
        #     if comb_type == "ffn":
        #         self.out_and_cap_ffn = MultiInputPositionwiseFeedForward(
        #             size=d_model, hidden_size=d_inner_hid, dropout=dropout,
        #             inp_sizes=[dim_per_part, dim_per_part]
        #         )
        #     elif comb_type == "gate":
        #         self.out_and_cap_ffn = MultiInputGates(
        #             d_model=d_model, input_sizes=[dim_per_part, dim_per_part],
        #             dropout=dropout
        #         )
        # else:
        #     self.apply_output_capsule = False

        if capsule_type == "layer-wise-share":
            for i in range(1, n_layers):
                self.block_stack[i].capsule_layer = self.block_stack[
                    0].capsule_layer
                self.block_stack[i].out_and_cap_ffn = self.block_stack[
                    0].out_and_cap_ffn