def test_recurrent_freeze(self):
     decoder = RecurrentDecoder(hidden_size=self.hidden_size,
                                encoder=self.encoders[0],
                                attention="bahdanau",
                                emb_size=self.emb_size,
                                vocab_size=self.vocab_size,
                                num_layers=self.num_layers,
                                init_hidden="zero",
                                input_feeding=False,
                                freeze=True)
     for n, p in decoder.named_parameters():
         self.assertFalse(p.requires_grad)
 def test_recurrent_decoder_type(self):
     valid_rnn_types = {"gru": GRU, "lstm": LSTM}
     for name, obj in valid_rnn_types.items():
         decoder = RecurrentDecoder(rnn_type=name,
                                    hidden_size=self.hidden_size,
                                    encoder=self.encoders[0],
                                    attention="bahdanau",
                                    emb_size=self.emb_size,
                                    vocab_size=self.vocab_size,
                                    num_layers=self.num_layers,
                                    init_hidden="zero",
                                    input_feeding=False)
         self.assertEqual(type(decoder.rnn), obj)
    def test_recurrent_decoder_size(self):
        # test all combinations of bridge, input_feeding, encoder directions
        for encoder in self.encoders:
            for init_hidden in ["bridge", "zero", "last"]:
                for input_feeding in [True, False]:
                    decoder = RecurrentDecoder(hidden_size=self.hidden_size,
                                               encoder=encoder,
                                               attention="bahdanau",
                                               emb_size=self.emb_size,
                                               vocab_size=self.vocab_size,
                                               num_layers=self.num_layers,
                                               init_hidden=init_hidden,
                                               input_feeding=input_feeding)
                    self.assertEqual(decoder.rnn.hidden_size, self.hidden_size)
                    self.assertEqual(decoder.att_vector_layer.out_features,
                                     self.hidden_size)
                    self.assertEqual(decoder.output_layer.out_features,
                                     self.vocab_size)
                    self.assertEqual(decoder.output_size, self.vocab_size)
                    self.assertEqual(decoder.rnn.bidirectional, False)

                    self.assertEqual(decoder.init_hidden_option, init_hidden)
                    if init_hidden == "bridge":
                        self.assertTrue(hasattr(decoder, "bridge_layer"))
                        self.assertEqual(decoder.bridge_layer.out_features,
                                         self.hidden_size)
                        self.assertEqual(decoder.bridge_layer.in_features,
                                         encoder.output_size)
                    else:
                        self.assertFalse(hasattr(decoder, "bridge_layer"))

                    if input_feeding:
                        self.assertEqual(decoder.rnn_input_size,
                                         self.emb_size + self.hidden_size)
                    else:
                        self.assertEqual(decoder.rnn_input_size, self.emb_size)
Exemple #4
0
def build_model(cfg: dict = None,
                src_vocab: Vocabulary = None,
                trg_vocab: Vocabulary = None) -> Model:
    """
    Build and initialize the model according to the configuration.

    :param cfg: dictionary configuration containing model specifications
    :param src_vocab: source vocabulary
    :param trg_vocab: target vocabulary
    :return: built and initialized model
    """
    src_padding_idx = src_vocab.stoi[PAD_TOKEN]
    trg_padding_idx = trg_vocab.stoi[PAD_TOKEN]

    src_embed = Embeddings(**cfg["encoder"]["embeddings"],
                           vocab_size=len(src_vocab),
                           padding_idx=src_padding_idx)

    # this ties source and target embeddings
    # for softmax layer tying, see further below
    if cfg.get("tied_embeddings", False):
        if src_vocab.itos == trg_vocab.itos:
            # share embeddings for src and trg
            trg_embed = src_embed
        else:
            raise ConfigurationError(
                "Embedding cannot be tied since vocabularies differ.")
    else:
        trg_embed = Embeddings(**cfg["decoder"]["embeddings"],
                               vocab_size=len(trg_vocab),
                               padding_idx=trg_padding_idx)

    # build encoder
    enc_dropout = cfg["encoder"].get("dropout", 0.)
    enc_emb_dropout = cfg["encoder"]["embeddings"].get("dropout", enc_dropout)
    if cfg["encoder"].get("type", "recurrent") == "transformer":
        assert cfg["encoder"]["embeddings"]["embedding_dim"] == \
            cfg["encoder"]["hidden_size"], \
            "for transformer, emb_size must be hidden_size"

        encoder = TransformerEncoder(**cfg["encoder"],
                                     emb_size=src_embed.embedding_dim,
                                     emb_dropout=enc_emb_dropout)
    else:
        encoder = RecurrentEncoder(**cfg["encoder"],
                                   emb_size=src_embed.embedding_dim,
                                   emb_dropout=enc_emb_dropout)

    # build decoder
    dec_dropout = cfg["decoder"].get("dropout", 0.)
    dec_emb_dropout = cfg["decoder"]["embeddings"].get("dropout", dec_dropout)
    if cfg["decoder"].get("type", "recurrent") == "transformer":
        decoder = TransformerDecoder(**cfg["decoder"],
                                     encoder=encoder,
                                     vocab_size=len(trg_vocab),
                                     emb_size=trg_embed.embedding_dim,
                                     emb_dropout=dec_emb_dropout)
    else:
        decoder = RecurrentDecoder(**cfg["decoder"],
                                   encoder=encoder,
                                   vocab_size=len(trg_vocab),
                                   emb_size=trg_embed.embedding_dim,
                                   emb_dropout=dec_emb_dropout)

    model = Model(encoder=encoder,
                  decoder=decoder,
                  src_embed=src_embed,
                  trg_embed=trg_embed,
                  src_vocab=src_vocab,
                  trg_vocab=trg_vocab)

    # tie softmax layer with trg embeddings
    if cfg.get("tied_softmax", False):
        if trg_embed.lut.weight.shape == \
                model.decoder.output_layer.weight.shape:
            # (also) share trg embeddings and softmax layer:
            model.decoder.output_layer.weight = trg_embed.lut.weight
        else:
            raise ConfigurationError(
                "For tied_softmax, the decoder embedding_dim and decoder "
                "hidden_size must be the same."
                "The decoder must be a Transformer.")

    # custom initialization of model parameters
    initialize_model(model, cfg, src_padding_idx, trg_padding_idx)

    return model
Exemple #5
0
def build_speech_model(cfg: dict = None,
                       src_vocab: Vocabulary = None,
                       trg_vocab: Vocabulary = None) -> SpeechModel:
    """
    Build and initialize the model according to the configuration.

    :param cfg: dictionary configuration containing model specifications
    :param src_vocab: source vocabulary
    :param trg_vocab: target vocabulary
    :return: built and initialized model
    """
    src_padding_idx = src_vocab.stoi[PAD_TOKEN]
    trg_padding_idx = trg_vocab.stoi[PAD_TOKEN]

    src_embed = Embeddings(**cfg["encoder"]["embeddings"],
                           vocab_size=len(src_vocab),
                           padding_idx=src_padding_idx)

    if cfg.get("tied_embeddings", False) \
            and src_vocab.itos == trg_vocab.itos:
        # share embeddings for src and trg
        trg_embed = src_embed
    else:
        trg_embed = Embeddings(**cfg["decoder"]["embeddings"],
                               vocab_size=len(trg_vocab),
                               padding_idx=trg_padding_idx)

    enc_dropout = cfg["encoder"].get("dropout", 0.)
    enc_emb_dropout = cfg["encoder"]["embeddings"].get("dropout", enc_dropout)

    encoder = SpeechRecurrentEncoder(**cfg["encoder"],
                                     emb_size=src_embed.embedding_dim,
                                     emb_dropout=enc_emb_dropout)

    dec_dropout = cfg["decoder"].get("dropout", 0.)
    dec_emb_dropout = cfg["decoder"]["embeddings"].get("dropout", dec_dropout)

    if cfg["decoder"].get("use_conditional_decoder", True):
        decoder = ConditionalRecurrentDecoder(**cfg["decoder"],
                                              encoder=encoder,
                                              vocab_size=len(trg_vocab),
                                              emb_size=trg_embed.embedding_dim,
                                              emb_dropout=dec_emb_dropout)

    else:
        decoder = RecurrentDecoder(**cfg["decoder"],
                                   encoder=encoder,
                                   vocab_size=len(trg_vocab),
                                   emb_size=trg_embed.embedding_dim,
                                   emb_dropout=dec_emb_dropout)

    model = SpeechModel(encoder=encoder,
                        decoder=decoder,
                        src_embed=src_embed,
                        trg_embed=trg_embed,
                        src_vocab=src_vocab,
                        trg_vocab=trg_vocab)

    # custom initialization of model parameters
    initialize_model(model, cfg, src_padding_idx, trg_padding_idx)

    return model
    def test_recurrent_input_dropout(self):
        drop_prob = 0.5
        decoder = RecurrentDecoder(hidden_size=self.hidden_size,
                                   encoder=self.encoders[0],
                                   attention="bahdanau",
                                   emb_size=self.emb_size,
                                   vocab_size=self.vocab_size,
                                   num_layers=self.num_layers,
                                   init_hidden="zero",
                                   input_feeding=False,
                                   dropout=drop_prob,
                                   emb_dropout=drop_prob)
        input_tensor = torch.Tensor([2, 3, 1, -1])
        decoder.train()
        dropped = decoder.emb_dropout(input=input_tensor)
        # eval switches off dropout
        decoder.eval()
        no_drop = decoder.emb_dropout(input=input_tensor)
        # when dropout is applied, remaining values are divided by drop_prob
        self.assertGreaterEqual((no_drop - (drop_prob * dropped)).abs().sum(),
                                0)

        drop_prob = 1.0
        decoder = RecurrentDecoder(hidden_size=self.hidden_size,
                                   encoder=self.encoders[0],
                                   attention="bahdanau",
                                   emb_size=self.emb_size,
                                   vocab_size=self.vocab_size,
                                   num_layers=self.num_layers,
                                   init_hidden="zero",
                                   input_feeding=False,
                                   dropout=drop_prob,
                                   emb_dropout=drop_prob)
        all_dropped = decoder.emb_dropout(input=input_tensor)
        self.assertEqual(all_dropped.sum(), 0)
        decoder.eval()
        none_dropped = decoder.emb_dropout(input=input_tensor)
        self.assertTensorEqual(no_drop, none_dropped)
        self.assertTensorEqual((no_drop - all_dropped), no_drop)
 def test_recurrent_forward(self):
     time_dim = 4
     batch_size = 2
     # make sure the outputs match the targets
     decoder = RecurrentDecoder(hidden_size=self.hidden_size,
                                encoder=self.encoders[0],
                                attention="bahdanau",
                                emb_size=self.emb_size,
                                vocab_size=self.vocab_size,
                                num_layers=self.num_layers,
                                init_hidden="zero",
                                input_feeding=False)
     encoder_states = torch.rand(size=(batch_size, time_dim,
                                       self.encoders[0].output_size))
     trg_inputs = torch.ones(size=(batch_size, time_dim, self.emb_size))
     # no padding, no mask
     #x_length = torch.Tensor([time_dim]*batch_size).int()
     mask = torch.ones(size=(batch_size, 1, time_dim)).byte()
     output, hidden, att_probs, att_vectors = decoder(
         trg_inputs,
         encoder_hidden=encoder_states[:, -1, :],
         encoder_output=encoder_states,
         src_mask=mask,
         unroll_steps=time_dim,
         hidden=None,
         prev_att_vector=None)
     self.assertEqual(output.shape,
                      torch.Size([batch_size, time_dim, self.vocab_size]))
     self.assertEqual(
         hidden.shape,
         torch.Size([self.num_layers, batch_size, self.hidden_size]))
     self.assertEqual(att_probs.shape,
                      torch.Size([batch_size, time_dim, time_dim]))
     self.assertEqual(att_vectors.shape,
                      torch.Size([batch_size, time_dim, self.hidden_size]))
     hidden_target = torch.Tensor(
         [[[0.1814, 0.5468, -0.4717, -0.7580, 0.5834, -0.4018],
           [0.1814, 0.5468, -0.4717, -0.7580, 0.5834, -0.4018]],
          [[0.4649, 0.5484, -0.2702, 0.4545, 0.1983, 0.2771],
           [0.4649, 0.5484, -0.2702, 0.4545, 0.1983, 0.2771]],
          [[-0.1752, -0.4215, 0.1941, -0.3975, -0.2317, -0.5566],
           [-0.1752, -0.4215, 0.1941, -0.3975, -0.2317, -0.5566]]])
     output_target = torch.Tensor(
         [[[0.2702, -0.1988, -0.1985, -0.2998, -0.2564],
           [0.2719, -0.2075, -0.2017, -0.2988, -0.2595],
           [0.2720, -0.2143, -0.2084, -0.3024, -0.2537],
           [0.2714, -0.2183, -0.2135, -0.3061, -0.2468]],
          [[0.2757, -0.1744, -0.1888, -0.3038, -0.2466],
           [0.2782, -0.1837, -0.1928, -0.3028, -0.2505],
           [0.2785, -0.1904, -0.1994, -0.3066, -0.2448],
           [0.2777, -0.1943, -0.2042, -0.3105, -0.2379]]])
     att_vectors_target = torch.Tensor(
         [[[-0.6196, -0.0505, 0.4900, 0.6286, -0.5007, -0.3721],
           [-0.6389, -0.0337, 0.4998, 0.6458, -0.5052, -0.3579],
           [-0.6396, -0.0158, 0.5058, 0.6609, -0.5035, -0.3660],
           [-0.6348, -0.0017, 0.5090, 0.6719, -0.5013, -0.3771]],
          [[-0.5697, -0.0887, 0.4515, 0.6128, -0.4713, -0.4068],
           [-0.5910, -0.0721, 0.4617, 0.6305, -0.4760, -0.3930],
           [-0.5918, -0.0544, 0.4680, 0.6461, -0.4741, -0.4008],
           [-0.5866, -0.0405, 0.4712, 0.6574, -0.4718, -0.4116]]])
     self.assertTensorAlmostEqual(hidden_target, hidden)
     self.assertTensorAlmostEqual(output_target, output)
     self.assertTensorAlmostEqual(att_vectors, att_vectors_target)
     # att_probs should be a distribution over the output vocabulary
     self.assertTensorAlmostEqual(att_probs.sum(2),
                                  torch.ones(batch_size, time_dim))