def test_transformer_encoder_forward(self): batch_size = 2 time_dim = 4 torch.manual_seed(self.seed) encoder = TransformerEncoder( hidden_size=self.hidden_size, ff_size=self.ff_size, num_layers=self.num_layers, num_heads=self.num_heads, dropout=self.dropout, emb_dropout=self.dropout) for p in encoder.parameters(): torch.nn.init.uniform_(p, -0.5, 0.5) x = torch.rand(size=(batch_size, time_dim, self.emb_size)) # no padding, no mask x_length = torch.Tensor([time_dim] * batch_size).int() mask = torch.ones([batch_size, time_dim, 1]) == 1 output, hidden = encoder(x, x_length, mask) self.assertEqual(output.shape, torch.Size( [batch_size, time_dim, self.hidden_size])) self.assertEqual(hidden, None) output_target = torch.Tensor( [[[0.1615, -0.1195, 0.0586, -0.0921, -0.3483, -0.3654, -0.6052, -0.3355, 0.3179, 0.2757, -0.2909, -0.0346], [0.1272, -0.1241, 0.0223, -0.1463, -0.3462, -0.1579, -0.5591, -0.6274, 0.1822, 0.3043, -0.3818, 0.0094], [0.0616, -0.1344, 0.0625, 0.0056, -0.2785, -0.4290, -0.5765, -0.5176, -0.0598, 0.3389, -0.5522, -0.1692], [0.1539, -0.1371, 0.0026, -0.0248, -0.0856, -0.3223, -0.5537, -0.3948, -0.2586, 0.2458, -0.2887, -0.0698]], [[0.1863, -0.1198, 0.1006, -0.0277, -0.3779, -0.3728, -0.6343, -0.3449, 0.2131, 0.2448, -0.3122, -0.1777], [0.0254, -0.1219, 0.0436, -0.0289, -0.2932, -0.2377, -0.6003, -0.5406, 0.2308, 0.3578, -0.3728, 0.0707], [0.1146, -0.1270, 0.1163, -0.0290, -0.3773, -0.3924, -0.5738, -0.6528, 0.1428, 0.3623, -0.4796, 0.0471], [0.0815, -0.1355, 0.1016, 0.0496, -0.3001, -0.4812, -0.5557, -0.6937, 0.1002, 0.2873, -0.4675, -0.1383]]] ) self.assertTensorAlmostEqual(output_target, output)
def test_transformer_encoder_forward(self): batch_size = 2 time_dim = 4 encoder = TransformerEncoder( hidden_size=self.hidden_size, ff_size=self.ff_size, num_layers=self.num_layers, num_heads=self.num_heads, dropout=self.dropout) x = torch.rand(size=(batch_size, time_dim, self.emb_size)) # no padding, no mask x_length = torch.Tensor([time_dim] * batch_size).int() mask = torch.ones([batch_size, time_dim, 1]).byte() output, hidden = encoder(x, x_length, mask) self.assertEqual(output.shape, torch.Size( [batch_size, time_dim, self.hidden_size])) self.assertEqual(hidden, None) output_target = torch.Tensor( [[[-0.4256, 0.1072, 0.0155, 0.7239, 0.4905, 1.3247, -1.5558, -0.3509, -0.9214, 1.2840, -1.7615, 1.0693], [-0.1745, -0.3157, 0.3251, 0.6532, 0.6617, 0.5979, -1.3970, -0.1533, -0.7765, 1.0856, -2.0777, 1.5712], [-0.1378, -0.8135, 0.5008, 1.0826, 0.3408, 0.8382, -1.2756, -0.6101, -0.7581, 1.0849, -1.7532, 1.5009], [-0.0490, -0.9483, 0.3841, 0.9291, 0.8505, 0.7881, -1.4945, -0.3270, -0.8172, 0.9365, -1.6795, 1.4272]], [[-0.6104, 0.4010, 0.1397, 0.8625, 1.0309, 1.0151, -1.6762, -0.2485, -1.0487, 0.5824, -1.6729, 1.2251], [0.2141, 0.2892, 0.2086, 0.9977, 0.4318, 0.9500, -1.1859, -0.6417, -1.5015, 0.5381, -1.7835, 1.4831], [-0.1667, -0.3265, 0.3493, 1.0160, 0.6305, 0.8068, -1.9274, -0.7047, -0.8184, 1.3463, -1.3166, 1.1116], [-0.1639, -1.1023, 0.2168, 1.2276, 0.7986, 0.3777, -1.6462, -0.4816, -0.2629, 1.3105, -1.4894, 1.2152]]]) self.assertTensorAlmostEqual(output_target, output)
def build_model(cfg: dict = None, src_vocab: Vocabulary = None, trg_vocab: Vocabulary = None) -> Model: """ Build and initialize the model according to the configuration. :param cfg: dictionary configuration containing model specifications :param src_vocab: source vocabulary :param trg_vocab: target vocabulary :return: built and initialized model """ src_padding_idx = src_vocab.stoi[PAD_TOKEN] trg_padding_idx = trg_vocab.stoi[PAD_TOKEN] # TODO if continue-us src_embed = PretrainedEmbeddings(src_vocab, trg_vocab, **cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx) # this ties source and target embeddings # for softmax layer tying, see further below if cfg.get("tied_embeddings", False): if src_vocab.itos == trg_vocab.itos: # share embeddings for src and trg trg_embed = src_embed else: raise ConfigurationError( "Embedding cannot be tied since vocabularies differ.") else: src_embed = PretrainedEmbeddings(src_vocab, trg_vocab, **cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx) # build encoder enc_dropout = cfg["encoder"].get("dropout", 0.) enc_emb_dropout = cfg["encoder"]["embeddings"].get("dropout", enc_dropout) if cfg["encoder"].get("type", "recurrent") == "transformer": assert cfg["encoder"]["embeddings"]["embedding_dim"] == \ cfg["encoder"]["hidden_size"], \ "for transformer, emb_size must be hidden_size" encoder = TransformerEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) else: encoder = RecurrentEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) # build decoder dec_dropout = cfg["decoder"].get("dropout", 0.) dec_emb_dropout = cfg["decoder"]["embeddings"].get("dropout", dec_dropout) if cfg["decoder"].get("type", "recurrent") == "transformer": decoder = TransformerDecoder(**cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) else: decoder = RecurrentDecoder(**cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) model = Model(encoder=encoder, decoder=decoder, src_embed=src_embed, trg_embed=trg_embed, src_vocab=src_vocab, trg_vocab=trg_vocab) # tie softmax layer with trg embeddings """ if cfg.get("tied_softmax", False): if trg_embed.lut.weight.shape == \ model.decoder.output_layer.weight.shape: # (also) share trg embeddings and softmax layer: model.decoder.output_layer.weight = trg_embed.lut.weight else: raise ConfigurationError( "For tied_softmax, the decoder embedding_dim and decoder " "hidden_size must be the same." "The decoder must be a Transformer." f"shapes: output_layer.weight: {model.decoder.output_layer.weight.shape}; target_embed.lut.weight:{trg_embed.lut.weight.shape}") """ # custom initialization of model parameters initialize_model(model, cfg, src_padding_idx, trg_padding_idx) return model
def build_model(cfg: dict = None, src_vocab: Vocabulary = None, trg_vocab: Vocabulary = None, trv_vocab: Vocabulary = None, canonizer=None) -> Model: """ Build and initialize the model according to the configuration. :param cfg: dictionary configuration containing model specifications :param src_vocab: source vocabulary :param trg_vocab: target vocabulary :param trv_vocab: kb true value lookup vocabulary :return: built and initialized model """ src_padding_idx = src_vocab.stoi[PAD_TOKEN] trg_padding_idx = trg_vocab.stoi[PAD_TOKEN] if "embedding_files" in cfg.keys(): #init from pretrained assert not cfg.get( "tied_embeddings", False ), "TODO implement tied embeddings along with pretrained initialization" raise NotImplementedError( "TODO implement kbsrc embed loading for embedding files") weight_tensors = [] for weight_file in cfg["embedding_files"]: with open(weight_file, "r") as f: weight = [] for line in f.readlines(): line = line.split() line = [float(x) for x in line] weight.append(line) weight = FloatTensor(weight) weight_tensors.append(weight) # Set source Embeddings to Pretrained Embeddings src_embed = Embeddings( int(weight_tensors[0][0].shape[0]), False, #TODO transformer: change to True len(weight_tensors[0]), ) src_embed.lut.weight.data = weight_tensors[0] # Set target Embeddings to Pretrained Embeddings trg_embed = Embeddings( int(weight_tensors[1][0].shape[0]), False, #TODO transformer: change to True len(weight_tensors[1]), ) trg_embed.lut.weight.data = weight_tensors[1] else: src_embed = Embeddings(**cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx) if cfg.get("kb_embed_separate", False): kbsrc_embed = Embeddings(**cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx) else: kbsrc_embed = src_embed # this ties source and target embeddings # for softmax layer tying, see further below if cfg.get("tied_embeddings", False): if src_vocab.itos == trg_vocab.itos: # share embeddings for src and trg trg_embed = src_embed else: raise ConfigurationError( "Embedding cannot be tied since vocabularies differ.") else: # Latest TODO: init embeddings with vocab_size = len(trg_vocab joined with kb_vocab) trg_embed = Embeddings(**cfg["decoder"]["embeddings"], vocab_size=len(trg_vocab), padding_idx=trg_padding_idx) # build encoder enc_dropout = cfg["encoder"].get("dropout", 0.) enc_emb_dropout = cfg["encoder"]["embeddings"].get("dropout", enc_dropout) if cfg["encoder"].get("type", "recurrent") == "transformer": assert cfg["encoder"]["embeddings"]["embedding_dim"] == \ cfg["encoder"]["hidden_size"], \ "for transformer, emb_size must be hidden_size" encoder = TransformerEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) else: encoder = RecurrentEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) # retrieve kb task info kb_task = bool(cfg.get("kb", False)) k_hops = int( cfg.get("k_hops", 1) ) # k number of kvr attention layers in decoder (eric et al/default: 1) same_module_for_all_hops = bool(cfg.get("same_module_for_all_hops", False)) do_postproc = bool(cfg.get("do_postproc", True)) copy_from_source = bool(cfg.get("copy_from_source", True)) canonization_func = None if canonizer is None else canonizer( copy_from_source=copy_from_source) kb_input_feeding = bool(cfg.get("kb_input_feeding", True)) kb_feed_rnn = bool(cfg.get("kb_feed_rnn", True)) kb_multihead_feed = bool(cfg.get("kb_multihead_feed", False)) posEncKBkeys = cfg.get("posEncdKBkeys", False) tfstyletf = cfg.get("tfstyletf", True) infeedkb = bool(cfg.get("infeedkb", False)) outfeedkb = bool(cfg.get("outfeedkb", False)) add_kb_biases_to_output = bool(cfg.get("add_kb_biases_to_output", True)) kb_max_dims = cfg.get("kb_max_dims", (16, 32)) # should be tuple double_decoder = cfg.get("double_decoder", False) tied_side_softmax = cfg.get( "tied_side_softmax", False) # actually use separate linear layers, tying only the main one do_pad_kb_keys = cfg.get( "pad_kb_keys", True ) # doesnt need to be true for 1 hop (=>BIG PERFORMANCE SAVE), needs to be true for >= 2 hops if hasattr(kb_max_dims, "__iter__"): kb_max_dims = tuple(kb_max_dims) else: assert type(kb_max_dims) == int, kb_max_dims kb_max_dims = (kb_max_dims, ) assert cfg["decoder"]["hidden_size"] dec_dropout = cfg["decoder"].get("dropout", 0.) dec_emb_dropout = cfg["decoder"]["embeddings"].get("dropout", dec_dropout) if cfg["decoder"].get("type", "recurrent") == "transformer": if tfstyletf: decoder = TransformerDecoder( **cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout, kb_task=kb_task, kb_key_emb_size=kbsrc_embed.embedding_dim, feed_kb_hidden=kb_input_feeding, infeedkb=infeedkb, outfeedkb=outfeedkb, double_decoder=double_decoder) else: decoder = TransformerKBrnnDecoder( **cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout, kb_task=kb_task, k_hops=k_hops, kb_max=kb_max_dims, same_module_for_all_hops=same_module_for_all_hops, kb_key_emb_size=kbsrc_embed.embedding_dim, kb_input_feeding=kb_input_feeding, kb_feed_rnn=kb_feed_rnn, kb_multihead_feed=kb_multihead_feed) else: if not kb_task: decoder = RecurrentDecoder(**cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) else: decoder = KeyValRetRNNDecoder( **cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout, k_hops=k_hops, kb_max=kb_max_dims, same_module_for_all_hops=same_module_for_all_hops, kb_key_emb_size=kbsrc_embed.embedding_dim, kb_input_feeding=kb_input_feeding, kb_feed_rnn=kb_feed_rnn, kb_multihead_feed=kb_multihead_feed, do_pad_kb_keys=do_pad_kb_keys) # specify generator which is mostly just the output layer generator = Generator(dec_hidden_size=cfg["decoder"]["hidden_size"], vocab_size=len(trg_vocab), add_kb_biases_to_output=add_kb_biases_to_output, double_decoder=double_decoder) model = Model( encoder=encoder, decoder=decoder, generator=generator, src_embed=src_embed, trg_embed=trg_embed, src_vocab=src_vocab, trg_vocab=trg_vocab,\ kb_key_embed=kbsrc_embed,\ trv_vocab=trv_vocab, k_hops=k_hops, do_postproc=do_postproc, canonize=canonization_func, kb_att_dims=len(kb_max_dims), posEncKBkeys=posEncKBkeys ) # tie softmax layer with trg embeddings if cfg.get("tied_softmax", False): if trg_embed.lut.weight.shape == \ model.generator.output_layer.weight.shape: # (also) share trg embeddings and softmax layer: model.generator.output_layer.weight = trg_embed.lut.weight if model.generator.double_decoder: # (also also) share trg embeddings and side softmax layer assert hasattr(model.generator, "side_output_layer") if tied_side_softmax: # because of distributivity this becomes O (x_1+x_2) instead of O_1 x_1 + O_2 x_2 model.generator.side_output_layer.weight = trg_embed.lut.weight else: raise ConfigurationError( "For tied_softmax, the decoder embedding_dim and decoder " "hidden_size must be the same." "The decoder must be a Transformer.") # custom initialization of model parameters initialize_model(model, cfg, src_padding_idx, trg_padding_idx) return model
def test_transformer_encoder_forward(self): batch_size = 2 time_dim = 4 torch.manual_seed(self.seed) encoder = TransformerEncoder(hidden_size=self.hidden_size, ff_size=self.ff_size, num_layers=self.num_layers, num_heads=self.num_heads, dropout=self.dropout, emb_dropout=self.dropout) for p in encoder.parameters(): torch.nn.init.uniform_(p, -0.5, 0.5) x = torch.rand(size=(batch_size, time_dim, self.emb_size)) # no padding, no mask x_length = torch.Tensor([time_dim] * batch_size).int() mask = torch.ones([batch_size, time_dim, 1]) == 1 output, hidden = encoder(x, x_length, mask) self.assertEqual(output.shape, torch.Size([batch_size, time_dim, self.hidden_size])) self.assertEqual(hidden, None) output_target = torch.Tensor( [[[ 1.9728e-01, -1.2042e-01, 8.0998e-02, 1.3411e-03, -3.5960e-01, -5.2988e-01, -5.6056e-01, -3.5297e-01, 2.6680e-01, 2.8343e-01, -3.7342e-01, -5.9112e-03 ], [ 8.9687e-02, -1.2491e-01, 7.7809e-02, -1.3500e-03, -2.7002e-01, -4.7312e-01, -5.7981e-01, -4.1998e-01, 1.0457e-01, 2.9726e-01, -3.9461e-01, 8.1598e-02 ], [ 3.4988e-02, -1.3020e-01, 6.0043e-02, 2.7782e-02, -3.1483e-01, -3.8940e-01, -5.5557e-01, -5.9540e-01, -2.9808e-02, 3.1468e-01, -4.5809e-01, 4.3313e-03 ], [ 1.2234e-01, -1.3285e-01, 6.3068e-02, -2.3343e-02, -2.3519e-01, -4.0794e-01, -5.6063e-01, -5.5484e-01, -1.1272e-01, 3.0103e-01, -4.0983e-01, 3.3038e-02 ]], [[ 9.8597e-02, -1.2121e-01, 1.0718e-01, -2.2644e-02, -4.0282e-01, -4.2646e-01, -5.9981e-01, -3.7200e-01, 1.9538e-01, 2.7036e-01, -3.4072e-01, -1.7966e-03 ], [ 8.8470e-02, -1.2618e-01, 5.3351e-02, -1.8531e-02, -3.3834e-01, -4.9047e-01, -5.7063e-01, -4.9790e-01, 2.2070e-01, 3.3964e-01, -4.1604e-01, 2.3519e-02 ], [ 5.8373e-02, -1.2706e-01, 1.0598e-01, 9.3277e-05, -3.0493e-01, -4.4406e-01, -5.4723e-01, -5.2214e-01, 8.0374e-02, 2.6307e-01, -4.4571e-01, 8.7052e-02 ], [ 7.9567e-02, -1.2977e-01, 1.1731e-01, 2.6198e-02, -2.4024e-01, -4.2161e-01, -5.7604e-01, -7.3298e-01, 1.6698e-01, 3.1454e-01, -4.9189e-01, 2.4027e-02 ]]]) self.assertTensorAlmostEqual(output_target, output)
def test_transformer_encoder_freeze(self): encoder = TransformerEncoder(freeze=True) for n, p in encoder.named_parameters(): self.assertFalse(p.requires_grad)
def build_model(cfg: dict = None, src_vocab: Vocabulary = None, trg_vocab: Vocabulary = None) -> Model: """ Build and initialize the model according to the configuration. :param cfg: dictionary configuration containing model specifications :param src_vocab: source vocabulary :param trg_vocab: target vocabulary :return: built and initialized model """ logger.info("Building an encoder-decoder model...") src_padding_idx = src_vocab.stoi[PAD_TOKEN] trg_padding_idx = trg_vocab.stoi[PAD_TOKEN] src_embed = Embeddings(**cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx) # this ties source and target embeddings # for softmax layer tying, see further below if cfg.get("tied_embeddings", False): if src_vocab.itos == trg_vocab.itos: # share embeddings for src and trg trg_embed = src_embed else: raise ConfigurationError( "Embedding cannot be tied since vocabularies differ.") else: trg_embed = Embeddings(**cfg["decoder"]["embeddings"], vocab_size=len(trg_vocab), padding_idx=trg_padding_idx) # build encoder enc_dropout = cfg["encoder"].get("dropout", 0.) enc_emb_dropout = cfg["encoder"]["embeddings"].get("dropout", enc_dropout) if cfg["encoder"].get("type", "recurrent") == "transformer": assert cfg["encoder"]["embeddings"]["embedding_dim"] == \ cfg["encoder"]["hidden_size"], \ "for transformer, emb_size must be hidden_size" encoder = TransformerEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) else: encoder = RecurrentEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) # build decoder dec_dropout = cfg["decoder"].get("dropout", 0.) dec_emb_dropout = cfg["decoder"]["embeddings"].get("dropout", dec_dropout) if cfg["decoder"].get("type", "recurrent") == "transformer": decoder = TransformerDecoder(**cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) else: decoder = RecurrentDecoder(**cfg["decoder"], encoder=encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) model = Model(encoder=encoder, decoder=decoder, src_embed=src_embed, trg_embed=trg_embed, src_vocab=src_vocab, trg_vocab=trg_vocab) # tie softmax layer with trg embeddings if cfg.get("tied_softmax", False): if trg_embed.lut.weight.shape == \ model.decoder.output_layer.weight.shape: # (also) share trg embeddings and softmax layer: model.decoder.output_layer.weight = trg_embed.lut.weight else: raise ConfigurationError( "For tied_softmax, the decoder embedding_dim and decoder " "hidden_size must be the same." "The decoder must be a Transformer.") # custom initialization of model parameters initialize_model(model, cfg, src_padding_idx, trg_padding_idx) # initialize embeddings from file pretrained_enc_embed_path = cfg["encoder"]["embeddings"].get( "load_pretrained", None) pretrained_dec_embed_path = cfg["decoder"]["embeddings"].get( "load_pretrained", None) if pretrained_enc_embed_path: logger.info("Loading pretraind src embeddings...") model.src_embed.load_from_file(pretrained_enc_embed_path, src_vocab) if pretrained_dec_embed_path and not cfg.get("tied_embeddings", False): logger.info("Loading pretraind trg embeddings...") model.trg_embed.load_from_file(pretrained_dec_embed_path, trg_vocab) logger.info("Enc-dec model built.") return model
def build_unsupervised_nmt_model( cfg: dict = None, src_vocab: Vocabulary = None, trg_vocab: Vocabulary = None) -> UnsupervisedNMTModel: """ Build an UnsupervisedNMTModel. :param cfg: model configuration :param src_vocab: Vocabulary for the src language :param trg_vocab: Vocabulary for the trg language :return: Unsupervised NMT model as specified in cfg """ src_padding_idx = src_vocab.stoi[PAD_TOKEN] trg_padding_idx = trg_vocab.stoi[PAD_TOKEN] # build source and target embedding layers # embeddings in the encoder are pretrained and stay fixed loaded_src_embed = PretrainedEmbeddings(**cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx, vocab=src_vocab, freeze=True) loaded_trg_embed = PretrainedEmbeddings(**cfg["decoder"]["embeddings"], vocab_size=len(trg_vocab), padding_idx=trg_padding_idx, vocab=trg_vocab, freeze=True) # embeddings in the decoder are randomly initialised and will be learned src_embed = Embeddings(**cfg["encoder"]["embeddings"], vocab_size=len(src_vocab), padding_idx=src_padding_idx, freeze=False) trg_embed = Embeddings(**cfg["decoder"]["embeddings"], vocab_size=len(trg_vocab), padding_idx=trg_padding_idx, freeze=False) # build shared encoder enc_dropout = cfg["encoder"].get("dropout", 0.) enc_emb_dropout = cfg["encoder"]["embeddings"].get("dropout", enc_dropout) if cfg["encoder"].get("type", "recurrent") == "transformer": assert cfg["encoder"]["embeddings"]["embedding_dim"] == \ cfg["encoder"]["hidden_size"], \ "for transformer, emb_size must be hidden_size" shared_encoder = TransformerEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) else: shared_encoder = RecurrentEncoder(**cfg["encoder"], emb_size=src_embed.embedding_dim, emb_dropout=enc_emb_dropout) # build src and trg language decoder dec_dropout = cfg["decoder"].get("dropout", 0.) dec_emb_dropout = cfg["decoder"]["embeddings"].get("dropout", dec_dropout) if cfg["decoder"].get("type", "recurrent") == "transformer": src_decoder = TransformerDecoder(**cfg["decoder"], encoder=shared_encoder, vocab_size=len(src_vocab), emb_size=src_embed.embedding_dim, emb_dropout=dec_emb_dropout) trg_decoder = TransformerDecoder(**cfg["decoder"], encoder=shared_encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) else: src_decoder = RecurrentDecoder(**cfg["decoder"], encoder=shared_encoder, vocab_size=len(src_vocab), emb_size=src_embed.embedding_dim, emb_dropout=dec_emb_dropout) trg_decoder = RecurrentDecoder(**cfg["decoder"], encoder=shared_encoder, vocab_size=len(trg_vocab), emb_size=trg_embed.embedding_dim, emb_dropout=dec_emb_dropout) # build unsupervised NMT model model = UnsupervisedNMTModel(loaded_src_embed, loaded_trg_embed, src_embed, trg_embed, shared_encoder, src_decoder, trg_decoder, src_vocab, trg_vocab) # initialise model # embed_initializer should be none so loaded encoder embeddings won't be overwritten initialize_model(model.src2src_translator, cfg, src_padding_idx, src_padding_idx) initialize_model(model.src2trg_translator, cfg, src_padding_idx, trg_padding_idx) initialize_model(model.trg2src_translator, cfg, trg_padding_idx, src_padding_idx) initialize_model(model.trg2src_translator, cfg, trg_padding_idx, trg_padding_idx) return model