def make_model(cls, src_vocab, tgt_vocab, enc_layers=6, hid_size=512, ff_size=2048, enc_heads=8, dropout=0.1, tied_emb='three-way', dec_rnn_type: str = 'LSTM', dec_layers: int = 1, exp: Experiment = None): """ Helper: Construct a model from hyper parameters." :return: model, args """ # get all args for reconstruction at a later phase _, _, _, args = inspect.getargvalues(inspect.currentframe()) for exclusion in ['cls', 'exp']: del args[exclusion] # exclude some args log.info(f"making hybridmt model: {args}") c = copy.deepcopy attn = MultiHeadedAttention(enc_heads, hid_size) ff = PositionwiseFeedForward(hid_size, ff_size, dropout) enc_layer = EncoderLayer(hid_size, c(attn), c(ff), dropout) encoder = Encoder(enc_layer, enc_layers) # clones n times src_emb = nn.Sequential(Embeddings(hid_size, src_vocab), PositionalEncoding(hid_size, dropout)) decoder = RnnDecoder(rnn_type=dec_rnn_type, hid_size=hid_size, n_layers=dec_layers, dropout=dropout) tgt_emb = Embeddings(hid_size, tgt_vocab) generator = Generator(hid_size, tgt_vocab) model = cls(encoder, decoder, src_emb, tgt_emb, generator) if tied_emb: model.tie_embeddings(tied_emb) model.init_params() return model, args
def make_model(cls, vocab_size, n_layers=6, hid_size=512, ff_size=2048, n_heads=8, dropout=0.1, tied_emb=True, exp: Experiment = None): # get all args for reconstruction at a later phase _, _, _, args = inspect.getargvalues(inspect.currentframe()) for exclusion in ['cls', 'exp']: del args[exclusion] # exclude some args # In case you are wondering, why I didnt use **kwargs here: # these args are read from conf file where user can introduce errors, so the parameter # validation and default value assignment is implicitly done by function call for us :) c = copy.deepcopy attn = MultiHeadedAttention(n_heads, hid_size) ff = PositionwiseFeedForward(hid_size, ff_size, dropout) dec_layer = LMDecoderLayer(hid_size, c(attn), c(ff), dropout) decoder = LMDecoder(dec_layer, n_layers) embedr = nn.Sequential(Embeddings(hid_size, vocab_size), PositionalEncoding(hid_size, dropout)) generator = Generator(hid_size, vocab_size) model = TfmLm(decoder, embedr, generator) if tied_emb: log.info( "Tying the embedding weights, two ways: (TgtIn == TgtOut)") model.generator.proj.weight = model.embed[0].lut.weight model.init_params() return model, args
def make_model(cls, src_vocab, tgt_vocab, n_layers=6, hid_size=512, ff_size=2048, n_heads=8, attn_dropout=0.1, dropout=0.1, activation='relu', tied_emb='three-way', plug_mode='cat_attn', exp: Experiment = None): """ Helper: Construct a model from hyper parameters." :return: model, args """ assert plug_mode in {'cat_attn', 'add_attn', 'cat_emb'} # get all args for reconstruction at a later phase _, _, _, args = inspect.getargvalues(inspect.currentframe()) for exclusion in ['cls', 'exp']: del args[exclusion] # exclude some args # In case you are wondering, why I didnt use **kwargs here: # these args are read from conf file where user can introduce errors, so the parameter # validation and default value assignment is implicitly done by function call for us :) log.info(f"making mtfmnmt model: {args}") c = copy.deepcopy attn = MultiHeadedAttention(n_heads, hid_size, dropout=attn_dropout) ff = PositionwiseFeedForward(hid_size, ff_size, dropout, activation=activation) enc_layer = EncoderLayer(hid_size, c(attn), c(ff), dropout) encoder = Encoder(enc_layer, n_layers) # clones n times src_emb = nn.Sequential(Embeddings(hid_size, src_vocab), PositionalEncoding(hid_size, dropout)) if plug_mode == 'cat_emb': tgt_emb = nn.Sequential(MEmbeddings(hid_size, tgt_vocab), PositionalEncoding(hid_size, dropout)) decoder = c( encoder ) # decoder is same as encoder, except embeddings have concat else: dec_block = DecoderBlock(hid_size, dropout, mode=plug_mode) dec_layer = MDecoderLayer(hid_size, c(attn), c(dec_block), c(ff), dropout) decoder = MDecoder(dec_layer, n_layers) tgt_emb = nn.Sequential(Embeddings(hid_size, tgt_vocab), PositionalEncoding(hid_size, dropout)) generator = Generator(hid_size, tgt_vocab) model = cls(encoder, decoder, src_emb, tgt_emb, generator) if tied_emb: model.tie_embeddings(tied_emb) model.init_params() return model, args
def __init__(self, d_model: int, ff_dims: List[int], N: int, n_heads: int, attn_dropout: float, dropout: float, activation: str = 'relu'): super().__init__() # Make N layers with different pointwise ff_dims assert len(ff_dims) == N, f'N:{N} != ff_dims:{len(ff_dims)}' layers = list() for ff_dim in ff_dims: self_attn = MultiHeadedAttention(n_heads, d_model, attn_dropout) src_attn = MultiHeadedAttention(n_heads, d_model, dropout) ff = PositionwiseFeedForward(d_model, ff_dim, dropout, activation=activation) layers.append( DecoderLayer(d_model, self_attn, src_attn, ff, dropout)) self.layers = nn.ModuleList(layers) self.norm = nn.LayerNorm(d_model)
def make_model(cls, src_vocab, tgt_vocab, n_layers=6, hid_size=512, ff_size=2048, n_heads=8, dropout=0.1, tied_emb='three-way', exp: Experiment = None): # get all args for reconstruction at a later phase _, _, _, args = inspect.getargvalues(inspect.currentframe()) for exclusion in ['cls', 'exp']: del args[exclusion] # exclude some args log.info("Getting external embedding from the experiment dir") src_ext_emb_wt = torch.load(exp.ext_emb_src_file) tgt_ext_emb_wt = torch.load(exp.ext_emb_tgt_file) assert src_ext_emb_wt.shape[1] == tgt_ext_emb_wt.shape[1] ext_emb_dim = src_ext_emb_wt.shape[1] log.info(f"Found ext embs. Dim: {ext_emb_dim}; SRC vocab: {src_ext_emb_wt.shape[0]}, " f"TGT vocab: {tgt_ext_emb_wt.shape[0]}") src_ext_emb = nn.Embedding(src_ext_emb_wt.shape[0], ext_emb_dim, padding_idx=padding_idx) src_ext_emb.weight.requires_grad = False tgt_ext_emb = nn.Embedding(tgt_ext_emb_wt.shape[0], ext_emb_dim, padding_idx=padding_idx) tgt_ext_emb.weight.requires_grad = False c = copy.deepcopy attn = MultiHeadedAttention(n_heads, hid_size, dropout=dropout) ff = PositionwiseFeedForward(hid_size, ff_size, dropout) encoder = Encoder(EncoderLayer(hid_size, c(attn), c(ff), dropout), n_layers) src_attn_ext = MultiHeadedAttentionExt(n_heads, hid_size, query_dim=hid_size + ext_emb_dim, dropout=dropout) decoder = DecoderExt(DecoderLayerExt(hid_size, c(attn), src_attn_ext, c(ff), dropout), n_layers) src_emb = nn.Sequential(Embeddings(hid_size, src_vocab), PositionalEncoding(hid_size, dropout)) tgt_emb = nn.Sequential(Embeddings(hid_size, tgt_vocab), PositionalEncoding(hid_size, dropout)) generator = Generator(hid_size, tgt_vocab) model = cls(encoder, decoder, src_emb, tgt_emb, generator, src_ext_emb=src_ext_emb, tgt_ext_emb=tgt_ext_emb) if tied_emb: model.tie_embeddings(tied_emb) model.init_params() return model, args