コード例 #1
0
ファイル: hybridmt.py プロジェクト: isi-nlp/rtg
    def make_model(cls, src_vocab, tgt_vocab, enc_layers=6, hid_size=512, ff_size=2048, enc_heads=8,
                   dropout=0.1, tied_emb='three-way', dec_rnn_type: str = 'LSTM',
                   dec_layers: int = 1,
                   exp: Experiment = None):
        """
        Helper: Construct a model from hyper parameters."
        :return: model, args
        """
        # get all args for reconstruction at a later phase
        _, _, _, args = inspect.getargvalues(inspect.currentframe())
        for exclusion in ['cls', 'exp']:
            del args[exclusion]  # exclude some args

        log.info(f"making hybridmt model: {args}")

        c = copy.deepcopy
        attn = MultiHeadedAttention(enc_heads, hid_size)
        ff = PositionwiseFeedForward(hid_size, ff_size, dropout)

        enc_layer = EncoderLayer(hid_size, c(attn), c(ff), dropout)
        encoder = Encoder(enc_layer, enc_layers)  # clones n times
        src_emb = nn.Sequential(Embeddings(hid_size, src_vocab),
                                PositionalEncoding(hid_size, dropout))

        decoder = RnnDecoder(rnn_type=dec_rnn_type, hid_size=hid_size, n_layers=dec_layers,
                             dropout=dropout)
        tgt_emb = Embeddings(hid_size, tgt_vocab)
        generator = Generator(hid_size, tgt_vocab)

        model = cls(encoder, decoder, src_emb, tgt_emb, generator)
        if tied_emb:
            model.tie_embeddings(tied_emb)

        model.init_params()
        return model, args
コード例 #2
0
ファイル: tfmlm.py プロジェクト: MGheini/rtg
    def make_model(cls,
                   vocab_size,
                   n_layers=6,
                   hid_size=512,
                   ff_size=2048,
                   n_heads=8,
                   dropout=0.1,
                   tied_emb=True,
                   exp: Experiment = None):
        # get all args for reconstruction at a later phase
        _, _, _, args = inspect.getargvalues(inspect.currentframe())
        for exclusion in ['cls', 'exp']:
            del args[exclusion]  # exclude some args
        # In case you are wondering, why I didnt use **kwargs here:
        #   these args are read from conf file where user can introduce errors, so the parameter
        #   validation and default value assignment is implicitly done by function call for us :)

        c = copy.deepcopy
        attn = MultiHeadedAttention(n_heads, hid_size)
        ff = PositionwiseFeedForward(hid_size, ff_size, dropout)
        dec_layer = LMDecoderLayer(hid_size, c(attn), c(ff), dropout)
        decoder = LMDecoder(dec_layer, n_layers)
        embedr = nn.Sequential(Embeddings(hid_size, vocab_size),
                               PositionalEncoding(hid_size, dropout))
        generator = Generator(hid_size, vocab_size)

        model = TfmLm(decoder, embedr, generator)
        if tied_emb:
            log.info(
                "Tying the embedding weights, two ways: (TgtIn == TgtOut)")
            model.generator.proj.weight = model.embed[0].lut.weight

        model.init_params()
        return model, args
コード例 #3
0
ファイル: mtfmnmt.py プロジェクト: isi-nlp/rtg
    def make_model(cls,
                   src_vocab,
                   tgt_vocab,
                   n_layers=6,
                   hid_size=512,
                   ff_size=2048,
                   n_heads=8,
                   attn_dropout=0.1,
                   dropout=0.1,
                   activation='relu',
                   tied_emb='three-way',
                   plug_mode='cat_attn',
                   exp: Experiment = None):
        """
        Helper: Construct a model from hyper parameters."
        :return: model, args
        """
        assert plug_mode in {'cat_attn', 'add_attn', 'cat_emb'}
        # get all args for reconstruction at a later phase
        _, _, _, args = inspect.getargvalues(inspect.currentframe())
        for exclusion in ['cls', 'exp']:
            del args[exclusion]  # exclude some args
        # In case you are wondering, why I didnt use **kwargs here:
        #   these args are read from conf file where user can introduce errors, so the parameter
        #   validation and default value assignment is implicitly done by function call for us :)
        log.info(f"making mtfmnmt model: {args}")
        c = copy.deepcopy
        attn = MultiHeadedAttention(n_heads, hid_size, dropout=attn_dropout)
        ff = PositionwiseFeedForward(hid_size,
                                     ff_size,
                                     dropout,
                                     activation=activation)

        enc_layer = EncoderLayer(hid_size, c(attn), c(ff), dropout)
        encoder = Encoder(enc_layer, n_layers)  # clones n times
        src_emb = nn.Sequential(Embeddings(hid_size, src_vocab),
                                PositionalEncoding(hid_size, dropout))

        if plug_mode == 'cat_emb':
            tgt_emb = nn.Sequential(MEmbeddings(hid_size, tgt_vocab),
                                    PositionalEncoding(hid_size, dropout))
            decoder = c(
                encoder
            )  # decoder is same as encoder, except embeddings have concat
        else:
            dec_block = DecoderBlock(hid_size, dropout, mode=plug_mode)
            dec_layer = MDecoderLayer(hid_size, c(attn), c(dec_block), c(ff),
                                      dropout)
            decoder = MDecoder(dec_layer, n_layers)
            tgt_emb = nn.Sequential(Embeddings(hid_size, tgt_vocab),
                                    PositionalEncoding(hid_size, dropout))

        generator = Generator(hid_size, tgt_vocab)

        model = cls(encoder, decoder, src_emb, tgt_emb, generator)
        if tied_emb:
            model.tie_embeddings(tied_emb)

        model.init_params()
        return model, args
コード例 #4
0
ファイル: wvtfmnmt.py プロジェクト: isi-nlp/rtg
    def __init__(self,
                 d_model: int,
                 ff_dims: List[int],
                 N: int,
                 n_heads: int,
                 attn_dropout: float,
                 dropout: float,
                 activation: str = 'relu'):
        super().__init__()

        # Make N layers with different pointwise ff_dims
        assert len(ff_dims) == N, f'N:{N} != ff_dims:{len(ff_dims)}'
        layers = list()
        for ff_dim in ff_dims:
            self_attn = MultiHeadedAttention(n_heads, d_model, attn_dropout)
            src_attn = MultiHeadedAttention(n_heads, d_model, dropout)
            ff = PositionwiseFeedForward(d_model,
                                         ff_dim,
                                         dropout,
                                         activation=activation)
            layers.append(
                DecoderLayer(d_model, self_attn, src_attn, ff, dropout))
        self.layers = nn.ModuleList(layers)
        self.norm = nn.LayerNorm(d_model)
コード例 #5
0
    def make_model(cls, src_vocab, tgt_vocab, n_layers=6, hid_size=512, ff_size=2048, n_heads=8,
                   dropout=0.1, tied_emb='three-way', exp: Experiment = None):
        # get all args for reconstruction at a later phase
        _, _, _, args = inspect.getargvalues(inspect.currentframe())
        for exclusion in ['cls', 'exp']:
            del args[exclusion]  # exclude some args

        log.info("Getting external embedding from the experiment dir")
        src_ext_emb_wt = torch.load(exp.ext_emb_src_file)

        tgt_ext_emb_wt = torch.load(exp.ext_emb_tgt_file)
        assert src_ext_emb_wt.shape[1] == tgt_ext_emb_wt.shape[1]
        ext_emb_dim = src_ext_emb_wt.shape[1]
        log.info(f"Found ext embs. Dim: {ext_emb_dim}; SRC vocab: {src_ext_emb_wt.shape[0]}, "
                 f"TGT vocab: {tgt_ext_emb_wt.shape[0]}")
        src_ext_emb = nn.Embedding(src_ext_emb_wt.shape[0], ext_emb_dim, padding_idx=padding_idx)
        src_ext_emb.weight.requires_grad = False
        tgt_ext_emb = nn.Embedding(tgt_ext_emb_wt.shape[0], ext_emb_dim, padding_idx=padding_idx)
        tgt_ext_emb.weight.requires_grad = False

        c = copy.deepcopy
        attn = MultiHeadedAttention(n_heads, hid_size, dropout=dropout)
        ff = PositionwiseFeedForward(hid_size, ff_size, dropout)

        encoder = Encoder(EncoderLayer(hid_size, c(attn), c(ff), dropout), n_layers)
        src_attn_ext = MultiHeadedAttentionExt(n_heads, hid_size, query_dim=hid_size + ext_emb_dim,
                                               dropout=dropout)
        decoder = DecoderExt(DecoderLayerExt(hid_size, c(attn), src_attn_ext, c(ff), dropout),
                             n_layers)

        src_emb = nn.Sequential(Embeddings(hid_size, src_vocab),
                                PositionalEncoding(hid_size, dropout))
        tgt_emb = nn.Sequential(Embeddings(hid_size, tgt_vocab),
                                PositionalEncoding(hid_size, dropout))
        generator = Generator(hid_size, tgt_vocab)

        model = cls(encoder, decoder, src_emb, tgt_emb, generator,
                    src_ext_emb=src_ext_emb, tgt_ext_emb=tgt_ext_emb)

        if tied_emb:
            model.tie_embeddings(tied_emb)

        model.init_params()
        return model, args