Example #1
0
 def __init__(self,
              embeddings,
              feat_dim=512,
              max_word=32,
              multi_image=1,
              image_pe=True,
              layer_norm=False,
              num_layers=6,
              teacher_forcing=False,
              image_model=None,
              image_pretrained=None,
              finetune_image=False,
              image_finetune_epoch=None,
              rl_opts=None,
              word_idxs=None,
              device='gpu',
              verbose=False):
     super(Visual_GPTSimpleCaptioner,
           self).__init__(embeddings, feat_dim, max_word, multi_image,
                          image_pe, layer_norm, teacher_forcing,
                          image_model, image_pretrained, finetune_image,
                          image_finetune_epoch, rl_opts, word_idxs, device,
                          verbose)
     # Transformer Decoder
     decoder_layer = TransformerMaxDecoderLayer(feat_dim, nhead=8)
     self.decoder = TransformerDecoder(decoder_layer, num_layers=num_layers)
Example #2
0
def _build_transformer_decoder(
    d_model: int,
    nhead: int,
    num_decoder_layers: int,
    dim_feedforward: int,
    dropout: float,
) -> nn.TransformerDecoder:
    """build transformer decoder with params
    Parameters
    ----------
    d_model : int
    nhead : int
    num_decoder_layers : int
    dim_feedforward : int
    dropout : float
    Returns
    -------
    nn.TransformerDecoder
    """
    decoder_layer = nn.TransformerDecoderLayer(
        d_model=d_model,
        nhead=nhead,
        dim_feedforward=dim_feedforward,
        dropout=dropout,
    )
    decoder_norm = nn.LayerNorm(d_model)

    decoder = TransformerDecoder(decoder_layer, num_decoder_layers,
                                 decoder_norm)
    return decoder
Example #3
0
 def __init__(self,
              embeddings,
              feat_dim=512,
              max_word=32,
              multi_image=1,
              layer_norm=False,
              num_memory=40,
              num_enc_layers=6,
              num_dec_layers=6,
              teacher_forcing=False,
              image_model=None,
              image_pretrained=None,
              finetune_image=False,
              image_finetune_epoch=None,
              rl_opts=None,
              word_idxs=None,
              device='gpu',
              verbose=False):
     super(M2Transformer,
           self).__init__(embeddings, feat_dim, max_word, multi_image,
                          False, layer_norm, teacher_forcing, image_model,
                          image_pretrained, finetune_image,
                          image_finetune_epoch, rl_opts, word_idxs, device,
                          verbose)
     # Transformer Encoder
     encoder_layer = TransformerEncoderLayerWithMem(feat_dim,
                                                    nhead=8,
                                                    nmem=num_memory)
     self.encoder = MeshedTransformerEncoder(encoder_layer,
                                             num_layers=num_enc_layers)
     # Transformer Decoder
     decoder_layer = MeshedTransformerMaxDecoderLayer(
         feat_dim, nhead=8, nlayer_enc=num_enc_layers)
     self.decoder = TransformerDecoder(decoder_layer,
                                       num_layers=num_dec_layers)
Example #4
0
    def __init__(self, vocab_size, hidden_size, num_layers, max_len):
        super(AbsDecoder, self).__init__()

        self.decoder_embedding = nn.Embedding(vocab_size, hidden_size)

        from torch.nn.modules.transformer import TransformerDecoder, TransformerDecoderLayer

        d_model = hidden_size  # the number of expected features in the input
        nhead = 8  # the number of heads in the multiheadattention models
        dim_feedforward = 2048
        dropout = 0.1

        self.positional_encoder = PositionalEncoding(d_model,
                                                     dropout=dropout,
                                                     max_len=max_len)

        transformer_decoder_layer = TransformerDecoderLayer(
            d_model, nhead, dim_feedforward, dropout)
        self.transformer_decoder = TransformerDecoder(
            transformer_decoder_layer, num_layers, norm=None)

        # Linear & Softmax Layers
        self.linear_decoder = nn.Linear(in_features=hidden_size,
                                        out_features=vocab_size,
                                        bias=True)
        self.logsoftmax_decoder = nn.LogSoftmax(dim=-1)
Example #5
0
    def __init__(self, embedding, vocab2id, args):
        super().__init__()
        self.embedding = embedding
        self.vocab2id = vocab2id
        self.args = args
        self.input_dim = args.input_dim
        self.head_size = args.target_head_size
        self.feed_forward_dim = args.feed_forward_dim
        self.dropout = args.target_dropout
        self.num_layers = args.target_layers
        self.target_max_len = args.max_target_len
        self.max_oov_count = args.max_oov_count
        self.vocab_size = embedding.num_embeddings

        layer = TransformerDecoderLayer(d_model=self.input_dim,
                                        nhead=self.head_size,
                                        dim_feedforward=self.feed_forward_dim,
                                        dropout=self.dropout)
        self.decoder = TransformerDecoder(decoder_layer=layer,
                                          num_layers=self.num_layers)
        self.input_copy_proj = nn.Linear(self.input_dim,
                                         self.input_dim,
                                         bias=False)
        self.copy_proj = nn.Linear(self.input_dim, self.input_dim, bias=False)
        self.embed_proj = nn.Linear(2 * self.input_dim,
                                    self.input_dim,
                                    bias=False)
        self.generate_proj = nn.Linear(self.input_dim,
                                       self.vocab_size,
                                       bias=False)
Example #6
0
    def __init__(self, input_size, output_size, z_size, depth, params, embedding=None, highway=False, sbn=None,
                 dropout=0., batchnorm=False, residual=None, bidirectional=False, n_mems=20, memory=None, targets=None,
                 nheads=2):
        super(ConditionalCoattentiveTransformerLink, self).__init__(input_size, output_size, z_size, depth,
                                                                    params, embedding, highway, dropout=dropout,
                                                                    batchnorm=batchnorm, residual=residual)
        output_size = int(output_size/n_mems)

        self.input_to_hidden = nn.Linear(input_size, output_size)
        self.transformer_enc = TransformerEncoder(SpecialTransformerEncoder(output_size, nheads, dim_feedforward=output_size*n_mems,
                                                                      dropout=dropout, activation='gelu', n_mems=n_mems)
                                                  , depth)
        self.transformer_dec = TransformerDecoder(TransformerDecoderLayer(output_size, nheads, dim_feedforward=output_size,
                                                                      dropout=dropout, activation='gelu'), depth)
        self.memory, self.targets = memory, targets
        self.pe = PositionalEncoding(output_size)
        self.bn = nn.BatchNorm1d(z_size)
        self.n_mems, self.output_size = n_mems, output_size
        self.bidirectional = bidirectional

        if embedding is not None:
            self.sbn = sbn
            if sbn is not None:
                z_params_size = int(embedding.weight.shape[1] / sbn.n_experts)
            else:
                z_params_size = embedding.weight.shape[1]
            self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, z_params_size)
                                                     for param in params})
        else:
            self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, z_size) for param in params})
        assert self.residual is None, "Named links still can't have residuals"
Example #7
0
    def __init__(self, input_size, output_size, z_size, depth, params, embedding=None, highway=False, sbn=None,
                 dropout=0., batchnorm=False, residual=None, bidirectional=False, n_targets=20, nheads=2,
                 sequence=None, memory=None, n_mems=None):
        super(CoattentiveTransformerLink, self).__init__(input_size, output_size, z_size, depth, params, embedding,
                                                         highway, dropout=dropout, batchnorm=batchnorm,
                                                         residual=residual)
        assert output_size % n_targets == 0
        assert z_size % n_targets == 0
        output_size = int(output_size/n_targets)
        self.target = nn.Embedding(n_targets, output_size).weight
        self.n_mems = n_mems
        self.memory = memory
        self.sequence = sequence

        self.input_to_hidden = nn.Linear(input_size, output_size)
        self.transformer_dec = TransformerDecoder(TransformerDecoderLayer(output_size, nheads, dim_feedforward=output_size*n_targets,
                                                                      dropout=dropout, activation='gelu'), depth)
        self.transformer_enc = TransformerEncoder(TransformerEncoderLayer(output_size, nheads, dim_feedforward=output_size,
                                                                      dropout=dropout, activation='gelu'), depth)
        self.pe = PositionalEncoding(output_size)
        self.bn = nn.BatchNorm1d(z_size)

        if embedding is not None:
            self.sbn = sbn
            if sbn is not None:
                z_params_size = int(embedding.weight.shape[1] / sbn.n_experts)
            else:
                z_params_size = embedding.weight.shape[1]
            self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, z_params_size)
                                                     for param in params})
        else:
            self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, int(z_size/n_targets))
                                                     for param in params})
Example #8
0
    def __init__(self,
                 ntoken,
                 ninp,
                 nhead,
                 nhid,
                 nlayers,
                 batch_size,
                 dropout=0.5,
                 pretrain_cnn=None,
                 pretrain_emb=None,
                 freeze_cnn=True):
        super(TransformerModel, self).__init__()

        self.model_type = 'cnn+transformer'
        decoder_layers = TransformerDecoderLayer(d_model=nhid,
                                                 nhead=nhead,
                                                 dropout=dropout)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers)
        self.word_emb = nn.Embedding(ntoken, nhid)
        self.ninp = ninp
        self.nhid = nhid
        self.fc = nn.Linear(512, 512, bias=True)
        self.fc1 = nn.Linear(512, nhid, bias=True)
        self.dec_fc = nn.Linear(nhid, ntoken)
        self.batch_size = batch_size
        self.ntoken = ntoken
        self.encoder = Cnn10()
        self.dropout = nn.Dropout(dropout)
        self.pos_encoder = PositionalEncoding(nhid, dropout)
        self.generator = nn.Softmax(dim=-1)
        self.init_weights()

        if pretrain_cnn is not None:
            dict_trained = pretrain_cnn
            dict_new = self.encoder.state_dict().copy()
            new_list = list(self.encoder.state_dict().keys())
            trained_list = list(dict_trained.keys())
            for i in range(len(new_list)):
                dict_new[new_list[i]] = dict_trained[trained_list[i]]
            self.encoder.load_state_dict(dict_new)
        if freeze_cnn:
            self.freeze_cnn()

        if pretrain_emb is not None:
            self.word_emb.weight.data = pretrain_emb
Example #9
0
 def __init__(
     self,
     vocab_size: int,
     max_seq_len: int,
     d_model: int,
     nhead: int,
     num_layers: int,
     dropout: float,
 ):
     super(Decoder, self).__init__()
     self.max_seq_len = max_seq_len
     self.embedding = nn.Embedding(vocab_size, d_model)
     self.pos_encoder = PositionalEncoding(dropout, d_model)
     decoder_layer = TransformerDecoderLayer(d_model,
                                             nhead,
                                             4 * d_model,
                                             dropout,
                                             norm_first=True)
     self.decoder = TransformerDecoder(decoder_layer, num_layers,
                                       nn.LayerNorm(d_model))
     self.output = nn.Linear(d_model, vocab_size)
Example #10
0
    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
                 num_decoder_layers=None, dim_feedforward=2048, dropout=0.1,
                 activation="relu"):
        super(MultiDecodersTransformer, self).__init__()
        encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
        encoder_norm = LayerNorm(d_model)
        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)

        decoders = {}
        if num_decoder_layers:
            for k, v in num_decoder_layers.items():
                decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
                decoder_norm = LayerNorm(d_model)
                decoder = TransformerDecoder(decoder_layer, v, decoder_norm)
                decoders[k] = decoder
        self.decoders = ModuleDict(decoders.items())

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead
    def __init__(self,
                 decoding_dim: int,
                 target_embedding_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 positional_encoding_max_steps: int = 1000,
                 dropout_prob: float = 0.1) -> None:

        super().__init__(decoding_dim=decoding_dim,
                         target_embedding_dim=target_embedding_dim,
                         decodes_parallel=True)

        decoder_layer = TransformerDecoderLayer(decoding_dim,
                                                num_attention_heads,
                                                feedforward_hidden_dim,
                                                dropout_prob)
        decoder_norm = LayerNorm(decoding_dim)
        self._decoder = TransformerDecoder(decoder_layer, num_layers,
                                           decoder_norm)
        self._dropout = Dropout(dropout_prob)
        self._use_positional_encoding = use_positional_encoding
        self._reset_parameters()
 def decoder(self, decoder_layer: nn.Module) -> nn.Module:
     return TransformerDecoder(decoder_layer, num_layers=6)
    def __init__(self,
                 vocab: Vocabulary,
                 metrics_dict_seq: dict,
                 metrics_dict_reg: dict,
                 input_dim=512,
                 num_attention_heads=8,
                 num_encoder_layers=6,
                 num_decoder_layers=6,
                 feedforward_hidden_dim=2048,
                 dropout=0.1,
                 transformer_dropout=0.1,
                 activation='relu',
                 linear_layers_activation='relu',
                 custom_encoder=None,
                 custom_decoder=None,
                 positional_encoding: Optional[str] = None,
                 predict_avg_total_payoff: bool=True,
                 predict_seq: bool = True,
                 attention: Attention = DotProductAttention(),
                 seq_weight_loss: float = 0.5,
                 reg_weight_loss: float = 0.5,
                 batch_size: int = 9,
                 linear_dim: int=None,
                 only_raisha: bool=False,  # if not saifa input is given
                 ):
        super(TransformerBasedModel, self).__init__(vocab)

        if custom_encoder is not None:
            self.encoder = custom_encoder
        else:
            encoder_layer = TransformerEncoderLayer(input_dim, num_attention_heads, feedforward_hidden_dim,
                                                    transformer_dropout, activation)
            encoder_norm = LayerNorm(input_dim)
            self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)

        if custom_decoder is not None:
            self.decoder = custom_decoder
        else:
            decoder_layer = TransformerDecoderLayer(input_dim, num_attention_heads, feedforward_hidden_dim,
                                                    transformer_dropout, activation)
            decoder_norm = LayerNorm(input_dim)
            self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm)

        self._reset_parameters()

        self._input_dim = input_dim
        self.num_attention_heads = num_attention_heads

        if positional_encoding is None:
            self._sinusoidal_positional_encoding = False
            self._positional_embedding = None
        elif positional_encoding == "sinusoidal":
            self._sinusoidal_positional_encoding = True
            self._positional_embedding = None
        else:
            raise ValueError(
                "positional_encoding must be one of None, 'sinusoidal', or 'embedding'"
            )

        if predict_avg_total_payoff:  # need attention and regression layer
            self.attention = attention
            if linear_dim is not None and predict_seq:  # avg_turn_linear models
                input_dim_attention = linear_dim
            else:
                input_dim_attention = input_dim
            self.linear_after_attention_layer = LinearLayer(input_size=input_dim_attention, output_size=batch_size,
                                                            activation=linear_layers_activation)
            self.regressor = LinearLayer(input_size=batch_size, output_size=1, dropout=dropout,
                                         activation=linear_layers_activation)
            self.attention_vector = torch.randn((batch_size, input_dim_attention), requires_grad=True)
            if torch.cuda.is_available():
                self.attention_vector = self.attention_vector.cuda()
            self.mse_loss = nn.MSELoss()

        if predict_seq:  # need hidden2tag layer
            if linear_dim is not None:  # add linear layer before hidden2tag
                self.linear_layer = LinearLayer(input_size=input_dim, output_size=linear_dim, dropout=dropout,
                                                activation=linear_layers_activation)
                hidden2tag_input_size = linear_dim
            else:
                self.linear_layer = None
                hidden2tag_input_size = input_dim
            self.hidden2tag = LinearLayer(input_size=hidden2tag_input_size, output_size=vocab.get_vocab_size('labels'),
                                          dropout=dropout, activation=linear_layers_activation)

        self.metrics_dict_seq = metrics_dict_seq
        self.metrics_dict_reg = metrics_dict_reg
        self.seq_predictions = defaultdict(dict)
        self.reg_predictions = pd.DataFrame()
        self._epoch = 0
        self._first_pair = None
        self.seq_weight_loss = seq_weight_loss
        self.reg_weight_loss = reg_weight_loss
        self.predict_avg_total_payoff = predict_avg_total_payoff
        self.predict_seq = predict_seq
        self.only_raisha = only_raisha