Ejemplo n.º 1
0
 def __init__(self,
              d_model: int,
              nhead: int,
              dim_feedforward: int = 2048,
              pre_norm: bool = False,
              att_dropout: float = 0.1,
              ffn_dropout: float = 0.1,
              activation: str = "relu") -> None:
     super(TransformerDncoderLayer, self).__init__()
     self.pre_norm = pre_norm
     self.self_attn = MultiheadAttention(d_model,
                                         nhead,
                                         dropout=att_dropout)
     self.multihead_attn = MultiheadAttention(d_model,
                                              nhead,
                                              dropout=att_dropout)
     self.feedforward = nn.Sequential(nn.Linear(d_model, dim_feedforward),
                                      _get_activation_fn(activation),
                                      nn.Dropout(ffn_dropout),
                                      nn.Linear(dim_feedforward, d_model),
                                      nn.Dropout(ffn_dropout))
     self.norm1 = nn.LayerNorm(d_model)
     self.norm2 = nn.LayerNorm(d_model)
     self.norm3 = nn.LayerNorm(d_model)
     self.dropout1 = nn.Dropout(ffn_dropout)
     self.dropout2 = nn.Dropout(ffn_dropout)
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 p_net=None):
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        self.multihead_attn = MultiheadAttention(d_model,
                                                 nhead,
                                                 dropout=dropout)

        #Adding layer for BERT
        self.multihead_attn_bert = MultiheadAttention(d_model,
                                                      nhead,
                                                      dropout=dropout)
        self.p_net = p_net

        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.norm3 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)

        self.activation = _get_activation_fn(activation)
Ejemplo n.º 3
0
    def __init__(self,
                 dim_model: int,
                 num_heads: int,
                 dim_feedforward: int = 2048,
                 dropout: float = 0.1,
                 activation: str = "relu",
                 pre_ln: bool = False) -> None:
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(dim_model,
                                            num_heads,
                                            dropout=dropout)
        self.multihead_attn = MultiheadAttention(dim_model,
                                                 num_heads,
                                                 dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = Linear(dim_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, dim_model)

        self.norm1 = LayerNorm(dim_model)
        self.norm2 = LayerNorm(dim_model)
        self.norm3 = LayerNorm(dim_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)

        self.activation = _get_activation_fn(activation)
        self.pre_ln = pre_ln
Ejemplo n.º 4
0
    def __init__(
        self,
        embed_dim,
        ffn_dim,
        num_heads,
        attn_dropout=0.,
        act_dropout=0.,
        dropout=0.,
        layernorm_before=False,
    ):
        super().__init__()

        self.embed_dim = embed_dim
        self.layernorm_before = layernorm_before
        self.act_dropout = act_dropout
        self.dropout = dropout

        # self-attention part
        self.self_attn = MultiheadAttention(embed_dim=embed_dim,
                                            num_heads=num_heads,
                                            dropout=attn_dropout)
        self.attn_layernorm = nn.LayerNorm(embed_dim, eps=1e-5)

        # end-dec attention part
        self.enc_dec_attention = MultiheadAttention(embed_dim=embed_dim,
                                                    num_heads=num_heads,
                                                    dropout=attn_dropout)
        self.enc_dec_layernorm = nn.LayerNorm(embed_dim, eps=1e-5)

        # point-wise ffn
        self.ffn1 = nn.Linear(embed_dim, ffn_dim)
        self.ffn2 = nn.Linear(ffn_dim, embed_dim)
        self.ffn_layernorm = nn.LayerNorm(embed_dim, eps=1e-5)

        self.reset_parameters()
Ejemplo n.º 5
0
 def __init__(self, d_model, n_head, d_inner, dropout=0.1):
     super(DecoderLayer, self).__init__()
     self.slf_attn = MultiheadAttention(d_model, n_head, dropout=dropout)
     self.enc_attn = MultiheadAttention(d_model, n_head, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner)
     self.connector = nn.ModuleList(
         [SublayerConnection(d_model, dropout) for _ in range(3)])
Ejemplo n.º 6
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 kdim=None,
                 vdim=None):
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        self.multihead_attn = MultiheadAttention(d_model,
                                                 nhead,
                                                 dropout=dropout,
                                                 kdim=kdim,
                                                 vdim=vdim)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
Ejemplo n.º 7
0
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
        super(DecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

        self.activation = _get_activation_fn(activation)
Ejemplo n.º 8
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 nn,
                 aggr='add',
                 bias=True,
                 **kwargs):
        super().__init__(aggr=aggr, **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.nn = nn
        self.aggr = aggr
        self.lin0 = Linear(self.out_channels, self.out_channels, bias=False)
        self.lin1 = Linear(self.out_channels, self.out_channels, bias=True)
        self.attn = MultiheadAttention(self.out_channels, 4)

        self.register_parameter('root', None)

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()
Ejemplo n.º 9
0
    def __init__(self,
                 d_model,
                 subtokens_per_token,
                 pointer_attention_type: AttentionType,
                 n_attention_heads=8):
        super(PointerNetwork, self).__init__()

        self.d_model = d_model
        self.pointer_attention_type = pointer_attention_type

        # "Embedding" of the sentinel used for computing the logit of the gate
        self.sentinel = nn.Parameter(torch.Tensor(self.d_model, 1))

        # Linear transformation for computing the query from the LSTM hidden state
        self.query_linear = nn.Linear(self.d_model, self.d_model)

        # Linear transformation for getting n subtokens out of the representations of the final encoder layer
        self.subtoken_extractor_linear = nn.Linear(
            self.d_model, subtokens_per_token * self.d_model)

        if self.pointer_attention_type == AttentionType.ADDITIVE:
            self.additive_attention_W = nn.Linear(
                self.d_model * 2, self.d_model)  # bidirectional
            self.additive_attention_tanh = nn.Tanh()
            self.additive_attention_v = nn.Parameter(
                torch.Tensor(self.d_model, 1))  # context vector
        elif self.pointer_attention_type == AttentionType.MULTIHEAD:
            self.multihead_attention = MultiheadAttention(
                self.d_model, n_attention_heads)

        self._reset_parameters()
Ejemplo n.º 10
0
 def __init__(self, d_model, n_head, head_dropout=0.1):
     super(AudioVideoInter, self).__init__()
     self.dropout = nn.Dropout(0.1)
     self.video_multihead = MultiheadAttention(d_model,
                                               num_heads=n_head,
                                               dropout=head_dropout)
     self.norm1 = nn.LayerNorm(d_model)
Ejemplo n.º 11
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 use_gate=False):
        #fill in reordering of operations as done in https://arxiv.org/pdf/1910.06764.pdf
        #d_model: dimension of embedding for each input
        super(StableTransformerLayer, self).__init__()

        self.use_gate = use_gate
        self.gate_mha = GRUGate(d_model)
        self.gate_mlp = GRUGate(d_model)
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)

        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

        self.activation = F.relu
    def __init__(self, d_model, n_heads, dropout=0.0):
        super(SelfAttentionLayer, self).__init__()
        self.multihead_attn = MultiheadAttention(d_model,
                                                 n_heads,
                                                 dropout=dropout)

        self.norm1 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
Ejemplo n.º 13
0
    def __init__(self, num_heads: int, features_dim: int, dropout: float):
        """

        :param num_heads: head num of attention
        :param features_dim: total features dimension4
        """
        super().__init__()
        self.model = MultiheadAttention(num_heads=num_heads,
                                        embed_dim=features_dim,
                                        dropout=dropout)
Ejemplo n.º 14
0
    def __init__(self, d_model, nhead, dim_feedforward=1024, dropout=0.1):
        super(TransLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
Ejemplo n.º 15
0
    def __init__(self, d_model: int, nhead: int, d_hid: int, dropout=0.1):
        super(Smoother, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)

        self.conv1 = Conv1d(d_model, d_hid, 9, padding=4)
        self.conv2 = Conv1d(d_hid, d_model, 1, padding=0)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
Ejemplo n.º 16
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation=F.relu,
                 add_bias_kv=False,
                 add_norm=False) -> None:
        super(TFDecorder, self).__init__()
        self.self_attn = MultiheadAttention(d_model,
                                            nhead,
                                            dropout=dropout,
                                            batch_first=True,
                                            add_bias_kv=add_bias_kv)
        self.multihead_attn = MultiheadAttention(d_model,
                                                 nhead,
                                                 dropout=dropout,
                                                 batch_first=True,
                                                 add_bias_kv=add_bias_kv)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.dropout = nn.Dropout(dropout)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)

        if add_norm:
            self.norm1 = nn.LayerNorm(d_model)
            self.norm2 = nn.LayerNorm(d_model)
            self.norm3 = nn.LayerNorm(d_model)
        else:
            self.norm1 = self.norm2 = self.norm3 = nn.Identity()

        # Legacy string support for activation function.
        if isinstance(activation, str):
            self.activation = _get_activation_fn(activation)
        else:
            self.activation = activation
        self.activation = self.activation()
    def __init__(self,
                 d_model,
                 max_seq_len,
                 max_docs,
                 batch_size,
                 nhead,
                 mmr=False,
                 query_doc_attn=False,
                 head_pooling=False,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu"):

        super().__init__()

        # Definintions from pytorch encoder layer
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        if activation == "relu":
            self.activation = nn.functional.relu
        else:
            raise IOError("Please specify 'relu' activation")
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.pooling_norm = nn.LayerNorm(d_model)
        self.doc_attn_norm = nn.LayerNorm(d_model)
        self.query_doc_norm = nn.LayerNorm(d_model)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.d_model = d_model

        # New variables
        self.max_seq_len = max_seq_len
        self.max_docs = max_docs
        self.batch_size = batch_size
        self.head_pooling = head_pooling
        self.mmr = mmr
        self.query_doc_attn = query_doc_attn

        if mmr:
            self.mmr_attention = MMR(d_model, max_seq_len)
        # TODO: Add option for using cls token as doc representation
        if head_pooling:
            self.head_pooling = MultiHeadPooling(max_seq_len,
                                                 max_docs,
                                                 batch_size,
                                                 d_model,
                                                 nhead,
                                                 dropout=dropout)
Ejemplo n.º 18
0
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
Ejemplo n.º 19
0
    def __init__(self, d_model: int, n_heads: int, dropout: float):
        """Uses self-attention to combine the meta-data and the temporal data.

        :param d_model: The dimension of the meta-data
        :type d_model: int
        :param n_heads: The number of heads to use in multi-head mechanism
        :type n_heads: int
        :param dropout: The dropout score as a flow
        :type dropout: float
        """
        super().__init__()
        self.main_layer = MultiheadAttention(d_model, n_heads, dropout)
Ejemplo n.º 20
0
 def __init__(self, d_model=256, nhead=8, dim_feedforward=2048, dropout=0.1):
     super(PDSLayer, self).__init__()
     self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
     # Implementation of Feedforward model
     self.linear1 = Linear(d_model, dim_feedforward)
     self.dropout = Dropout(dropout)
     self.linear2 = Linear(dim_feedforward//2, d_model)
     self.norm2 = LayerNorm(d_model)
     self.norm3 = LayerNorm(d_model)
     self.dropout2 = Dropout(dropout)
     self.dropout3 = Dropout(dropout)
     self.activation = F.glu
Ejemplo n.º 21
0
    def __init__(self, embed_dim, n_head, hidden_dim, inner_dim, dropout,
                 max_len, cross):
        super(ATTNLayer, self).__init__()

        # Meta data of mattn
        self.embed_dim = embed_dim
        self.n_head = n_head
        self.hidden_dim = hidden_dim
        self.inner_dim = inner_dim
        self.max_len = max_len
        self.dropout = dropout
        self.cross = cross

        # Multihead & Positionwise
        self.self_mattn = MultiheadAttention(embed_dim, n_head, dropout)
        self.pos_ff = PositionwiseFeedForward(embed_dim, inner_dim, dropout)
        self.norm = LayerNorm(embed_dim)

        # Cross attention
        if cross:
            self.cross_mattn = MultiheadAttention(embed_dim, n_head, dropout)
Ejemplo n.º 22
0
 def __init__(self, decoder_layer, num_layers=4, norm=None):
     super(PDS, self).__init__()
     self.position_multihead_attn = MultiheadAttention(256, 8, dropout=0.1)
     self.norm1 = LayerNorm(256)
     self.linear1 = Linear(256, 2048)
     self.dropout = Dropout(0.1)
     self.linear2 = Linear(2048//2, 256)
     self.activation = F.glu
     self.dropout1 = Dropout(0.1)
     self.dropout2 = Dropout(0.1)
     self.layers = _get_clones(decoder_layer, num_layers-1)
     self.num_layers = num_layers-1
     self.norm = norm
Ejemplo n.º 23
0
    def __init__(self,
                 d_model: int,
                 nhead: int,
                 d_hid: int,
                 dropout=0.1,
                 no_residual=False):
        super(Extractor, self).__init__()

        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        self.cross_attn = MultiheadAttention(d_model, nhead, dropout=dropout)

        self.conv1 = Conv1d(d_model, d_hid, 9, padding=4)
        self.conv2 = Conv1d(d_hid, d_model, 1, padding=0)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.norm3 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)

        self.no_residual = no_residual
Ejemplo n.º 24
0
 def __init__(self, args):
     super(TransformerMIL, self).__init__()
     encoder_layer = TransformerEncoderLayer(d_model=args.feature_depth,
                                             nhead=8,
                                             dim_feedforward=2048,
                                             dropout=args.dropout,
                                             activation="relu")
     encoder_norm = LayerNorm(args.feature_depth)
     self.attention = TransformerEncoder(encoder_layer, args.ntrans,
                                         encoder_norm)
     #self.attention1 = MultiheadAttention(args.feature_depth, 8)
     self.attention2 = MultiheadAttention(args.feature_depth, 8)
     self.classifier = Sequential(Linear(args.feature_depth, 1), Sigmoid())
     self.mil = AttentionMILFeatures(args)
Ejemplo n.º 25
0
    def __init__(self, n_head=2, d_head = 2, embed_dim=100, N_en=6, N_de=6,
                  classes = 2, ff_dim=2048, do_rate=0.1, max_len=256,
                  activation="relu", custom_encoder=None, custom_decoder=None,
                  masks=[False, False, False], kmasks=[False, False, False]):
        
        super(Transformer, self).__init__()

        #===Base model(attn, enc, dec, ff)
        mhattn = MultiheadAttention(embed_dim, n_head)
        selfattn = MultiheadAttention(embed_dim, n_head)
        ff_1 = nn.Linear(embed_dim, ff_dim)
        ff_2 = nn.Linear(ff_dim, embed_dim)
        position = PositionalEncoding(embed_dim, do_rate)

        #===Masked attention(for seqs/keys) #src, tgt, memory
        self.masks = masks
        self.kmasks = kmasks

        #===Main Archetecture(enc, dec)
        self.encoder = Encoder(
            EncoderLayer(embed_dim, deepcopy(mhattn), deepcopy(ff_1), deepcopy(ff_2), do_rate), N_en)
        self.decoder = Decoder(
            DecoderLayer(embed_dim, deepcopy(selfattn), deepcopy(mhattn), deepcopy(ff_1), deepcopy(ff_2), do_rate), N_de)

        #===Embedding setting(src, tgt)
        self.src_embed = nn.Sequential(nn.Embedding(10000, embed_dim), deepcopy(position))
        self.tgt_embed = nn.Sequential(nn.Embedding(10000, embed_dim), deepcopy(position))

        #===Fianl FC
        self.final = nn.Linear(embed_dim*max_len, classes)

        #===Loss function definition
        self.loss = nn.CrossEntropyLoss()

        #===Parameters
        self.embed_dim = embed_dim
        self.max_len = max_len
Ejemplo n.º 26
0
    def __init__(self, d_model, nhead, dim_feedforward=1024, dropout=0.1):
        super(TransformerDecoderLayer_BN, self).__init__()

        self.multihead_attn = MultiheadAttention(d_model,
                                                 nhead,
                                                 dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm2 = BatchNorm1d(d_model)
        self.norm3 = BatchNorm1d(d_model)

        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)
Ejemplo n.º 27
0
    def load_model(self, run_id, snapshot_iteration, gpu=True):
        model_params = self.load_parameters(run_id, snapshot_iteration, gpu=gpu)
        config = self.load_config(run_id)
        model_config = self._prepare_model_config(config)

        language = config['data_setup']['language']
        data_manager = CTPreprocessedDataManager(DATA_PATH_STAGE_2, language)

        decoder_config = model_config['lm_decoder']

        word_vocab, token_type_vocab, node_type_vocab = data_manager.load_vocabularies()

        transformer_encoder_config = model_config['lm_encoder']
        transformer_encoder_config['num_token_types'] = len(token_type_vocab)
        transformer_encoder_config['vocab_size'] = len(word_vocab)

        decoder_config['sos_id'] = word_vocab[SOS_TOKEN]
        if 'num_subtokens_output' in config['data_setup']:
            decoder_config['output_subtokens_per_token'] = config['data_setup']['num_subtokens_output']
        else:
            decoder_config['output_subtokens_per_token'] = NUM_SUB_TOKENS

        if 'use_pointer_network' in config['data_setup']:
            decoder_config['use_pointer_network'] = config['data_setup']['use_pointer_network']

        decoder_config['lm_encoder'] = transformer_encoder_config
        decoder_config['loss_fct'] = model_config['loss_fct']

        model = XLNetTransformerDecoder(TransformerLMDecoderConfig(**decoder_config))

        try:
            model.load_state_dict(model_params)
        except RuntimeError:
            # In most cases, this is due to the legacy issue with encoder_self_attention
            model.add_module('encoder_self_attention',
                             MultiheadAttention(model.d_model, decoder_config['decoder_nhead'],
                                                dropout=decoder_config['decoder_dropout']))
            try:
                model.load_state_dict(model_params)
            except RuntimeError:
                decoder_config['concat_query_and_pointer'] = False
                model = CodeTransformerDecoder(TransformerLMDecoderConfig(**decoder_config))
                model.load_state_dict(model_params)

        return model
Ejemplo n.º 28
0
    def __init__(self, model_size: int, num_heads: int, **kwargs):
        super().__init__(**kwargs)
        self.input_size = self.output_size = model_size
        self.num_heads = num_heads

        self.multihead_attention = MultiheadAttention(
            embed_dim=self.input_size, num_heads=num_heads
        )

        self.attention_norm = AddAndNormLayer(model_size=self.input_size)

        self.linear_layer = nn.Sequential(
            nn.Linear(in_features=self.input_size, out_features=4 * self.input_size),
            nn.ReLU(),
            nn.Linear(in_features=4 * self.input_size, out_features=self.input_size),
        )

        self.linear_layer_norm = AddAndNormLayer(model_size=self.input_size)
    def __init__(self,
                 questions_size=CONTENT_ID_VOCAB_SIZE,
                 responses_size=RESPONSE_VOCAB_SIZE,
                 part_size=PART_VOCAB_SIZE,
                 task_container_id_size=CONTAINER_VOCAB_SIZE,
                 user_id_size=USER_VOCAB_SIZE,
                 day_size=DAYS_VOCAB_SIZE,
                 maxlength=NDAY_LENGTH,
                 num_heads=NUM_HEADS,
                 embedding_size=EMBEDDING_DIM,
                 dropout=DROPOUT):

        super(Encoder, self).__init__()
        self.input_length = maxlength
        #embedding layers for question, response
        #user, part, task_container_id and position
        self.embedding_ques = Embedding(num_embeddings=questions_size,
                                        embedding_dim=embedding_size)

        self.embedding_response = Embedding(num_embeddings=responses_size,
                                            embedding_dim=embedding_size)
        self.embedding_user = Embedding(num_embeddings=user_id_size,
                                        embedding_dim=embedding_size)

        self.embedding_part = Embedding(num_embeddings=part_size,
                                        embedding_dim=embedding_size)

        self.embedding_task = Embedding(num_embeddings=task_container_id_size,
                                        embedding_dim=embedding_size)

        self.embedding_pos = Embedding(num_embeddings=maxlength + day_size +
                                       DAY_VOCAB_SIZE,
                                       embedding_dim=embedding_size)
        #linear layers for day and days
        self.linear_day = Linear(maxlength, embedding_size)
        self.linear_days = Linear(maxlength, embedding_size)

        #multihead attention
        self.attention = MultiheadAttention(embed_dim=embedding_size,
                                            num_heads=num_heads,
                                            dropout=dropout)
        self.dropout1 = Dropout(dropout)
Ejemplo n.º 30
0
    def __init__(self,
                 trip_emb,
                 embed_dim,
                 num_heads=8,
                 dropout=0.1,
                 filter_inner=64):
        super(AttNet, self).__init__()

        # emb
        self.emb = Conv1d(trip_emb, embed_dim, 1)
        self.emb_bn = BatchNorm1d(embed_dim)

        # mha
        self.mha = MultiheadAttention(embed_dim, num_heads, dropout)
        self.mha_bn = BatchNorm1d(embed_dim)

        # ff
        self.inner = Conv1d(embed_dim, filter_inner, 1)
        self.outer = Conv1d(filter_inner, embed_dim, 1)
        self.ff_bn = BatchNorm1d(embed_dim)

        self.reset_parameters()