Esempio n. 1
0
    def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
        super().__init__()

        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v

        self.w_qs = nn.Linear(d_model, n_head * d_k)
        self.w_ks = nn.Linear(d_model, n_head * d_k)
        self.w_vs = nn.Linear(d_model, n_head * d_v)
        nn.init.normal_(self.w_qs.weight,
                        mean=0,
                        std=np.sqrt(2.0 / (d_model + d_k)))
        nn.init.normal_(self.w_ks.weight,
                        mean=0,
                        std=np.sqrt(2.0 / (d_model + d_k)))
        nn.init.normal_(self.w_vs.weight,
                        mean=0,
                        std=np.sqrt(2.0 / (d_model + d_v)))

        self.attention = ScaledDotProductAttention(
            temperature=np.power(d_k, 0.5))
        self.layer_norm = LayerNorm(d_model)

        self.fc = nn.Linear(n_head * d_v, d_model)
        nn.init.xavier_normal_(self.fc.weight)

        self.dropout = nn.Dropout(dropout)
Esempio n. 2
0
 def __init__(self,
              seq_length: int,
              output_seq_length: int,
              n_time_series: int,
              d_model=128,
              output_dim=1,
              n_layers_encoder=6,
              forward_dim=2048,
              dropout=0.1,
              use_mask=False,
              meta_data=None,
              n_heads=8):
     """
     Uses a number of encoder layers with simple linear decoder layer
     """
     super().__init__()
     self.dense_shape = torch.nn.Linear(n_time_series, d_model)
     self.pe = SimplePositionalEncoding(d_model)
     encoder_layer = TransformerEncoderLayer(d_model, 8, forward_dim,
                                             dropout)
     encoder_norm = LayerNorm(d_model)
     self.transformer_enc = TransformerEncoder(encoder_layer,
                                               n_layers_encoder,
                                               encoder_norm)
     self.output_dim_layer = torch.nn.Linear(d_model, output_dim)
     self.output_seq_length = output_seq_length
     self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length)
     self.mask = generate_square_subsequent_mask(seq_length)
     self.mask_it = use_mask
     if meta_data:
         self.meta_merger = MergingModel(meta_data["method"],
                                         meta_data["params"])
Esempio n. 3
0
    def __init__(self,
                 input_dim=13,
                 num_classes=9,
                 d_model=64,
                 n_head=2,
                 n_layers=5,
                 d_inner=128,
                 activation="relu",
                 dropout=0.017998950510888446,
                 max_len=200):

        super(PETransformerModel, self).__init__()
        self.modelname = f"PeTransformerEncoder_input-dim={input_dim}_num-classes={num_classes}_" \
                         f"d-model={d_model}_d-inner={d_inner}_n-layers={n_layers}_n-head={n_head}_" \
                         f"dropout={dropout}"

        encoder_layer = TransformerEncoderLayer(d_model, n_head, d_inner,
                                                dropout, activation)
        encoder_norm = LayerNorm(d_model)

        self.inlinear = Linear(input_dim, d_model)
        self.relu = ReLU()
        self.transformerencoder = TransformerEncoder(encoder_layer, n_layers,
                                                     encoder_norm)
        self.flatten = Flatten()
        self.outlinear = Linear(d_model, num_classes)
        self.pe = PositionalEncoding(d_model, max_len=max_len)
        """
Esempio n. 4
0
 def __init__(self,
              seq_length: int,
              output_seq_length: int,
              n_time_series: int,
              d_model=128,
              output_dim=1,
              n_layers_encoder=6,
              use_mask=False,
              n_heads=8):
     """
     Uses a number of encoder layers with simple linear decoder layer
     """
     super().__init__()
     self.dense_shape = torch.nn.Linear(n_time_series, d_model)
     self.pe = SimplePositionalEncoding(d_model)
     encoder_layer = TransformerEncoderLayer(d_model, 8)
     encoder_norm = LayerNorm(d_model)
     self.transformer_enc = TransformerEncoder(encoder_layer,
                                               n_layers_encoder,
                                               encoder_norm)
     self.output_dim_layer = torch.nn.Linear(d_model, output_dim)
     self.output_seq_length = output_seq_length
     self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length)
     self.mask = generate_square_subsequent_mask(seq_length)
     self.mask_it = use_mask
Esempio n. 5
0
    def __init__(self, d_model, dropout=0.1, max_len=512):
        super(PositionalEncodings, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        self.position_embeddings = nn.Embedding(max_len, d_model)
        self.token_type_embeddings = nn.Embedding(2, d_model)

        self.embedding_layer_norm = LayerNorm(d_model, eps=1e-12)
    def __init__(self, d_model, vocab_size=30522, dropout=0.1, max_len=512):
        super(BERTStyleEmbedding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        self.word_embeddings = nn.Embedding(vocab_size, d_model)
        self.position_embeddings = nn.Embedding(max_len, d_model)
        self.token_type_embeddings = nn.Embedding(2, d_model)

        self.embedding_layer_norm = LayerNorm(d_model, eps=1e-12)
Esempio n. 7
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu"):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

        self.activation = _get_activation_fn(activation)
Esempio n. 8
0
    def __init__(self,
                 dim_model=300,
                 num_heads=12,
                 dim_feedforward=2048,
                 dropout=0.2):
        super().__init__()

        encoder_layer = nn.TransformerEncoderLayer(dim_model, num_heads,
                                                   dim_feedforward, dropout)
        encoder_norm = LayerNorm(dim_model)
        self.transformer = nn.TransformerEncoder(encoder_layer, 1,
                                                 encoder_norm)
Esempio n. 9
0
def build_transformer_model(src_vocab_size: int,
                            tgt_vocab_size: int,
                            rnn_size: int = RNN_SIZE,
                            num_head: int = 4,
                            num_layers: int = 3,
                            dim_ff: int = 1024,
                            dropout: float = DROPOUT) -> EncoderDecoder:
    """
    Build transformer model based on the paper "Attention Is All You Need".

    Arguments:
         src_vocab_size: vocab size for encoder
         tgt_vocab_size: vocab size for decoder
         rnn_size: size of RNN hidden states in encoder/decoder
         num_head: the number of heads in the multi headed attention
         num_layers: number of encoder/decoder layers
         dim_ff: the dimension of the feed forward layer
         dropout: the dropout probability value
    """

    # Build encoder
    encoder_layer = TransformerEncoderLayer(rnn_size, num_head, dim_ff,
                                            dropout)
    encoder_norm = LayerNorm(rnn_size)
    encoder = TransformerEncoder(encoder_layer, num_layers, encoder_norm)

    # Build decoder
    decoder_layer = TransformerDecoderLayer(rnn_size, num_head, dim_ff,
                                            dropout)
    decoder_norm = LayerNorm(rnn_size)
    decoder = TransformerDecoder(decoder_layer, num_layers, decoder_norm)

    # Build generator
    generator = Generator(rnn_size, tgt_vocab_size)

    return EncoderDecoder(encoder, decoder, generator, rnn_size,
                          src_vocab_size, tgt_vocab_size)
Esempio n. 10
0
    def __init__(self,
                 d_model: int = 512,
                 nhead: int = 8,
                 num_encoder_layers: int = 6,
                 num_decoder_layers: int = 6,
                 dim_feedforward: int = 2048,
                 dropout: float = 0.1,
                 activation: str = "relu",
                 custom_encoder: Optional[Any] = None,
                 custom_decoder: Optional[Any] = None) -> None:
        super(Transformer, self).__init__()

        if custom_encoder is not None:
            self.encoder = custom_encoder
        else:
            encoder_layer = TransformerEncoderLayer(d_model, nhead,
                                                    dim_feedforward, dropout,
                                                    activation)
            encoder_norm = LayerNorm(d_model)
            self.encoder = TransformerEncoder(encoder_layer,
                                              num_encoder_layers, encoder_norm)

        if custom_decoder is not None:
            self.decoder = custom_decoder
        else:
            decoder_layer = TransformerDecoderLayer(d_model, nhead,
                                                    dim_feedforward, dropout,
                                                    activation)
            decoder_norm = LayerNorm(d_model)
            self.decoder = TransformerDecoder(decoder_layer,
                                              num_decoder_layers, decoder_norm)

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead
Esempio n. 11
0
    def __init__(self,
                 input_dim=13,
                 num_classes=9,
                 sequencelength=13,
                 d_model=64,
                 n_head=1,
                 n_layers=3,
                 d_inner=256,
                 activation="relu",
                 dropout=0.39907201621346594):

        super(TransformerModel, self).__init__()
        self.modelname = f"TransformerEncoder_input-dim={input_dim}_num-classes={num_classes}_" \
                         f"d-model={d_model}_d-inner={d_inner}_n-layers={n_layers}_n-head={n_head}_" \
                         f"dropout={dropout}"

        encoder_layer = TransformerEncoderLayer(d_model, n_head, d_inner,
                                                dropout, activation)
        encoder_norm = LayerNorm(d_model)

        self.sequential = Sequential(
            Linear(input_dim, d_model), ReLU(),
            TransformerEncoder(encoder_layer, n_layers, encoder_norm),
            Flatten(), ReLU(), Linear(d_model * sequencelength, num_classes))
    def __init__(self,
                 seq_length: int,
                 output_seq_length: int,
                 n_time_series: int,
                 d_model=128,
                 output_dim=1,
                 n_layers_encoder=6,
                 forward_dim=2048,
                 dropout=0.1,
                 use_mask=False,
                 meta_data=None,
                 final_act=None,
                 squashed_embedding=False,
                 n_heads=8):
        """Uses a number of encoder layers with simple linear decoder layer.

        :param seq_length: The number of historical time-steps fed into the model in each forward pass.
        :type seq_length: int
        :param output_seq_length: The number of forecasted time-steps outputted by the model.
        :type output_seq_length: int
        :param n_time_series: The total number of time series present (targets + features)
        :type n_time_series: int
        :param d_model: The embedding dim of the mode, defaults to 128
        :type d_model: int, optional
        :param output_dim: The output dimension (should correspond to n_targets), defaults to 1
        :type output_dim: int, optional
        :param n_layers_encoder: The number of encoder layers, defaults to 6
        :type n_layers_encoder: int, optional
        :param forward_dim: The forward embedding dim, defaults to 2048
        :type forward_dim: int, optional
        :param dropout: How much dropout to use, defaults to 0.1
        :type dropout: float, optional
        :param use_mask: Whether to use subsquent sequence mask during training, defaults to False
        :type use_mask: bool, optional
        :param meta_data: Whether to use static meta-data, defaults to None
        :type meta_data: str, optional
        :param final_act: Whether to use a final activation function, defaults to None
        :type final_act: str, optional
        :param squashed_embedding: Whether to create a one 1-D time embedding, defaults to False
        :type squashed_embedding: bool, optional
        :param n_heads: [description], defaults to 8
        :type n_heads: int, optional
        """
        super().__init__()
        self.dense_shape = torch.nn.Linear(n_time_series, d_model)
        self.pe = SimplePositionalEncoding(d_model)
        encoder_layer = TransformerEncoderLayer(d_model, 8, forward_dim,
                                                dropout)
        encoder_norm = LayerNorm(d_model)
        self.transformer_enc = TransformerEncoder(encoder_layer,
                                                  n_layers_encoder,
                                                  encoder_norm)
        self.output_dim_layer = torch.nn.Linear(d_model, output_dim)
        self.output_seq_length = output_seq_length
        self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length)
        self.mask = generate_square_subsequent_mask(seq_length)
        self.out_dim = output_dim
        self.mask_it = use_mask
        self.final_act = None
        self.squashed = None
        if final_act:
            self.final_act = activation_dict[final_act]
        if meta_data:
            self.meta_merger = MergingModel(meta_data["method"],
                                            meta_data["params"])
        if squashed_embedding:
            self.squashed = torch.nn.Linear(seq_length, 1)
            self.unsquashed = torch.nn.Linear(1, seq_length)
Esempio n. 13
0
 def __init__(self, d_in, d_hid, dropout=0.1):
     super().__init__()
     self.w_1 = nn.Linear(d_in, d_hid)  # position-wise
     self.w_2 = nn.Linear(d_hid, d_in)  # position-wise
     self.layer_norm = LayerNorm(d_in)
     self.dropout = nn.Dropout(dropout)