Beispiel #1
0
 def __init__(self,
              seq_length: int,
              output_seq_length: int,
              n_time_series: int,
              d_model=128,
              output_dim=1,
              n_layers_encoder=6,
              forward_dim=2048,
              dropout=0.1,
              use_mask=False,
              meta_data=None,
              n_heads=8):
     """
     Uses a number of encoder layers with simple linear decoder layer
     """
     super().__init__()
     self.dense_shape = torch.nn.Linear(n_time_series, d_model)
     self.pe = SimplePositionalEncoding(d_model)
     encoder_layer = TransformerEncoderLayer(d_model, 8, forward_dim,
                                             dropout)
     encoder_norm = LayerNorm(d_model)
     self.transformer_enc = TransformerEncoder(encoder_layer,
                                               n_layers_encoder,
                                               encoder_norm)
     self.output_dim_layer = torch.nn.Linear(d_model, output_dim)
     self.output_seq_length = output_seq_length
     self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length)
     self.mask = generate_square_subsequent_mask(seq_length)
     self.mask_it = use_mask
     if meta_data:
         self.meta_merger = MergingModel(meta_data["method"],
                                         meta_data["params"])
Beispiel #2
0
    def __init__(self, vocab_size: int, d_model: int, n_head: int,
                 n_layers: int, dim_ff: int, dropout: float, pad_id: int):
        super(TransformerSpaceCorrector, self).__init__()

        self.vocab_size = vocab_size
        self.label_size = 2 + 1

        self.d_model = d_model
        self.n_head = n_head
        self.n_layers = n_layers
        self.dim_ff = dim_ff
        self.dropout = dropout
        self.pad_id = pad_id

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.label_embedding = nn.Embedding(self.label_size, d_model)

        self.position_embedding = PositionalEncoding(d_model, dropout)
        enc_layer = TransformerEncoderLayer(d_model, n_head, dim_ff)
        # enc_norm = LayerNorm(d_model)
        # self.encoder = TransformerEncoder(enc_layer, n_layers, enc_norm)
        self.encoder = TransformerEncoder(enc_layer, n_layers)

        self.classifier = Classifier(d_model=d_model,
                                     class_num=2,
                                     d_ff=128,
                                     dropout=dropout)
Beispiel #3
0
    def __init__(self, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, src_vocab_size, tgt_vocab_size):
        super(TransformerModel, self).__init__()
        self.pos_encoder = PositionalEncoding(
            d_model=d_model, dropout=0.1)  # , max_len=100)
        encoder_layer = TransformerEncoderLayer(
                        d_model, nhead, dim_feedforward, dropout, activation)
        encoder_norm = LayerNorm(d_model)
        self.encoder = TransformerEncoder(
            encoder_layer, num_encoder_layers, encoder_norm)
        decoder_layer = TransformerDecoderLayer(
            d_model, nhead, dim_feedforward, dropout, activation)
        decoder_norm = LayerNorm(d_model)
        self.decoder = TransformerDecoder(
            decoder_layer, num_decoder_layers, decoder_norm)

        self.d_model = d_model
        self.nhead = nhead
        self.linear = Linear(d_model, tgt_vocab_size)
        self.transformer = Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers,
                                       num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward,
                                       dropout=dropout, activation=activation)
        self.encoder_embedding = nn.Embedding(src_vocab_size, d_model)
        self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model)

        self._reset_parameters()
Beispiel #4
0
    def __init__(self, encoder_type, vocab_size, embed_dim, encoder_dim,
                 output_dim, dropout, **kwargs):
        super().__init__()

        if encoder_type == "bert":
            self.embed = None
            self.encoder = BertModel.from_pretrained('bert-base-uncased')
            # for p in self.encoder.parameters():
            #     p.requires_grad = False
            # self.encoder.cuda()
        elif encoder_type == "transformer":
            self.embed = nn.Embedding(vocab_size, embed_dim)
            encoder_layer = TransformerEncoderLayer(encoder_dim, nhead=8)
            encoder_norm = LayerNorm(encoder_dim)
            self.encoder = TransformerEncoder(encoder_layer, 1, encoder_norm)
        elif encoder_type in ["lstm", "rnn", "gru"]:
            self.embed = nn.Embedding(vocab_size, embed_dim)
            self.encoder = RNN(encoder_type,
                               embed_dim,
                               output_dim,
                               bidirectional=kwargs.get(
                                   'bidirectional', False),
                               dropout=dropout)

        if encoder_dim != output_dim or kwargs.get('project', False):
            self.encoder_output_linear = nn.Linear(encoder_dim, output_dim)
            self.encoder_state_linear = nn.Linear(encoder_dim, output_dim)
        else:
            self.encoder_output_linear = nn.Sequential()
            self.encoder_state_linear = nn.Sequential()

        self.encoder_type = encoder_type
        self.dropout = nn.Dropout(dropout)
Beispiel #5
0
 def __init__(self,
              seq_length: int,
              output_seq_length: int,
              n_time_series: int,
              d_model=128,
              output_dim=1,
              n_layers_encoder=6,
              use_mask=False,
              n_heads=8):
     """
     Uses a number of encoder layers with simple linear decoder layer
     """
     super().__init__()
     self.dense_shape = torch.nn.Linear(n_time_series, d_model)
     self.pe = SimplePositionalEncoding(d_model)
     encoder_layer = TransformerEncoderLayer(d_model, 8)
     encoder_norm = LayerNorm(d_model)
     self.transformer_enc = TransformerEncoder(encoder_layer,
                                               n_layers_encoder,
                                               encoder_norm)
     self.output_dim_layer = torch.nn.Linear(d_model, output_dim)
     self.output_seq_length = output_seq_length
     self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length)
     self.mask = generate_square_subsequent_mask(seq_length)
     self.mask_it = use_mask
Beispiel #6
0
    def __init__(self, vocab_size: int, word_dim: int, d_model: int,
                 n_head: int, n_layers: int, dim_ff: int, dropout: float,
                 pad_id: int, n_class: int):
        super(TransformerHierachiSeqTagger, self).__init__()

        self.vocab_size = vocab_size
        self.word_dim = word_dim
        self.d_model = d_model
        self.n_head = n_head
        self.n_layers = n_layers
        self.dim_ff = dim_ff
        self.dropout = dropout
        self.pad_id = pad_id

        self.embedding = nn.Embedding(vocab_size, word_dim, padding_idx=pad_id)
        self.word_linear = nn.Linear(word_dim, d_model, bias=False)

        self.position_embedding = PositionalEncoding(d_model, dropout)
        enc_layer = TransformerEncoderLayer(d_model, n_head, dim_ff)

        self.encoder = TransformerEncoder(enc_layer, n_layers)
        self.classifier = Classifier(d_model=d_model,
                                     class_num=n_class,
                                     d_ff=dim_ff,
                                     dropout=dropout)
Beispiel #7
0
 def __init__(self, args):
     super(TransformerMIL, self).__init__()
     encoder_layer = TransformerEncoderLayer(d_model=args.feature_depth,
                                             nhead=8,
                                             dim_feedforward=2048,
                                             dropout=args.dropout,
                                             activation="relu")
     encoder_norm = LayerNorm(args.feature_depth)
     self.attention = TransformerEncoder(encoder_layer, args.ntrans,
                                         encoder_norm)
     #self.attention1 = MultiheadAttention(args.feature_depth, 8)
     self.attention2 = MultiheadAttention(args.feature_depth, 8)
     self.classifier = Sequential(Linear(args.feature_depth, 1), Sigmoid())
     self.mil = AttentionMILFeatures(args)
Beispiel #8
0
    def __init__(self, num_tokens_per_channel, codebook_dim, upscale_factors,
                 list_of_num_layers, n_head, d_model, dim_feedforward,
                 num_tokens_bottleneck, dropout):
        super(AuxiliaryDecoder, self).__init__()
        assert len(list_of_num_layers) == len(upscale_factors)
        self.num_notes_per_voice = num_tokens_per_channel
        self.num_tokens_per_block = len(self.num_notes_per_voice)
        self.d_model = d_model
        self.codebook_dim = codebook_dim
        self.upscale_factors = upscale_factors

        # self.code_embedding = nn.Embedding(self.codebook_dim, self.d_model)
        self.linear = nn.Linear(self.codebook_dim, self.d_model)

        # TODO factorised positional embeddings
        positional_embedding_size = self.d_model

        self.positional_embeddings = nn.Parameter(
            torch.randn((1, num_tokens_bottleneck, positional_embedding_size)))

        self.upscale_embeddings = nn.ParameterList([
            nn.Parameter(torch.randn(upscale, self.d_model))
            for upscale in self.upscale_factors
        ])

        # self.code_embedding_dim = self.d_model - positional_embedding_size
        # TODO for now sum positional embedding
        self.code_embedding_dim = self.d_model - positional_embedding_size

        encoder_layer = TransformerEncoderLayer(
            d_model=self.d_model,
            nhead=n_head,
            dim_feedforward=dim_feedforward,
            dropout=dropout)
        # NOTE layer_norm is already contained in encoder_layers
        self.transformers = nn.ModuleList([
            TransformerEncoder(
                encoder_layer=encoder_layer,
                num_layers=num_layers,
            ) for num_layers in list_of_num_layers
        ])

        self.pre_softmaxes = nn.ModuleList([
            nn.Linear(self.d_model, num_notes)
            for num_notes in num_tokens_per_channel
        ])
Beispiel #9
0
    def __init__(self, src_vocab_size=128, tgt_vocab_size=128,
                 embedding_dim=128, fcn_hidden_dim=128,
                 num_heads=4, num_layers=2, dropout=0.2,
                 src_to_tgt_vocab_conversion_matrix=None):
        super(PointerGeneratorTransformer, self).__init__()

        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.embedding_dim = embedding_dim
        self.src_to_tgt_vocab_conversion_matrix = src_to_tgt_vocab_conversion_matrix
        self.pos_encoder = PositionalEncoding(embedding_dim)
        # Source and target embeddings
        self.src_embed = Embedding(self.src_vocab_size, embedding_dim, padding_idx=2)
        self.tgt_embed = Embedding(self.tgt_vocab_size, embedding_dim, padding_idx=2)

        # Encoder layers
        self.encoder_layer = TransformerEncoderLayer(embedding_dim, num_heads, fcn_hidden_dim, dropout)
        self.encoder = TransformerEncoder(self.encoder_layer, num_layers)

        # Decoder layers
        self.decoder_layer = TransformerDecoderLayer(embedding_dim, num_heads, fcn_hidden_dim, dropout)
        self.decoder_final_layer = TransformerDecoderFinalLayer(embedding_dim, num_heads, fcn_hidden_dim, dropout)
        self.decoder = TransformerDecoder(self.decoder_layer, self.decoder_final_layer, num_layers)

        # Final linear layer + softmax. for probability over target vocabulary
        self.p_vocab = nn.Sequential(
            nn.Linear(self.embedding_dim, self.tgt_vocab_size),
            nn.Softmax(dim=-1))

        # P_gen, probability of generating output
        self.p_gen = nn.Sequential(
            nn.Linear(self.embedding_dim * 3, 1),
            nn.Sigmoid())
        # Context vector
        self.c_t = None

        # Initialize masks
        self.src_mask = None
        self.tgt_mask = None
        self.mem_mask = None
        # Initialize weights of model
        self._reset_parameters()
Beispiel #10
0
    def __init__(
        self,
        num_layers,
        input_dim,
        num_tokens_per_channel,
        positional_embedding_size,
        d_model,
        dim_feedforward,
        n_head,
        num_tokens,
        dropout,
    ):
        raise NotImplementedError
        # must use its own data_processor
        super(TeacherAbsolute, self).__init__()
        self.num_channels = len(num_tokens_per_channel)
        self.positional_embeddings = nn.Parameter(
            torch.randn((1, num_tokens, positional_embedding_size)))

        self.num_layers = num_layers

        self.linear_to_input_transformer = nn.Linear(
            input_dim, d_model - positional_embedding_size)
        encoder_layer = TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_head,
            dim_feedforward=dim_feedforward,
            dropout=dropout)
        self.transformer = TransformerEncoder(
            encoder_layer=encoder_layer,
            num_layers=self.num_layers,
        )
        self.num_tokens_per_channel = num_tokens_per_channel

        self.pre_softmaxes = nn.ModuleList([
            nn.Linear(d_model, num_notes)
            for num_notes in num_tokens_per_channel
        ])
Beispiel #11
0
def build_transformer_model(src_vocab_size: int,
                            tgt_vocab_size: int,
                            rnn_size: int = RNN_SIZE,
                            num_head: int = 4,
                            num_layers: int = 3,
                            dim_ff: int = 1024,
                            dropout: float = DROPOUT) -> EncoderDecoder:
    """
    Build transformer model based on the paper "Attention Is All You Need".

    Arguments:
         src_vocab_size: vocab size for encoder
         tgt_vocab_size: vocab size for decoder
         rnn_size: size of RNN hidden states in encoder/decoder
         num_head: the number of heads in the multi headed attention
         num_layers: number of encoder/decoder layers
         dim_ff: the dimension of the feed forward layer
         dropout: the dropout probability value
    """

    # Build encoder
    encoder_layer = TransformerEncoderLayer(rnn_size, num_head, dim_ff,
                                            dropout)
    encoder_norm = LayerNorm(rnn_size)
    encoder = TransformerEncoder(encoder_layer, num_layers, encoder_norm)

    # Build decoder
    decoder_layer = TransformerDecoderLayer(rnn_size, num_head, dim_ff,
                                            dropout)
    decoder_norm = LayerNorm(rnn_size)
    decoder = TransformerDecoder(decoder_layer, num_layers, decoder_norm)

    # Build generator
    generator = Generator(rnn_size, tgt_vocab_size)

    return EncoderDecoder(encoder, decoder, generator, rnn_size,
                          src_vocab_size, tgt_vocab_size)
Beispiel #12
0
    def __init__(self, vocab_size: int, d_model: int, n_head: int,
                 n_layers: int, dim_ff: int, dropout: float, pad_id: int,
                 n_class: int):
        super(TransformerSeqTagger, self).__init__()

        self.vocab_size = vocab_size

        self.d_model = d_model
        self.n_head = n_head
        self.n_layers = n_layers
        self.dim_ff = dim_ff
        self.dropout = dropout
        self.pad_id = pad_id

        self.embedding = nn.Embedding(vocab_size, d_model)

        self.position_embedding = PositionalEncoding(d_model, dropout)
        enc_layer = TransformerEncoderLayer(d_model, n_head, dim_ff)

        self.encoder = TransformerEncoder(enc_layer, n_layers)
        self.classifier = Classifier(d_model=d_model,
                                     class_num=n_class,
                                     d_ff=dim_ff,
                                     dropout=dropout)
    def __init__(self,
                 seq_length: int,
                 output_seq_length: int,
                 n_time_series: int,
                 d_model=128,
                 output_dim=1,
                 n_layers_encoder=6,
                 forward_dim=2048,
                 dropout=0.1,
                 use_mask=False,
                 meta_data=None,
                 final_act=None,
                 squashed_embedding=False,
                 n_heads=8):
        """Uses a number of encoder layers with simple linear decoder layer.

        :param seq_length: The number of historical time-steps fed into the model in each forward pass.
        :type seq_length: int
        :param output_seq_length: The number of forecasted time-steps outputted by the model.
        :type output_seq_length: int
        :param n_time_series: The total number of time series present (targets + features)
        :type n_time_series: int
        :param d_model: The embedding dim of the mode, defaults to 128
        :type d_model: int, optional
        :param output_dim: The output dimension (should correspond to n_targets), defaults to 1
        :type output_dim: int, optional
        :param n_layers_encoder: The number of encoder layers, defaults to 6
        :type n_layers_encoder: int, optional
        :param forward_dim: The forward embedding dim, defaults to 2048
        :type forward_dim: int, optional
        :param dropout: How much dropout to use, defaults to 0.1
        :type dropout: float, optional
        :param use_mask: Whether to use subsquent sequence mask during training, defaults to False
        :type use_mask: bool, optional
        :param meta_data: Whether to use static meta-data, defaults to None
        :type meta_data: str, optional
        :param final_act: Whether to use a final activation function, defaults to None
        :type final_act: str, optional
        :param squashed_embedding: Whether to create a one 1-D time embedding, defaults to False
        :type squashed_embedding: bool, optional
        :param n_heads: [description], defaults to 8
        :type n_heads: int, optional
        """
        super().__init__()
        self.dense_shape = torch.nn.Linear(n_time_series, d_model)
        self.pe = SimplePositionalEncoding(d_model)
        encoder_layer = TransformerEncoderLayer(d_model, 8, forward_dim,
                                                dropout)
        encoder_norm = LayerNorm(d_model)
        self.transformer_enc = TransformerEncoder(encoder_layer,
                                                  n_layers_encoder,
                                                  encoder_norm)
        self.output_dim_layer = torch.nn.Linear(d_model, output_dim)
        self.output_seq_length = output_seq_length
        self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length)
        self.mask = generate_square_subsequent_mask(seq_length)
        self.out_dim = output_dim
        self.mask_it = use_mask
        self.final_act = None
        self.squashed = None
        if final_act:
            self.final_act = activation_dict[final_act]
        if meta_data:
            self.meta_merger = MergingModel(meta_data["method"],
                                            meta_data["params"])
        if squashed_embedding:
            self.squashed = torch.nn.Linear(seq_length, 1)
            self.unsquashed = torch.nn.Linear(1, seq_length)