Beispiel #1
0
 def __init__(self,
              number_time_series: int,
              seq_length: int = 48,
              output_seq_len: int = None,
              d_model: int = 128,
              n_heads: int = 8,
              dropout=0.1,
              forward_dim=2048,
              sigmoid=False):
     """
     Full transformer model
     """
     super().__init__()
     if output_seq_len is None:
         output_seq_len = seq_length
     self.out_seq_len = output_seq_len
     self.mask = generate_square_subsequent_mask(seq_length)
     self.dense_shape = torch.nn.Linear(number_time_series, d_model)
     self.pe = SimplePositionalEncoding(d_model)
     self.transformer = Transformer(d_model, nhead=n_heads)
     self.final_layer = torch.nn.Linear(d_model, 1)
     self.sequence_size = seq_length
     self.tgt_mask = generate_square_subsequent_mask(output_seq_len)
     self.sigmoid = None
     if sigmoid:
         self.sigmoid = torch.nn.Sigmoid()
Beispiel #2
0
class SimpleTransformer(torch.nn.Module):
    def __init__(
            self,
            number_time_series: int,
            seq_length: int = 48,
            output_seq_len: int = None,
            d_model: int = 128,
            n_heads: int = 8,
            dropout=0.1,
            forward_dim=2048,
            sigmoid=False):
        """
        Full transformer model
        """
        super().__init__()
        if output_seq_len is None:
            output_seq_len = seq_length
        self.out_seq_len = output_seq_len
        self.mask = generate_square_subsequent_mask(seq_length)
        self.dense_shape = torch.nn.Linear(number_time_series, d_model)
        self.pe = SimplePositionalEncoding(d_model)
        self.transformer = Transformer(d_model, nhead=n_heads)
        self.final_layer = torch.nn.Linear(d_model, 1)
        self.sequence_size = seq_length
        self.tgt_mask = generate_square_subsequent_mask(output_seq_len)
        self.sigmoid = None
        if sigmoid:
            self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x: torch.Tensor, t: torch.Tensor, tgt_mask=None, src_mask=None):
        if src_mask:
            x = self.encode_sequence(x, src_mask)
        else:
            x = self.encode_sequence(x, src_mask)
        return self.decode_seq(x, t, tgt_mask)

    def basic_feature(self, x: torch.Tensor):
        x = self.dense_shape(x)
        x = self.pe(x)
        x = x.permute(1, 0, 2)
        return x

    def encode_sequence(self, x, src_mask=None):
        x = self.basic_feature(x)
        x = self.transformer.encoder(x, src_mask)
        return x

    def decode_seq(self, mem, t, tgt_mask=None, view_number=None) -> torch.Tensor:
        if view_number is None:
            view_number = self.out_seq_len
        if tgt_mask is None:
            tgt_mask = self.tgt_mask
        t = self.basic_feature(t)
        x = self.transformer.decoder(t, mem, tgt_mask=tgt_mask)
        x = self.final_layer(x)
        if self.sigmoid:
            x = self.sigmoid(x)
        return x.view(-1, view_number)
Beispiel #3
0
    def __init__(self, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 dim_feedforward, dropout, activation, src_vocab_size, tgt_vocab_size):
        super(TransformerModel, self).__init__()
        self.pos_encoder = PositionalEncoding(
            d_model=d_model, dropout=0.1)  # , max_len=100)
        encoder_layer = TransformerEncoderLayer(
                        d_model, nhead, dim_feedforward, dropout, activation)
        encoder_norm = LayerNorm(d_model)
        self.encoder = TransformerEncoder(
            encoder_layer, num_encoder_layers, encoder_norm)
        decoder_layer = TransformerDecoderLayer(
            d_model, nhead, dim_feedforward, dropout, activation)
        decoder_norm = LayerNorm(d_model)
        self.decoder = TransformerDecoder(
            decoder_layer, num_decoder_layers, decoder_norm)

        self.d_model = d_model
        self.nhead = nhead
        self.linear = Linear(d_model, tgt_vocab_size)
        self.transformer = Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers,
                                       num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward,
                                       dropout=dropout, activation=activation)
        self.encoder_embedding = nn.Embedding(src_vocab_size, d_model)
        self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model)

        self._reset_parameters()
    def __init__(self,
                 number_time_series: int,
                 seq_length: int = 48,
                 output_seq_len: int = None,
                 d_model: int = 128,
                 n_heads: int = 8,
                 dropout=0.1,
                 forward_dim=2048,
                 sigmoid=False):
        """A full transformer model

        :param number_time_series: The total number of time series present
            (e.g. n_feature_time_series + n_targets)
        :type number_time_series: int
        :param seq_length: The length of your input sequence, defaults to 48
        :type seq_length: int, optional
        :param output_seq_len: The length of your output sequence, defaults
            to None
        :type output_seq_len: int, optional
        :param d_model: The dimensions of your model, defaults to 128
        :type d_model: int, optional
        :param n_heads: The number of heads in each encoder/decoder block,
            defaults to 8
        :type n_heads: int, optional
        :param dropout: The fraction of dropout you wish to apply during
            training, defaults to 0.1 (currently not functional)
        :type dropout: float, optional
        :param forward_dim: Currently not functional, defaults to 2048
        :type forward_dim: int, optional
        :param sigmoid: Whether to apply a sigmoid activation to the final
            layer (useful for binary classification), defaults to False
        :type sigmoid: bool, optional
        """
        super().__init__()
        if output_seq_len is None:
            output_seq_len = seq_length
        self.out_seq_len = output_seq_len
        self.mask = generate_square_subsequent_mask(seq_length)
        self.dense_shape = torch.nn.Linear(number_time_series, d_model)
        self.pe = SimplePositionalEncoding(d_model)
        self.transformer = Transformer(d_model, nhead=n_heads)
        self.final_layer = torch.nn.Linear(d_model, 1)
        self.sequence_size = seq_length
        self.tgt_mask = generate_square_subsequent_mask(output_seq_len)
        self.sigmoid = None
        if sigmoid:
            self.sigmoid = torch.nn.Sigmoid()
Beispiel #5
0
    def __init__(self, vocab_size, em_size, word_dropout=0.4, dropout=0.1):
        super(SynPG, self).__init__()
        self.vocab_size = vocab_size
        self.em_size = em_size
        self.word_dropout = word_dropout
        self.dropout = dropout

        # vcocabulary embedding
        self.embedding_encoder = nn.Embedding(vocab_size, em_size)
        self.embedding_decoder = nn.Embedding(vocab_size, em_size)

        # positional encoding
        self.pos_encoder = PositionalEncoding(em_size, dropout=0.0)

        self.transformer = Transformer(d_model=em_size,
                                       nhead=6,
                                       dropout=dropout)

        # linear Transformation
        self.linear = nn.Linear(em_size, vocab_size)

        self.init_weights()
Beispiel #6
0
class SynPG(nn.Module):
    def __init__(self, vocab_size, em_size, word_dropout=0.4, dropout=0.1):
        super(SynPG, self).__init__()
        self.vocab_size = vocab_size
        self.em_size = em_size
        self.word_dropout = word_dropout
        self.dropout = dropout

        # vcocabulary embedding
        self.embedding_encoder = nn.Embedding(vocab_size, em_size)
        self.embedding_decoder = nn.Embedding(vocab_size, em_size)

        # positional encoding
        self.pos_encoder = PositionalEncoding(em_size, dropout=0.0)

        self.transformer = Transformer(d_model=em_size,
                                       nhead=6,
                                       dropout=dropout)

        # linear Transformation
        self.linear = nn.Linear(em_size, vocab_size)

        self.init_weights()

    def init_weights(self):
        initrange = 0.1

        # initialize cocabulary matrix weight
        self.embedding_encoder.weight.data.uniform_(-initrange, initrange)
        self.embedding_decoder.weight.data.uniform_(-initrange, initrange)

        # initialize linear weight
        self.linear.weight.data.uniform_(-initrange, initrange)
        self.linear.bias.data.fill_(0)

    def load_embedding(self, embedding):
        self.embedding_encoder.weight.data.copy_(torch.from_numpy(embedding))
        self.embedding_decoder.weight.data.copy_(torch.from_numpy(embedding))

    def store_grad_norm(self, grad):
        norm = torch.norm(grad, 2, 1)
        self.grad_norm = norm.detach().data.mean()
        return grad

    def generate_square_mask(self, max_sent_len, max_synt_len):
        size = max_sent_len + max_synt_len + 2
        mask = torch.zeros((size, size))
        mask[:max_sent_len, max_sent_len:] = float("-inf")
        mask[max_sent_len:, :max_sent_len] = float("-inf")
        return mask

    def forward(self, sents, synts, targs):
        batch_size = sents.size(0)
        max_sent_len = sents.size(1)
        max_synt_len = synts.size(1) - 2  # count without <sos> and <eos>
        max_targ_len = targs.size(1) - 2  # count without <sos> and <eos>

        # apply word dropout
        drop_mask = torch.bernoulli(self.word_dropout *
                                    torch.ones(max_sent_len)).bool().cuda()
        sents = sents.masked_fill(drop_mask, 0)

        # sentence, syntax => embedding
        sent_embeddings = self.embedding_encoder(sents).transpose(
            0, 1) * np.sqrt(self.em_size)
        synt_embeddings = self.embedding_encoder(synts).transpose(
            0, 1) * np.sqrt(self.em_size)
        synt_embeddings = self.pos_encoder(synt_embeddings)
        en_embeddings = torch.cat((sent_embeddings, synt_embeddings), dim=0)

        # record gradient
        if en_embeddings.requires_grad:
            en_embeddings.register_hook(self.store_grad_norm)

        # do not allow cross attetion
        src_mask = self.generate_square_mask(max_sent_len, max_synt_len).cuda()

        # target => embedding
        de_embeddings = self.embedding_decoder(targs[:, :-1]).transpose(
            0, 1) * np.sqrt(self.em_size)
        de_embeddings = self.pos_encoder(de_embeddings)

        # sequential mask
        tgt_mask = self.transformer.generate_square_subsequent_mask(
            max_targ_len + 1).cuda()

        # forward
        outputs = self.transformer(en_embeddings,
                                   de_embeddings,
                                   src_mask=src_mask,
                                   tgt_mask=tgt_mask)

        # apply linear layer to vcocabulary size
        outputs = outputs.transpose(0, 1)
        outputs = self.linear(outputs.contiguous().view(-1, self.em_size))
        outputs = outputs.view(batch_size, max_targ_len + 1, self.vocab_size)

        return outputs

    def generate(self, sents, synts, max_len, sample=True, temp=0.5):
        batch_size = sents.size(0)
        max_sent_len = sents.size(1)
        max_synt_len = synts.size(1) - 2  # count without <sos> and <eos>
        max_targ_len = max_len

        # output index starts with <sos>
        idxs = torch.zeros((batch_size, max_targ_len + 2),
                           dtype=torch.long).cuda()
        idxs[:, 0] = 1

        # sentence, syntax => embedding
        sent_embeddings = self.embedding_encoder(sents).transpose(
            0, 1) * np.sqrt(self.em_size)
        synt_embeddings = self.embedding_encoder(synts).transpose(
            0, 1) * np.sqrt(self.em_size)
        synt_embeddings = self.pos_encoder(synt_embeddings)
        en_embeddings = torch.cat((sent_embeddings, synt_embeddings), dim=0)

        # do not allow cross attetion
        src_mask = self.generate_square_mask(max_sent_len, max_synt_len).cuda()

        # starting index => embedding
        de_embeddings = self.embedding_decoder(idxs[:, :1]).transpose(
            0, 1) * np.sqrt(self.em_size)
        de_embeddings = self.pos_encoder(de_embeddings)

        # sequential mask
        tgt_mask = self.transformer.generate_square_subsequent_mask(
            de_embeddings.size(0)).cuda()

        # encode
        memory = self.transformer.encoder(en_embeddings, mask=src_mask)

        # auto-regressively generate output
        for i in range(1, max_targ_len + 2):
            # decode
            outputs = self.transformer.decoder(de_embeddings,
                                               memory,
                                               tgt_mask=tgt_mask)
            outputs = self.linear(outputs[-1].contiguous().view(
                -1, self.em_size))

            # get argmax index or sample index
            if not sample:
                values, idx = torch.max(outputs, 1)
            else:
                probs = F.softmax(outputs / temp, dim=1)
                idx = torch.multinomial(probs, 1).squeeze(1)

            # save to output index
            idxs[:, i] = idx

            # concatenate index to decoding
            de_embeddings = self.embedding_decoder(idxs[:, :i + 1]).transpose(
                0, 1) * np.sqrt(self.em_size)
            de_embeddings = self.pos_encoder(de_embeddings)

            # new sequential mask
            tgt_mask = self.transformer.generate_square_subsequent_mask(
                de_embeddings.size(0)).cuda()

        return idxs[:, 1:]
class SimpleTransformer(torch.nn.Module):
    def __init__(self,
                 number_time_series: int,
                 seq_length: int = 48,
                 output_seq_len: int = None,
                 d_model: int = 128,
                 n_heads: int = 8,
                 dropout=0.1,
                 forward_dim=2048,
                 sigmoid=False):
        """A full transformer model

        :param number_time_series: The total number of time series present
            (e.g. n_feature_time_series + n_targets)
        :type number_time_series: int
        :param seq_length: The length of your input sequence, defaults to 48
        :type seq_length: int, optional
        :param output_seq_len: The length of your output sequence, defaults
            to None
        :type output_seq_len: int, optional
        :param d_model: The dimensions of your model, defaults to 128
        :type d_model: int, optional
        :param n_heads: The number of heads in each encoder/decoder block,
            defaults to 8
        :type n_heads: int, optional
        :param dropout: The fraction of dropout you wish to apply during
            training, defaults to 0.1 (currently not functional)
        :type dropout: float, optional
        :param forward_dim: Currently not functional, defaults to 2048
        :type forward_dim: int, optional
        :param sigmoid: Whether to apply a sigmoid activation to the final
            layer (useful for binary classification), defaults to False
        :type sigmoid: bool, optional
        """
        super().__init__()
        if output_seq_len is None:
            output_seq_len = seq_length
        self.out_seq_len = output_seq_len
        self.mask = generate_square_subsequent_mask(seq_length)
        self.dense_shape = torch.nn.Linear(number_time_series, d_model)
        self.pe = SimplePositionalEncoding(d_model)
        self.transformer = Transformer(d_model, nhead=n_heads)
        self.final_layer = torch.nn.Linear(d_model, 1)
        self.sequence_size = seq_length
        self.tgt_mask = generate_square_subsequent_mask(output_seq_len)
        self.sigmoid = None
        if sigmoid:
            self.sigmoid = torch.nn.Sigmoid()

    def forward(self,
                x: torch.Tensor,
                t: torch.Tensor,
                tgt_mask=None,
                src_mask=None):
        x = self.encode_sequence(x[:, :-1, :], src_mask)
        return self.decode_seq(x, t, tgt_mask)

    def basic_feature(self, x: torch.Tensor):
        x = self.dense_shape(x)
        x = self.pe(x)
        x = x.permute(1, 0, 2)
        return x

    def encode_sequence(self, x, src_mask=None):
        x = self.basic_feature(x)
        x = self.transformer.encoder(x, src_mask)
        return x

    def decode_seq(self,
                   mem,
                   t,
                   tgt_mask=None,
                   view_number=None) -> torch.Tensor:
        if view_number is None:
            view_number = self.out_seq_len
        if tgt_mask is None:
            tgt_mask = self.tgt_mask
        t = self.basic_feature(t)
        x = self.transformer.decoder(t, mem, tgt_mask=tgt_mask)
        x = self.final_layer(x)
        if self.sigmoid:
            x = self.sigmoid(x)
        return x.view(-1, view_number)