Beispiel #1
0
    def __init__(self,
                 enc_in,
                 dec_in,
                 c_out,
                 seq_len,
                 label_len,
                 out_len,
                 factor=5,
                 d_model=512,
                 n_heads=8,
                 e_layers=3,
                 d_layers=2,
                 d_ff=512,
                 dropout=0.0,
                 attn='prob',
                 embed='fixed',
                 data='ETTh',
                 activation='gelu',
                 device=torch.device('cuda:0')):
        super(Informer, self).__init__()
        self.pred_len = out_len
        self.attn = attn

        # Encoding
        self.enc_embedding = DataEmbedding(enc_in, d_model, embed, data,
                                           dropout)
        self.dec_embedding = DataEmbedding(dec_in, d_model, embed, data,
                                           dropout)
        # Attention
        Attn = ProbAttention if attn == 'prob' else FullAttention
        # Encoder
        self.encoder = Encoder([
            EncoderLayer(AttentionLayer(
                Attn(False, factor, attention_dropout=dropout), d_model,
                n_heads),
                         d_model,
                         d_ff,
                         dropout=dropout,
                         activation=activation) for l in range(e_layers)
        ], [ConvLayer(d_model) for l in range(e_layers - 1)],
                               norm_layer=torch.nn.LayerNorm(d_model))
        # Decoder
        self.decoder = Decoder([
            DecoderLayer(
                AttentionLayer(
                    FullAttention(True, factor, attention_dropout=dropout),
                    d_model, n_heads),
                AttentionLayer(
                    FullAttention(False, factor, attention_dropout=dropout),
                    d_model, n_heads),
                d_model,
                d_ff,
                dropout=dropout,
                activation=activation,
            ) for l in range(d_layers)
        ],
                               norm_layer=torch.nn.LayerNorm(d_model))
        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projection = nn.Linear(d_model, c_out, bias=True)
Beispiel #2
0
    def __init__(self, vocabSize, seqLength, d_model, n_hidden, d_K, d_V,
                 n_layers, n_heads):
        super(BERT, self).__init__()

        d_ffn = n_hidden * 4

        self.sourceEmbedding = nn.Embedding(vocabSize, d_model)
        self.posEmbedding = nn.Embedding.from_pretrained(getSinCosEncoding(
            seqLength + 1, d_model),
                                                         freeze=True)
        # segment info embedding
        # Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
        # segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
        self.segmentEmbedding = nn.Embedding(2, d_model)
        self.layers = nn.ModuleList([
            EncoderLayer(d_model, d_ffn, d_K, d_V, n_heads)
            for _ in range(n_layers)
        ])
Beispiel #3
0
 def __init__(self,
              enc_in,
              dec_in,
              c_out,
              seq_len,
              out_len,
              d_model=120,
              n_heads=8,
              e_layers=3,
              d_layers=2,
              d_ff=512,
              dropout=0.0,
              embed='fixed',
              data='Ali_00',
              activation='gelu',
              device=torch.device('cuda:0')):
     super(Model, self).__init__()
     self.pred_len = out_len
     self.tcn = TemporalConvNet(d_model, [d_model, d_model, d_model],
                                kernel_size=2,
                                dropout=dropout)
     # Encoding
     self.enc_embedding = DataEmbedding(enc_in, d_model, dropout)
     self.dec_embedding = DataEmbedding(dec_in, d_model, dropout)
     # Attention
     Attn = FullAttention
     # Encoder
     self.encoder = Encoder([
         EncoderLayer(AttentionLayer(Attn(False, attention_dropout=dropout),
                                     d_model, n_heads),
                      d_model,
                      d_ff,
                      dropout=dropout,
                      activation=activation) for l in range(e_layers)
     ], [ConvLayer(d_model) for l in range(e_layers - 1)],
                            tcn_layers=self.tcn,
                            norm_layer=torch.nn.LayerNorm(d_model))
     self.hidden = d_model * 12
     self.predict = nn.Linear(self.hidden, 24, bias=None)
     self.d_model = d_model
Beispiel #4
0
    def __init__(self,
                 enc_in,
                 dec_in,
                 c_out,
                 seq_len,
                 label_len,
                 out_len,
                 factor=5,
                 d_model=512,
                 n_heads=8,
                 e_layers=3,
                 d_layers=2,
                 d_ff=512,
                 dropout=0.0,
                 attn='prob',
                 embed='fixed',
                 freq='h',
                 activation='gelu',
                 output_attention=False,
                 distil=True,
                 device=torch.device('cuda:0')):
        super(InformerStack, self).__init__()
        self.pred_len = out_len
        self.attn = attn
        self.output_attention = output_attention

        # Encoding
        self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq,
                                           dropout)
        self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq,
                                           dropout)
        # Attention
        Attn = ProbAttention if attn == 'prob' else FullAttention
        # Encoder

        stacks = list(range(e_layers, 2, -1))  # you can customize here
        encoders = [
            Encoder([
                EncoderLayer(AttentionLayer(
                    Attn(False,
                         factor,
                         attention_dropout=dropout,
                         output_attention=output_attention), d_model, n_heads),
                             d_model,
                             d_ff,
                             dropout=dropout,
                             activation=activation) for l in range(el)
            ], [ConvLayer(d_model) for l in range(el - 1)] if distil else None,
                    norm_layer=torch.nn.LayerNorm(d_model)) for el in stacks
        ]
        self.encoder = EncoderStack(encoders)
        # Decoder
        self.decoder = Decoder([
            DecoderLayer(
                AttentionLayer(
                    FullAttention(True,
                                  factor,
                                  attention_dropout=dropout,
                                  output_attention=False), d_model, n_heads),
                AttentionLayer(
                    FullAttention(False,
                                  factor,
                                  attention_dropout=dropout,
                                  output_attention=False), d_model, n_heads),
                d_model,
                d_ff,
                dropout=dropout,
                activation=activation,
            ) for l in range(d_layers)
        ],
                               norm_layer=torch.nn.LayerNorm(d_model))
        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projection = nn.Linear(d_model, c_out, bias=True)
Beispiel #5
0
    def __init__(self,
                 enc_in,
                 dec_in,
                 c_out,
                 seq_len,
                 label_len,
                 out_len,
                 factor=5,
                 d_model=512,
                 n_heads=8,
                 e_layers=3,
                 d_layers=2,
                 d_ff=512,
                 dropout=0.0,
                 attn='prob',
                 embed='fixed',
                 freq='h',
                 activation='gelu',
                 output_attention=False,
                 distil=True,
                 device=torch.device('cuda:0'),
                 decoder_case=0):
        super(Informer, self).__init__()
        self.pred_len = out_len
        self.attn = attn
        self.output_attention = output_attention

        # Encoding
        self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq,
                                           dropout)
        self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq,
                                           dropout)
        # Attention

        Attn = ProbAttention if attn == 'prob' else FullAttention
        InterpretableAttn = InterpretableProbAttention if attn == 'intprob' else InterpretableFullAttention
        # Encoder
        self.encoder = Encoder(
            [
                EncoderLayer(
                    # PZ
                    # AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention),
                    #             d_model, n_heads),
                    InterpretableAttentionLayer(
                        InterpretableAttn(False,
                                          factor,
                                          attention_dropout=dropout,
                                          output_attention=output_attention),
                        d_model, n_heads) if attn == 'intprob'
                    or attn == 'intfull' else AttentionLayer(
                        Attn(False,
                             factor,
                             attention_dropout=dropout,
                             output_attention=output_attention), d_model,
                        n_heads),
                    d_model,
                    d_ff,
                    dropout=dropout,
                    activation=activation) for l in range(e_layers)
            ],
            [ConvLayer(d_model)
             for l in range(e_layers - 1)] if distil else None,
            norm_layer=torch.nn.LayerNorm(d_model))
        # Decoder
        self.decoder = Decoder(
            [
                DecoderLayer(
                    # PZ
                    # AttentionLayer(Attn(True, factor, attention_dropout=dropout, output_attention=False),
                    #             d_model, n_heads),
                    InterpretableAttentionLayer(
                        InterpretableAttn(True,
                                          factor,
                                          attention_dropout=dropout,
                                          output_attention=output_attention),
                        d_model, n_heads) if attn == 'intprob'
                    or attn == 'intfull' else AttentionLayer(
                        Attn(True,
                             factor,
                             attention_dropout=dropout,
                             output_attention=output_attention), d_model,
                        n_heads),
                    AttentionLayer(
                        FullAttention(False,
                                      factor,
                                      attention_dropout=dropout,
                                      output_attention=False), d_model,
                        n_heads),
                    d_model,
                    self.pred_len,  # pred_len
                    d_ff=d_ff,
                    dropout=dropout,
                    activation=activation,
                    case=decoder_case) for l in range(d_layers)
            ],
            norm_layer=torch.nn.LayerNorm(d_model))
        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projection = nn.Linear(d_model, c_out, bias=True)
Beispiel #6
0
    def __init__(self,
                 enc_in,
                 dec_in,
                 c_out,
                 seq_len,
                 label_len,
                 out_len,
                 factor=5,
                 d_model=512,
                 n_heads=8,
                 e_layers=3,
                 d_layers=2,
                 d_ff=512,
                 dropout=0.0,
                 attn='prob',
                 embed='fixed',
                 data='ETTh1',
                 freq='h',
                 activation='gelu',
                 output_attention=False,
                 distil=True,
                 device=torch.device('cuda')):
        super(Informer, self).__init__()
        self.pred_len = out_len
        self.attn = attn
        self.output_attention = output_attention
        self.e_layers = e_layers
        self.d_layers = d_layers

        # Encoding
        self.enc_embedding = DataEmbedding(enc_in, seq_len, d_model, embed,
                                           freq, dropout, data)
        self.dec_embedding = DataEmbedding(dec_in, label_len + out_len,
                                           d_model, embed, freq, dropout, data)
        # Attention
        Attn = ProbAttention if attn == 'prob' else FullAttention
        # Encoder
        if e_layers > 0:
            self.encoder = Encoder(
                [
                    EncoderLayer(
                        AttentionLayer(Attn(False, factor,
                                            attention_dropout=dropout,
                                            output_attention=output_attention),
                                            d_model, n_heads),
                        d_model,
                        d_ff,
                        dropout=dropout,
                        activation=activation
                    # stacking multiple layers
                    ) for l in range(e_layers)
                ],
                [
                    ConvLayer(
                        d_model
                    # stacking multiple layers
                    ) for l in range(e_layers-1)
                ] if distil else None,
                norm_layer=torch.nn.LayerNorm(d_model)
            ) if attn == 'prob' else \
            nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model, n_heads, d_ff, dropout, activation), num_layers=e_layers, norm=nn.LayerNorm(d_model))
        else:
            self.encoder = nn.Identity()

        # Decoder
        if d_layers > 0:
            self.decoder = Decoder(
                [
                    DecoderLayer(
                        AttentionLayer(ProbAttention(True, factor,
                                                    attention_dropout=dropout,
                                                    output_attention=False),
                                                    d_model, n_heads),
                        AttentionLayer(FullAttention(False, factor,
                                                    attention_dropout=dropout,
                                                    output_attention=False),
                                                    d_model, n_heads),
                        d_model,
                        d_ff,
                        dropout=dropout,
                        activation=activation,
                    )
                    for l in range(d_layers)
                ],
                norm_layer=torch.nn.LayerNorm(d_model)
            ) if attn == 'prob' else \
            nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model, n_heads, d_ff, dropout, activation), num_layers=d_layers, norm=nn.LayerNorm(d_model))
        else:
            self.decoder = nn.Identity()

        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projection = nn.Linear(d_model, c_out, bias=True)