def __init__(self, config, src_vocab, target_vocab, s_v, t_v, u):
        super(Transformer, self).__init__()
        self.config = config

        h, N, dropout = self.config.h, self.config.N, self.config.dropout
        d_model, d_ff = self.config.d_model, self.config.d_ff

        attn = MultiHeadedAttention(h, d_model)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        position = PositionalEncoding(d_model, dropout)

        attncross = MultiHeadedAttention(h, d_model * 2)
        ffcross = PositionwiseFeedForward(d_model * 2, d_ff, dropout)
        positioncross = PositionalEncoding(d_model * 2, dropout)

        self.encoder = Encoder(
            EncoderLayer(config.d_model, deepcopy(attn), deepcopy(ff),
                         dropout), N)
        self.encoder_cross = EncoderCross(
            EncoderLayerCross((config.d_model) * 2, deepcopy(attncross),
                              deepcopy(ffcross), dropout), N)
        self.src_embed = nn.Sequential(
            Embeddings(config.d_model, src_vocab, s_v, u),
            deepcopy(position))  # Embeddings followed by PE
        # self.src_embed.weight.data.copy_(src_vocab.vectors)
        self.target_embed = nn.Sequential(
            Embeddings(config.d_model, target_vocab, t_v, u),
            deepcopy(position))
        # self.target_embed.weight.data.copy_(target_vocab.vectors)
        # Fully-Connected Layer
        self.fc = nn.Linear(self.config.d_model, self.config.output_size)
        self.sigmoid = nn.Sigmoid()
        self.cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        self.softmax = nn.Softmax()
Example #2
0
    def __init__(self, config, src_vocab):
        super(Transformer, self).__init__()
        self.config = config
        self.src_vocab = src_vocab

        # 超参数
        # h是多头数量, N是层数, dropout是比率
        h, N, dropout = self.config.h, self.config.N, self.config.dropout
        # 词向量维度,全连接维度
        d_model, d_ff = self.config.d_model, self.config.d_ff

        # 多头注意力层
        attn = MultiHeadedAttention(h, d_model)
        # 全连接层
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        # 位置向量
        position = PositionalEncoding(d_model, dropout)

        self.encoder = Encoder(
            EncoderLayer(config.d_model, deepcopy(attn), deepcopy(ff),
                         dropout), N)
        self.src_embed = nn.Sequential(
            Embedding(self.config.d_model, self.src_vocab),
            deepcopy(position))  # embedding with position encoding

        self.fc = nn.Linear(self.config.d_model, self.config.output_size)
        self.softmax = nn.Softmax()
Example #3
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Example #4
0
def construct_model():
    x_input = tf.keras.layers.Input((28, 28, 1))
    enc_x = EncoderLayer()(x_input)
    quant_x = VQVAE()(enc_x)
    x_dec = tf.keras.layers.Lambda(
        lambda quant_x: enc_x + tf.stop_gradient(quant_x - enc_x))(quant_x)
    dec_x = DecoderLayer()(x_dec)
    model = tf.keras.models.Model(x_input, dec_x)
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=vqvae_loss(0.25, enc_x, quant_x),
                  experimental_run_tf_function=False)
    return model
Example #5
0
    def __init__(self, config, src_vocab):
        super(Transformer, self).__init__()
        self.config = config

        h, N, dropout = self.config.h, self.config.N, self.config.dropout
        d_model, d_ff = self.config.d_model, self.config.d_ff

        attn = MultiHeadedAttention(h, d_model)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)

        self.encoder = Encoder(
            EncoderLayer(config.d_model, deepcopy(attn), deepcopy(ff),
                         dropout), N)
        self.src_embed = nn.Sequential(Embeddings(config.d_model, src_vocab))

        self.fc = nn.Linear(self.config.d_model, self.config.output_size)

        self.softmax = nn.Softmax()
Example #6
0
    def __init__(self, config, src_vocab):
        super(Matposer, self).__init__()
        self.config = config

        d_row, N, dropout = self.config.d_row, self.config.N, self.config.dropout
        d_model, d_ff = self.config.d_model, self.config.d_ff

        inter = Interactor(d_model, d_ff, out_row=d_row, dropout=dropout)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        position = PositionalEncoding(d_model, dropout)

        self.encoder = Encoder(
            EncoderLayer(d_model, deepcopy(inter), deepcopy(ff), dropout), N)
        self.src_embed = nn.Sequential(Embeddings(d_model, src_vocab),
                                       deepcopy(position))

        self.fc = nn.Linear(d_model, self.config.output_size)

        self.softmax = nn.Softmax()
Example #7
0
    def __init__(self, config):
        super(Transformer, self).__init__()
        self.config = config

        h, N, dropout = self.config.h, self.config.N, self.config.dropout
        d_model, d_ff = self.config.d_model, self.config.d_ff

        attn = MultiHeadedAttention(h, d_model)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        position = PositionalEncoding(d_model, dropout)

        self.encoder = Encoder(EncoderLayer(config.d_model, deepcopy(attn), deepcopy(ff), dropout), N)
        # self.src_embed = nn.Sequential(Embeddings(config.d_model, src_vocab),
        #                                deepcopy(position))  # Embeddings followed by PE

        # Fully-Connected Layer
        self.fc = nn.Linear(
            self.config.d_model,
            self.config.output_size
        )
Example #8
0
    def __init__(self, config, pre_train_weight, embedding_size):
        super(Transformer, self).__init__()
        self.config = config
        self.pre_train_weight = pre_train_weight
        self.embedding_size = embedding_size
        
        # 超参数
        # h是多头数量, N是层数, dropout是比率
        h, N, dropout = self.config.h, self.config.N, self.config.dropout
        # 词向量维度,全连接维度
        d_model, d_ff = self.config.d_model, self.config.d_ff

        # 多头注意力层
        attn = MultiHeadedAttention(h, d_model)
        # 全连接层
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        # 位置向量
        position = PositionalEncoding(d_model, dropout)

        self.encoder = Encoder(EncoderLayer(config.d_model, deepcopy(attn), deepcopy(ff), dropout), N)
        self.src_embed = nn.Sequential(Embedding(self.config.d_model, self.pre_train_weight, self.embedding_size), deepcopy(position)) # embedding with position encoding
Example #9
0
    def __init__(self, config, src_vocab):
        super(Transformer, self).__init__()
        self.config = config

        h, N, dropout = self.config.h, self.config.N, self.config.dropout
        d_model, d_ff = self.config.d_model, self.config.d_ff
        self.src_vocab = src_vocab

        attn = MultiHeadedAttention(h, d_model)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)

        self.encoder_layer = EncoderLayer(config.d_model, deepcopy(attn),
                                          deepcopy(ff), dropout)
        self.encoder = Encoder(self.encoder_layer, N)

        self.src_word_emb = nn.Embedding(src_vocab,
                                         config.d_model,
                                         padding_idx=0)

        # self.pos_bias = nn.Embedding(src_vocab, config.d_model, padding_idx=0)
        # self.pos_bias = nn.Embedding.from_pretrained(get_sinusoid_encoding_table_dim(src_vocab, config.d_model, padding_idx=0),freeze=True)
        # self.pos_bias = nn.Embedding.from_pretrained(get_sinusoid_encoding_table_vocab(src_vocab, config.d_model, padding_idx=0),freeze=True)

        # self.pos_bias = nn.Embedding(1, config.d_model, padding_idx=0)
        # self.pos_bias = nn.Embedding(src_vocab, 1, padding_idx=0)
        # self.position_enc = nn.Embedding(src_vocab, config.d_model, padding_idx=0)
        self.position_enc = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(src_vocab,
                                        config.d_model,
                                        padding_idx=0),
            freeze=False)

        # position_enc = torch.randn(1000, config.d_model)
        # position_enc = position_enc.unsqueeze(0)
        # self.register_buffer('position_enc', position_enc)

        self.drop = nn.Dropout(p=dropout)
        self.fc = nn.Linear(self.config.d_model, self.config.output_size)

        self.softmax = nn.Softmax()
Example #10
0
    def __init__(self, enc_in, dec_in, c_out, out_len,
                 factor=5, d_model=512, n_heads=8, e_layers=3, d_layers=2, d_ff=512, group_factors=None,
                 group_operator='avg', group_step=1, dropout=0.0, attn='prob', embed='fixed', activation='gelu',
                 has_minute=False, has_hour=True):
        super(HLInformer, self).__init__()
        self.pred_len = out_len
        self.attn = attn

        if group_factors is None:
            group_factors = [4, 1]
        else:
            group_factors = [*group_factors, 1]

        self.group_factors = group_factors

        # Grouping
        self.group_layers = nn.ModuleList([GroupLayer(gf, group_operator, group_step) for gf in group_factors])
        # Encoding
        self.enc_embeddings = nn.ModuleList(
            [InformerDataEmbedding(enc_in, d_model, has_minute=has_minute, has_hour=has_hour) for _ in group_factors])
        self.dec_embeddings = nn.ModuleList(
            [InformerDataEmbedding(dec_in, d_model, has_minute=has_minute, has_hour=has_hour) for _ in group_factors])
        # Attention
        Attn = ProbAttention if attn == 'prob' else FullAttention
        # Encoder
        self.encoders = nn.ModuleList([Encoder(
            [
                EncoderLayer(
                    AttentionLayer(Attn(False, factor, attention_dropout=dropout),
                                   d_model, n_heads),
                    d_model,
                    d_ff,
                    dropout=dropout,
                    activation=activation
                ) for l in range(e_layers)
            ],
            [
                ConvLayer(
                    d_model
                ) for l in range(e_layers - 1)
            ],
            norm_layer=torch.nn.LayerNorm(d_model)
        ) for _ in group_factors])
        # Decoder
        self.decoders = nn.ModuleList([Decoder(
            [
                DecoderLayer(
                    AttentionLayer(FullAttention(True, factor, attention_dropout=dropout),
                                   d_model, n_heads),
                    AttentionLayer(FullAttention(False, factor, attention_dropout=dropout),
                                   d_model, n_heads),
                    d_model,
                    d_ff,
                    dropout=dropout,
                    activation=activation,
                )
                for l in range(d_layers)
            ],
            norm_layer=torch.nn.LayerNorm(d_model)
        ) for _ in group_factors])
        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projections = nn.ModuleList(
            [nn.Linear(d_model * (i + 1), c_out, bias=True) for i, gf in enumerate(group_factors)])
Example #11
0
    def __init__(self,
                 enc_in,
                 dec_in,
                 c_out,
                 seq_len,
                 label_len,
                 out_len,
                 batch_size,
                 factor=5,
                 d_model=512,
                 n_heads=8,
                 e_layers=3,
                 d_layers=2,
                 d_ff=512,
                 dropout=0.0,
                 attn='prob',
                 embed='fixed',
                 data='ETTh',
                 activation='gelu'):
        super(Informer, self).__init__()
        self.pred_len = out_len
        self.attn = attn
        self.seq_len = seq_len
        self.label_len = label_len
        self.batch_size = batch_size

        # Encoding
        self.enc_embedding = DataEmbedding(enc_in, d_model, embed, data,
                                           dropout)
        self.dec_embedding = DataEmbedding(dec_in, d_model, embed, data,
                                           dropout)
        # Attention
        Attn = ProbAttention if attn == 'prob' else FullAttention
        # Encoder
        self.encoder = Encoder([
            EncoderLayer(AttentionLayer(
                Attn(False, factor, attention_dropout=dropout), d_model,
                n_heads),
                         d_model,
                         d_ff,
                         dropout=dropout,
                         activation=activation) for l in range(e_layers)
        ], [ConvLayer(d_model) for l in range(e_layers - 1)],
                               norm_layer=tf.keras.layers.LayerNormalization())
        # Decoder
        self.decoder = Decoder([
            DecoderLayer(
                AttentionLayer(
                    FullAttention(True, factor, attention_dropout=dropout),
                    d_model, n_heads),
                AttentionLayer(
                    FullAttention(False, factor, attention_dropout=dropout),
                    d_model, n_heads),
                d_model,
                d_ff,
                dropout=dropout,
                activation=activation,
            ) for l in range(d_layers)
        ],
                               norm_layer=tf.keras.layers.LayerNormalization())
        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projection = tf.keras.layers.Dense(c_out)
Example #12
0
def main():
    train_data = SentenceDataset(args.train_file,
                                 encoding_type=args.encoding_type,
                                 filter_threshold=args.filter_threshold)
    val_data = SentenceDataset(args.val_file,
                               encoding_type=args.encoding_type,
                               filter_threshold=args.filter_threshold)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               args.batch_size,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, args.batch_size)

    print(len(train_loader))

    input_dim = len(train_data.vocab.source_vocab)
    output_dim = len(train_data.vocab.target_vocab)
    static = args.embedding_type == 'static'

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    enc_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len,
                               device, static)
    encoder_layer = EncoderLayer(args.hidden_dim, args.num_enc_heads,
                                 args.inner_dim, args.dropout)
    encoder = Encoder(enc_embedding, encoder_layer, args.num_enc_layers,
                      args.dropout)

    dec_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len,
                               device, static)
    decoder_layer = DecoderLayer(args.hidden_dim, args.num_dec_heads,
                                 args.inner_dim, args.dropout)
    decoder = Decoder(output_dim, args.hidden_dim, dec_embedding,
                      decoder_layer, args.num_dec_layers, args.dropout)

    pad_id = train_data.vocab.source_vocab['<pad>']

    model = Transformer(encoder, decoder, pad_id, device)

    print('Transformer has {:,} trainable parameters'.format(
        count_parames(model)))

    if args.load_model is not None:
        model.load(args.load_model)
    else:
        model.apply(init_weights)

    if args.mode == 'test':
        inferencer = Inferencer(model, train_data.vocab, device)
        greedy_out = inferencer.infer_greedy(
            'helo world, I m testin a typo corector')
        print(greedy_out)

    elif args.mode == 'train':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

        loss_function = nn.NLLLoss(ignore_index=pad_id)

        print('Started training...')
        train(model, train_loader, val_loader, optimizer, loss_function,
              device)

    else:
        raise ValueError('Mode not recognized')