Ejemplo n.º 1
0
def make_model(opt,
               src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab), opt)

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Ejemplo n.º 2
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 vocab_size,
                 input_seq_len,
                 output_seq_len,
                 add_stage_1,
                 add_stage_2,
                 rate=0.1):
        super(AbstractiveSummarization, self).__init__()

        self.input_seq_len = input_seq_len
        self.output_seq_len = output_seq_len
        self.vocab_size = vocab_size
        self.bert = BertLayer(d_embedding=d_model, trainable=False)
        embedding_matrix = vocab_of_BERT.get_weights()[0]
        self.embedding = tf.keras.layers.Embedding(
            vocab_size,
            d_model,
            trainable=False,
            embeddings_initializer=Constant(embedding_matrix))

        self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size,
                               rate)
        self.d_model = d_model
        self.add_stage_1 = add_stage_1
        self.add_stage_2 = add_stage_2
        if config.copy_gen:
            self.pointer_generator = Pointer_Generator()

        self.final_layer = tf.keras.layers.Dense(vocab_size)
Ejemplo n.º 3
0
 def test_decoder(self):
     vocab_size, embed_dim, max_seq_len, n_heads, dropout_rate, n_layers = 100, 512, 10, 8, 0.1, 6
     decoder = Decoder(vocab_size, embed_dim, max_seq_len, n_heads, dropout_rate, n_layers)
     batch_size = 10
     enc_outputs = torch.randn(batch_size, max_seq_len, embed_dim)
     dec_x  = torch.randint(0, max_seq_len, size=(batch_size, max_seq_len))
     assert decoder(enc_outputs, dec_x).shape == enc_outputs.shape
Ejemplo n.º 4
0
    def __init__(self, preprocess_config, model_config):
        super(FastSpeech2, self).__init__()
        self.model_config = model_config

        self.encoder = Encoder(model_config)
        self.variance_adaptor = VarianceAdaptor(preprocess_config, model_config)
        self.decoder = Decoder(model_config)
        self.mel_linear = nn.Linear(
            model_config["transformer"]["decoder_hidden"],
            preprocess_config["preprocessing"]["mel"]["n_mel_channels"],
        )
        self.postnet = PostNet()

        self.speaker_emb = None
        if model_config["multi_speaker"]:
            with open(
                os.path.join(
                    preprocess_config["path"]["preprocessed_path"], "speakers.json"
                ),
                "r",
            ) as f:
                n_speaker = len(json.load(f))
            self.speaker_emb = nn.Embedding(
                n_speaker,
                model_config["transformer"]["encoder_hidden"],
            )
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 vocab_size,
                 output_seq_len,
                 rate=0.1):
        super(AbstractiveSummarization, self).__init__()

        self.output_seq_len = output_seq_len
        self.vocab_size = vocab_size
        embedding_matrix, self.bert_model = _embedding_from_bert()
        self.embedding = tf.keras.layers.Embedding(
            vocab_size,
            d_model,
            trainable=False,
            embeddings_initializer=Constant(embedding_matrix))

        self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size,
                               rate)
        self.d_model = d_model
        if config.copy_gen:
            self.pointer_generator = Pointer_Generator()

        self.final_layer = tf.keras.layers.Dense(vocab_size)
 def __init__(self,
              num_layers,
              d_model,
              num_heads,
              dff,
              vocab_size,
              rate=0.1):
     super(draft_summary, self).__init__()
     self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size,
                            rate)
     self.final_layer = tf.keras.layers.Dense(vocab_size)
 def __init__(self,
              num_layers,
              d_model,
              num_heads,
              dff,
              vocab_size,
              output_seq_len,
              rate=0.1):
     super(refine_summary, self).__init__()
     self.bert = BertLayer(d_embedding=d_model, trainable=False)
     self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size,
                            rate)
     self.final_layer = tf.keras.layers.Dense(vocab_size)
     self.output_seq_len = output_seq_len
     self.d_model = d_model
Ejemplo n.º 8
0
def setup_self_attn_model():
    import torch.nn as nn
    from transformer import Encoder, Decoder, Transformer, EncoderLayer, DecoderLayer, SelfAttention, PositionwiseFeedforward

    device = torch.device('cuda:0')
    pad_idx = DE.vocab.stoi["<pad>"]

    hid_dim = 300
    n_layers = 3
    n_heads = 4
    pf_dim = 512  # 2048
    dropout = 0.1

    input_dim = len(DE.vocab)
    enc = Encoder(input_dim, hid_dim, n_layers, n_heads, pf_dim, EncoderLayer,
                  SelfAttention, PositionwiseFeedforward, dropout, device)

    output_dim = len(EN.vocab)
    dec = Decoder(output_dim, hid_dim, n_layers, n_heads, pf_dim, DecoderLayer,
                  SelfAttention, PositionwiseFeedforward, dropout, device)

    model = Transformer(enc, dec, pad_idx, device)

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    # model.load_state_dict(torch.load("weights/bigger_self_attn_weights"))

    train_model(model,
                num_epochs=100,
                learning_rate=0.001,
                weight_decay=0,
                log_freq=1,
                self_attn_hid_dim=hid_dim)
    torch.save(model.state_dict(), "weights/bigger_self_attn_weights")

    return model
Ejemplo n.º 9
0
    def __init__(self,
                 dim,
                 src_n_vocab,
                 n_encod_layer,
                 tgt_n_vocab,
                 n_decode_layer,
                 max_len=512):
        self.src_emb = EmbeddingWithPositionalEncoding(dim, src_n_vocab,
                                                       max_len)
        self.tgt_emb = EmbeddingWithLearnedPositionalEncoding(
            dim, tgt_n_vocab, max_len)

        enc_layer = TransformerLayer(dim, MultiHeadAttention(6, dim, 0.1),
                                     None, nn.Linear(dim, dim), 0.1)
        self.encoder = Encoder(enc_layer, n_encod_layer)

        dec_layer = TransformerLayer(dim, MultiHeadAttention(6, dim, 0.1),
                                     MultiHeadAttention(6, dim, 0.1),
                                     nn.Linear(dim, dim), 0.1)
        self.decoder = Decoder(dec_layer, n_decode_layer)

        self.encoder_decoder = EncoderDecoder(self.encoder, self.decoder,
                                              self.src_emb, self.tgt_emb)
    def __init__(
                  self, 
                  num_layers, 
                  d_model, 
                  num_heads, 
                  dff, 
                  input_vocab_size, 
                  target_vocab_size,
                  rate=config.dropout_rate, 
                  add_pointer_generator=None):
        super(Bertified_transformer, self).__init__()

        self.target_vocab_size = target_vocab_size
        (decoder_embedding, self.encoder, 
        self.decoder_bert_model) = _embedding_from_bert()
        self.decoder_embedding = tf.keras.layers.Embedding(
                                       target_vocab_size, 
                                       d_model, 
                                       trainable=False,
                                       embeddings_initializer=Constant(decoder_embedding),
                                       name='Decoder-embedding'
                                       )
        self.decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, rate, 
                               add_pointer_generator=add_pointer_generator)
Ejemplo n.º 11
0
def BuildModel(vocab_size, encoder_emb, decoder_emb, d_model = 512, N = 6, d_ff = 2048, h = 8, dropout = 0.1):

    target_vocab = vocab_size
    c = copy.deepcopy

    attention = MultiHeadedAttention(h, d_model)
    feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)

    encoder_layer = EncoderLayer(d_model, c(attention), c(feed_forward), dropout)
    decoder_layer = DecoderLayer(d_model, c(attention), c(attention), c(feed_forward), dropout)

    encoder = Encoder(encoder_layer, N)
    decoder = Decoder(decoder_layer, N)

    model = EncoderDecoder( encoder, decoder,
        nn.Sequential(Embeddings(encoder_emb, d_model), c(position)),
        nn.Sequential(Embeddings(decoder_emb, d_model), c(position)),
        Generator(d_model, target_vocab))

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model