def make_model(opt, src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab), opt) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform(p) return model
def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, input_seq_len, output_seq_len, add_stage_1, add_stage_2, rate=0.1): super(AbstractiveSummarization, self).__init__() self.input_seq_len = input_seq_len self.output_seq_len = output_seq_len self.vocab_size = vocab_size self.bert = BertLayer(d_embedding=d_model, trainable=False) embedding_matrix = vocab_of_BERT.get_weights()[0] self.embedding = tf.keras.layers.Embedding( vocab_size, d_model, trainable=False, embeddings_initializer=Constant(embedding_matrix)) self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size, rate) self.d_model = d_model self.add_stage_1 = add_stage_1 self.add_stage_2 = add_stage_2 if config.copy_gen: self.pointer_generator = Pointer_Generator() self.final_layer = tf.keras.layers.Dense(vocab_size)
def test_decoder(self): vocab_size, embed_dim, max_seq_len, n_heads, dropout_rate, n_layers = 100, 512, 10, 8, 0.1, 6 decoder = Decoder(vocab_size, embed_dim, max_seq_len, n_heads, dropout_rate, n_layers) batch_size = 10 enc_outputs = torch.randn(batch_size, max_seq_len, embed_dim) dec_x = torch.randint(0, max_seq_len, size=(batch_size, max_seq_len)) assert decoder(enc_outputs, dec_x).shape == enc_outputs.shape
def __init__(self, preprocess_config, model_config): super(FastSpeech2, self).__init__() self.model_config = model_config self.encoder = Encoder(model_config) self.variance_adaptor = VarianceAdaptor(preprocess_config, model_config) self.decoder = Decoder(model_config) self.mel_linear = nn.Linear( model_config["transformer"]["decoder_hidden"], preprocess_config["preprocessing"]["mel"]["n_mel_channels"], ) self.postnet = PostNet() self.speaker_emb = None if model_config["multi_speaker"]: with open( os.path.join( preprocess_config["path"]["preprocessed_path"], "speakers.json" ), "r", ) as f: n_speaker = len(json.load(f)) self.speaker_emb = nn.Embedding( n_speaker, model_config["transformer"]["encoder_hidden"], )
def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, output_seq_len, rate=0.1): super(AbstractiveSummarization, self).__init__() self.output_seq_len = output_seq_len self.vocab_size = vocab_size embedding_matrix, self.bert_model = _embedding_from_bert() self.embedding = tf.keras.layers.Embedding( vocab_size, d_model, trainable=False, embeddings_initializer=Constant(embedding_matrix)) self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size, rate) self.d_model = d_model if config.copy_gen: self.pointer_generator = Pointer_Generator() self.final_layer = tf.keras.layers.Dense(vocab_size)
def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, rate=0.1): super(draft_summary, self).__init__() self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size, rate) self.final_layer = tf.keras.layers.Dense(vocab_size)
def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, output_seq_len, rate=0.1): super(refine_summary, self).__init__() self.bert = BertLayer(d_embedding=d_model, trainable=False) self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size, rate) self.final_layer = tf.keras.layers.Dense(vocab_size) self.output_seq_len = output_seq_len self.d_model = d_model
def setup_self_attn_model(): import torch.nn as nn from transformer import Encoder, Decoder, Transformer, EncoderLayer, DecoderLayer, SelfAttention, PositionwiseFeedforward device = torch.device('cuda:0') pad_idx = DE.vocab.stoi["<pad>"] hid_dim = 300 n_layers = 3 n_heads = 4 pf_dim = 512 # 2048 dropout = 0.1 input_dim = len(DE.vocab) enc = Encoder(input_dim, hid_dim, n_layers, n_heads, pf_dim, EncoderLayer, SelfAttention, PositionwiseFeedforward, dropout, device) output_dim = len(EN.vocab) dec = Decoder(output_dim, hid_dim, n_layers, n_heads, pf_dim, DecoderLayer, SelfAttention, PositionwiseFeedforward, dropout, device) model = Transformer(enc, dec, pad_idx, device) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) # model.load_state_dict(torch.load("weights/bigger_self_attn_weights")) train_model(model, num_epochs=100, learning_rate=0.001, weight_decay=0, log_freq=1, self_attn_hid_dim=hid_dim) torch.save(model.state_dict(), "weights/bigger_self_attn_weights") return model
def __init__(self, dim, src_n_vocab, n_encod_layer, tgt_n_vocab, n_decode_layer, max_len=512): self.src_emb = EmbeddingWithPositionalEncoding(dim, src_n_vocab, max_len) self.tgt_emb = EmbeddingWithLearnedPositionalEncoding( dim, tgt_n_vocab, max_len) enc_layer = TransformerLayer(dim, MultiHeadAttention(6, dim, 0.1), None, nn.Linear(dim, dim), 0.1) self.encoder = Encoder(enc_layer, n_encod_layer) dec_layer = TransformerLayer(dim, MultiHeadAttention(6, dim, 0.1), MultiHeadAttention(6, dim, 0.1), nn.Linear(dim, dim), 0.1) self.decoder = Decoder(dec_layer, n_decode_layer) self.encoder_decoder = EncoderDecoder(self.encoder, self.decoder, self.src_emb, self.tgt_emb)
def __init__( self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, rate=config.dropout_rate, add_pointer_generator=None): super(Bertified_transformer, self).__init__() self.target_vocab_size = target_vocab_size (decoder_embedding, self.encoder, self.decoder_bert_model) = _embedding_from_bert() self.decoder_embedding = tf.keras.layers.Embedding( target_vocab_size, d_model, trainable=False, embeddings_initializer=Constant(decoder_embedding), name='Decoder-embedding' ) self.decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, rate, add_pointer_generator=add_pointer_generator)
def BuildModel(vocab_size, encoder_emb, decoder_emb, d_model = 512, N = 6, d_ff = 2048, h = 8, dropout = 0.1): target_vocab = vocab_size c = copy.deepcopy attention = MultiHeadedAttention(h, d_model) feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) encoder_layer = EncoderLayer(d_model, c(attention), c(feed_forward), dropout) decoder_layer = DecoderLayer(d_model, c(attention), c(attention), c(feed_forward), dropout) encoder = Encoder(encoder_layer, N) decoder = Decoder(decoder_layer, N) model = EncoderDecoder( encoder, decoder, nn.Sequential(Embeddings(encoder_emb, d_model), c(position)), nn.Sequential(Embeddings(decoder_emb, d_model), c(position)), Generator(d_model, target_vocab)) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model