def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
            """https://zhuanlan.zhihu.com/p/74274453
            #權值初始化 Xavier均勻分佈"""
    return model
Exemple #2
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 target_vocab_size,
                 max_position_encoding,
                 drop_rate=0.1,
                 trainable=True,
                 name=None,
                 dtype=None,
                 dynamic=False,
                 **kwargs):
        super(Decoder, self).__init__(trainable, name, dtype, dynamic,
                                      **kwargs)

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        self.pos_encoding = PositionEncoder().position_encoding(
            max_position_encoding, d_model)

        self.dec_layers = [
            DecoderLayer(d_model, num_heads, dff, drop_rate)
            for _ in range(num_layers)
        ]

        self.dropout = tf.keras.layers.Dropout(rate=drop_rate)
Exemple #3
0
    def __init__(self, data, num_layers, num_heads, dff, rate=0.1):
        super(Decoder, self).__init__()
      
        self.num_layers = num_layers

        self.embed = Embedding(data)
        self.maxlength = self.embed.maxl
        self.d_model = self.embed.d_model
        
        self.pos_encoding = PositionalEncoder(self.maxlength, self.d_model)
      
        self.dec_layers = [DecoderLayer(self.d_model, num_heads, dff, rate)
                           for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate)
Exemple #4
0
	def _make_model(self, num_tgt_chars, N, d_model, d_ff, h, dropout):
		"""
		
		:param num_tgt_chars: output space
		:param N: number of decoder and encoder layers
		:param d_model: model dimensionality
		:param d_ff: hidden size of the feed-forward neural network
		:param h: number of attention heads
		:param dropout: dropout rate
		:return: model

		"""
		c = copy.deepcopy
		attn = MultiHeadedAttention(h, d_model)
		ff = PositionwiseFeedForward(d_model, d_ff, dropout)
		position = PositionalEncoding(d_model, dropout)

		if self.config.USE_RESNET:
			feature_extractor = ResNet(block=BasicBlock, layers=self.config.RESNET_LAYERS, d_model=self.config.D_MODEL)
		else:
			feature_extractor = FeatureExtractionNetwork(d_model=self.config.D_MODEL)

		direction_embed = Embeddings(d_model, 2)

		model = EncoderDecoder(
			encoder=Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
			decoder=Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
			tgt_embed=nn.Sequential(Embeddings(d_model, num_tgt_chars), c(position)),
			generator=PredictionLayer(d_model, num_tgt_chars),
			feature_extractor=feature_extractor,
			prediction_layer=PredictionLayer(d_model, len(Dataset.CHAR_ID_MAP)),
			bidirectional_decoding=self.config.BIDIRECTIONAL_DECODING,
			direction_embed=direction_embed,
			device=self.device
		)
		
		for p in model.parameters():
			if p.dim() > 1:
				nn.init.xavier_normal_(p)
		
		logging.info("Model created")
		
		return model
Exemple #5
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 target_vocab_size,
                 rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model

        # 為中文(目標語言)建立詞嵌入層
        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        self.pos_encoding = positional_encoding(target_vocab_size,
                                                self.d_model)

        self.dec_layers = [
            DecoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]
        self.dropout = tf.keras.layers.Dropout(rate)
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters"
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionWiseFeedForward(d_model, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab, c(position))),
        Generator(d_model, tgt_vocab))
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Exemple #7
0
    def __init__(self,
                 num_layers,
                 target_vocab_size,
                 max_length,
                 d_model,
                 num_heads,
                 dff,
                 rate=0.1):
        super(DecoderModel, self).__init__()
        self.d_model = d_model
        self.num_layers = num_layers
        self.max_length = max_length

        self.embedding = keras.layers.Embedding(target_vocab_size,
                                                self.d_model)
        # position_embedding.shape: (1, max_length, d_model)
        self.position_embedding = get_position_embedding(
            self.max_length, self.d_model)
        self.dropout = keras.layers.Dropout(rate)
        self.decoder_layers = [
            DecoderLayer(d_model, num_heads, dff, rate)
            for _ in range(self.num_layers)
        ]
Exemple #8
0
    def __init__(self,
                 tgt_size,
                 N=6,
                 d_model=512,
                 d_ff=2048,
                 h=8,
                 dropout=0.1):
        '''
		
		'''
        super(Transformer, self).__init__()
        self.tgt_size = tgt_size
        c = copy.deepcopy
        self.attn = MultiHeadedAttn(h, d_model, dropout)
        self.ffn = PositionwiseFeedForward(d_model, d_ff, dropout)

        self.position_in = PositionalEncoding(d_model, dropout)
        self.position_out = PositionalEncoding(d_model, dropout)
        self.encoder = Encoder(
            EncoderLayer(d_model, c(self.attn), c(self.ffn), dropout), N)
        self.decoder = Decoder(
            DecoderLayer(d_model, c(self.attn), c(self.attn), c(self.ffn),
                         dropout), N)
        self.tgt_embed = Embeddings(d_model, tgt_size)
Exemple #9
0
print("zh_padding_mask:", zh_padding_mask, "\nzh_padding_mask.shape:",
      zh_padding_mask.shape)
print(20 * '-')
print("look_ahead_mask:", look_ahead_mask, "\n")
print(20 * '-')
print("combined_mask:", combined_mask, "\n")
print(100 * '-')

print("DecoderLayer: \n")
# hyperparameters:
d_model = 4
num_heads = 2
dff = 8

# construt decoder layer
dec_layer = DecoderLayer(d_model, num_heads, dff)

# create masks
zh_padding_mask = create_padding_mask(zh)
look_ahead_mask = create_look_ahead_mask(zh.shape[-1])
combined_mask = tf.maximum(zh_padding_mask, look_ahead_mask)

# init decoder layer
dec_out, dec_self_attention_weights, dec_enc_attention_weights = dec_layer(
    emb_zh, enc_out, False, combined_mask, en_padding_mask)

print("emb_zh:", emb_zh)
print(20 * '-')
print("enc_out:", enc_out)
print(20 * '-')
print("dec_out:", dec_out)