def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) """https://zhuanlan.zhihu.com/p/74274453 #權值初始化 Xavier均勻分佈""" return model
def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, max_position_encoding, drop_rate=0.1, trainable=True, name=None, dtype=None, dynamic=False, **kwargs): super(Decoder, self).__init__(trainable, name, dtype, dynamic, **kwargs) self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model) self.pos_encoding = PositionEncoder().position_encoding( max_position_encoding, d_model) self.dec_layers = [ DecoderLayer(d_model, num_heads, dff, drop_rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate=drop_rate)
def __init__(self, data, num_layers, num_heads, dff, rate=0.1): super(Decoder, self).__init__() self.num_layers = num_layers self.embed = Embedding(data) self.maxlength = self.embed.maxl self.d_model = self.embed.d_model self.pos_encoding = PositionalEncoder(self.maxlength, self.d_model) self.dec_layers = [DecoderLayer(self.d_model, num_heads, dff, rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(rate)
def _make_model(self, num_tgt_chars, N, d_model, d_ff, h, dropout): """ :param num_tgt_chars: output space :param N: number of decoder and encoder layers :param d_model: model dimensionality :param d_ff: hidden size of the feed-forward neural network :param h: number of attention heads :param dropout: dropout rate :return: model """ c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) if self.config.USE_RESNET: feature_extractor = ResNet(block=BasicBlock, layers=self.config.RESNET_LAYERS, d_model=self.config.D_MODEL) else: feature_extractor = FeatureExtractionNetwork(d_model=self.config.D_MODEL) direction_embed = Embeddings(d_model, 2) model = EncoderDecoder( encoder=Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), decoder=Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), tgt_embed=nn.Sequential(Embeddings(d_model, num_tgt_chars), c(position)), generator=PredictionLayer(d_model, num_tgt_chars), feature_extractor=feature_extractor, prediction_layer=PredictionLayer(d_model, len(Dataset.CHAR_ID_MAP)), bidirectional_decoding=self.config.BIDIRECTIONAL_DECODING, direction_embed=direction_embed, device=self.device ) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_normal_(p) logging.info("Model created") return model
def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, rate=0.1): super(Decoder, self).__init__() self.d_model = d_model # 為中文(目標語言)建立詞嵌入層 self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model) self.pos_encoding = positional_encoding(target_vocab_size, self.d_model) self.dec_layers = [ DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate)
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters" c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionWiseFeedForward(d_model, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab, c(position))), Generator(d_model, tgt_vocab)) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform(p) return model
def __init__(self, num_layers, target_vocab_size, max_length, d_model, num_heads, dff, rate=0.1): super(DecoderModel, self).__init__() self.d_model = d_model self.num_layers = num_layers self.max_length = max_length self.embedding = keras.layers.Embedding(target_vocab_size, self.d_model) # position_embedding.shape: (1, max_length, d_model) self.position_embedding = get_position_embedding( self.max_length, self.d_model) self.dropout = keras.layers.Dropout(rate) self.decoder_layers = [ DecoderLayer(d_model, num_heads, dff, rate) for _ in range(self.num_layers) ]
def __init__(self, tgt_size, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): ''' ''' super(Transformer, self).__init__() self.tgt_size = tgt_size c = copy.deepcopy self.attn = MultiHeadedAttn(h, d_model, dropout) self.ffn = PositionwiseFeedForward(d_model, d_ff, dropout) self.position_in = PositionalEncoding(d_model, dropout) self.position_out = PositionalEncoding(d_model, dropout) self.encoder = Encoder( EncoderLayer(d_model, c(self.attn), c(self.ffn), dropout), N) self.decoder = Decoder( DecoderLayer(d_model, c(self.attn), c(self.attn), c(self.ffn), dropout), N) self.tgt_embed = Embeddings(d_model, tgt_size)
print("zh_padding_mask:", zh_padding_mask, "\nzh_padding_mask.shape:", zh_padding_mask.shape) print(20 * '-') print("look_ahead_mask:", look_ahead_mask, "\n") print(20 * '-') print("combined_mask:", combined_mask, "\n") print(100 * '-') print("DecoderLayer: \n") # hyperparameters: d_model = 4 num_heads = 2 dff = 8 # construt decoder layer dec_layer = DecoderLayer(d_model, num_heads, dff) # create masks zh_padding_mask = create_padding_mask(zh) look_ahead_mask = create_look_ahead_mask(zh.shape[-1]) combined_mask = tf.maximum(zh_padding_mask, look_ahead_mask) # init decoder layer dec_out, dec_self_attention_weights, dec_enc_attention_weights = dec_layer( emb_zh, enc_out, False, combined_mask, en_padding_mask) print("emb_zh:", emb_zh) print(20 * '-') print("enc_out:", enc_out) print(20 * '-') print("dec_out:", dec_out)