def __init__(self, enc_inp_size, dec_inp_size, dec_out_size, N=6, d_model=512, d_ff=2048, heads=8, dropout=0.1, mean=[0, 0], std=[0, 0]): super(IndividualTF, self).__init__() "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadAttention(heads, d_model) ff = PointerwiseFeedforward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) self.mean = np.array(mean) self.std = np.array(std) self.model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(LinearEmbedding(enc_inp_size, d_model), c(position)), nn.Sequential(LinearEmbedding(dec_inp_size, d_model), c(position)), Generator(d_model, dec_out_size)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in self.model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def __init__(self, n_src_vocab, d_word_vec, n_layer, n_head, d_model, d_inner, pad_idx, dropout=0.1, n_position=200, max_seq_len=32): super(Encoder, self).__init__() self.src_emb = nn.Embedding(n_src_vocab, d_word_vec, padding_idx=pad_idx) # self.pos_emb = PositionalEncoding(d_word_vec, n_position=n_position) self.pos_emb = nn.Embedding(max_seq_len, d_word_vec) self.dropout = nn.Dropout(p=dropout) self.layer_stack = nn.ModuleList([ EncoderLayer(d_model=d_model, d_inner=d_inner, n_head=n_head, dropout=dropout) for _ in range(n_layer) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, enc_inp_size, d_latent, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1, device='cpu', d_map_latent=8): super(EncoderY, self).__init__() self.d_model = d_model self.embed_fn = nn.Sequential( LinearEmbedding(enc_inp_size, d_model - d_map_latent), PositionalEncoding(d_model - d_map_latent, dropout)) self.encoder = Encoder( EncoderLayer(d_model, MultiHeadAttention(h, d_model), PointerwiseFeedforward(d_model, d_ff, dropout), dropout), N) self.fc = nn.Linear(d_model, d_latent) self.init_weights(self.encoder.parameters()) self.init_weights(self.fc.parameters()) self.map_encoder = load_map_encoder(device)
def __init__(self, src_len, tgt_len, enc_inp_size, dec_inp_size, dec_out_size, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1, device='cpu'): super(Generator, self).__init__() self.device = device self.src_len = src_len self.tgt_len = tgt_len self.dec_inp_size = dec_inp_size c = copy.deepcopy attn = MultiHeadAttention(h, d_model) ff = PointerwiseFeedforward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) self.generator = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(LinearEmbedding(enc_inp_size, d_model), c(position)), nn.Sequential(LinearEmbedding(dec_inp_size, d_model), c(position)), TFHeadGenerator(d_model, dec_out_size)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in self.generator.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def __init__(self, disc_inp_size, disc_seq_len, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1, device='cpu'): super(Critic, self).__init__() self.device = device c = copy.deepcopy attn = MultiHeadAttention(h, d_model) ff = PointerwiseFeedforward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) self.critic = nn.ModuleDict({ 'src_embed': nn.Sequential(LinearEmbedding(disc_inp_size, d_model), c(position)), 'encoder': Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), 'disc_head': nn.Sequential(nn.Flatten(), nn.Linear(d_model * disc_seq_len, 1)), }) for p in self.critic.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding, rate=0.1): super(Encoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model) self.enc_layers = [ EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate)