def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "从超参数构造模型" c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # 从代码来看,使用 Glorot / fan_avg初始化参数很重要。 # 对参数进行均匀分布初始化 for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def __init__(self, vocab_size, d_model, N, heads, dropout): super().__init__() self.N = N self.embed = Embedder(vocab_size, d_model) self.pe = PositionalEncoder(d_model, dropout=dropout) self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N) self.norm = Norm(d_model)
def __init__(self, bert, hidden_size, num_hidden_layers, num_attention_heads, dropout): super().__init__() self.N = num_hidden_layers self.bert = bert self.pe = PositionalEncoder(hidden_size, dropout=dropout) self.layers = get_clones(DecoderLayer(hidden_size, num_attention_heads, dropout), num_hidden_layers) self.norm = Norm(hidden_size)
def make_model_newast(src_vocab, tgt_vocab, ast_vocab, kg_embed, N=6, d_model=512, d_ff=2048, d_intermediate=512, h=8, dropout=0.1): "从超参数构造模型" c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) attn_ent = MultiHeadedAttention(h, d_model) attn_ast = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) with open(kg_embed, "r", encoding='utf-8') as f: lines = json.loads(f.read()) vecs = list() # vecs.append([0] * 100) # CLS for (i, line) in enumerate(lines): if line == "ent_embeddings": for vec in lines[line]: vec = [float(x) for x in vec] vecs.append(vec) embed = torch.FloatTensor(vecs) model = EncoderDecoder4newAST( Encoder4KG(EncoderLayer4KG(d_model, d_intermediate, c(attn), c(attn_ent), c(ff), dropout), N), Encoder4newAST(EncoderLayer4newAST(d_model, c(attn_ast), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), torch.nn.Embedding.from_pretrained(embed), nn.Sequential(Embeddings(d_model, 5000), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # 从代码来看,使用 Glorot / fan_avg初始化参数很重要。 # 对参数进行均匀分布初始化 for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def __init__(self, n_trg_vocab, d_word_vec, n_layers, n_head, d_k, d_v, d_model, d_inner, pad_idx, n_position=200, dropout=0.1): """Set the hyper-parameters and build the layers.""" super(Decoder, self).__init__() self.trg_word_emb = nn.Embedding(n_trg_vocab, d_word_vec, padding_idx=pad_idx) self.position_enc = PositionalEncoding(d_word_vec, n_position=n_position) self.dropout = nn.Dropout(p=dropout) self.layer_stack = nn.ModuleList([ DecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers)]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, vocab_size, d_model, N, heads, dropout, decoder_extra_layers, field, word_emb, opt): super().__init__() self.N = N self.opt = opt self.word_emb = word_emb # unused, just for querying self.embed = Embedder(vocab_size, d_model, word_emb, field) self.pe = PositionalEncoder(d_model, dropout=dropout) self.layers = get_clones( DecoderLayer(d_model, heads, decoder_extra_layers, dropout), N, decoder_extra_layers) self.norm = Norm(d_model)
def __init__( self, n_tgt_vocab, len_max_seq, d_word_vec, n_layers, n_head, d_k, d_v, d_model, d_inner, dropout=0.1, pretrained_embeddings=None): super().__init__() n_position = len_max_seq + 1 self.layer_stack = nn.ModuleList([ DecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers)])
def __init__(self, bpe_size, h, d_model, p, d_ff): super(Transformer, self).__init__() self.bpe_size = bpe_size self.word_emb = nn.Embedding(bpe_size, d_model, padding_idx=0) self.pos_emb = PositionalEncoding(d_model, p) self.encoder = nn.ModuleList([EncoderLayer(h, d_model, p, d_ff) for _ in range(6)]) self.decoder = nn.ModuleList([DecoderLayer(h, d_model, p, d_ff) for _ in range(6)]) self.generator = nn.Linear(d_model, bpe_size, bias=False) # tie weight between word embedding and generator self.generator.weight = self.word_emb.weight self.logsoftmax = nn.LogSoftmax() # pre-save a mask to avoid future information in self-attentions in decoder # save as a buffer, otherwise will need to recreate it and move to GPU during every call mask = torch.ByteTensor(np.triu(np.ones((512,512)), k=1).astype('uint8')) self.register_buffer('mask', mask)
def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, maximum_position_encoding, rate=0.1): super(ImageCaptioningDecoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, d_model) self.dec_layers = [ DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(rate)
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=.1): """ construct model from hyper-parameters""" c = copy.deepcopy attn_rpr = MultiHeadedAttention_RPR(d_model, h, max_relative_position=5) attn = MultiHeadedAttention(d_model, h) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn_rpr), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn_rpr), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def __init__(self, n_tgt_vocab, n_max_seq, n_layers=6, n_head=8, d_k=64, d_v=64, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super(Decoder, self).__init__() n_position = n_max_seq + 1 self.n_max_seq = n_max_seq self.d_model = d_model self.position_enc = nn.Embedding(n_position, d_word_vec, padding_idx=Constants.PAD) self.position_enc.weight.data = position_encoding_init( n_position, d_word_vec) self.tgt_word_emb = nn.Embedding(n_tgt_vocab, d_word_vec, padding_idx=Constants.PAD) self.dropout = nn.Dropout(dropout) self.layer_stack = nn.ModuleList([ DecoderLayer(d_model, d_inner_hid, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])
def __init__(self, d_input, d_model, N, heads, dropout): super().__init__() self.N = N self.layers = get_clones( DecoderLayer(d_input, d_model, heads, dropout), N) self.norm = nn.LayerNorm(d_model)
def make_model_ast(src_vocab, tgt_vocab, voc_size, device, kg_embed, ast_embed, N=6, d_model=512, d_ff=2048, d_intermediate=512, h=8, dropout=0.1, embedding_dim=512, hidden_size=512): # "???????? c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) attn_ent = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) with open(kg_embed, "r", encoding='utf-8') as f: lines = json.loads(f.read()) vecs = list() # vecs.append([0] * 100) # CLS for (i, line) in enumerate(lines): if line == "ent_embeddings": for vec in lines[line]: vec = [float(x) for x in vec] vecs.append(vec) embed = torch.FloatTensor(vecs) ast_size = voc_size path_listtensor = list() lstm = LSTM(voc_size, embedding_dim, hidden_size) # bilstm = BiLSTM(voc_size, embedding_dim, hidden_size, device) # for k in ast_embed.keys(): # pathlist = ast_embed[k] # path_tensor = torch.zeros(voc_size).long() # path_tensor = Variable(path_tensor).to(device) # for p in pathlist: # outpath = torch.zeros(voc_size).long() # outpath = Variable(outpath).to(device) # p.unsqueeze(0) # hidden = lstm.init_hidden().to(device) # cell_state = lstm.init_cell_state().to(device) # #cell_state_b = lstm.init_cell_state().to(device) # i = 0 # while i < (list(p.size())[0]): # # output, hiddenout, cell_state, cell_state_b = bilstm.forward(p[i], p[list(p.size())[0] - 1 - i], hidden, cell_state, cell_state_b) # output, hidden, cell_state = lstm(p[i], hidden, cell_state) # i += 1 # outpath += output # path_tensor += outpath # # path_float = list() # for x in path_tensor: # path_float.append(float(x)) # path_listtensor.append(path_float) # ast_embed = torch.FloatTensor(path_listtensor) model = EncoderDecoder4AST( Encoder4AST(EncoderLayer4AST(d_model, d_intermediate, ast_size, c(attn), c(attn_ent), c(ff), dropout, voc_size, embedding_dim, hidden_size, device), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), torch.nn.Embedding.from_pretrained(embed), #nn.Sequential(Embeddings(d_model, voc_size)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # ?????,?? Glorot / fan_avg????????? # ???????????? for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model