def __init__(self, config, params): super(Context_att, self).__init__() self.word_num = params.word_num self.label_num = params.label_num self.char_num = params.char_num self.id2word = params.word_alphabet.id2word self.word2id = params.word_alphabet.word2id self.padID = params.word_alphabet.word2id['<pad>'] self.unkID = params.word_alphabet.word2id['<unk>'] self.use_cuda = params.use_cuda self.add_char = params.add_char self.static = params.static self.feature_count = config.shrink_feature_thresholds self.word_dims = config.word_dims self.char_dims = config.char_dims self.lstm_hiddens = config.lstm_hiddens self.attention_size = config.attention_size self.dropout_emb = nn.Dropout(p=config.dropout_emb) self.dropout_lstm = nn.Dropout(p=config.dropout_lstm) self.lstm_layers = config.lstm_layers self.batch_size = config.train_batch_size self.embedding = nn.Embedding(self.word_num, self.word_dims) self.embedding.weight.requires_grad = True if self.static: self.embedding_static = nn.Embedding(self.word_num, self.word_dims) self.embedding_static.weight.requires_grad = False if params.pretrain_word_embedding is not None: # pretrain_weight = np.array(params.pretrain_word_embedding) # self.embedding.weight.data.copy_(torch.from_numpy(pretrain_weight)) # pretrain_weight = np.array(params.pretrain_embed) pretrain_weight = torch.FloatTensor(params.pretrain_word_embedding) self.embedding.weight.data.copy_(pretrain_weight) # for id in range(self.word_dims): # self.embedding.weight.data[self.eofID][id] = 0 if params.static: self.lstm = nn.LSTM(self.word_dims * 2, self.lstm_hiddens // 2, num_layers=self.lstm_layers, bidirectional=True, dropout=config.dropout_lstm) else: self.lstm = nn.LSTM(self.word_dims, self.lstm_hiddens // 2, num_layers=self.lstm_layers, bidirectional=True, dropout=config.dropout_lstm) self.hidden2label = nn.Linear(self.lstm_hiddens, self.label_num) self.hidden = self.init_hidden(self.batch_size, self.lstm_layers) # self.attention = Attention(self.lstm_hiddens, self.attention_size, self.use_cuda) # self.attention_l = Attention(self.lstm_hiddens, self.attention_size, self.use_cuda) # self.attention_r = Attention(self.lstm_hiddens, self.attention_size, self.use_cuda) self.attention = Attention_b(self.lstm_hiddens, self.attention_size, self.use_cuda) self.attention_l = Attention_b(self.lstm_hiddens, self.attention_size, self.use_cuda) self.attention_r = Attention_b(self.lstm_hiddens, self.attention_size, self.use_cuda) self.linear = nn.Linear(self.lstm_hiddens, self.label_num, bias=True) self.linear_l = nn.Linear(self.lstm_hiddens, self.label_num, bias=True) self.linear_r = nn.Linear(self.lstm_hiddens, self.label_num, bias=True)
def __init__(self, config, params): super(Context_att_gate, self).__init__() self.word_num = params.word_num self.label_num = params.label_num self.char_num = params.char_num self.category_num = params.category_num self.id2word = params.word_alphabet.id2word self.word2id = params.word_alphabet.word2id self.padID = params.word_alphabet.word2id['<pad>'] self.unkID = params.word_alphabet.word2id['<unk>'] self.use_cuda = params.use_cuda self.add_char = params.add_char self.static = params.static self.feature_count = config.shrink_feature_thresholds self.word_dims = config.word_dims self.char_dims = config.char_dims self.lstm_hiddens = config.lstm_hiddens self.attention_size = config.attention_size self.dropout_emb = nn.Dropout(p=config.dropout_emb) self.dropout_lstm = nn.Dropout(p=config.dropout_lstm) self.lstm_layers = config.lstm_layers self.batch_size = config.train_batch_size self.embedding = nn.Embedding(self.word_num, self.word_dims) self.embedding.weight.requires_grad = True self.embedding_label = nn.Embedding(self.label_num, self.word_dims) self.embedding_label.weight.requires_grad = True if params.pretrain_word_embedding is not None: # pretrain_weight = np.array(params.pretrain_word_embedding) # self.embedding.weight.data.copy_(torch.from_numpy(pretrain_weight)) # pretrain_weight = np.array(params.pretrain_embed) pretrain_weight = torch.FloatTensor(params.pretrain_word_embedding) self.embedding.weight.data.copy_(pretrain_weight) # for id in range(self.word_dims): # self.embedding.weight.data[self.eofID][id] = 0 self.lstm = nn.LSTM(self.word_dims * 2, self.lstm_hiddens // 2, num_layers=self.lstm_layers, bidirectional=True, dropout=config.dropout_lstm) # nn.init.xavier_uniform(self.lstm.all_weights[0][0]) # nn.init.xavier_uniform(self.lstm.all_weights[0][1]) # nn.init.xavier_uniform(self.lstm.all_weights[1][0]) # nn.init.xavier_uniform(self.lstm.all_weights[1][1]) self.hidden2label = nn.Linear(self.lstm_hiddens, self.category_num) self.hidden = self.init_hidden(self.batch_size, self.lstm_layers) self.attention = Attention_b(self.lstm_hiddens, self.attention_size, self.use_cuda) self.attention_l = Attention_b(self.lstm_hiddens, self.attention_size, self.use_cuda) self.attention_r = Attention_b(self.lstm_hiddens, self.attention_size, self.use_cuda) self.w1 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens)) self.w2 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens)) self.w3 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens)) self.u1 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens)) self.u2 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens)) self.u3 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens)) self.b1 = Parameter(torch.randn(self.lstm_hiddens, self.batch_size)) self.b2 = Parameter(torch.randn(self.lstm_hiddens, self.batch_size)) self.b3 = Parameter(torch.randn(self.lstm_hiddens, self.batch_size)) nn.init.xavier_uniform(self.w1) nn.init.xavier_uniform(self.w2) nn.init.xavier_uniform(self.w3) nn.init.xavier_uniform(self.u1) nn.init.xavier_uniform(self.u2) nn.init.xavier_uniform(self.u3) nn.init.xavier_uniform(self.b1) nn.init.xavier_uniform(self.b2) nn.init.xavier_uniform(self.b3) self.linear_2 = nn.Linear(self.lstm_hiddens, self.category_num, bias=True) nn.init.xavier_uniform(self.linear_2.weight)