def __init__(self, vocab, num_classes, char_alphabet): super(CNNCNN, self).__init__() self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size # charcnn self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) D = self.embedding.weight.size(1) self.hidden_size = opt.hidden_size D = D + self.char_hidden_dim #mention cnn Ci = 1 Co = opt.kernel_num Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","] self.convs1 = nn.ModuleList([ nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=False) for K in Ks ]) self.hidden = nn.Linear(len(Ks) * Co, self.hidden_size) self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout)
def __init__(self, vocab, num_classes, char_alphabet): super(CNNCNN_SentLSTM,self).__init__() self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size # charcnn self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) self.embedding_size = self.embedding.weight.size(1) self.hidden_size = opt.hidden_size Ci = 1 Co = opt.kernel_num Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","] # mention char_cnn D = self.embedding_size + self.char_hidden_dim self.convs1 = nn.ModuleList([nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=False) for K in Ks]) self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size) #sentence lstm self.lstm_hidden = opt.hidden_size self.lstm = nn.GRU(self.embedding_size, self.lstm_hidden, num_layers=1, batch_first=True, bidirectional=True) self.sent_hidden_size = opt.sent_hidden_size self.sent_hidden = nn.Linear(self.lstm_hidden*2, self.sent_hidden_size) self.hidden = nn.Linear(self.hidden_size + self.sent_hidden_size, self.hidden_size) # mention_hidden_size + sentence_hidden_size self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout)
def __init__(self, vocab, num_classes, char_alphabet): super(AttenCNN,self).__init__() self.embed_size = opt.word_emb_size self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) self.input_size = self.embed_size + self.char_hidden_dim self.W = nn.Linear(self.input_size, 1, bias=False) self.hidden = nn.Linear(self.input_size, self.hidden_size) self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout)
def __init__(self, data): super(WordRep, self).__init__() self.char_hidden_dim = data.char_hidden_dim # 50 self.char_embedding_dim = data.char_emb_dim # 300 self.char_feature = CharCNN(data.char_alphabet_size, data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.dropout) self.drop = nn.Dropout(data.dropout) self.word_embedding = nn.Embedding(data.word_alphabet_size, data.word_emb_dim) self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet_size, data.word_emb_dim))) self.feature_embedding = nn.Embedding(data.feat_alphabet_size, data.feature_emb_dim) self.feature_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_feature_embeddings))
def __init__(self, data): super(WordRep, self).__init__() print("build word representation...") self.gpu = data.HP_gpu self.use_char = data.use_char self.use_trans = data.use_trans self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim) if self.use_trans: self.trans_hidden_dim = data.HP_trans_hidden_dim self.trans_embedding_dim = data.trans_emb_dim self.trans_feature = TransBiLSTM(data.translation_alphabet.size(), self.trans_embedding_dim, self.trans_hidden_dim, data.HP_dropout, data.pretrain_trans_embedding, self.gpu) if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_seq_feature == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) elif data.char_seq_feature == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)." ) exit(0) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = data.feature_num self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() for idx in range(self.feature_num): self.feature_embeddings.append( nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) for idx in range(self.feature_num): if data.pretrain_feature_embeddings[idx] is not None: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy(data.pretrain_feature_embeddings[idx])) else: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if self.gpu: self.drop = self.drop.cuda() self.word_embedding = self.word_embedding.cuda() for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda()
def __init__(self, data, use_position, use_cap, use_postag, use_char): super(WordRep, self).__init__() self.gpu = data.HP_gpu self.use_char = use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = 0 self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() if use_cap: self.feature_num += 1 alphabet_id = data.feature_name2id['[Cap]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) if use_postag: self.feature_num += 1 alphabet_id = data.feature_name2id['[POS]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) self.use_position = use_position if self.use_position: position_alphabet_id = data.re_feature_name2id['[POSITION]'] self.position_embedding_dim = data.re_feature_emb_dims[ position_alphabet_id] self.position1_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position1_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) self.position2_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position2_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) if torch.cuda.is_available(): self.drop = self.drop.cuda(self.gpu) self.word_embedding = self.word_embedding.cuda(self.gpu) for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda(self.gpu) if self.use_position: self.position1_emb = self.position1_emb.cuda(self.gpu) self.position2_emb = self.position2_emb.cuda(self.gpu)
def __init__(self, data): super(BiLSTM, self).__init__() print( "build batched bilstm...") self.use_bigram = data.use_bigram self.gpu = data.HP_gpu self.use_char = data.HP_use_char self.use_gaz = data.HP_use_gaz self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_features == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_features == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_features (either CNN or LSTM).") exit(0) self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim) self.bilstm_flag = data.HP_bilstm # self.bilstm_flag = False self.lstm_layer = data.HP_lstm_layer if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) if data.pretrain_biword_embedding is not None: self.biword_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding)) else: self.biword_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim))) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim lstm_input = self.embedding_dim + self.char_hidden_dim if self.use_bigram: lstm_input += data.biword_emb_dim print("********************use_lattice",self.use_gaz) if self.use_gaz: self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu) if self.bilstm_flag: self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu) else: self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size) self.hidden2tag_ner = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_ner) self.hidden2tag_general = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_general) if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.biword_embeddings = self.biword_embeddings.cuda() if self.use_gaz: self.forward_lstm = self.forward_lstm.cuda() if self.bilstm_flag: self.backward_lstm = self.backward_lstm.cuda() else: self.lstm = self.lstm.cuda() self.hidden2tag = self.hidden2tag.cuda() self.hidden2tag_ner = self.hidden2tag_ner.cuda() self.hidden2tag_general = self.hidden2tag_general.cuda()
def __init__(self, rnn_type, vocab_size, embedding_dim, hidden_dim, num_layers, tie_weights, dropout, device, pretrain_emb=None, use_ch=False, use_he=False, use_i=False, use_h=False, use_g=True, **kwargs): super(RNNModel, self).__init__() self.rnn_type = rnn_type self.n_layers = num_layers self.hi_dim = hidden_dim self.device = device self.use_i = use_i self.use_h = use_h self.use_g = use_g self.use_ch = use_ch self.use_he = use_he self.drop = nn.Dropout(dropout) char_hid_dim = 0 char_len = 0 he_dim = 0 self.embedding = nn.Embedding(vocab_size, embedding_dim) if pretrain_emb is not None: self.embedding.weight.data.copy_(torch.from_numpy(pretrain_emb)) else: self.embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(vocab_size, embedding_dim))) self.embedding.weight.requires_grad = False # ch if use_ch: char_vocab_size = kwargs['char_vocab_size'] char_emb_dim = kwargs['char_emb_dim'] char_hid_dim = kwargs['char_hid_dim'] char_len = kwargs['char_len'] self.ch = CharCNN(char_vocab_size, None, char_emb_dim, char_hid_dim, dropout).to(device) # he if use_he: print("Build Hypernym Embeddings...") he_dim = embedding_dim self.he = Hypernym(embedding_dim, self.embedding, device) concat_embedding_dim = embedding_dim + char_len * char_hid_dim + he_dim if self.use_i: embedding_dim = embedding_dim + concat_embedding_dim if self.use_h: self.h_linear = nn.Linear(concat_embedding_dim + hidden_dim, hidden_dim) if self.use_g: self.zt_linear = nn.Linear(concat_embedding_dim + hidden_dim, hidden_dim) self.rt_linear = nn.Linear(concat_embedding_dim + hidden_dim, concat_embedding_dim) self.ht_linear = nn.Linear(concat_embedding_dim + hidden_dim, hidden_dim) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_dim, num_layers, dropout=dropout) else: try: nonlinearity = { 'RNN_TANH': 'tanh', 'RNN_RELU': 'relu' }[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""" ) self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, nonlinearity=nonlinearity, dropout=dropout) self.word2hidden = nn.Linear(concat_embedding_dim, hidden_dim) self.decoder = nn.Linear(hidden_dim, vocab_size) if tie_weights: if hidden_dim != embedding_dim: raise ValueError( 'When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.embedding.weight self.init_weights()
def __init__(self, vocab_size, emb_dim, hid_dim, device, pretrain_emb=None, dropout=0, use_i=False, use_h=False, use_g=True, use_ch=True, use_he=False, **kwargs): super(DefSeq, self).__init__() self.device = device self.use_i = use_i self.use_h = use_h self.use_g = use_g self.use_ch = use_ch self.use_he = use_he char_emb_dim = 0 char_hid_dim = 0 char_len = 0 he_dim = 0 def weight_init(m): if isinstance(m, nn.Embedding): nn.init.orthogonal_(m.weight.data) if isinstance(m, nn.Linear): nn.init.orthogonal_(m.weight.data) nn.init.constant_(m.bias.data, 0.5) if isinstance(m, nn.LSTMCell): nn.init.orthogonal_(m.weight_hh.data) nn.init.orthogonal_(m.weight_ih.data) nn.init.constant_(m.bias_hh.data, 0.5) nn.init.constant_(m.bias_ih.data, 0.5) self.embedding = nn.Embedding(vocab_size, emb_dim) if pretrain_emb is not None: # self.embedding.weight.data.copy_(pretrain_emb) self.embedding.from_pretrained(pretrain_emb, freeze=True) # self.embedding.weight.requires_grad = False else: weight_init(self.embedding) if self.use_ch: print("build char sequence feature extractor: CNN ...") char_vocab_size = kwargs['char_vocab_size'] char_emb_dim = kwargs['char_emb_dim'] char_hid_dim = kwargs['char_hid_dim'] char_len = kwargs['char_len'] self.ch = CharCNN(char_vocab_size, None, char_emb_dim, char_hid_dim, dropout, device) if self.use_he: print("build Hypernym Embeddings...") he_dim = emb_dim self.he = Hypernym(emb_dim, self.embedding, device) final_word_dim = emb_dim + char_hid_dim * char_len + he_dim self.word_linear = nn.Linear(final_word_dim, hid_dim) weight_init(self.word_linear) self.s_lstm = nn.LSTMCell(emb_dim, hid_dim) weight_init(self.s_lstm) if self.use_i: self.i_lstm = nn.LSTMCell(final_word_dim + emb_dim, hid_dim) weight_init(self.i_lstm) if self.use_h: self.h_linear = nn.Linear(final_word_dim + hid_dim, hid_dim) weight_init(self.h_linear) if self.use_g: self.g_zt_linear = nn.Linear(final_word_dim + hid_dim, hid_dim) weight_init(self.g_zt_linear) self.g_rt_linear = nn.Linear(final_word_dim + hid_dim, final_word_dim) weight_init(self.g_rt_linear) self.g_ht_linear = nn.Linear(final_word_dim + hid_dim, hid_dim) weight_init(self.g_ht_linear) self.hidden2tag_linear = nn.Linear(hid_dim, vocab_size) weight_init(self.hidden2tag_linear) self.dropout = nn.Dropout(p=dropout)