コード例 #1
0
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print ("build batched bilstm...")
        self.gpu = data.HP_gpu
        self.use_gloss = data.HP_use_gloss
        self.use_entity = data.HP_use_entity
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.gloss_hidden_dim = 0
        self.embedding_dim = data.word_emb_dim
        self.gloss_hidden_dim = data.gloss_hidden_dim
        self.gloss_drop = data.HP_dropout
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
        if self.use_entity:
            self.entity_embeddings = nn.Embedding(data.entity_alphabet.size(), data.entity_emb_dim)
            self.entity_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.entity_alphabet.size(), data.entity_emb_dim)))
        if self.use_gloss:
            self.gloss_hidden_dim = data.gloss_hidden_dim
            self.gloss_embedding_dim = data.gloss_emb_dim
            if data.gloss_features == "CNN":
                self.gloss_feature = CNN(data,input_dim=data.gloss_emb_dim,hidden_dim=self.gloss_hidden_dim,dropout=self.gloss_drop)
                # self.gloss_feature = CharCNN(data)#data.gloss_alphabet.size(), self.gloss_embedding_dim, self.gloss_hidden_dim, data.HP_dropout, self.gpu)
            elif data.gloss_features == "LSTM":
                self.gloss_feature = CharBiLSTM(data.gloss_alphabet.size(), self.gloss_embedding_dim, self.gloss_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print ("Error gloss feature selection, please check parameter data.gloss_features (either CNN or LSTM).")
                exit(0)
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.droplstm = nn.Dropout(data.HP_dropout)
        if self.bilstm_flag:
            lstm_hidden_dim = data.HP_lstm_hidden_dim // 2
        else:
            lstm_hidden_dim = data.HP_lstm_hidden_dim
        lstm_input_dim = self.embedding_dim + self.gloss_hidden_dim
        self.forward_lstm = LatticeLSTM(lstm_input_dim, lstm_hidden_dim, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, left2right=True, fix_word_emb=data.HP_fix_gaz_emb, gpu=self.gpu)
        if self.bilstm_flag:
            self.backward_lstm = LatticeLSTM(lstm_input_dim, lstm_hidden_dim, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, left2right=False, fix_word_emb=data.HP_fix_gaz_emb, gpu=self.gpu)
        # self.lstm = nn.LSTM(lstm_input_dim, lstm_hidden_dim, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_lstm_hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            if self.use_entity:
                self.entity_embeddings = self.entity_embeddings.cuda()
            self.forward_lstm = self.forward_lstm.cuda()
            if self.bilstm_flag:
                self.backward_lstm = self.backward_lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
コード例 #2
0
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print( "build batched bilstm...")
        self.use_bigram = data.use_bigram
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print( "Error char feature selection, please check parameter data.char_features (either CNN or LSTM).")
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        # self.bilstm_flag = False
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
            
        if data.pretrain_biword_embedding is not None:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding))
        else:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bigram:
            lstm_input += data.biword_emb_dim
        print("********************use_lattice",self.use_gaz)
        if self.use_gaz:
            self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu)
            if self.bilstm_flag:
                self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu)
        else:
            self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size)
        self.hidden2tag_ner = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_ner)
        self.hidden2tag_general = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_general)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.biword_embeddings = self.biword_embeddings.cuda()
            if self.use_gaz:
                self.forward_lstm = self.forward_lstm.cuda()
                if self.bilstm_flag:
                    self.backward_lstm = self.backward_lstm.cuda()
            else:
                self.lstm = self.lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.hidden2tag_ner = self.hidden2tag_ner.cuda()
            self.hidden2tag_general = self.hidden2tag_general.cuda()
コード例 #3
0
class BiLSTM(nn.Module):
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print( "build batched bilstm...")
        self.use_bigram = data.use_bigram
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print( "Error char feature selection, please check parameter data.char_features (either CNN or LSTM).")
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        # self.bilstm_flag = False
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
            
        if data.pretrain_biword_embedding is not None:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding))
        else:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bigram:
            lstm_input += data.biword_emb_dim
        print("********************use_lattice",self.use_gaz)
        if self.use_gaz:
            self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu)
            if self.bilstm_flag:
                self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu)
        else:
            self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size)
        self.hidden2tag_ner = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_ner)
        self.hidden2tag_general = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_general)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.biword_embeddings = self.biword_embeddings.cuda()
            if self.use_gaz:
                self.forward_lstm = self.forward_lstm.cuda()
                if self.bilstm_flag:
                    self.backward_lstm = self.backward_lstm.cuda()
            else:
                self.lstm = self.lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.hidden2tag_ner = self.hidden2tag_ner.cuda()
            self.hidden2tag_general = self.hidden2tag_general.cuda()


    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index,:] = np.random.uniform(-scale, scale, [1, embedding_dim])
        return pretrain_emb


    def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                gaz_list:
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(sent_len, batch_size, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs =  self.word_embeddings(word_inputs)
        if self.use_bigram:
            biword_embs = self.biword_embeddings(biword_inputs)
            word_embs = torch.cat([word_embs, biword_embs],2)
        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(char_inputs, char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size,sent_len,-1)
            ## concat word and char together
            word_embs = torch.cat([word_embs, char_features], 2)
        word_embs = self.drop(word_embs)
        # packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True)
        hidden = None
        if self.use_gaz:
            lstm_out, hidden = self.forward_lstm(word_embs, gaz_list, hidden)
            if self.bilstm_flag:
                backward_hidden = None 
                backward_lstm_out, backward_hidden = self.backward_lstm(word_embs, gaz_list, backward_hidden)
                lstm_out = torch.cat([lstm_out, backward_lstm_out],2)
        else:
            lstm_out, hidden = self.lstm(word_embs, hidden)
        # lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.droplstm(lstm_out)
        return lstm_out



    def get_output_score(self, gaz_list,  word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        lstm_out = self.get_lstm_features(gaz_list, word_inputs,biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        ## lstm_out (batch_size, sent_len, hidden_dim)
        outputs = self.hidden2tag(lstm_out)
        return outputs
    
    def get_output_score_ner(self, gaz_list,  word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        lstm_out = self.get_lstm_features(gaz_list, word_inputs,biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        ## lstm_out (batch_size, sent_len, hidden_dim)
        outputs = self.hidden2tag_ner(lstm_out)
        return outputs

    def get_output_score_general(self, gaz_list,  word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        lstm_out = self.get_lstm_features(gaz_list, word_inputs,biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        ## lstm_out (batch_size, sent_len, hidden_dim)
        outputs = self.hidden2tag_general(lstm_out)
        return outputs
コード例 #4
0
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print ("build batched bilstm...")
        self.use_bigram = data.use_bigram
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print ("Error char feature selection, please check parameter data.char_features (either CNN or LSTM).")
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        # self.bilstm_flag = False
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
            
        if data.pretrain_biword_embedding is not None:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding))
        else:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim

        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bigram:
            lstm_input += data.biword_emb_dim

        self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu)

        if self.bilstm_flag:
            self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu)
        # self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.biword_embeddings = self.biword_embeddings.cuda()
            self.forward_lstm = self.forward_lstm.cuda()
            if self.bilstm_flag:
                self.backward_lstm = self.backward_lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
コード例 #5
0
class BiLSTM(nn.Module):
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print ("build batched bilstm...")
        self.use_bigram = data.use_bigram
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print ("Error char feature selection, please check parameter data.char_features (either CNN or LSTM).")
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        # self.bilstm_flag = False
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
            
        if data.pretrain_biword_embedding is not None:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding))
        else:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim

        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bigram:
            lstm_input += data.biword_emb_dim

        self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu)

        if self.bilstm_flag:
            self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu)
        # self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.biword_embeddings = self.biword_embeddings.cuda()
            self.forward_lstm = self.forward_lstm.cuda()
            if self.bilstm_flag:
                self.backward_lstm = self.backward_lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()


    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index,:] = np.random.uniform(-scale, scale, [1, embedding_dim])
        return pretrain_emb


    def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                gaz_list:
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(sent_len, batch_size, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs =  self.word_embeddings(word_inputs)
        if self.use_bigram:
            biword_embs = self.biword_embeddings(biword_inputs)
            word_embs = torch.cat([word_embs, biword_embs],2)
        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(char_inputs, char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size,sent_len,-1)
            ## concat word and char together
            word_embs = torch.cat([word_embs, char_features], 2)
        word_embs = self.drop(word_embs)
        # packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True)
        hidden = None
        lstm_out, hidden = self.forward_lstm(word_embs, gaz_list, hidden)
        if self.bilstm_flag:
            backward_hidden = None 
            backward_lstm_out, backward_hidden = self.backward_lstm(word_embs, gaz_list, backward_hidden)
            lstm_out = torch.cat([lstm_out, backward_lstm_out],2)
        # lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.droplstm(lstm_out)
        return lstm_out



    def get_output_score(self, gaz_list,  word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        lstm_out = self.get_lstm_features(gaz_list, word_inputs,biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        ## lstm_out (batch_size, sent_len, hidden_dim)
        outputs = self.hidden2tag(lstm_out)
        return outputs
    

    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
        ## mask is not used
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
        outs = self.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        # outs (batch_size, seq_len, label_vocab)
        outs = outs.view(total_word, -1)
        score = F.log_softmax(outs, 1)
        loss = loss_function(score, batch_label.view(total_word))
        _, tag_seq  = torch.max(score, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        return loss, tag_seq


    def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths,  char_inputs, char_seq_lengths, char_seq_recover, mask):
        
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        outs = self.get_output_score(gaz_list,  word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        outs = outs.view(total_word, -1)
        _, tag_seq  = torch.max(outs, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        ## filter padded position with zero
        decode_seq = mask.long() * tag_seq
        return decode_seq
コード例 #6
0
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print("build batched bilstm...")
        self.use_bichar = data.use_bichar
        self.gpu = data.HP_gpu
        # self.use_char = data.HP_use_character
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.embedding_dim = data.char_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.char_embeddings = nn.Embedding(data.char_alphabet.size(),
                                            self.embedding_dim)
        self.bichar_embeddings = nn.Embedding(data.bichar_alphabet.size(),
                                              data.bichar_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_char_embedding is not None:
            self.char_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_char_embedding))
        else:
            self.char_embeddings.weight.data.copy_(
                # torch.from_numpy(_): numpy to  torch
                # torch_data.numpy(): torch to numpy
                torch.from_numpy(
                    self.random_embedding(data.char_alphabet.size(),
                                          self.embedding_dim)))

        if data.pretrain_bichar_embedding is not None:
            self.bichar_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_bichar_embedding))
        else:
            self.bichar_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.bichar_alphabet.size(),
                                          data.bichar_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states with dimensionality hidden_dim.
        lstm_hidden = data.HP_hidden_dim // 2 if self.bilstm_flag else data.HP_hidden_dim
        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bichar:
            lstm_input += data.bichar_emb_dim
        self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden,
                                        data.gaz_dropout,
                                        data.gaz_alphabet.size(),
                                        data.gaz_emb_dim,
                                        data.pretrain_gaz_embedding, True,
                                        data.HP_fix_gaz_emb, self.gpu)
        if self.bilstm_flag:
            self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden,
                                             data.gaz_dropout,
                                             data.gaz_alphabet.size(),
                                             data.gaz_emb_dim,
                                             data.pretrain_gaz_embedding,
                                             False, data.HP_fix_gaz_emb,
                                             self.gpu)
        # use biLSTM
        # self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim,
                                    data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.char_embeddings = self.char_embeddings.cuda()
            self.bichar_embeddings = self.bichar_embeddings.cuda()
            self.forward_lstm = self.forward_lstm.cuda()
            if self.bilstm_flag:
                self.backward_lstm = self.backward_lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
コード例 #7
0
class BiLSTM(nn.Module):
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print("build batched bilstm...")
        self.use_bichar = data.use_bichar
        self.gpu = data.HP_gpu
        # self.use_char = data.HP_use_character
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.embedding_dim = data.char_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.char_embeddings = nn.Embedding(data.char_alphabet.size(),
                                            self.embedding_dim)
        self.bichar_embeddings = nn.Embedding(data.bichar_alphabet.size(),
                                              data.bichar_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_char_embedding is not None:
            self.char_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_char_embedding))
        else:
            self.char_embeddings.weight.data.copy_(
                # torch.from_numpy(_): numpy to  torch
                # torch_data.numpy(): torch to numpy
                torch.from_numpy(
                    self.random_embedding(data.char_alphabet.size(),
                                          self.embedding_dim)))

        if data.pretrain_bichar_embedding is not None:
            self.bichar_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_bichar_embedding))
        else:
            self.bichar_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.bichar_alphabet.size(),
                                          data.bichar_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states with dimensionality hidden_dim.
        lstm_hidden = data.HP_hidden_dim // 2 if self.bilstm_flag else data.HP_hidden_dim
        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bichar:
            lstm_input += data.bichar_emb_dim
        self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden,
                                        data.gaz_dropout,
                                        data.gaz_alphabet.size(),
                                        data.gaz_emb_dim,
                                        data.pretrain_gaz_embedding, True,
                                        data.HP_fix_gaz_emb, self.gpu)
        if self.bilstm_flag:
            self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden,
                                             data.gaz_dropout,
                                             data.gaz_alphabet.size(),
                                             data.gaz_emb_dim,
                                             data.pretrain_gaz_embedding,
                                             False, data.HP_fix_gaz_emb,
                                             self.gpu)
        # use biLSTM
        # self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim,
                                    data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.char_embeddings = self.char_embeddings.cuda()
            self.bichar_embeddings = self.bichar_embeddings.cuda()
            self.forward_lstm = self.forward_lstm.cuda()
            if self.bilstm_flag:
                self.backward_lstm = self.backward_lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def get_lstm_features(self, gaz_list, char_inputs, bichar_inputs,
                          char_seq_lengths):
        """
            input:
                char_inputs: (batch_size, sent_len)
                gaz_list:
                char_seq_lengths: list of batch_size, (batch_size,1)
                character_inputs: (batch_size*sent_len, word_length)
                character_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(sent_len, batch_size, hidden_dim)
        """
        char_embs = self.char_embeddings(char_inputs)
        if self.use_bichar:
            bichar_embs = self.bichar_embeddings(bichar_inputs)
            char_embs = torch.cat([char_embs, bichar_embs], 2)

        char_embs = self.drop(char_embs)

        hidden = None
        lstm_out, hidden = self.forward_lstm(char_embs, gaz_list, hidden)
        if self.bilstm_flag:
            backward_hidden = None
            backward_lstm_out, backward_hidden = self.backward_lstm(
                char_embs, gaz_list, backward_hidden)
            lstm_out = torch.cat([lstm_out, backward_lstm_out], 2)
        lstm_out = self.droplstm(lstm_out)
        return lstm_out

    def get_output_score(self, gaz_list, char_inputs, bichar_inputs,
                         char_seq_lengths):
        lstm_out = self.get_lstm_features(gaz_list, char_inputs, bichar_inputs,
                                          char_seq_lengths)
        outputs = self.hidden2tag(lstm_out)
        return outputs

    def neg_log_likelihood_loss(self, gaz_list, char_inputs, bichar_inputs,
                                char_seq_lengths, batch_label, mask):
        ## mask is not used
        batch_size = char_inputs.size(0)
        seq_len = char_inputs.size(1)
        total_word = batch_size * seq_len
        loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
        outs = self.get_output_score(gaz_list, char_inputs, bichar_inputs,
                                     char_seq_lengths)
        outs = outs.view(total_word, -1)
        score = F.log_softmax(outs, 1)
        loss = loss_function(score, batch_label.view(total_word))
        _, tag_seq = torch.max(score, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        return loss, tag_seq

    def forward(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths,
                mask):

        batch_size = char_inputs.size(0)
        seq_len = char_inputs.size(1)
        total_word = batch_size * seq_len
        outs = self.get_output_score(gaz_list, char_inputs, bichar_inputs,
                                     char_seq_lengths)
        outs = outs.view(total_word, -1)
        _, tag_seq = torch.max(outs, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        # filter padded position with zero
        decode_seq = mask.long() * tag_seq
        return decode_seq
コード例 #8
0
class BiLSTM(nn.Module):
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print ("build batched bilstm...")
        self.gpu = data.HP_gpu
        self.use_gloss = data.HP_use_gloss
        self.use_entity = data.HP_use_entity
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.gloss_hidden_dim = 0
        self.embedding_dim = data.word_emb_dim
        self.gloss_hidden_dim = data.gloss_hidden_dim
        self.gloss_drop = data.HP_dropout
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
        if self.use_entity:
            self.entity_embeddings = nn.Embedding(data.entity_alphabet.size(), data.entity_emb_dim)
            self.entity_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.entity_alphabet.size(), data.entity_emb_dim)))
        if self.use_gloss:
            self.gloss_hidden_dim = data.gloss_hidden_dim
            self.gloss_embedding_dim = data.gloss_emb_dim
            if data.gloss_features == "CNN":
                self.gloss_feature = CNN(data,input_dim=data.gloss_emb_dim,hidden_dim=self.gloss_hidden_dim,dropout=self.gloss_drop)
                # self.gloss_feature = CharCNN(data)#data.gloss_alphabet.size(), self.gloss_embedding_dim, self.gloss_hidden_dim, data.HP_dropout, self.gpu)
            elif data.gloss_features == "LSTM":
                self.gloss_feature = CharBiLSTM(data.gloss_alphabet.size(), self.gloss_embedding_dim, self.gloss_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print ("Error gloss feature selection, please check parameter data.gloss_features (either CNN or LSTM).")
                exit(0)
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.droplstm = nn.Dropout(data.HP_dropout)
        if self.bilstm_flag:
            lstm_hidden_dim = data.HP_lstm_hidden_dim // 2
        else:
            lstm_hidden_dim = data.HP_lstm_hidden_dim
        lstm_input_dim = self.embedding_dim + self.gloss_hidden_dim
        self.forward_lstm = LatticeLSTM(lstm_input_dim, lstm_hidden_dim, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, left2right=True, fix_word_emb=data.HP_fix_gaz_emb, gpu=self.gpu)
        if self.bilstm_flag:
            self.backward_lstm = LatticeLSTM(lstm_input_dim, lstm_hidden_dim, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, left2right=False, fix_word_emb=data.HP_fix_gaz_emb, gpu=self.gpu)
        # self.lstm = nn.LSTM(lstm_input_dim, lstm_hidden_dim, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_lstm_hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            if self.use_entity:
                self.entity_embeddings = self.entity_embeddings.cuda()
            self.forward_lstm = self.forward_lstm.cuda()
            if self.bilstm_flag:
                self.backward_lstm = self.backward_lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()


    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index,:] = np.random.uniform(-scale, scale, [1, embedding_dim])
        return pretrain_emb



    def get_lstm_features(self,gaz_list,batch_word,batch_entity,batch_gloss,batch_label, mask):
        """
            input:
                batch_word: (batch_size, sent_len)
                gaz_list:
                word_seq_lengths: list of batch_size, (batch_size,1)
                gloss_inputs: (batch_size*sent_len, word_length)
                gloss_seq_lengths: list of whole batch_size for gloss, (batch_size*sent_len, 1)
                gloss_seq_recover: variable which records the gloss order information, used to recover gloss order
            output: 
                Variable(sent_len, batch_size, hidden_dim)
        """
        batch_size = batch_word.size(0)
        sent_len =batch_word.size(1)
        word_embs =  self.word_embeddings(batch_word)
        if self.gpu:
            word_embs = word_embs.cuda()
        if self.use_entity:
            entity_embs = self.entity_embeddings(batch_entity)
            if self.gpu:
                entity_embs = entity_embs.cuda()
            word_embs = torch.cat([word_embs, entity_embs],2)
        if self.use_gloss:
            ## calculate gloss lstm last hidden
            gloss_features = self.gloss_feature(np.reshape(batch_gloss,[-1,batch_gloss.shape[2]]))#################
            gloss_features = gloss_features.view(batch_size,sent_len,-1)
            ## concat word and gloss together
            if self.gpu:
                gloss_features = gloss_features.cuda()
            word_embs = torch.cat([word_embs, gloss_features], 2)
        word_embs = self.drop(word_embs)
        # lstm_out=[]
        # for bi in range(batch_size):
        forward_hidden = None
        lstm_out, hidden = self.forward_lstm(word_embs, gaz_list, forward_hidden)
        # lstm_out.append(lstm_out_bi)
        if self.bilstm_flag:
            backward_hidden = None 
            backward_lstm_out, backward_hidden = self.backward_lstm(word_embs, gaz_list, backward_hidden)
            lstm_out=torch.cat([lstm_out, backward_lstm_out],2)
        lstm_out = self.droplstm(lstm_out)
        return lstm_out



    def get_output_score(self,gaz_list,batch_word,batch_entity,batch_gloss,batch_label, mask):
        lstm_out = self.get_lstm_features(gaz_list,batch_word,batch_entity,batch_gloss,batch_label,mask)
        ## lstm_out (batch_size, sent_len, hidden_dim)
        outputs = self.hidden2tag(lstm_out)
        return outputs
    

    def neg_log_likelihood_loss(self, gaz_list, word_inputs, entity_inputs, word_seq_lengths, gloss_inputs, gloss_seq_lengths, gloss_seq_recover, batch_label, mask):
        ## mask is not used
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
        outs = self.get_output_score(gaz_list, word_inputs, entity_inputs, word_seq_lengths, gloss_inputs, gloss_seq_lengths, gloss_seq_recover)
        # outs (batch_size, seq_len, label_vocab)
        outs = outs.view(total_word, -1)
        score = F.log_softmax(outs, 1)
        loss = loss_function(score, batch_label.view(total_word))
        _, tag_seq  = torch.max(score, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        return loss, tag_seq


    def forward(self, gaz_list, word_inputs, entity_inputs, word_seq_lengths,  gloss_inputs, gloss_seq_lengths, gloss_seq_recover, mask):
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        outs = self.get_output_score(gaz_list,  word_inputs, entity_inputs, word_seq_lengths, gloss_inputs, gloss_seq_lengths, gloss_seq_recover)
        outs = outs.view(total_word, -1)
        _, tag_seq  = torch.max(outs, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        ## filter padded position with zero
        decode_seq = mask.long() * tag_seq
        return decode_seq