Esempio n. 1
0
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print "build batched bilstm..."
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.average_batch = data.HP_average_batch_loss
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(),
                                            self.embedding_dim)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim,
                            lstm_hidden,
                            num_layers=self.lstm_layer,
                            batch_first=True,
                            bidirectional=self.bilstm_flag)
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(self.hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.lstm = self.lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
Esempio n. 2
0
    def __init__(self, args, model_params):
        super(WordEmbedding, self).__init__()
        self.gpu = args.ifgpu
        self.use_char = args.useChar
        self.char_hidden_dim = args.char_hidden_dim
        self.char_embedding_dim = args.char_embedding_dim
        self.embedding_dim = model_params.embedding_dim
        self.drop = nn.Dropout(args.dropout)
        #char Embedding
        if self.use_char:
            if args.charExtractor == "CNN":
                self.char_feature = CharCNN(
                    model_params.char_alphabet.size(),
                    model_params.pretrain_char_embedding,
                    self.char_embedding_dim, self.char_hidden_dim,
                    args.dropout, self.gpu)
            elif args.charExtractor == "LSTM":
                self.char_feature = CharBiLSTM(
                    model_params.char_alphabet.size(),
                    model_params.pretrain_char_embedding,
                    self.char_embedding_dim, self.char_hidden_dim,
                    args.dropout, self.gpu)
            else:
                print(
                    "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM)."
                )
                exit(0)

    #word Embedding
        self.word_embedding = nn.Embedding(model_params.word_alphabet.size(),
                                           self.embedding_dim)
        if model_params.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(model_params.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(model_params.word_alphabet.size(),
                                          self.embedding_dim)))
Esempio n. 3
0
    def __init__(self, data, use_position, use_cap, use_postag, use_char):
        super(WordRep, self).__init__()
        print "build word representation..."
        self.gpu = data.HP_gpu
        self.use_char = use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_feature_extractor == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            data.pretrain_char_embedding,
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_feature_extractor == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               data.pretrain_char_embedding,
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            elif data.char_feature_extractor == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(),
                                              data.pretrain_char_embedding,
                                              self.char_embedding_dim,
                                              self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_feature_extractor == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            data.pretrain_char_embedding,
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
                self.char_feature_extra = CharBiLSTM(
                    data.char_alphabet.size(), data.pretrain_char_embedding,
                    self.char_embedding_dim, self.char_hidden_dim,
                    data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM/GRU/ALL)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = 0
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()

        if use_cap:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[Cap]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        if use_postag:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[POS]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        self.use_position = use_position
        if self.use_position:

            position_alphabet_id = data.re_feature_name2id['[POSITION]']
            self.position_embedding_dim = data.re_feature_emb_dims[
                position_alphabet_id]
            self.position1_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position1_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

            self.position2_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position2_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

        if torch.cuda.is_available():
            self.drop = self.drop.cuda(self.gpu)
            self.word_embedding = self.word_embedding.cuda(self.gpu)
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda(self.gpu)
            if self.use_position:
                self.position1_emb = self.position1_emb.cuda(self.gpu)
                self.position2_emb = self.position2_emb.cuda(self.gpu)
Esempio n. 4
0
class WordRep(nn.Module):
    def __init__(self, data, use_position, use_cap, use_postag, use_char):
        super(WordRep, self).__init__()
        print "build word representation..."
        self.gpu = data.HP_gpu
        self.use_char = use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_feature_extractor == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            data.pretrain_char_embedding,
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_feature_extractor == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               data.pretrain_char_embedding,
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            elif data.char_feature_extractor == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(),
                                              data.pretrain_char_embedding,
                                              self.char_embedding_dim,
                                              self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_feature_extractor == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            data.pretrain_char_embedding,
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
                self.char_feature_extra = CharBiLSTM(
                    data.char_alphabet.size(), data.pretrain_char_embedding,
                    self.char_embedding_dim, self.char_hidden_dim,
                    data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM/GRU/ALL)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = 0
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()

        if use_cap:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[Cap]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        if use_postag:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[POS]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        self.use_position = use_position
        if self.use_position:

            position_alphabet_id = data.re_feature_name2id['[POSITION]']
            self.position_embedding_dim = data.re_feature_emb_dims[
                position_alphabet_id]
            self.position1_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position1_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

            self.position2_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position2_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

        if torch.cuda.is_available():
            self.drop = self.drop.cuda(self.gpu)
            self.word_embedding = self.word_embedding.cuda(self.gpu)
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda(self.gpu)
            if self.use_position:
                self.position1_emb = self.position1_emb.cuda(self.gpu)
                self.position2_emb = self.position2_emb.cuda(self.gpu)

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover,
                position1_inputs, position2_inputs):
        """
            input:
                word_inputs: (batch_size, sent_len)
                features: list [(batch_size, sent_len), (batch_len, sent_len),...]
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]
        for idx in range(self.feature_num):
            word_list.append(self.feature_embeddings[idx](feature_inputs[idx]))

        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            ## concat word and char together
            word_list.append(char_features)
            word_embs = torch.cat([word_embs, char_features], 2)
            if self.char_all_feature:
                char_features_extra = self.char_feature_extra.get_last_hiddens(
                    char_inputs,
                    char_seq_lengths.cpu().numpy())
                char_features_extra = char_features_extra[char_seq_recover]
                char_features_extra = char_features_extra.view(
                    batch_size, sent_len, -1)
                ## concat word and char together
                word_list.append(char_features_extra)

        if self.use_position:
            position1_feature = self.position1_emb(position1_inputs)
            position2_feature = self.position2_emb(position2_inputs)
            word_list.append(position1_feature)
            word_list.append(position2_feature)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent
Esempio n. 5
0
    def __init__(self, data):
        super(WordRep, self).__init__()
        print "build word representation..."
        self.gpu = data.HP_gpu
        self.use_char = data.use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_seq_feature == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(),
                                              self.char_embedding_dim,
                                              self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
                self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(),
                                                     self.char_embedding_dim,
                                                     self.char_hidden_dim,
                                                     data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = data.feature_num
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()
        for idx in range(self.feature_num):
            self.feature_embeddings.append(
                nn.Embedding(data.feature_alphabets[idx].size(),
                             self.feature_embedding_dims[idx]))
        for idx in range(self.feature_num):
            if data.pretrain_feature_embeddings[idx] is not None:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(data.pretrain_feature_embeddings[idx]))
            else:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(
                            data.feature_alphabets[idx].size(),
                            self.feature_embedding_dims[idx])))

        if self.gpu:
            self.drop = self.drop.cuda()
            self.word_embedding = self.word_embedding.cuda()
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda()
Esempio n. 6
0
class BiLSTM(nn.Module):
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print "build batched bilstm..."
        self.use_bigram = data.use_bigram
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(),
                                            self.embedding_dim)
        self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(),
                                              data.biword_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        # 添加radical embedding
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        if data.pretrain_biword_embedding is not None:
            self.biword_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_biword_embedding))
        else:
            self.biword_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.biword_alphabet.size(),
                                          data.biword_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states with dimensionality hidden_dim.

        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bigram:
            lstm_input += data.biword_emb_dim
        # 添加gaz embedding
        self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden,
                                        data.gaz_dropout,
                                        data.gaz_alphabet.size(),
                                        data.gaz_emb_dim,
                                        data.pretrain_gaz_embedding, True,
                                        data.HP_fix_gaz_emb, self.gpu)
        if self.bilstm_flag:
            self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden,
                                             data.gaz_dropout,
                                             data.gaz_alphabet.size(),
                                             data.gaz_emb_dim,
                                             data.pretrain_gaz_embedding,
                                             False, data.HP_fix_gaz_emb,
                                             self.gpu)
        # self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim,
                                    data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.biword_embeddings = self.biword_embeddings.cuda()
            self.forward_lstm = self.forward_lstm.cuda()
            if self.bilstm_flag:
                self.backward_lstm = self.backward_lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def get_lstm_features(self, gaz_list, word_inputs, biword_inputs,
                          word_seq_lengths, char_inputs, char_seq_lengths,
                          char_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                gaz_list:
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(sent_len, batch_size, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embeddings(word_inputs)
        if self.use_bigram:
            biword_embs = self.biword_embeddings(biword_inputs)
            word_embs = torch.cat([word_embs, biword_embs], 2)
        if self.use_char:
            # calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            # concat word and char together
            word_embs = torch.cat([word_embs, char_features], 2)
        word_embs = self.drop(word_embs)
        # packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True)
        hidden = None
        lstm_out, hidden = self.forward_lstm(word_embs, gaz_list, hidden)
        if self.bilstm_flag:
            backward_hidden = None
            backward_lstm_out, backward_hidden = self.backward_lstm(
                word_embs, gaz_list, backward_hidden)
            lstm_out = torch.cat([lstm_out, backward_lstm_out], 2)
        # lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.droplstm(lstm_out)
        return lstm_out

    def get_output_score(self, gaz_list, word_inputs, biword_inputs,
                         word_seq_lengths, char_inputs, char_seq_lengths,
                         char_seq_recover):
        lstm_out = self.get_lstm_features(gaz_list, word_inputs, biword_inputs,
                                          word_seq_lengths, char_inputs,
                                          char_seq_lengths, char_seq_recover)
        # lstm_out (batch_size, sent_len, hidden_dim)
        outputs = self.hidden2tag(lstm_out)
        return outputs

    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs,
                                word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                batch_label, mask):
        # mask is not used
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
        outs = self.get_output_score(gaz_list, word_inputs, biword_inputs,
                                     word_seq_lengths, char_inputs,
                                     char_seq_lengths, char_seq_recover)
        # outs (batch_size, seq_len, label_vocab)
        outs = outs.view(total_word, -1)
        score = F.log_softmax(outs, 1)
        loss = loss_function(score, batch_label.view(total_word))
        _, tag_seq = torch.max(score, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        return loss, tag_seq

    def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover, mask):

        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        outs = self.get_output_score(gaz_list, word_inputs, biword_inputs,
                                     word_seq_lengths, char_inputs,
                                     char_seq_lengths, char_seq_recover)
        outs = outs.view(total_word, -1)
        _, tag_seq = torch.max(outs, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        # filter padded position with zero
        decode_seq = mask.long() * tag_seq
        return decode_seq
Esempio n. 7
0
class WordEmbedding(nn.Module):
    def __init__(self, args, model_params):
        super(WordEmbedding, self).__init__()
        self.gpu = args.ifgpu
        self.use_char = args.useChar
        self.char_hidden_dim = args.char_hidden_dim
        self.char_embedding_dim = args.char_embedding_dim
        self.embedding_dim = model_params.embedding_dim
        self.drop = nn.Dropout(args.dropout)
        #char Embedding
        if self.use_char:
            if args.charExtractor == "CNN":
                self.char_feature = CharCNN(
                    model_params.char_alphabet.size(),
                    model_params.pretrain_char_embedding,
                    self.char_embedding_dim, self.char_hidden_dim,
                    args.dropout, self.gpu)
            elif args.charExtractor == "LSTM":
                self.char_feature = CharBiLSTM(
                    model_params.char_alphabet.size(),
                    model_params.pretrain_char_embedding,
                    self.char_embedding_dim, self.char_hidden_dim,
                    args.dropout, self.gpu)
            else:
                print(
                    "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM)."
                )
                exit(0)

    #word Embedding
        self.word_embedding = nn.Embedding(model_params.word_alphabet.size(),
                                           self.embedding_dim)
        if model_params.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(model_params.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(model_params.word_alphabet.size(),
                                          self.embedding_dim)))

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, char_inputs, char_seq_lengths,
                char_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output:
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]

        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            ## concat word and char together
            word_list.append(char_features)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent
Esempio n. 8
0
    def __init__(self, data):
        super(WordRep, self).__init__()
        print "Build word representation..."
        self.gpu = data.HP_gpu
        self.use_char = data.use_char
        self.use_trans = data.use_trans
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        self.use_mapping = data.use_mapping
        self.mapping_func = data.mapping_func

        # character-level
        if self.use_trans:
            if self.use_mapping:
                # linear mapping
                self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim)
                # non-linear mapping:w + tanh or w + sigmoid
                if self.mapping_func:
                    if self.mapping_func == "tanh":
                        self.non_linear = nn.Tanh()
                    elif self.mapping_func == "sigmoid":
                        self.non_linear = nn.Sigmoid()
            self.trans_hidden_dim = data.HP_trans_hidden_dim
            self.trans_embedding_dim = data.trans_emb_dim
            self.trans_feature = TransBiLSTM(data.translation_alphabet.size(), self.trans_embedding_dim,
                                             self.trans_hidden_dim,
                                             data.HP_dropout, data.pretrain_trans_embedding, self.gpu)

        # word-level
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_seq_feature == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                            data.HP_dropout, data.pretrain_char_embedding, self.gpu)
            elif data.char_seq_feature == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                               data.HP_dropout, data.pretrain_char_embedding, self.gpu)
            elif data.char_seq_feature == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                            data.HP_dropout, data.pretrain_char_embedding, self.gpu)
                self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim,
                                                     self.char_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)."
                exit(0)

        # Word embedding
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))

        # not use
        self.feature_num = data.feature_num
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()
        for idx in range(self.feature_num):
            self.feature_embeddings.append(
                nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))
        for idx in range(self.feature_num):
            if data.pretrain_feature_embeddings[idx] is not None:
                self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(data.pretrain_feature_embeddings[idx]))
            else:
                self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(
                    self.random_embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])))

        if self.gpu:
            self.drop = self.drop.cuda()
            self.word_embedding = self.word_embedding.cuda()
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[idx].cuda()
Esempio n. 9
0
class WordRep(nn.Module):
    def __init__(self, data):
        super(WordRep, self).__init__()
        print "Build word representation..."
        self.gpu = data.HP_gpu
        self.use_char = data.use_char
        self.use_trans = data.use_trans
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        self.use_mapping = data.use_mapping
        self.mapping_func = data.mapping_func

        # character-level
        if self.use_trans:
            if self.use_mapping:
                # linear mapping
                self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim)
                # non-linear mapping:w + tanh or w + sigmoid
                if self.mapping_func:
                    if self.mapping_func == "tanh":
                        self.non_linear = nn.Tanh()
                    elif self.mapping_func == "sigmoid":
                        self.non_linear = nn.Sigmoid()
            self.trans_hidden_dim = data.HP_trans_hidden_dim
            self.trans_embedding_dim = data.trans_emb_dim
            self.trans_feature = TransBiLSTM(data.translation_alphabet.size(), self.trans_embedding_dim,
                                             self.trans_hidden_dim,
                                             data.HP_dropout, data.pretrain_trans_embedding, self.gpu)

        # word-level
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_seq_feature == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                            data.HP_dropout, data.pretrain_char_embedding, self.gpu)
            elif data.char_seq_feature == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                               data.HP_dropout, data.pretrain_char_embedding, self.gpu)
            elif data.char_seq_feature == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim,
                                            data.HP_dropout, data.pretrain_char_embedding, self.gpu)
                self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim,
                                                     self.char_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)."
                exit(0)

        # Word embedding
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))

        # not use
        self.feature_num = data.feature_num
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()
        for idx in range(self.feature_num):
            self.feature_embeddings.append(
                nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))
        for idx in range(self.feature_num):
            if data.pretrain_feature_embeddings[idx] is not None:
                self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(data.pretrain_feature_embeddings[idx]))
            else:
                self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(
                    self.random_embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])))

        if self.gpu:
            self.drop = self.drop.cuda()
            self.word_embedding = self.word_embedding.cuda()
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[idx].cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover,
                trans_inputs, trans_seq_length, trans_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                features: list [(batch_size, sent_len), (batch_len, sent_len),...]
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]

        for idx in range(self.feature_num):
            word_list.append(self.feature_embeddings[idx](feature_inputs[idx]))

        if self.use_char:
            # calculate char lstm last hidden
            char_features, _ = self.char_feature.get_last_hiddens(char_inputs, char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            word_list.append(char_features)
            if self.char_all_feature:
                char_features_extra, _ = self.char_feature_extra.get_last_hiddens(char_inputs,
                                                                                  char_seq_lengths.cpu().numpy())
                char_features_extra = char_features_extra[char_seq_recover]
                char_features_extra = char_features_extra.view(batch_size, sent_len, -1)
                # concat word and char together
                word_list.append(char_features_extra)

        trans_features_wc_temp = None
        word_embed_mapping = None
        word_embs_temp = word_embs.view(batch_size * sent_len, -1)
        if self.use_trans:
            trans_features, trans_rnn_length = self.trans_feature.get_last_hiddens(trans_inputs,
                                                                                   trans_seq_length.cpu().numpy())

            if self.use_mapping:
                trans_features_wc = trans_features
                if self.gpu:
                    trans_features_wc.cuda()

                trans_features_wc = trans_features_wc[trans_seq_recover]
                trans_inputs = trans_inputs[trans_seq_recover]

                for index, line in enumerate(trans_inputs):
                    if line[0].data.cpu().numpy()[0] == 0:
                        if self.mapping_func:
                            trans_features_wc[index] = self.non_linear(self.w(word_embs_temp[index]))
                        else:
                            trans_features_wc[index] = self.w(word_embs_temp[index])

                trans_features_wc_temp = trans_features_wc
                trans_features_wc = trans_features_wc.view(batch_size, sent_len, -1)
                word_list.append(trans_features_wc)
                if self.mapping_func:
                    word_embed_mapping = self.non_linear(self.w(word_embs_temp))
                else:
                    word_embed_mapping = self.w(word_embs_temp)

            else:
                trans_features = trans_features[trans_seq_recover]
                trans_features = trans_features.view(batch_size, sent_len, -1)
                word_list.append(trans_features)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent, word_embed_mapping, trans_features_wc_temp
Esempio n. 10
0
class Examiner(nn.Module):
    def __init__(self, data):
        super(Examiner, self).__init__()
        print "build batched bilstm..."
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.average_batch = data.HP_average_batch_loss
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(),
                                            self.embedding_dim)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.tag_size = data.label_alphabet_size
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim +
                            data.label_alphabet_size,
                            lstm_hidden,
                            num_layers=self.lstm_layer,
                            batch_first=True,
                            bidirectional=self.bilstm_flag)
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(self.hidden_dim, 2)
        self.topk = 50

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.lstm = self.lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs,
                          char_seq_lengths, char_seq_recover, tag_prob,
                          tag_size):
        """
            input:
                word_inputs: (batch_size, sent_len)
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embeddings(word_inputs)

        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            word_embs = torch.cat([word_embs, char_features], 2)

        tag_feature = tag_prob  #torch.zeros(batch_size, sent_len, tag_size).cuda().scatter_(2,tag_seq.unsqueeze(2).cuda(),1.0)
        #print("tag_feature")
        #print(tag_feature)
        ## concat word and char together
        word_embs = self.drop(word_embs)
        word_embs = torch.cat([word_embs, tag_feature], 2)
        packed_words = pack_padded_sequence(word_embs,
                                            word_seq_lengths.cpu().numpy(),
                                            True)
        hidden = None
        packed_words.requires_grad = False

        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.droplstm(lstm_out.transpose(1, 0))
        ## lstm_out (batch_size, seq_len, hidden_size)
        return lstm_out

    def get_output_score(self, word_inputs, word_seq_lengths, char_inputs,
                         char_seq_lengths, char_seq_recover, tag_seq):
        lstm_out = self.get_lstm_features(word_inputs, word_seq_lengths,
                                          char_inputs, char_seq_lengths,
                                          char_seq_recover, tag_seq,
                                          self.tag_size)
        outputs = self.hidden2tag(lstm_out)
        return outputs

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover, batch_label, tag_seq,
                                tag_prob, mask):

        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len

        outs = self.get_output_score(word_inputs, word_seq_lengths,
                                     char_inputs, char_seq_lengths,
                                     char_seq_recover, tag_prob)
        # outs (batch_size, seq_len, 2)

        outs = outs.view(total_word, -1)
        score = F.softmax(outs, 1)
        # score: The score for choosing each position

        score = score[:, 0]

        score = score.contiguous().view(batch_size, seq_len)

        score = mask.float() * score  #the score is always positive

        score = F.softmax(score, 1)

        if seq_len >= self.topk:
            topk, indices = score.topk(self.topk, dim=1)
        else:
            topk, indices = score.topk(seq_len, dim=1)
        tag_mask = Variable(torch.ones(batch_size, seq_len).cuda())
        tag_mask = tag_mask.scatter(1, indices, 0).long()
        # tag mask: selected positons as mask vector

        topk = torch.log(topk)
        #topk: the topk scores

        info_tensor = (
            1 -
            (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float()
        )  #inequal if one
        _sum = info_tensor.sum().long()[0]

        full_loss = -torch.log(score) * (
            1 -
            (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float())
        partial_reward = score * (
            1 -
            (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float())
        #full_loss: the supervised loss
        #partial_loss: the partial labeled supervised reward

        return indices, tag_mask, topk.mean(1), full_loss, partial_reward
Esempio n. 11
0
class Reformulator(nn.Module):
    def __init__(self, data):
        super(Reformulator, self).__init__()
        print "build batched bilstm..."
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.average_batch = data.HP_average_batch_loss
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(),
                                            self.embedding_dim)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.tag_size = data.label_alphabet_size
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim +
                            data.label_alphabet_size,
                            lstm_hidden,
                            num_layers=self.lstm_layer,
                            batch_first=True,
                            bidirectional=self.bilstm_flag)
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(self.hidden_dim, 2)
        self.topk = 50

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.lstm = self.lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs,
                          char_seq_lengths, char_seq_recover, tag_seq,
                          tag_size):
        """
            input:
                word_inputs: (batch_size, sent_len)
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embeddings(word_inputs)

        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            word_embs = torch.cat([word_embs, char_features], 2)

        tag_feature = torch.zeros(batch_size, sent_len,
                                  tag_size).cuda().scatter_(
                                      2,
                                      tag_seq.unsqueeze(2).cuda(), 1.0)
        ## concat word and char together
        word_embs = self.drop(word_embs)
        word_embs = torch.cat([word_embs, Variable(tag_feature)], 2)
        packed_words = pack_padded_sequence(word_embs,
                                            word_seq_lengths.cpu().numpy(),
                                            True)
        hidden = None
        packed_words.requires_grad = False

        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.droplstm(lstm_out.transpose(1, 0))
        ## lstm_out (batch_size, seq_len, hidden_size)
        return lstm_out

    def get_output_score(self, word_inputs, word_seq_lengths, char_inputs,
                         char_seq_lengths, char_seq_recover, tag_seq):
        lstm_out = self.get_lstm_features(word_inputs, word_seq_lengths,
                                          char_inputs, char_seq_lengths,
                                          char_seq_recover, tag_seq,
                                          self.tag_size)
        outputs = self.hidden2tag(lstm_out)
        return outputs

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover, batch_label, tag_seq, mask):
        ## mask is not used
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        #loss_function = nn.CrossEntropyLoss()
        outs = self.get_output_score(word_inputs, word_seq_lengths,
                                     char_inputs, char_seq_lengths,
                                     char_seq_recover, tag_seq)
        # outs (batch_size, seq_len, 2)
        outs = outs.view(total_word, -1)
        score = F.softmax(outs, 1)

        #loss = loss_function(score, batch_label.view(total_word))
        #if self.average_batch:
        #    loss = loss / batch_size
        #print('score0',loss)

        _ = score[:, 0]

        _ = _.contiguous().view(batch_size, seq_len)
        #print(_)

        _ = mask.float() * _  #the score is always positive
        #print(mask)
        #print("_",_)
        _ = F.softmax(_, 1)

        if seq_len >= self.topk:
            #print(self.topk)
            topk, indices = _.topk(self.topk, dim=1)
        else:
            #print(seq_len)
            topk, indices = _.topk(seq_len, dim=1)
        tag_mask = Variable(torch.ones(batch_size, seq_len).cuda())
        tag_mask = tag_mask.scatter(1, indices, 0).long()
        #print("topk",topk)
        #topk=F.softmax(topk, 1)
        topk = torch.log(topk)
        #print(topk)

        #topk=torch.log(_)*(1-(-torch.abs(Variable(tag_seq).cuda()-batch_label)).ge(0).float())

        info_tensor = (
            1 -
            (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float()
        )  #inequal if one
        _sum = info_tensor.sum().long()[0]

        ans = -torch.log(_) * (
            1 -
            (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float())
        correct = _ * (
            1 -
            (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float())
        #ans=0.0
        #for i in range(_sum):
        #    ans+=torch.index_select(info_tensor[0],0,indices[0][:i+1]).sum()/float(i+1)

        #print(batch_label)
        #print(Variable(tag_seq).cuda())

        return indices, tag_mask, topk.mean(1), ans, correct

        # tag_seq = autograd.Variable(torch.zeros(batch_size, seq_len)).long()
        # total_loss = 0
        # for idx in range(batch_size):
        #     score = F.log_softmax(outs[idx])
        #     loss = loss_function(score, batch_label[idx])
        #     # tag_seq[idx] = score.cpu().data.numpy().argmax(axis=1)
        #     _, tag_seq[idx] = torch.max(score, 1)
        #     total_loss += loss
        # return total_loss, tag_seq

    def forward(self, word_inputs, word_seq_lengths, char_inputs,
                char_seq_lengths, char_seq_recover, mask, tag_seq):

        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        outs = self.get_output_score(word_inputs, word_seq_lengths,
                                     char_inputs, char_seq_lengths,
                                     char_seq_recover, tag_seq)
        outs = outs.view(total_word, -1)
        _, tag_seq = torch.max(outs, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        ## filter padded position with zero
        decode_seq = mask.long() * tag_seq
        #print('outs',out)
        return decode_seq
Esempio n. 12
0
class CoveBiLSTM(nn.Module):
    def __init__(self, data):
        super(CoveBiLSTM, self).__init__()
        print "build batched bilstm..."
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.average_batch = data.HP_average_batch_loss
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout, self.gpu)
            else:
                print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)."
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(),
                                            self.embedding_dim)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim

        self.cove = nn.LSTM(300,
                            300,
                            num_layers=2,
                            bidirectional=True,
                            batch_first=True)
        self.cove.load_state_dict(
            model_zoo.load_url(model_urls['wmt-lstm'], model_dir=model_cache))

        self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim + 600,
                            lstm_hidden,
                            num_layers=self.lstm_layer,
                            batch_first=True,
                            bidirectional=self.bilstm_flag)
        ## catner
        # self.lstm = nn.LSTM(self.char_hidden_dim + 300, lstm_hidden, num_layers=self.lstm_layer,
        #                     batch_first=True, bidirectional=self.bilstm_flag)
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(self.hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.cove = self.cove.cuda()
            self.lstm = self.lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs,
                          char_seq_lengths, char_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output:
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embeddings(word_inputs)
        ## cove
        cove_hidden = None
        packed_words = pack_padded_sequence(word_embs,
                                            word_seq_lengths.cpu().numpy(),
                                            True)
        outputs, hidden_t = self.cove(packed_words, cove_hidden)
        outputs = pad_packed_sequence(outputs, batch_first=True)[0]
        # _, _indices = torch.sort(indices, 0)
        # outputs = outputs[_indices]
        # outputs = outputs[0]
        outputs.contiguous()
        outputs = outputs.view(batch_size, sent_len, -1)
        ## catner
        word_embs = torch.cat([word_embs, outputs], 2)
        # word_embs = outputs

        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            ## concat word and char together
            word_embs = torch.cat([word_embs, char_features], 2)

        # cove_hidden = None
        # cove_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True)
        # outputs, hidden_t = self.cove(cove_words, cove_hidden)
        # outputs = pad_packed_sequence(outputs, batch_first=True)[0]
        # _, _indices = torch.sort(indices, 0)
        # outputs = outputs[_indices]

        word_embs = self.drop(word_embs)
        ## word_embs (batch_size, seq_len, embed_size)
        packed_words = pack_padded_sequence(word_embs,
                                            word_seq_lengths.cpu().numpy(),
                                            True)
        hidden = None
        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        ## lstm_out (seq_len, batch_size, hidden_size)
        lstm_out = self.droplstm(lstm_out.transpose(1, 0))
        ## lstm_out (batch_size, seq_len, hidden_size)
        return lstm_out

    def get_output_score(self, word_inputs, word_seq_lengths, char_inputs,
                         char_seq_lengths, char_seq_recover):
        lstm_out = self.get_lstm_features(word_inputs, word_seq_lengths,
                                          char_inputs, char_seq_lengths,
                                          char_seq_recover)
        outputs = self.hidden2tag(lstm_out)
        return outputs

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover, batch_label, mask):
        ## mask is not used
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
        outs = self.get_output_score(word_inputs, word_seq_lengths,
                                     char_inputs, char_seq_lengths,
                                     char_seq_recover)
        # outs (batch_size, seq_len, label_vocab)
        outs = outs.view(total_word, -1)
        score = F.log_softmax(outs, 1)
        loss = loss_function(score, batch_label.view(total_word))
        if self.average_batch:
            loss = loss / batch_size
        _, tag_seq = torch.max(score, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        return loss, tag_seq

        # tag_seq = autograd.Variable(torch.zeros(batch_size, seq_len)).long()
        # total_loss = 0
        # for idx in range(batch_size):
        #     score = F.log_softmax(outs[idx])
        #     loss = loss_function(score, batch_label[idx])
        #     # tag_seq[idx] = score.cpu().data.numpy().argmax(axis=1)
        #     _, tag_seq[idx] = torch.max(score, 1)
        #     total_loss += loss
        # return total_loss, tag_seq

    def forward(self, word_inputs, word_seq_lengths, char_inputs,
                char_seq_lengths, char_seq_recover, mask):

        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_word = batch_size * seq_len
        outs = self.get_output_score(word_inputs, word_seq_lengths,
                                     char_inputs, char_seq_lengths,
                                     char_seq_recover)
        outs = outs.view(total_word, -1)
        _, tag_seq = torch.max(outs, 1)
        tag_seq = tag_seq.view(batch_size, seq_len)
        ## filter padded position with zero
        decode_seq = mask.long() * tag_seq
        return decode_seq