Exemple #1
0
    def __init__(self, data, type=1):
        print("Build char-word based NER Task...")
        super(CW_NER, self).__init__()

        self.gpu = data.HP_gpu
        label_size = data.label_alphabet_size
        self.type = type
        self.gaz_embed = Gaz_Embed(data, type)

        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           data.word_emb_dim)

        self.lstm = Gaz_BiLSTM(data, data.word_emb_dim + data.gaz_emb_dim,
                               data.HP_hidden_dim)

        self.crf = CRF(data.label_alphabet_size, self.gpu)

        self.hidden2tag = nn.Linear(data.HP_hidden_dim * 2,
                                    data.label_alphabet_size + 2)

        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                random_embedding(data.word_alphabet_size, data.word_emb_dim))

        if self.gpu:
            self.word_embedding = self.word_embedding.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
Exemple #2
0
    def __init__(self, data):
        super(Char_NER, self).__init__()
        print("Build Character-based BiLSTM-CRF....")

        self.gpu = data.HP_gpu
        self.embedding_dim = data.char_emb_dim
        self.hidden_dim = data.HP_hidden_dim

        self.char_embedding = nn.Embedding(data.char_alphabet.size(), self.embedding_dim)

        self.lstm = Char_BiLSTM(data)
        self.hidden2tag = nn.Linear(self.hidden_dim * 2, data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, self.gpu)


        if data.pretrain_char_embedding is not None:
            self.char_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_char_embedding)
            )
        else:
            self.char_embedding.weight.data.copy_(
                random_embedding(data.char_alphabet.size(), self.embedding_dim)
            )

        if self.gpu:
            self.char_embedding = self.char_embedding.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
Exemple #3
0
    def __init__(self, data, type=1):
        """
        Args:
            data: all the data information
            type: the type of strategy, 1 for avg, 2 for short first, 3 for long first
        """
        print('build gaz embedding...')

        super(Gaz_Embed, self).__init__()

        self.gpu = data.HP_gpu
        self.data = data
        self.type = type
        self.gaz_dim = data.gaz_emb_dim
        self.gaz_embedding = nn.Embedding(data.gaz_alphabet.size(),
                                          data.gaz_emb_dim)
        self.dropout = nn.Dropout(p=0.5)

        if data.pretrain_gaz_embedding is not None:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_gaz_embedding))
        else:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(
                    random_embedding(data.gaz_alphabet.size(),
                                     data.gaz_emb_dim)))

        self.filters = [[1, 20], [2, 30]]
        if self.type == 4:
            # use conv, so we need to define some conv
            # here we use 20 1-d conv, and 30 2-d conv
            self.build_cnn(self.filters)

            ## also use highway, 2 layers highway
            # self.highway = Highway(self.gaz_dim, num_layers=2)
            # if self.gpu:
            #     self.highway = self.highway.cuda()

        if self.type == 5:
            # use self-attention
            self.build_attention()

        if self.gpu:
            self.gaz_embedding = self.gaz_embedding.cuda()