Beispiel #1
0
    def tagging(self, text, lower=False):
        """ Return the words with POS-tags of the given sentence.

        args:
            - text (str): An input sentence.
            - lower (bool): If lower is True, all uppercase characters in a list \
                            of the words are converted into lowercase characters.
        return:
            - object : The object of the words with POS-tags.
        """
        words = self.wakati(text, lower)

        wids = utils.conv_tokens_to_ids(words, self._word2id)
        cids = [
            utils.conv_tokens_to_ids([c for c in w.lower()], self._uni2id)
            for w in words
        ]
        tids = []
        for w in words:
            w = w.lower()
            if w in self._word2postags:
                w2p = self._word2postags[w]
            else:
                w2p = [0]
            if w.isalnum() is True:
                if w2p == [0]:
                    w2p = [self._pos2id[u'名詞']]
                else:
                    w2p.append(self._pos2id[u'名詞'])
            w2p = list(set(w2p))
            tids.append(w2p)

        X = [cids, wids, tids]
        postags = [self._id2pos[pid] for pid in self._model.POStagging(X)]
        return self._Token(text, words, postags)
Beispiel #2
0
    def tagging(self, text, lower=False):
        """
        Return the words with POS-tags of the given sentence.
        Input: str (a sentence)
        Output: the object of the words with POS-tags
        """
        words = self.wakati(text, lower)

        wids = utils.conv_tokens_to_ids(words, self._word2id)
        cids = [
            utils.conv_tokens_to_ids([c for c in w], self._uni2id)
            for w in words
        ]
        tids = []
        for w in words:
            w = w.lower()
            if w in self._word2postags:
                w2p = self._word2postags[w]
            else:
                w2p = [0]
            if w.isalnum() is True:
                if w2p == [0]:
                    w2p = [self._pos2id[u'名詞']]
                else:
                    w2p.append(self._pos2id[u'名詞'])
            w2p = list(set(w2p))
            tids.append(w2p)

        X = [cids, wids, tids]
        postags = [self._id2pos[pid] for pid in self._model.POStagging(X)]
        return self._Token(text, words, postags)
Beispiel #3
0
    def _postagging(self, words, lower=False):
        if lower is True:
            words = [w.lower() for w in words]

        wids = utils.conv_tokens_to_ids(words, self._word2id)
        cids = [
            utils.conv_tokens_to_ids([c for c in w], self._uni2id)
            for w in words
        ]
        tids = []
        for w in words:
            if w in self._word2postags:
                w2p = self._word2postags[w]
            else:
                w2p = [0]
            if self.use_noun_heuristic is True:
                if w.isalnum() is True:
                    if w2p == [0]:
                        w2p = [self._pos2id[u'名詞']]
                    else:
                        w2p.append(self._pos2id[u'名詞'])
            w2p = list(set(w2p))
            tids.append(w2p)

        X = [cids, wids, tids]
        postags = [self._id2pos[pid] for pid in self._model.POStagging(X)]
        return postags