コード例 #1
0
ファイル: disambiguate.py プロジェクト: bab2min/kiwipiepy
 def __init__(self):
     import konlpy
     from konlpy import tag
     print("Initialize Hannanum from konlpy ({})".format(
         konlpy.__version__),
           file=sys.stderr)
     self._mdl = tag.Hannanum()
コード例 #2
0
 def tagger_load(self):
     name = self.tagger_name
     if name == 'Okt':
         self.tagger = tag.Okt()
     elif name == 'Kkma':
         self.tagger = tag.Kkma()
     elif name == 'Komoran':
         self.tagger = tag.Komoran()
     else:
         self.tagger = tag.Hannanum()
     print("load tagger")
コード例 #3
0
def parse_into_nouns(articles):
    articles = utils.preprocess(articles)
    model = tag.Hannanum()
    result = []
    for sentence in articles:
        words = []
        for word in model.nouns(sentence):
            for p in punctuation:
                word = word.replace(p, '')
            if word not in ignore_words:
                try:
                    int(word)
                except ValueError:
                    words.append(word)
        if len(words) > 0:
            result.append(words)
    return result
コード例 #4
0
    def __init__(self, tagger, useful_tags, delimiters, min_token_length,
                 stopwords, **kwargs):
        if tagger == 'twitter':
            self.tagger = taggers.Twitter()
            self.tagger_options = {
                'norm': bool(kwargs.get('norm', True)),
                'stem': bool(kwargs.get('stem', True)),
            }
        elif tagger == 'komoran':
            self.tagger = taggers.Komoran()
            self.tagger_options = {
                'flatten': bool(kwargs.get('flatten', True)),
            }
        elif tagger == 'hannanum':
            self.tagger = taggers.Hannanum()
            self.tagger_options = {
                'ntags': int(kwargs.get('ntags', 9)),
                'flatten': bool(kwargs.get('flatten', True)),
            }
        elif tagger == 'kkma':
            self.tagger = taggers.Kkma()
            self.tagger_options = {
                'flatten': bool(kwargs.get('flatten', True)),
            }
        # 윈도에선 mecab을 지원하지 않으니 이하 생략하도록 한다.

        # elif tagger == 'mecab':
        #    self.tagger = taggers.Mecab()
        #    self.tagger_options = {
        #        'flatten': bool(kwargs.get('flatten', True)),
        #    }
        else:
            raise LexRankError(
                "available taggers are: twitter, komoran, hannanum, kkma")
        self.useful_tags = useful_tags
        self.delimiters = delimiters
        self.stopwords = stopwords
        self.min_token_length = min_token_length
        self.splitter = self.splitterer()
        self.pos = lambda text: self.tagger.pos(text, **self.tagger_options)
コード例 #5
0
ファイル: preprocess.py プロジェクト: blank54/connlp
    def __init__(self, pre_trained=True, analyzer='Hannanum'):
        self.pre_trained = pre_trained

        if analyzer == 'Hannanum':
            self.analyzer = tag.Hannanum()
        elif analyzer == 'Kkma':
            self.analyzer = tag.Kkma()
        elif analyzer == 'Komoran':
            self.analyzer = tag.Komoran()
        elif analyzer == 'Mecab':
            self.analyzer = tag.Mecab()
        elif analyzer == 'Okt':
            self.analyzer = tag.Okt()
        else:
            if pre_trained == False:
                pass
            else:
                print('Enter a valid KoNLPy analyzer name.\n\tavailable: Hannanum, Kkma, Komoran, Mecab, Okt')

        self.WordExtractor = WordExtractor(min_frequency=0)
        self.noun_extractor = LRNounExtractor(verbose=False)
        self.word_score = {}