Ejemplo n.º 1
0
 def tagger_load(self):
     name = self.tagger_name
     if name == 'Okt':
         self.tagger = tag.Okt()
     elif name == 'Kkma':
         self.tagger = tag.Kkma()
     elif name == 'Komoran':
         self.tagger = tag.Komoran()
     else:
         self.tagger = tag.Hannanum()
     print("load tagger")
Ejemplo n.º 2
0
    def __init__(self, tagger, useful_tags, delimiters, min_token_length,
                 stopwords, **kwargs):
        if tagger == 'twitter':
            self.tagger = taggers.Twitter()
            self.tagger_options = {
                'norm': bool(kwargs.get('norm', True)),
                'stem': bool(kwargs.get('stem', True)),
            }
        elif tagger == 'komoran':
            self.tagger = taggers.Komoran()
            self.tagger_options = {
                'flatten': bool(kwargs.get('flatten', True)),
            }
        elif tagger == 'hannanum':
            self.tagger = taggers.Hannanum()
            self.tagger_options = {
                'ntags': int(kwargs.get('ntags', 9)),
                'flatten': bool(kwargs.get('flatten', True)),
            }
        elif tagger == 'kkma':
            self.tagger = taggers.Kkma()
            self.tagger_options = {
                'flatten': bool(kwargs.get('flatten', True)),
            }
        # 윈도에선 mecab을 지원하지 않으니 이하 생략하도록 한다.

        # elif tagger == 'mecab':
        #    self.tagger = taggers.Mecab()
        #    self.tagger_options = {
        #        'flatten': bool(kwargs.get('flatten', True)),
        #    }
        else:
            raise LexRankError(
                "available taggers are: twitter, komoran, hannanum, kkma")
        self.useful_tags = useful_tags
        self.delimiters = delimiters
        self.stopwords = stopwords
        self.min_token_length = min_token_length
        self.splitter = self.splitterer()
        self.pos = lambda text: self.tagger.pos(text, **self.tagger_options)
Ejemplo n.º 3
0
    def __init__(self, pre_trained=True, analyzer='Hannanum'):
        self.pre_trained = pre_trained

        if analyzer == 'Hannanum':
            self.analyzer = tag.Hannanum()
        elif analyzer == 'Kkma':
            self.analyzer = tag.Kkma()
        elif analyzer == 'Komoran':
            self.analyzer = tag.Komoran()
        elif analyzer == 'Mecab':
            self.analyzer = tag.Mecab()
        elif analyzer == 'Okt':
            self.analyzer = tag.Okt()
        else:
            if pre_trained == False:
                pass
            else:
                print('Enter a valid KoNLPy analyzer name.\n\tavailable: Hannanum, Kkma, Komoran, Mecab, Okt')

        self.WordExtractor = WordExtractor(min_frequency=0)
        self.noun_extractor = LRNounExtractor(verbose=False)
        self.word_score = {}
Ejemplo n.º 4
0
 def __init__(self):
     import konlpy
     from konlpy import tag
     print("Initialize Komoran from konlpy ({})".format(konlpy.__version__),
           file=sys.stderr)
     self._mdl = tag.Komoran()