def __init__(self, tagger, useful_tags, delimiters, min_token_length, stopwords, **kwargs): if tagger == 'twitter': self.tagger = taggers.Twitter() self.tagger_options = { 'norm': bool(kwargs.get('norm', True)), 'stem': bool(kwargs.get('stem', True)), } elif tagger == 'komoran': self.tagger = taggers.Komoran() self.tagger_options = { 'flatten': bool(kwargs.get('flatten', True)), } elif tagger == 'hannanum': self.tagger = taggers.Hannanum() self.tagger_options = { 'ntags': int(kwargs.get('ntags', 9)), 'flatten': bool(kwargs.get('flatten', True)), } elif tagger == 'kkma': self.tagger = taggers.Kkma() self.tagger_options = { 'flatten': bool(kwargs.get('flatten', True)), } elif tagger == 'mecab': self.tagger = taggers.Mecab() self.tagger_options = { 'flatten': bool(kwargs.get('flatten', True)), } else: raise LexRankError( "available taggers are: twitter, komoran, hannanum, kkma, mecab" ) self.useful_tags = useful_tags self.delimiters = delimiters self.stopwords = stopwords self.min_token_length = min_token_length self.splitter = self.splitterer() self.pos = lambda text: self.tagger.pos(text, **self.tagger_options)
def __init__(self, pre_trained=True, analyzer='Hannanum'): self.pre_trained = pre_trained if analyzer == 'Hannanum': self.analyzer = tag.Hannanum() elif analyzer == 'Kkma': self.analyzer = tag.Kkma() elif analyzer == 'Komoran': self.analyzer = tag.Komoran() elif analyzer == 'Mecab': self.analyzer = tag.Mecab() elif analyzer == 'Okt': self.analyzer = tag.Okt() else: if pre_trained == False: pass else: print('Enter a valid KoNLPy analyzer name.\n\tavailable: Hannanum, Kkma, Komoran, Mecab, Okt') self.WordExtractor = WordExtractor(min_frequency=0) self.noun_extractor = LRNounExtractor(verbose=False) self.word_score = {}
def __init__(self): import konlpy from konlpy import tag print("Initialize Mecab from konlpy ({})".format(konlpy.__version__), file=sys.stderr) self._mdl = tag.Mecab()