def gen_content_ne(self): content_tokens = self._pure_tokenize_content() names = TextPreprocesser.gen_ner(content_tokens) return names
def gen_title_ne(self): title_tokens = self._pure_tokenize_title() names = TextPreprocesser.gen_ner(title_tokens) return names
def gen_tokenized_content(self): tokens = self._pure_tokenize_content() tokens = TextPreprocesser.filter(tokens) return tokens
def gen_tokenized_title(self): tokens = self._pure_tokenize_title() tokens = TextPreprocesser.filter(tokens) return tokens
def _pure_tokenize_content(self): if self._tokenized_content is None: self._tokenized_content = TextPreprocesser.tokenizor(self.content) return self._tokenized_content
def _pure_tokenize_title(self): if self._tokenized_title is None: self._tokenized_title = TextPreprocesser.tokenizor(self.title) return self._tokenized_title