def _initialize_detector(self): t1 = time.time() try: import kenlm except ImportError: raise ImportError( 'pycorrector dependencies are not fully installed, ' 'they are required for statistical language model.' 'Please use "pip install kenlm" to install it.' 'if you are Win, Please install kenlm in cgwin.') if not os.path.exists(self.language_model_path): filename = self.pre_trained_language_models.get( self.language_model_path, 'zh_giga.no_cna_cmn.prune01244.klm') url = self.pre_trained_language_models.get(filename) get_file(filename, url, extract=True, cache_dir=config.USER_DIR, cache_subdir=config.USER_DATA_DIR, verbose=1) self.lm = kenlm.Model(self.language_model_path) t2 = time.time() logger.debug('Loaded language model: %s, spend: %.3f s.' % (self.language_model_path, t2 - t1)) # 词、频数dict self.word_freq = self.load_word_freq_dict(self.word_freq_path) # 自定义混淆集 self.custom_confusion = self._get_custom_confusion_dict( self.custom_confusion_path) # 自定义切词词典 self.custom_word_freq = self.load_word_freq_dict( self.custom_word_freq_path) self.person_names = self.load_word_freq_dict(self.person_name_path) self.place_names = self.load_word_freq_dict(self.place_name_path) self.stopwords = self.load_word_freq_dict(self.stopwords_path) # 合并切词词典及自定义词典 self.custom_word_freq.update(self.person_names) self.custom_word_freq.update(self.place_names) self.custom_word_freq.update(self.stopwords) self.word_freq.update(self.custom_word_freq) self.tokenizer = Tokenizer(dict_path=self.word_freq_path, custom_word_freq_dict=self.custom_word_freq, custom_confusion_dict=self.custom_confusion) t3 = time.time() logger.debug('Loaded dict file, spend: %.3f s.' % (t3 - t2)) self.initialized_detector = True
""" 控制器:获取新浪网科技-手机类新闻 """ from flask import Blueprint from flask import render_template from configparser import ConfigParser from services.news_push_service import get_news_info from utils.get_file import get_file NEWS_PUSH = Blueprint('news_push', __name__) CFG = ConfigParser() CFG.read(get_file('/config/') + 'pro_setting.ini') @NEWS_PUSH.route('/SendMessageTojige/NewsPush', methods=['get']) def news_push(): """ 获取新浪网科技-手机类新闻,并自动推送(每日10条) :return: 网页展示处理结果 """ message = get_news_info(CFG.get('news_push', 'url')) return render_template('user_comments.html', message=message)