Beispiel #1
0
    def _initialize_detector(self):
        t1 = time.time()
        try:
            import kenlm
        except ImportError:
            raise ImportError(
                'pycorrector dependencies are not fully installed, '
                'they are required for statistical language model.'
                'Please use "pip install kenlm" to install it.'
                'if you are Win, Please install kenlm in cgwin.')
        if not os.path.exists(self.language_model_path):
            filename = self.pre_trained_language_models.get(
                self.language_model_path, 'zh_giga.no_cna_cmn.prune01244.klm')
            url = self.pre_trained_language_models.get(filename)
            get_file(filename,
                     url,
                     extract=True,
                     cache_dir=config.USER_DIR,
                     cache_subdir=config.USER_DATA_DIR,
                     verbose=1)
        self.lm = kenlm.Model(self.language_model_path)
        t2 = time.time()
        logger.debug('Loaded language model: %s, spend: %.3f s.' %
                     (self.language_model_path, t2 - t1))

        # 词、频数dict
        self.word_freq = self.load_word_freq_dict(self.word_freq_path)
        # 自定义混淆集
        self.custom_confusion = self._get_custom_confusion_dict(
            self.custom_confusion_path)
        # 自定义切词词典
        self.custom_word_freq = self.load_word_freq_dict(
            self.custom_word_freq_path)
        self.person_names = self.load_word_freq_dict(self.person_name_path)
        self.place_names = self.load_word_freq_dict(self.place_name_path)
        self.stopwords = self.load_word_freq_dict(self.stopwords_path)
        # 合并切词词典及自定义词典
        self.custom_word_freq.update(self.person_names)
        self.custom_word_freq.update(self.place_names)
        self.custom_word_freq.update(self.stopwords)
        self.word_freq.update(self.custom_word_freq)
        self.tokenizer = Tokenizer(dict_path=self.word_freq_path,
                                   custom_word_freq_dict=self.custom_word_freq,
                                   custom_confusion_dict=self.custom_confusion)
        t3 = time.time()
        logger.debug('Loaded dict file, spend: %.3f s.' % (t3 - t2))
        self.initialized_detector = True
"""
控制器:获取新浪网科技-手机类新闻
"""
from flask import Blueprint
from flask import render_template
from configparser import ConfigParser
from services.news_push_service import get_news_info
from utils.get_file import get_file

NEWS_PUSH = Blueprint('news_push', __name__)

CFG = ConfigParser()
CFG.read(get_file('/config/') + 'pro_setting.ini')


@NEWS_PUSH.route('/SendMessageTojige/NewsPush', methods=['get'])
def news_push():
    """
    获取新浪网科技-手机类新闻,并自动推送(每日10条)
    :return: 网页展示处理结果
    """
    message = get_news_info(CFG.get('news_push', 'url'))
    return render_template('user_comments.html', message=message)