Beispiel #1
0
class TestCna(unittest.TestCase):
    """
    中央社搜尋測試
    """

    def setUp(self):
        self.keyword = '上吊'
        self.nsearch = NewsSearch('cna', limit=10)

    def test_01_filter_title(self):
        """
        測試中央社搜尋
        """
        results = self.nsearch.by_keyword(self.keyword, title_only=True).to_dict_list()
        for topic in results:
            if '上吊' not in topic['title']:
                self.fail('標題必須含有 "上吊"')

    def test_02_search_and_soup(self):
        """
        測試中央社搜尋+分解
        """
        nsoups = self.nsearch.by_keyword(self.keyword).to_soup_list()
        for nsoup in nsoups:
            if nsoup.contents() is None:
                self.fail('內文不可為 None')
class TestEttoday(unittest.TestCase):
    """
    東森新聞雲搜尋測試
    """

    def setUp(self):
        self.keyword = '上吊'
        self.nsearch = NewsSearch('ettoday', limit=10, proxy_first=True)

    def test_01_filter_title(self):
        """
        測試東森新聞雲搜尋
        """
        results = self.nsearch.by_keyword(self.keyword, title_only=True).to_dict_list()
        for topic in results:
            if '上吊' not in topic['title']:
                self.fail('標題必須含有 "上吊"')

    def test_02_search_and_soup(self):
        """
        測試東森新聞雲搜尋+分解
        """
        nsoups = self.nsearch.by_keyword(self.keyword).to_soup_list()
        for nsoup in nsoups:
            if nsoup.contents() is None:
                msg = '內文不可為 None, URL={}'.format(nsoup.path)
                self.fail(msg)
Beispiel #3
0
def compare_keyword(keyword):
    """
    比較關鍵字在各媒體的出現次數
    """
    print('比較上個月 "{}" 在各媒體標題出現次數'.format(keyword))
    now = datetime.now()
    nts = now.timestamp()
    nts = nts - nts % 86400
    day_lmon = datetime.fromtimestamp(nts - 86400 * now.day).day
    beg_date = datetime(now.year, now.month - 1, 1).strftime('%Y-%m-%d')
    end_date = datetime(now.year, now.month - 1, day_lmon).strftime('%Y-%m-%d')
    print('時間區間: {} ~ {}'.format(beg_date, end_date))

    media = {
        'appledaily': '  蘋果',
        'cna': '中央社',
        'ettoday': '  東森',
        'ltn': '  自由',
        'setn': '  三立',
        'udn': '  聯合'
    }

    for (channel, name) in media.items():
        nsearch = NewsSearch(channel,
                             beg_date=beg_date,
                             end_date=end_date,
                             limit=999)
        results = nsearch.by_keyword(keyword, title_only=True).to_dict_list()
        msg = '{}: {}'.format(name, len(results))
        print(msg, flush=True)
class TestAppleDaily(unittest.TestCase):
    """
    蘋果日報搜尋測試
    """
    def setUp(self):
        self.keyword = '上吊'
        self.nsearch = NewsSearch('appledaily', limit=10)

    def test_01_filter_title(self):
        """
        測試蘋果日報搜尋
        """
        results = self.nsearch.by_keyword(self.keyword,
                                          title_only=True).to_dict_list()
        for topic in results:
            if '上吊' not in topic['title']:
                self.fail('標題必須含有 "上吊"')

    def test_02_search_and_soup(self):
        """
        測試蘋果日報搜尋+分解
        """
        nsoups = self.nsearch.by_keyword(self.keyword).to_soup_list()
        for nsoup in nsoups:
            if nsoup.contents() is None:
                # 因為 home.appledaily.com.tw 的 SSL 憑證有問題,忽略這個因素造成的錯誤
                if not nsoup.path.startswith('https://home.appledaily.com.tw'):
                    msg = '內文不可為 None, URL={}'.format(nsoup.path)
                    self.fail(msg)
Beispiel #5
0
def search_and_list(keyword, channel):
    """
    搜尋,然後列出新聞標題
    """
    print('測試搜尋')
    nsearch = NewsSearch(channel, limit=10)
    results = nsearch.by_keyword(keyword).to_dict_list()
    logger = get_logger()

    for (i, result) in enumerate(results):
        try:
            print('{:03d}: {}'.format(i, result['title']))
            print('     日期: {}'.format(result['date']))
            print('     連結: {}'.format(result['link']))
        except ValueError as ex:
            logger.error('例外類型: %s', type(ex).__name__)
            logger.error(ex)
Beispiel #6
0
def search_and_soup(keyword, channel):
    """
    搜尋,然後分解新聞
    """
    print('測試搜尋與分解, 搜尋中 ...', end='', flush=True)
    logger = get_logger()
    nsearch = NewsSearch(channel, limit=10)
    nsoups = nsearch.by_keyword(keyword).to_soup_list()
    print('\r測試搜尋與分解' + ' ' * 20, flush=True)

    for (i, nsoup) in enumerate(nsoups):
        try:
            print('{:03d}: {}'.format(i, nsoup.path))
            print('     記者: {} / 日期: {}'.format(nsoup.author(), nsoup.date()))
            print('     標題: {}'.format(nsoup.title()))
            print('     {} ...'.format(nsoup.contents(30)), flush=True)
        except ValueError as ex:
            logger.error('例外類型: %s', type(ex).__name__)
            logger.error(ex)
Beispiel #7
0
def search_and_soup(keyword, channel):
    """
    search_and_soup(keyword, channel)
    """

    print('測試搜尋與分解')
    logger = get_logger()
    nsearch = NewsSearch(channel, limit=10)
    nsoups = nsearch.by_keyword(keyword).to_soup_list()

    for (i, nsoup) in enumerate(nsoups):
        try:
            print('{:03d}: {}'.format(i, nsoup.path))
            print('     記者: {} / 日期: {}'.format(nsoup.author(), nsoup.date()))
            print('     標題: {}'.format(nsoup.title()))
            print('     {} ...'.format(nsoup.contents()[0:30]))
        except Exception as ex:
            logger.error('例外類型: %s', type(ex).__name__)
            logger.error(ex)
Beispiel #8
0
def search_and_compare_performance(keyword):
    """
    search_and_compare_performance(keyword):
    """

    print('測試各家新聞台的搜尋效能')
    summary = {}

    for channel in ['appledaily', 'cna', 'ettoday', 'ltn', 'setn', 'udn']:
        print()
        print(channel)
        print('-' * 60)
        summary[channel] = []
        for repeat in range(3):
            nsearch = NewsSearch(channel, limit=100)
            nsearch.by_keyword(keyword)
            results = nsearch.to_dict_list()
            total = len(results)
            tpp = nsearch.elapsed / nsearch.pages
            tpr = nsearch.elapsed / total
            summary[channel].append(tpp)
            msg = '{:03d}: {:.3f} 秒/頁, {:.3f} 秒/筆, 共 {} 頁, 總耗時: {:.3f} 秒'
            print(msg.format(repeat, tpp, tpr, nsearch.pages, nsearch.elapsed))
        print('-' * 60)

    print()
    print('Markdown 摘要表:')
    print()
    print('  | 1st | 2nd | 3rd')
    print('---- | ---- | ---- | ----')
    for (channel, samples) in summary.items():
        print(channel, end='')
        for sample in samples:
            print(' | {:.3f}'.format(sample), end='')
        print()
    print()
 def setUp(self):
     self.keyword = '上吊'
     self.nsearch = NewsSearch('ettoday', limit=10, proxy_first=True)
Beispiel #10
0
 def setUp(self):
     self.keyword = '上吊'
     self.nsearch = NewsSearch('cna', limit=10)
 def setUp(self):
     self.keyword = '上吊'
     self.nsearch = NewsSearch('appledaily', limit=10)