def test_get_meta(): board = ptt.Board('Soft_Job') meta = board.get_meta(num=20) assert len(meta) == 20 board = ptt.Board('movie') meta = board.get_meta(num=10) assert len(meta) == 10 meta = board.get_meta(num=5, after_filename=meta[-1].filename) assert len(meta) == 5
def test_get_meta(): board = ptt.Board('Soft_Job') meta = board.get_meta(num=20) assert len(meta) == 20 board = ptt.Board('movie') meta = board.get_meta(num=5) assert len(meta) == 5 meta = board.get_meta(num=5, start_aid='') assert len(meta) == 5 meta = board.get_meta(num=5, start_date='') assert len(meta) == 5
def test_get_pagination_meta(): board = ptt.Board('Soft_Job') paged_meta = board.get_pagination_meta(pages=3) meta_list = [meta for meta in paged_meta] assert len(meta_list) == 3
def crawler(): '''Grab all articles in recent days.''' config_ = config.Config() config_section = 'Crawler' term_date = config_.getint(config_section, 'term_date') LOGGER.info('Start date:[%s]', dh.to_ptt_date()) LOGGER.info('Term date as [%d] days.', term_date) board_name = config_.get(config_section, 'board') board = ptt.Board(board_name, term_date) LOGGER.info('Retrive articles from board [%s].', board_name) total = 0 while board.has_prev_page: board.retrieve_dom(0) article_meta_list = parse_board(board) count = len(article_meta_list) LOGGER.info('[%d] articles\' meta retrieved.', count) total += count retrieve_articles(*article_meta_list) pending = asyncio.Task.all_tasks() ptt.wait_completion(asyncio.gather(*pending)) LOGGER.info('%d articles handled.', total) LOGGER.info('Job finished.')
def enumerate_meta(): ptt_board = ptt.Board('Soft_Job') resp = ptt_board.get_meta(num=5) for i, m in enumerate(resp): if '公告' in m.title: continue r = ptt_board.get_post(link=m.link) print(i, m.title, m.link, f', words: {len(r.content)}')
def test_get_post_from_meta(): board = ptt.Board('movie') meta = board.get_meta(num=5) for m in meta: if '公告' in m.title: continue post = board.get_post(link=m.link) assert post
def main(): ptt_board = ptt.Board('Gossiping') resp = ptt_board.get_meta(num=5) for i, m in enumerate(resp): print(i, m.title, m.link) if '公告' in m.title: continue r = ptt_board.post(link=m.link) print(r.content)
def test_set_url(self): '''Unit test for ptt.Board.set_url.''' board_name = self.BOARD_NAME board = ptt.Board(board_name, 11) board.set_url() self.assertIsNone(board.url) board.set_url(board_name) self.assertEqual(board.url, '/bbs/{0}/index.html'.format(board_name))
def test_get_post_from_meta(): board = ptt.Board('movie') meta = board.get_meta(num=5) for m in meta: try: post = board.get_post(link=m.link) break except PttParseContentError: continue assert post assert post.ip
def setUpClass(cls): '''The class level setup.''' # Date that test data created at 2017/12/25 date_diff = (date.today() - date(year=2017, month=12, day=25)).days cls.page_list, cls.expect_list = tests.board_helper.setup() cls.board_list = [ ptt.Board(cls.BOARD_NAME, date_diff) for _ in range(len(cls.page_list)) ] cls.retrieve_dom = ptt.Board.retrieve_dom ptt.Board.retrieve_dom = retrieve_dom
def main(): board = ptt.Board('movie') meta = board.get_meta(num=10) meta = board.get_meta(num=5, after_filename=meta[-1].filename) assert len(meta) == 5 for m in meta: print(f'推文數: {m.push} ', f'標記: {m.mark} ', f'標題: {m.title} ', f'日期: {m.date} ', f'作者: {m.author} ', f'連結: {m.link} ', f'文章檔案編號: {m.filename} ', )
def build_test_board(self): '''Build a temporary board object.''' board = ptt.Board(self.BOARD_NAME, 0) return board
def test_search_author(): board = ptt.Board('movie') result = board.search(author='hsukai') assert result
def test_search_recommend(): board = ptt.Board('movie') result = board.search(recommend=20) assert result
def test_search_pagination(): board = ptt.Board('movie') result_gen = board.search(recommend=20, num_pages=2) result = [e for e in result_gen] assert len(result) == 2
def test_search_thread(): board = ptt.Board('movie') result = board.search(thread='[ 好雷] 死侍2的各種彩蛋討論') assert result
def test_search_title(): board = ptt.Board('movie') result = board.search(title='哈哈') assert result
def test_save_search_result(): board = ptt.Board('movie') result = board.search(recommend=20) s = json.dumps(result) assert s
def test_get_meta_over18(): board = ptt.Board('Gossiping') meta = board.get_meta(num=20) assert len(meta) == 20
def test_save_meta(): board = ptt.Board('movie') meta = board.get_meta(num=10) s = json.dumps(meta) assert s