Example #1
0
def test_get_meta():
    board = ptt.Board('Soft_Job')
    meta = board.get_meta(num=20)
    assert len(meta) == 20

    board = ptt.Board('movie')
    meta = board.get_meta(num=10)
    assert len(meta) == 10

    meta = board.get_meta(num=5, after_filename=meta[-1].filename)
    assert len(meta) == 5
Example #2
0
def test_get_meta():
    board = ptt.Board('Soft_Job')
    meta = board.get_meta(num=20)
    assert len(meta) == 20

    board = ptt.Board('movie')
    meta = board.get_meta(num=5)
    assert len(meta) == 5

    meta = board.get_meta(num=5, start_aid='')
    assert len(meta) == 5

    meta = board.get_meta(num=5, start_date='')
    assert len(meta) == 5
Example #3
0
def test_get_pagination_meta():
    board = ptt.Board('Soft_Job')

    paged_meta = board.get_pagination_meta(pages=3)
    meta_list = [meta for meta in paged_meta]

    assert len(meta_list) == 3
Example #4
0
def crawler():
    '''Grab all articles in recent days.'''
    config_ = config.Config()
    config_section = 'Crawler'

    term_date = config_.getint(config_section, 'term_date')
    LOGGER.info('Start date:[%s]', dh.to_ptt_date())
    LOGGER.info('Term date as [%d] days.', term_date)

    board_name = config_.get(config_section, 'board')
    board = ptt.Board(board_name, term_date)
    LOGGER.info('Retrive articles from board [%s].', board_name)

    total = 0
    while board.has_prev_page:
        board.retrieve_dom(0)
        article_meta_list = parse_board(board)

        count = len(article_meta_list)
        LOGGER.info('[%d] articles\' meta retrieved.', count)
        total += count

        retrieve_articles(*article_meta_list)

    pending = asyncio.Task.all_tasks()
    ptt.wait_completion(asyncio.gather(*pending))

    LOGGER.info('%d articles handled.', total)
    LOGGER.info('Job finished.')
Example #5
0
def enumerate_meta():
    ptt_board = ptt.Board('Soft_Job')
    resp = ptt_board.get_meta(num=5)
    for i, m in enumerate(resp):
        if '公告' in m.title:
            continue
        r = ptt_board.get_post(link=m.link)
        print(i, m.title, m.link, f', words: {len(r.content)}')
Example #6
0
def test_get_post_from_meta():
    board = ptt.Board('movie')
    meta = board.get_meta(num=5)

    for m in meta:
        if '公告' in m.title:
            continue
        post = board.get_post(link=m.link)
        assert post
Example #7
0
def main():
    ptt_board = ptt.Board('Gossiping')
    resp = ptt_board.get_meta(num=5)
    for i, m in enumerate(resp):
        print(i, m.title, m.link)
        if '公告' in m.title:
            continue
        r = ptt_board.post(link=m.link)
        print(r.content)
    def test_set_url(self):
        '''Unit test for ptt.Board.set_url.'''
        board_name = self.BOARD_NAME
        board = ptt.Board(board_name, 11)

        board.set_url()
        self.assertIsNone(board.url)

        board.set_url(board_name)
        self.assertEqual(board.url, '/bbs/{0}/index.html'.format(board_name))
Example #9
0
def test_get_post_from_meta():
    board = ptt.Board('movie')
    meta = board.get_meta(num=5)

    for m in meta:
        try:
            post = board.get_post(link=m.link)
            break
        except PttParseContentError:
            continue

    assert post
    assert post.ip
Example #10
0
    def setUpClass(cls):
        '''The class level setup.'''
        # Date that test data created at 2017/12/25
        date_diff = (date.today() - date(year=2017, month=12, day=25)).days

        cls.page_list, cls.expect_list = tests.board_helper.setup()

        cls.board_list = [
            ptt.Board(cls.BOARD_NAME, date_diff)
            for _ in range(len(cls.page_list))
        ]

        cls.retrieve_dom = ptt.Board.retrieve_dom
        ptt.Board.retrieve_dom = retrieve_dom
Example #11
0
def main():
    board = ptt.Board('movie')

    meta = board.get_meta(num=10)
    meta = board.get_meta(num=5, after_filename=meta[-1].filename)
    assert len(meta) == 5

    for m in meta:
        print(f'推文數: {m.push} ',
              f'標記: {m.mark} ',
              f'標題: {m.title} ',
              f'日期: {m.date} ',
              f'作者: {m.author} ',
              f'連結: {m.link} ',
              f'文章檔案編號: {m.filename} ',
              )
Example #12
0
 def build_test_board(self):
     '''Build a temporary board object.'''
     board = ptt.Board(self.BOARD_NAME, 0)
     return board
Example #13
0
def test_search_author():
    board = ptt.Board('movie')
    result = board.search(author='hsukai')
    assert result
Example #14
0
def test_search_recommend():
    board = ptt.Board('movie')
    result = board.search(recommend=20)
    assert result
Example #15
0
def test_search_pagination():
    board = ptt.Board('movie')
    result_gen = board.search(recommend=20, num_pages=2)
    result = [e for e in result_gen]
    assert len(result) == 2
Example #16
0
def test_search_thread():
    board = ptt.Board('movie')
    result = board.search(thread='[ 好雷]  死侍2的各種彩蛋討論')
    assert result
Example #17
0
def test_search_title():
    board = ptt.Board('movie')
    result = board.search(title='哈哈')
    assert result
Example #18
0
def test_save_search_result():
    board = ptt.Board('movie')
    result = board.search(recommend=20)

    s = json.dumps(result)
    assert s
Example #19
0
def test_get_meta_over18():
    board = ptt.Board('Gossiping')
    meta = board.get_meta(num=20)
    assert len(meta) == 20
Example #20
0
def test_save_meta():
    board = ptt.Board('movie')
    meta = board.get_meta(num=10)

    s = json.dumps(meta)
    assert s