def test_next_page(self):
     html_1 = BeautifulSoup(data_next_page_1(), "lxml")
     page_url = crawler.next_page(html_1.body.ul)
     self.assertEqual(page_url, "https://github.com/search?p=2&q=python+crawler+json&type=Repositories")
     html_2 = BeautifulSoup(data_next_page_2(), "lxml")
     page_url = crawler.next_page(html_2.body.ul)
     self.assertEqual(page_url, None)
Ejemplo n.º 2
0
def MultiPageDownload():  # MultiPageDownload 函数负责一次下载多个页面
    total_number_of_page = int(input("请输入一共要下载页数 : "))
    current_page_number = 1  # 默认当前页数为1
    full_url = crawler.ask_tag()  # 询问tag
    current_page_html, current_page_url = crawler.determineTag(full_url)  # 判断tag是否存在
    coreDL(current_page_html)
    while current_page_number < total_number_of_page:  # 多页面下载循环
        (next_page_url, next_page_number) = crawler.next_page(current_page_url, current_page_number)
        current_page_number = current_page_number + 1
        coreDL(crawler.getSource(next_page_url))
    return True
Ejemplo n.º 3
0
def MultiPageDownload():  # MultiPageDownload 函数负责一次下载多个页面
    total_number_of_page = int(input("请输入一共要下载页数 : "))
    current_page_number = 1  # 默认当前页数为1
    full_url = crawler.ask_tag()  # 询问tag
    current_page_html, current_page_url = crawler.determineTag(full_url)  # 判断tag是否存在
    coreDL(current_page_html)
    while current_page_number < total_number_of_page:  # 多页面下载循环
        (next_page_url, next_page_number) = crawler.next_page(current_page_url, current_page_number)
        current_page_number = current_page_number + 1
        coreDL(crawler.getSource(next_page_url))
    return True
Ejemplo n.º 4
0
def SinglePageDownload():  # SinglePageDownlaod 函数每次下载一页 完成后询问是否继续
    full_url = crawler.ask_tag()  # 同上询问tag
    current_page_html, current_page_url = crawler.determineTag(full_url)  # 同上判断tag是否存在
    coreDL(current_page_html)  # 下载第一页
    FLAG = True  # 比较关键的FLAG 不是很喜欢while True 有一个flag比较好控制吧?
    current_page_number = 1  # 同上默认第一页
    while FLAG == True:
        answer = input('是否下载下一页内容:[Y/N]')
        if answer[0] == 'Y' or answer[0] == 'y':
            print(current_page_url)
            (next_page_url, next_page_number) = crawler.next_page(current_page_url, current_page_number)
            print(next_page_url, next_page_number)
            current_page_number = current_page_number + 1
            coreDL(crawler.getSource(next_page_url))
            FLAG = True
        else:  # 这里跳FLAG
            FLAG = False

    return True
Ejemplo n.º 5
0
def SinglePageDownload():  # SinglePageDownlaod 函数每次下载一页 完成后询问是否继续
    full_url = crawler.ask_tag()  # 同上询问tag
    current_page_html, current_page_url = crawler.determineTag(full_url)  # 同上判断tag是否存在
    coreDL(current_page_html)  # 下载第一页
    FLAG = True  # 比较关键的FLAG 不是很喜欢while True 有一个flag比较好控制吧?
    current_page_number = 1  # 同上默认第一页
    while FLAG == True:
        answer = input('是否下载下一页内容:[Y/N]')
        if answer[0] == 'Y' or answer[0] == 'y':
            print(current_page_url)
            (next_page_url, next_page_number) = crawler.next_page(current_page_url, current_page_number)
            print(next_page_url, next_page_number)
            current_page_number = current_page_number + 1
            coreDL(crawler.getSource(next_page_url))
            FLAG = True
        else:  # 这里跳FLAG
            FLAG = False

    return True