Ejemplo n.º 1
0
def get_list_of_site(index_page):
    html = Htmls(Download.download_html(index_page), 'html.parser')
    page_list = html.get_list()

    page_list = [index_page + i for i in page_list]

    return page_list
Ejemplo n.º 2
0
def get_pages_of_a_html(base_url):
    html = Download.download_html(base_url)
    html = Htmls(html, 'html.parser')

    base_urls = base_url.split('/')
    pages = html.get_pages()
    pages.insert(0, base_urls[-1])
    base_url = base_url.replace(base_urls[-1], '')
    for i in range(0, len(pages)):
        pages[i] = base_url + pages[i]

    return pages
Ejemplo n.º 3
0
def down_page_img(page):
    html_url = page
    html = Download.download_html(html_url)
    html = Htmls(html, 'html.parser')
    imgs = html.get_img_tag_values()

    for img in imgs:
        file_names = str(img).split('/')
        current_path = down_path + file_names[-2] + '/' + file_names[-1]

        if 'http' in img and 'jpg' in img:
            print("download pic: ", file_names[-1])
            Download.download(img, current_path)
Ejemplo n.º 4
0
def get_detail_pages_from_list(page_list):
    html = Htmls(Download.download_html(page_list), 'html.parser')
    items = html.get_items()

    return items