def download_article(article):
    name = article['name']
    print('开始下载[' + name + ']')
    save_dir = path.join(save_path, name)
    if not path.exists(save_dir):
        os.makedirs(save_dir)
    pics = get_pics(article['href'])
    for pic in pics:
        pic_name = pic['name']
        pic_href = pic['href']
        save_file = path.join(save_dir, pic_name)
        if path.exists(save_file) or savepath.check_exists(
                dir_name, name, pic_name):
            # print(save_file + '已存在')
            pass
        else:
            try:
                response = session.get(pic_href,
                                       cookies=cookies,
                                       verify=False,
                                       timeout=(3, 3))
                if response.status_code == 200:
                    with open(save_file, 'wb+') as f:
                        f.write(response.content)
                        # print('下载到' + save_file)
            except Exception as e:
                print(repr(e))
Example #2
0
def download_article(article):
    print('开始下载[{}]'.format(article))
    article_name = article['name']
    save_dir = path.join(save_path, escape(article_name))
    if not path.exists(save_dir):
        os.makedirs(save_dir)
    article_href = article['href']
    article_id = article_href.split('/')[-1]
    num = 1
    pic_href = dowload_url + '/' + article_id + '/' + str(num) + '.jpg'
    try:
        response = session.get(pic_href, verify=False, timeout=(3, 3))
        while response.status_code == 200:
            save_file = path.join(save_dir, str(num) + '.jpg')
            if not path.exists(save_file) and not savepath.check_exists(
                    dir_name, escape(article_name),
                    str(num) + '.jpg'):
                with open(save_file, 'wb+') as f:
                    f.write(response.content)
                    print('下载到' + save_file)
            else:
                print(save_file + '已存在')
            num += 1
            pic_href = dowload_url + '/' + article_id + '/' + str(num) + '.jpg'
            response = session.get(pic_href, verify=False, timeout=(3, 3))
    except Exception as e:
        print(repr(e))
Example #3
0
def d_collection(collection):
    tittle = collection.find_all('p', class_=['biaoti'])[
        0].text.strip().replace('<', '《').replace('>', '》')
    path = os.path.join(save_path, tittle)
    if not os.path.exists(path):
        try:
            os.makedirs(path)
        except Exception as e:
            print(repr(e))
            return
    count_str = collection.find_all(
        'span', class_=['shuliang'])[0].text
    count = int(count_str[0: -1])
    print('共有' + str(count) + '张图片')
    img_url = collection.find_all('img')[0]['src']
    prefix = img_url[0: img_url.rfind('/')]
    # headers = {'Referer': collection.find_all('')}
    for num in range(1, count + 1):
        img_save_path = os.path.join(path, str(num) + '.jpg')
        if os.path.exists(img_save_path) or savepath.check_exists(dir_name, tittle, str(num) + '.jpg'):
            print('['+img_save_path + ']已存在')
            continue
        url = prefix + '/' + str(num) + '.jpg'
        try:
            content_response = session.get(url, timeout=3)
            if content_response.status_code == 200:
                content = content_response.content
                with open(img_save_path, 'wb') as f:
                    f.write(content)
                    f.close()
                    print('已下载[' + img_save_path + ']')
        except Exception as e:
            print(repr(e))
            continue
Example #4
0
def download_article(article):
    name = article['name']
    print('开始下载合集[' + name + ']')
    save_dir = path.join(save_path, name)
    if not path.exists(save_dir):
        os.makedirs(save_dir)
    article_href = article['href']
    article_href_head = article_href[0:-5]
    article_pages = get_article_pages(article_href)
    print('合集[' + name + ']共有[' + str(article_pages) + ']张图片')
    for article_page in range(1, article_pages + 1):
        if article_page != 1:
            article_href = article_href_head + \
                '_' + str(article_page) + '.html'
        pic = get_pic(article_href)
        if pic:
            save_file = path.join(save_dir, pic['name'])
            if path.exists(save_file) or savepath.check_exists(
                    dir_name, name, pic['name']):
                print(save_file + ':已存在')
            else:
                try:
                    response = session.get(pic['href'],
                                           headers=headers,
                                           verify=False,
                                           timeout=(3, 3))
                    if response.status_code == 200:
                        with open(save_file, 'wb+') as f:
                            f.write(response.content)
                            print('已下载' + save_file)
                except Exception as e:
                    print(repr(e))
                    print('下载' + pic['href'] + '时出错')
Example #5
0
def download_collection(c):
    reffer = c.find_all('a')[0]['href']
    headers = {'Referer': reffer}
    pic_num = int(c.find_all('p', class_=False)[0].text.split(' ')[1])
    pic = c.find_all('img')[0]
    pic_url = pic['src']
    prefix = pic_url[0:pic_url.rfind('/')]
    pic_name = pic['alt'].strip().replace('<', '《').replace('>', '》') \
        .replace(':', ':').replace(' ', '')
    path = os.path.join(save_path, escape(pic_name.strip()))
    if not os.path.exists(path):
        os.makedirs(path)
    for n in range(1, pic_num + 1):
        pic_save_path = os.path.join(path, str(n) + '.jpg')
        if os.path.exists(pic_save_path) or savepath.check_exists(
                dir_name, escape(pic_name.strip()),
                str(n) + '.jpg'):
            print('[' + pic_save_path + ']已存在')
            continue
        content = None
        try:
            content = session.get(prefix + '/' + str(n) + '.jpg',
                                  timeout=3,
                                  headers=headers).content
        except Exception as e:
            print(repr(e))
            continue
        if content:
            with open(pic_save_path, 'wb') as f:
                f.write(content)
                f.close()
                print('已下载[' + pic_save_path + ']')
Example #6
0
def download_article(article):
    name = article['name']
    print('开始下载:' + name)
    save_dir = path.join(save_path, name)
    if not path.exists(save_dir):
        os.makedirs(save_dir)
    pages = get_pic_pages(article['href'])
    for p in range(1, pages + 1):
        url = article['href'] + '?page=' + str(p)
        pic = get_pic(url)
        if pic:
            file_name = path.join(save_dir, pic['name'])
            if path.exists(file_name) or savepath.check_exists(
                    dir_name, name, pic['name']):
                print(file_name + ':已存在')
            else:
                try:
                    response = session.get(pic['href'],
                                           headers=headers,
                                           verify=False,
                                           timeout=(10, 10))
                    if response.status_code == 200:
                        with open(file_name, 'wb+') as f:
                            f.write(response.content)
                        print('下载到:' + file_name)
                except Exception as e:
                    print(repr(e))
Example #7
0
def download_article(article):
    print(article)
    global download_zip
    article_href = article['href']
    soup = get_soup(article_href)
    real_articl_name = get_real_article_name(soup)
    if real_articl_name:
        article_name = real_articl_name
    else:
        article_name = article['name'].strip()
        if article_name.endswith('.'):
            article_name = article_name[:-1]
    save_dir = path.join(save_path, article_name)
    if not path.exists(save_dir):
        try:
            os.makedirs(save_dir)
        except Exception as e:
            print(repr(e))
    if download_zip:
        zip = get_zip(soup)
        if zip:
            print('获取到zip包:{}'.format(zip))
            zip_name = zip['name']
            zip_href = zip['href']
            zip_file = path.join(save_dir, zip_name)
            if not savepath.check_exists(dir_name, article_name, zip_name):
                download(zip_file, zip_href)
    pics = get_pics(soup)
    pic_page = 1
    article_url = article_href[0:-5] + '_{}.html'
    while pics:
        for pic in pics:
            pic_name = pic['name']
            pic_href = pic['href']
            pic_file = path.join(save_dir, pic_name)
            if not savepath.check_exists(dir_name, article_name, pic_name):
                download(pic_file, pic_href)
        pic_page += 1
        soup = get_soup(article_url.format(pic_page))
        pics = get_pics(soup)
Example #8
0
def download(article: dict):
    pics = get_pics(article)
    dir_name = os.path.join(savepath.save_path, article['title'])
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    if pics:
        for pic in pics:
            content = get_pic_content(pic)
            if content:
                pic_name = pic.split('/')[-1]
                pic_path = os.path.join(dir_name, pic_name)
                if not savepath.check_exists(article['title'], pic_name):
                    with open(pic_path, 'wb+') as f:
                        f.write(content)
Example #9
0
def get_pics(href):
    global cookies
    index = 1
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
        'Connection': 'keep-alive',
        'Referer': href
    }
    content = parse_url(href, headers=headers, cookies=cookies)
    div = content.find('div', class_='main-image') if content else None
    if div:
        pic = div.find('img')
        pic_base_url = pic['src'][:-6]
        pic_title = escape(pic['alt'])
        print("开始下载{}, {}".format(pic_title, pic['src']))
        path = os.path.join(save_path, pic_title)
        if not os.path.exists(path):
            os.makedirs(path)
        while True:
            pac_save_path = os.path.join(path, str(index) + '.jpg')
            if os.path.exists(pac_save_path) or savepath.check_exists(
                    dir_name, pic_title,
                    str(index) + '.jpg'):
                # print(pac_save_path + '已存在!')
                index += 1
                continue
            else:
                pic_url = pic_base_url + format(index) + '.jpg'
                try:
                    get_pic_response = session.get(pic_url,
                                                   headers=headers,
                                                   cookies=cookies,
                                                   timeout=5)
                    if get_pic_response and get_pic_response.status_code == 200:
                        cookies = get_pic_response.cookies
                        pic_content = get_pic_response.content
                        with open(pac_save_path, 'wb+') as f:
                            f.write(pic_content)
                    else:
                        break
                except Exception as e:
                    print(repr(e))
                index += 1
Example #10
0
def download_collection(collection):
    title = escape(collection[0])
    p = os.path.join(save_path, title)
    if not os.path.exists(p):
        os.makedirs(p)
    href = collection[1]
    headers = {'Referer': href}
    img_url = get_img_start_url(href)
    if not img_url:
        return
    num2 = 1
    while True:
        if savepath.check_exists(dir_name, title, str(num2) + '.jpg'):
            get_pic_res = True
        else:
            get_pic_res = download(
                img_url + '/' + str(num2) + '.jpg',
                os.path.join(save_path, title,
                             str(num2) + '.jpg'), headers)
        if not get_pic_res:
            break
        num2 += 1
Example #11
0
def download_article(article):
    global g_cookies
    print('开始下载:' + article['name'])
    save_dir = path.join(save_path, escape(article['name']))
    if not path.exists(save_dir):
        os.makedirs(save_dir)
    pics = get_pics(article)
    for pic in pics:
        save_file = path.join(save_dir, pic['name'])
        if path.exists(save_file) or savepath.check_exists(
                dir_name, escape(article['name']), pic['name']):
            # print(save_file + ':已存在!')
            pass
        else:
            new_headers = {
                'Referer': pic['referer'],
                'User-Agent':
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0',
                'Accept': 'image/webp,*/*',
                'Accept-Language': 'zh-CN,en-US;q=0.7,en;q=0.3',
                'Accept-Encoding': 'gzip, deflate, br',
                'Connection': 'keep-alive',
            }
            try:
                response = session.get(pic['href'],
                                       headers=new_headers,
                                       cookies=g_cookies,
                                       timeout=(10, 10))
                if response.status_code == 200:
                    g_cookies = response.cookies
                    with open(save_file, 'wb+') as f:
                        f.write(response.content)
            except Exception as e:
                print(repr(e))
                continue
            else:
                print('获取' + pic['href'] + '失败')