コード例 #1
0
def catch_pic_diagrams(url):
    soup = coderpig.get_bs(coderpig.get_resp(url).decode('utf-8'))
    # 先拿标题建文件夹:
    article_header = soup.find('header', attrs={'class': 'article-header'}).find('a').get_text().replace(':', " ")
    save_path = pic_save_path + article_header + "/"
    coderpig.is_dir_existed(save_path)
    print("开始下载:" + article_header)
    # 拿图片url
    imgs = soup.find('article').findAll('img')
    for img in imgs[:-1]:
        coderpig.download_pic(img['src'].lstrip('/'), save_path)
コード例 #2
0
def fetch_json(url):
    data = str(coderpig.get_resp(url).decode('utf-8'))
    data = json.loads(data)
    result_list = data['postList']
    for result in result_list:
        save_path = pic_save_path + result['post_id'] + '/'
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        pic_list = get_pic_url_list(result['url'])
        for pic in pic_list:
            coderpig.download_pic(pic, save_path)
コード例 #3
0
def catch_pic_diagrams(url, tag):
    soup = coderpig.get_bs(coderpig.get_resp(url).decode('utf-8'))
    title = soup.find('div', attrs={'class': 'ptitle'}).h1.get_text()
    pic_path = pic_save_path + tag + '/' + title + '/'
    coderpig.is_dir_existed(pic_path)
    ul = soup.find('ul', attrs={'class': 'scroll-img scroll-img02 clearfix'})
    lis = ul.findAll('li')
    for li in lis:
        pic_soup = coderpig.get_bs(coderpig.get_resp(li.a['href']).decode('utf-8'))
        pic_div = pic_soup.find('div', attrs={'id': 'pic-meinv'})
        pic_url = pic_div.find('img')['data-original']
        proxy_ip = t.get_proxy_ip()
        coderpig.download_pic(pic_url, pic_path, proxy=proxy_ip)
コード例 #4
0
def catch_pic_diagrams(url, tag):
    soup = coderpig.get_bs(coderpig.get_resp(url).decode('utf-8'))
    title = soup.find('div', attrs={'class': 'ptitle'}).h1.get_text()
    pic_path = pic_save_path + tag + '/' + title + '/'
    coderpig.is_dir_existed(pic_path)
    ul = soup.find('ul', attrs={'class': 'scroll-img scroll-img02 clearfix'})
    lis = ul.findAll('li')
    for li in lis:
        pic_soup = coderpig.get_bs(
            coderpig.get_resp(li.a['href']).decode('utf-8'))
        pic_div = pic_soup.find('div', attrs={'id': 'pic-meinv'})
        pic_url = pic_div.find('img')['data-original']
        proxy_ip = t.get_proxy_ip()
        coderpig.download_pic(pic_url, pic_path, proxy=proxy_ip)
コード例 #5
0
def catch_pic_diagrams(url):
    resp = coderpig.get_resp(url).decode('utf-8')
    soup = coderpig.get_bs(resp)
    dir_name = soup.find('title').get_text()[:-5]
    save_path = pic_save_path + dir_name + '/'
    coderpig.is_dir_existed(save_path)
    # 通过末页获取总共有多少页
    page_count = int(moye_pattern.match(soup.find('a', text='末页')['href']).group(1))
    for page in range(1, page_count + 1):
        page_resp = coderpig.get_resp(url.replace('.html', '_' + str(page) + '.html')).decode('utf-8')
        page_soup = coderpig.get_bs(page_resp)
        # 获取本页的图片
        imgs = page_soup.find('p', attrs={'align': 'center'}).findAll('img')
        for img in imgs:
            coderpig.download_pic(img['src'], save_path)
コード例 #6
0
def catch_pic_diagrams(url):
    resp = coderpig.get_resp(url).decode('utf-8')
    soup = coderpig.get_bs(resp)
    dir_name = soup.find('title').get_text()[:-5]
    save_path = pic_save_path + dir_name + '/'
    coderpig.is_dir_existed(save_path)
    # 通过末页获取总共有多少页
    page_count = int(moye_pattern.match(soup.find('a', text='末页')['href']).group(1))
    for page in range(1, page_count + 1):
        page_resp = coderpig.get_resp(url.replace('.html', '_' + str(page) + '.html')).decode('utf-8')
        page_soup = coderpig.get_bs(page_resp)
        # 获取本页的图片
        imgs = page_soup.find('p', attrs={'align': 'center'}).findAll('img')
        for img in imgs:
            coderpig.download_pic(img['src'], save_path)
コード例 #7
0
def fetch_pic():
    browser = coderpig.init_browser()
    for i in range(1, max_page_count + 1):
        url = weibo_url + containerid + "&page=" + str(i)
        browser.get(url)
        print("开始解析 ====== 第%d页 ====== " % i)
        html_text = browser.page_source
        soup = coderpig.get_bs(html_text)
        data_json = soup.find('pre').get_text()
        data_dict = json.loads(data_json)
        cards = data_dict['data']['cards']
        for card in cards:
            if 'mblog' in card:
                mblog = card['mblog']
                if 'pics' in mblog:
                    pics = mblog['pics']
                    for pic in pics:
                        if 'large' in pic:
                            pic_url = pic['large']['url']
                            coderpig.download_pic(pic['large']['url'], save_path)
    browser.close()
コード例 #8
0
def fetch_meizi_pic(url):
    data = str(coderpig.get_resp(url).decode('utf-8'))
    data = json.loads(data)
    result_list = data['results']
    for result in result_list:
        coderpig.download_pic(result['url'], pic_save_path)