def get_pic_set_page(url):
    url_list = []
    proxy_ip = t.get_proxy_ip()
    soup = coderpig.get_bs(coderpig.get_resp(url, proxy=proxy_ip))
    divs = soup.find('div', attrs={'class', 'pages'})
    a_s = divs.findAll('a', attrs={'class', 'num'})
    for a in a_s:
        url_list.append(a['href'])
    return url_list
Beispiel #2
0
def get_pic_set_page(url):
    url_list = []
    proxy_ip = t.get_proxy_ip()
    soup = coderpig.get_bs(coderpig.get_resp(url, proxy=proxy_ip))
    divs = soup.find('div', attrs={'class', 'pages'})
    a_s = divs.findAll('a', attrs={'class', 'num'})
    for a in a_s:
        url_list.append(a['href'])
    return url_list
def catch_all_boards(user_url):
    proxy_ip = t.get_proxy_ip()
    resp = coderpig.get_resp(user_url, proxy=proxy_ip).decode('utf-8')
    result = boards_pattern.search(resp)
    json_dict = json.loads(result.group(1))
    for item in json_dict:
        coderpig.write_str_data(item['title'] + ':' + str(item['board_id']), board_ids_file)
    # 返回最后一个board_id
    board_id = json_dict[-1]['board_id']
    return board_id
def get_tag_url():
    print("================================================== 检测有效的tag页:\n")
    for i in range(2, 101):
        proxy_ip = t.get_proxy_ip()
        tag_url = host_url + '/meinvtag' + str(i) + '_1.html'
        resp = coderpig.get_resp(tag_url, proxy=proxy_ip, read=False)
        if resp is not None:
            if resp.getcode() == 200:
                soup = coderpig.get_bs(resp.read())
                coderpig.write_str_data(soup.find('h2').get_text() + "-" + tag_url, tag_url_file)
Beispiel #5
0
def get_tag_url():
    print("================================================== 检测有效的tag页:\n")
    for i in range(2, 101):
        proxy_ip = t.get_proxy_ip()
        tag_url = host_url + '/meinvtag' + str(i) + '_1.html'
        resp = coderpig.get_resp(tag_url, proxy=proxy_ip, read=False)
        if resp is not None:
            if resp.getcode() == 200:
                soup = coderpig.get_bs(resp.read())
                coderpig.write_str_data(
                    soup.find('h2').get_text() + "-" + tag_url, tag_url_file)
def catch_all_boards(user_url):
    proxy_ip = t.get_proxy_ip()
    resp = coderpig.get_resp(user_url, proxy=proxy_ip).decode('utf-8')
    result = boards_pattern.search(resp)
    json_dict = json.loads(result.group(1))
    for item in json_dict:
        coderpig.write_str_data(item['title'] + ':' + str(item['board_id']),
                                board_ids_file)
    # 返回最后一个board_id
    board_id = json_dict[-1]['board_id']
    return board_id
Beispiel #7
0
def read_article_url(url):
    while True:
        proxy_ip = tools.get_proxy_ip()
        try:
            resp = requests.get(url, headers=headers, proxies=proxy_ip, timeout=5)
            if resp is not None and resp.status_code == 200:
                global read_count
                read_count += 1
                print("累计访问成功次数: %d" % read_count)
                return None
        except Exception as e:
            pass
def get_boards_index_data(url, pic_save_dir):
    print(url)
    proxy_ip = t.get_proxy_ip()
    resp = coderpig.get_resp(url, proxy=proxy_ip).decode('utf-8')
    result = pins_pattern.search(resp)
    json_dict = json.loads(result.group(1))
    if len(json_dict) > 0:
        for item in json_dict:
            coderpig.write_str_data(pic_save_dir + ':' + item['file']['key'], pin_keys_file)
        # 返回最后一个pin_id
        pin_id = json_dict[-1]['pin_id']
        return pin_id
def read_article_url(url):
    while True:
        proxy_ip = tools.get_proxy_ip()
        try:
            resp = requests.get(url, headers=headers, proxies=proxy_ip, timeout=5)
            if resp is not None and resp.status_code == 200:
                global read_count
                read_count += 1
                print("累计访问成功次数: %d" % read_count)
                return None
        except Exception as e:
            print(e)
def download_pic(pic_key, pic_dir):
    proxy_ip = t.get_proxy_ip()
    coderpig.is_dir_existed(pic_download_dir)
    url = img_start_url + pic_key + img_end
    resp = coderpig.get_resp(url, proxy=proxy_ip, headers=referrer_header)
    try:
        print("下载图片:" + url)
        pic_name = pic_key + ".jpg"
        with open(pic_dir + pic_name, "wb+") as f:
            f.write(resp)
    except (OSError, urllib.error.HTTPError, urllib.error.URLError, Exception) as reason:
        print(str(reason))
def catch_pic_diagrams(url, tag):
    soup = coderpig.get_bs(coderpig.get_resp(url).decode('utf-8'))
    title = soup.find('div', attrs={'class': 'ptitle'}).h1.get_text()
    pic_path = pic_save_path + tag + '/' + title + '/'
    coderpig.is_dir_existed(pic_path)
    ul = soup.find('ul', attrs={'class': 'scroll-img scroll-img02 clearfix'})
    lis = ul.findAll('li')
    for li in lis:
        pic_soup = coderpig.get_bs(coderpig.get_resp(li.a['href']).decode('utf-8'))
        pic_div = pic_soup.find('div', attrs={'id': 'pic-meinv'})
        pic_url = pic_div.find('img')['data-original']
        proxy_ip = t.get_proxy_ip()
        coderpig.download_pic(pic_url, pic_path, proxy=proxy_ip)
def download_pic(pic_key, pic_dir):
    proxy_ip = t.get_proxy_ip()
    coderpig.is_dir_existed(pic_download_dir)
    url = img_start_url + pic_key + img_end
    resp = coderpig.get_resp(url, proxy=proxy_ip, headers=referrer_header)
    try:
        print("下载图片:" + url)
        pic_name = pic_key + ".jpg"
        with open(pic_dir + pic_name, "wb+") as f:
            f.write(resp)
    except (OSError, urllib.error.HTTPError, urllib.error.URLError,
            Exception) as reason:
        print(str(reason))
def download_pic(img_url):
    while True:
        proxy_ip = tools.get_proxy_ip()
        try:
            resp = requests.get(img_url, headers=headers, proxies=proxy_ip, timeout=5)
            if resp is not None:
                print("下载图片:" + resp.request.url)
                pic_name = img_url.split("/")[-1]
                with open(pic_save_dir + pic_name, "wb+") as f:
                    f.write(resp.content)
                return None
        except Exception as e:
            pass
def get_boards_index_data(url, pic_save_dir):
    print(url)
    proxy_ip = t.get_proxy_ip()
    resp = coderpig.get_resp(url, proxy=proxy_ip).decode('utf-8')
    result = pins_pattern.search(resp)
    json_dict = json.loads(result.group(1))
    if len(json_dict) > 0:
        for item in json_dict:
            coderpig.write_str_data(pic_save_dir + ':' + item['file']['key'],
                                    pin_keys_file)
        # 返回最后一个pin_id
        pin_id = json_dict[-1]['pin_id']
        return pin_id
Beispiel #15
0
def catch_pic_diagrams(url, tag):
    soup = coderpig.get_bs(coderpig.get_resp(url).decode('utf-8'))
    title = soup.find('div', attrs={'class': 'ptitle'}).h1.get_text()
    pic_path = pic_save_path + tag + '/' + title + '/'
    coderpig.is_dir_existed(pic_path)
    ul = soup.find('ul', attrs={'class': 'scroll-img scroll-img02 clearfix'})
    lis = ul.findAll('li')
    for li in lis:
        pic_soup = coderpig.get_bs(
            coderpig.get_resp(li.a['href']).decode('utf-8'))
        pic_div = pic_soup.find('div', attrs={'id': 'pic-meinv'})
        pic_url = pic_div.find('img')['data-original']
        proxy_ip = t.get_proxy_ip()
        coderpig.download_pic(pic_url, pic_path, proxy=proxy_ip)
def catch_json_boards(url):
    proxy_ip = t.get_proxy_ip()
    print("获取画板Json:" + url)
    resp = coderpig.get_resp(url, headers=json_headers, proxy=proxy_ip).decode('utf-8')
    if resp is None:
        return None
    else:
        json_dict = json.loads(resp)
        boards = json_dict['user']['boards']
        if len(boards) == 0:
            return None
        else:
            for item in boards:
                coderpig.write_str_data(item['title'] + ':' + str(item['board_id']), board_ids_file)
            return boards[-1]['board_id']
def get_json_list(url, pic_save_dir):
    proxy_ip = t.get_proxy_ip()
    print("获取json:" + url)
    resp = coderpig.get_resp(url, headers=json_headers, proxy=proxy_ip).decode('utf-8')
    if resp is None:
        return None
    else:
        json_dict = json.loads(resp)
        pins = json_dict['board']['pins']
        if len(pins) == 0:
            return None
        else:
            for item in pins:
                coderpig.write_str_data(pic_save_dir + ':' + item['file']['key'], pin_keys_file)
            return pins[-1]['pin_id']
def download_pic(img_url):
    while True:
        proxy_ip = tools.get_proxy_ip()
        try:
            resp = requests.get(img_url,
                                headers=headers,
                                proxies=proxy_ip,
                                timeout=5)
            if resp is not None:
                print("下载图片:" + resp.request.url)
                pic_name = img_url.split("/")[-1]
                with open(pic_save_dir + pic_name, "wb+") as f:
                    f.write(resp.content)
                return None
        except Exception as e:
            pass
def get_json_list(url, pic_save_dir):
    proxy_ip = t.get_proxy_ip()
    print("获取json:" + url)
    resp = coderpig.get_resp(url, headers=json_headers,
                             proxy=proxy_ip).decode('utf-8')
    if resp is None:
        return None
    else:
        json_dict = json.loads(resp)
        pins = json_dict['board']['pins']
        if len(pins) == 0:
            return None
        else:
            for item in pins:
                coderpig.write_str_data(
                    pic_save_dir + ':' + item['file']['key'], pin_keys_file)
            return pins[-1]['pin_id']
def download_pic(pic_data):
    split = pic_data.split("~")
    pic_dir = c.ZZS_FLS_MZT_SAVE_PATH + split[0] + "/"
    pic_url = split[1]
    t.is_dir_existed(pic_dir)
    while True:
        proxy_ip = t.get_proxy_ip()
        print(proxy_ip)
        try:
            resp = requests.get(pic_url, proxies=proxy_ip, timeout=5)
            if resp is not None:
                print("下载图片:" + resp.request.url)
                pic_name = pic_url.split("/")[-1]
                with open(pic_dir + pic_name, "wb+") as f:
                    f.write(resp.content)
                return None
        except Exception as e:
            print(e)
def catch_json_boards(url):
    proxy_ip = t.get_proxy_ip()
    print("获取画板Json:" + url)
    resp = coderpig.get_resp(url, headers=json_headers,
                             proxy=proxy_ip).decode('utf-8')
    if resp is None:
        return None
    else:
        json_dict = json.loads(resp)
        boards = json_dict['user']['boards']
        if len(boards) == 0:
            return None
        else:
            for item in boards:
                coderpig.write_str_data(
                    item['title'] + ':' + str(item['board_id']),
                    board_ids_file)
            return boards[-1]['board_id']
def download_pic(pic_data):
    split = pic_data.split("~")
    pic_dir = pic_save_path + split[0] + "/"
    pic_url = split[1]
    t.is_dir_existed(pic_dir)
    while True:
        proxy_ip = t.get_proxy_ip()
        print(proxy_ip)
        try:
            resp = requests.get(pic_url, proxies=proxy_ip, timeout=5)
            if resp is not None:
                print("下载图片:" + resp.request.url)
                pic_name = pic_url.split("/")[-1]
                with open(pic_dir + pic_name, "wb+") as f:
                    f.write(resp.content)
                return None
        except Exception as e:
            print(e)