예제 #1
0
def parse_meizitu(url):
    response = requests.get(url=url, headers=get_headers())
    soup = BeautifulSoup(response.content, "html.parser")
    jpg_tags = soup.find_all("img")
    for tag in jpg_tags:
        jpg_url = tag.get("src")
        save_img(jpg_url)
예제 #2
0
def save_img(url):
    dir_root = "G:\pircture\\"
    isExists = os.path.exists(dir_root)
    if not isExists:
        os.mkdir(dir_root)
    response = requests.get(url=url, headers=get_headers())
    file_name = str(time.time()) + os.path.splitext(url)[1]
    with open(dir_root + file_name, "wb") as f:
        f.write(response.content)
예제 #3
0
def parse_url_to_html(url, file_name):
    try:
        count = 0
        response = requests.get(url,
                                headers=get_headers(),
                                proxies=get_proxies())
        while response.status_code != '200' and count < 5:
            response = requests.get(url,
                                    headers=get_headers(),
                                    proxies=get_proxies())
            count = count + 1

        if response.status_code != '200':
            print(response)
            return
    except Exception as e:
        print(e, "  \n")
        return
    soup = BeautifulSoup(response.content, 'html.parser')
    body = soup.find_all(class_="x-wiki-content x-main-content")[0]
    html = str(body).encode('utf-8')
    with open(file_name, 'wb') as f:
        f.write(html)
예제 #4
0
def get_url_list():
    """
    获取所有url目录
    :return:
    """
    baseUrl = 'https://www.liaoxuefeng.com/wiki/0014316089557264a6b348958f449949df42a6d3a2e542c000'
    response = requests.get(baseUrl,
                            headers=get_headers(),
                            proxies=get_proxies())
    soup = BeautifulSoup(response.content, 'html.parser')
    menu_tag = soup.find_all(class_='uk-nav uk-nav-side')[1]
    urls = []
    for li in menu_tag.find_all('div'):
        url = "http://www.liaoxuefeng.com" + li.a.get('href')
        urls.append(url)
    return urls
예제 #5
0
def test_proxy(ip, type):
    if type == 'http':
        base_test_url = test_http_proxy_base_url
    elif type == 'https':
        base_test_url = test_https_proxy_base_url
    else:
        raise RuntimeError("参数错误")
    try:
        requests.get(url=base_test_url,
                     headers=get_headers(),
                     proxies={type: ip},
                     timeout=2)
    except:
        print('%s connection failed ...' % ip)
        return False
    else:
        print('%s connection success...' % ip)
        return True
예제 #6
0
def get_html_sp(url):
    resp = requests.get(url, headers=get_headers())
    if resp:
        return BeautifulSoup(resp.content, "html.parser")
예제 #7
0
def get_response(url):
    return requests.get(url, headers=get_headers())