Ejemplo n.º 1
0
def get(url, headers):

    resp = opener.open(Request(url, headers=headers))
    html_text = html.to_html(resp.read(),
                             html.get_charset(resp.getheader('Content-Type')))

    root = etree.HTML(html_text)
    account = root.xpath('//p[@class="pt5"]/span/text()')[0]
    print(account)
Ejemplo n.º 2
0
def request(url, data: dict, headers=None):
    if data:
        # 将post上传的数据转成字节流
        form_params = urlencode(data)  # key=value&key2=value2
        req = Request(url, form_params.encode(), headers)
    else:
        if headers:
            req = Request(url, headers=headers)
        else:
            req = Request(url)

    resp = opener.open(req)
    html_txt = html.to_html(resp.read(),
                            html.get_charset(resp.getheader('Content-Type')))
    print(html_txt)
Ejemplo n.º 3
0
 def get_kuai(self):
     if not self.q_kuai.empty():
         url = self.q_kuai.get()
         if DEBUG:
             print('正在爬取: ', url)
         try:
             response = requests.get(url=url, headers=get_header())
             time.sleep(self.delay)
             if response.ok:
                 resp_bytes = response.content
                 html = to_html(resp_bytes)
                 self.parse_kuai(html)
         except:
             # 请求出错,将url重新放入队列
             self.q_kuai.put(url)
             # 调用自身
             self.get_kuai()
Ejemplo n.º 4
0
def request(url):
    # 1. 创建opener对象 - 类似于一个浏览器工具
    opener = build_opener(HTTPHandler())

    # 2. 构建请求对象 Request
    req = Request(url, headers=get_headers())

    # 3. 发起请求
    resp = opener.open(req)
    print(type(resp))  # http.client.HTTPResponse
    if resp.code == 200:
        # print(resp.getheader('Content-Type'))

        charset = html.get_charset(resp.getheader('Content-Type'))

        html_text = html.to_html(resp.read(), charset)
        print(html_text)