Python proxy_fetch Examples

Programming Language: Python

Namespace/Package Name: common.page

Method/Function: proxy_fetch

Examples at hotexamples.com: 4

Python proxy_fetch - 4 examples found. These are the top rated real world Python examples of common.page.proxy_fetch extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test.py Project: xiangrongzeng/spider_py3

 def test_proxy_fetch():
     p = common.proxy.Proxy('proxies.dat')
     proxies_list = p.get_from_file()
     page = common.page.Page('http://baike.baidu.com/subview/3077/11247674.htm')
     html = page.proxy_fetch(proxies_list)
     html = html.decode('utf-8', 'ignore')
     html = html.encode('gbk', 'ignore')
     html = html.decode('gbk', 'ignore')
     print(html)

Example #2

Show file

 def test_proxy_fetch():
     p = common.proxy.Proxy('proxies.dat')
     proxies_list = p.get_from_file()
     page = common.page.Page(
         'http://baike.baidu.com/subview/3077/11247674.htm')
     html = page.proxy_fetch(proxies_list)
     html = html.decode('utf-8', 'ignore')
     html = html.encode('gbk', 'ignore')
     html = html.decode('gbk', 'ignore')
     print(html)

Example #3

Show file

File: baike_page.py Project: xiangrongzeng/spider_py3

 def fetch(self, min_id, max_id):
     for page_id in range(min_id, max_id):
         # 获取
         page_id = str(page_id)
         url = 'http://baike.baidu.com/view/' + page_id + '.htm'
         page = common.page.Page(url)
         html = page.proxy_fetch(self.proxies_queue)
         if html:
             # 转码
             html = html.decode('utf-8', 'ignore')
             html = html.encode('gbk', 'ignore')
             html = html.decode('gbk', 'ignore')
             # 写出
             outfile = 'd:/test/baikefile/' + page_id + '.html'
             f = open(outfile, 'w')
             f.write(html)
             f.close()
             logging.debug('写入完成 %s' % page_id)
     logging.info('子任务完成 ' + str(min_id) + "~" + str(max_id))

Example #4

Show file

File: baike_page.py Project: xiangrongzeng/spider_py3

 def fetch(self, min_id, max_id):
     for page_id in range(min_id, max_id):
         # 获取
         page_id = str(page_id)
         url = 'http://baike.baidu.com/view/' + page_id + '.htm'
         page = common.page.Page(url)
         html = page.proxy_fetch(self.proxies_queue)
         if html:
             # 转码
             html = html.decode('utf-8', 'ignore')
             html = html.encode('gbk', 'ignore')
             html = html.decode('gbk', 'ignore')
             # 写出
             outfile = 'd:/test/baikefile/' + page_id + '.html'
             f = open(outfile, 'w')
             f.write(html)
             f.close()
             logging.debug('写入完成 %s' % page_id)
     logging.info('子任务完成 '+str(min_id)+"~"+str(max_id))