Python HtmlResponse._set_bodyの例

プログラミング言語: Python

名前空間/パッケージ名: scrapy.http

クラス/型: HtmlResponse

メソッド/関数: _set_body

hotexamples.comのコード掲載数: 4

Python HtmlResponse._set_body - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのscrapy.http.HtmlResponse._set_bodyの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

HtmlResponse(30)

css(30)

xpath(30)

follow(23)

urljoin(22)

json(16)

request(13)

body_as_unicode(9)

follow_all(6)

meta2(3)

_status(2)

_set_body(2)

copy(2)

flags(1)

_get_url(1)

encoding(1)

driver(1)

read(1)

replace(1)

status(1)

status_code(1)

url_list(1)

browser(1)

headers(1)

コード例 #1

ファイルを表示

 def parse_shop(self, response):
     print '\r\n\t======== Page Crawl Start - Company -----------'
     hxs = HtmlXPathSelector(response)
     item = init_item('shop')  #初始化 shop item
     try:
         if conf['show_messages']: print '----Company Fetch Start----'
         #--分析代码开始#################################################################################################################
         item['url'] = response.url
         item['logo_src'] = 'http://baidu.com/abc/ddd.jpg'
         item['photo_src'] = '/image/abcd.jpg'
         newurl = 'http://cn.china.cn'  #构造企业介绍页Url
         try:  #尝试加载新页面，使用代理IP
             proxy_handle = urllib2.ProxyHandler({'http': get_proxy()})
             opener = urllib2.build_opener(proxy_handle)
             temp = opener.open(newurl, timeout=30)  #请求
         except:  #重试一次，如果仍无法打开.. 然后..就没有然后了
             proxy_handle = urllib2.ProxyHandler({'http': get_proxy()})
             opener = urllib2.build_opener(proxy_handle)
             temp = opener.open(newurl, timeout=30)  #请求
         temp = temp.read()  #读数据
         newresponse = HtmlResponse(newurl)
         newresponse._set_body(temp)
         hxs = HtmlXPathSelector(newresponse)  #构建新的xpath选择器
         #print temp
         #--分析代码结束#################################################################################################################
         if conf['show_messages']: print '---- Fetch Success ----'
     except EOFError, e:
         if conf['show_messages']: print '----Company Fetch Error Start----'
         print e
         if conf['show_messages']: print '----Company Fetch Error End----'

コード例 #2

ファイルを表示

ファイル: all.py プロジェクト: JeanWolf/scrapy-spider

 def parse_shop(self, response):
     print '\r\n\t======== Page Crawl Start - Company -----------'
     hxs = HtmlXPathSelector(response)
     item = init_item('shop') #初始化 shop item
     try :
         if conf['show_messages'] : print '----Company Fetch Start----'
     #--分析代码开始#################################################################################################################
         item['url'] = response.url
         item['logo_src'] = 'http://baidu.com/abc/ddd.jpg'
         item['photo_src'] = '/image/abcd.jpg'
         newurl = 'http://cn.china.cn' #构造企业介绍页Url
         try : #尝试加载新页面，使用代理IP
             proxy_handle = urllib2.ProxyHandler({ 'http' : get_proxy() })
             opener = urllib2.build_opener(proxy_handle)
             temp = opener.open(newurl,timeout=30) #请求
         except : #重试一次，如果仍无法打开.. 然后..就没有然后了
             proxy_handle = urllib2.ProxyHandler({ 'http' : get_proxy() })
             opener = urllib2.build_opener(proxy_handle)
             temp = opener.open(newurl,timeout=30) #请求
         temp = temp.read() #读数据
         newresponse = HtmlResponse(newurl)
         newresponse._set_body(temp)
         hxs = HtmlXPathSelector(newresponse) #构建新的xpath选择器
         #print temp
     #--分析代码结束#################################################################################################################
         if conf['show_messages'] : print '---- Fetch Success ----'
     except EOFError,e :
         if conf['show_messages'] : print '----Company Fetch Error Start----'
         print e
         if conf['show_messages'] : print '----Company Fetch Error End----'

コード例 #3

ファイルを表示

ファイル: all.py プロジェクト: vsfor/scrapy-spider

 def parse_shop(self, response):
     jeen = Jeen()
     if conf_show_messages:
         print '\r\n\t======== Page Crawl Start - Company -----------'
     hxs = HtmlXPathSelector(response)
     item = jeen.init_item('shop')  #初始化 shop item
     try:
         if conf_show_messages: print '----Company Fetch Start----'
         #--分析代码开始#################################################################################################################
         item['url'] = response.url
         item['logo_src'] = 'http://china.toocle.com/images/comp/11/s19.gif'
         item[
             'photo_src'] = 'http://img1.toocle.com/bin/img/?x=217&y=156&t=company_cn&m=1&s=/2013/09/05/05/2519405_1.jpg'
         newurl = 'http://cn.china.cn'  #构造企业介绍页Url
         if conf_use_proxy:
             try:  #尝试加载新页面，使用代理IP
                 proxy_handle = urllib2.ProxyHandler(
                     {'http': jeen.get_proxy()})
                 opener = urllib2.build_opener(proxy_handle)
                 temp = opener.open(newurl, timeout=30)  #请求
             except:  #重试一次，如果仍无法打开.. 然后..就没有然后了
                 proxy_handle = urllib2.ProxyHandler(
                     {'http': jeen.get_proxy()})
                 opener = urllib2.build_opener(proxy_handle)
                 temp = opener.open(newurl, timeout=30)  #请求
         else:
             try:
                 temp = urllib2.urlopen(newurl, timeout=30)
             except:
                 temp = urllib2.urlopen(newurl, timeout=30)
         temp = temp.read()  #读数据
         newresponse = HtmlResponse(newurl)
         newresponse._set_body(temp)
         hxs = HtmlXPathSelector(newresponse)  #构建新的xpath选择器
         #print temp
         #--分析代码结束#################################################################################################################
         if conf_show_messages: print '---- Fetch Success ----'
     except EOFError, e:
         if conf_show_messages: print '----Company Fetch Error Start----'
         print e
         if conf_show_messages: print '----Company Fetch Error End----'

コード例 #4

ファイルを表示

ファイル: all.py プロジェクト: JeanWolf/scrapy-spider

 def parse_shop(self, response):
     jeen = Jeen()
     if conf_show_messages : print '\r\n\t======== Page Crawl Start - Company -----------'
     hxs = HtmlXPathSelector(response)
     item = jeen.init_item('shop') #初始化 shop item
     try :
         if conf_show_messages : print '----Company Fetch Start----'
     #--分析代码开始#################################################################################################################
         item['url'] = response.url
         item['logo_src'] = 'http://china.toocle.com/images/comp/11/s19.gif'
         item['photo_src'] = 'http://img1.toocle.com/bin/img/?x=217&y=156&t=company_cn&m=1&s=/2013/09/05/05/2519405_1.jpg'
         newurl = 'http://cn.china.cn' #构造企业介绍页Url
         if conf_use_proxy :
             try : #尝试加载新页面，使用代理IP
                 proxy_handle = urllib2.ProxyHandler({ 'http' : jeen.get_proxy() })
                 opener = urllib2.build_opener(proxy_handle)
                 temp = opener.open(newurl,timeout=30) #请求
             except : #重试一次，如果仍无法打开.. 然后..就没有然后了
                 proxy_handle = urllib2.ProxyHandler({ 'http' : jeen.get_proxy() })
                 opener = urllib2.build_opener(proxy_handle)
                 temp = opener.open(newurl,timeout=30) #请求
         else :
             try :
                 temp = urllib2.urlopen(newurl,timeout=30)
             except :
                 temp = urllib2.urlopen(newurl,timeout=30)
         temp = temp.read() #读数据
         newresponse = HtmlResponse(newurl)
         newresponse._set_body(temp)
         hxs = HtmlXPathSelector(newresponse) #构建新的xpath选择器
         #print temp
     #--分析代码结束#################################################################################################################
         if conf_show_messages : print '---- Fetch Success ----'
     except EOFError,e :
         if conf_show_messages : print '----Company Fetch Error Start----'
         print e
         if conf_show_messages : print '----Company Fetch Error End----'