Example #1
0
 def _get_html(self, url):
     #进行一些url的预处理,可扩展,目前没有处理
     url = StringUtil.process_url(url)
     headers = {
         #动态更改userAgent
         'User-Agent': AgentPoolUtil.getRandomUserAgent()
     }
     proxies = {
         #动态更改代理,https的tv.sohu.com用不上
         # 'http': AgentPoolUtil.getRandomProxy(),
     }
     try:
         response = requests.get(url, headers=headers, proxies=proxies)
     except Exception, e:
         print e
         print Exception
         print "请求错误拦截,因为url未必都靠谱,所以有可能会报错"
         print "%s Thread exit with error occurs"
         self._current_thread_count -= 1
         thread.exit_thread()
Example #2
0
	def _get_html(self, url):
		#进行一些url的预处理,可扩展,目前没有处理
		url = StringUtil.process_url(url)
		headers = {
			#动态更改userAgent
			'User-Agent': AgentPoolUtil.getRandomUserAgent()
		}
		proxies = {
			#动态更改代理,https的tv.sohu.com用不上
			# 'http': AgentPoolUtil.getRandomProxy(),
		}
		try:
			response  = requests.get(url, headers = headers, proxies=proxies)
		except Exception, e:
			print e
			print Exception
			print "请求错误拦截,因为url未必都靠谱,所以有可能会报错"
			print "%s Thread exit with error occurs"
			self._current_thread_count -= 1
			thread.exit_thread()