def _get_html(self, url): #进行一些url的预处理,可扩展,目前没有处理 url = StringUtil.process_url(url) headers = { #动态更改userAgent 'User-Agent': AgentPoolUtil.getRandomUserAgent() } proxies = { #动态更改代理,https的tv.sohu.com用不上 # 'http': AgentPoolUtil.getRandomProxy(), } try: response = requests.get(url, headers=headers, proxies=proxies) except Exception, e: print e print Exception print "请求错误拦截,因为url未必都靠谱,所以有可能会报错" print "%s Thread exit with error occurs" self._current_thread_count -= 1 thread.exit_thread()
def _get_html(self, url): #进行一些url的预处理,可扩展,目前没有处理 url = StringUtil.process_url(url) headers = { #动态更改userAgent 'User-Agent': AgentPoolUtil.getRandomUserAgent() } proxies = { #动态更改代理,https的tv.sohu.com用不上 # 'http': AgentPoolUtil.getRandomProxy(), } try: response = requests.get(url, headers = headers, proxies=proxies) except Exception, e: print e print Exception print "请求错误拦截,因为url未必都靠谱,所以有可能会报错" print "%s Thread exit with error occurs" self._current_thread_count -= 1 thread.exit_thread()