def response(self, response, url): """Response handler""" self.counter.update(("completed", )) if hasattr(response, 'status'): if response.status in self.DEFAULT_HTTP_FAILED_STATUSES: self.iterator = Progress.line(url, self.urls.__len__(), 'error', self.iterator) self.counter.update(("failed", )) elif response.status in self.DEFAULT_HTTP_SUCCESS_STATUSES: self.iterator = Progress.line(url, self.urls.__len__(), 'success', self.iterator) self.counter.update(("success", )) elif response.status in self.DEFAULT_HTTP_UNRESOLVED_STATUSES: # self.iterator = Progress.line(url, self.urls.__len__(), 'warning', self.iterator) self.counter.update(("possible", )) elif response.status in self.DEFAULT_HTTP_REDIRECT_STATUSES: # self.iterator = Progress.line(url, self.urls.__len__(), 'warning', self.iterator) self.counter.update(("redirects", )) else: self.counter.update(("undefined", )) return self.result[response.status].append(url) else: return
def request(self, url): """Request handler""" # if True == self.proxy: # proxyserver = self.reader.get_random_proxy() # try: # conn = urllib3.proxy_from_url(proxyserver, ) # except urllib3.exceptions.ProxySchemeUnknown as e: # log.critical(e.message + ": " + proxyserver) # else: # conn = urllib3.connection_from_url(url, ) try: response = self.http.urlopen(self.DEFAULT_HTTP_METHOD, url, headers=self.HEADER, redirect=False, timeout=self.rest, release_conn=True) except (urllib3.exceptions.ConnectTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.HostChangedError, urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.ProxyError) as e: response = None self.iterator = Progress.line(url + ' -> ' + e.message, self.urls.__len__(), 'warning', self.iterator) except exceptions.AttributeError as e: log.critical(e.message) except TypeError as e: log.critical(e.message) time.sleep(self.delay) return self.response(response, url)
def request(self, url): """Request handler""" if True == self.proxy: proxyserver = self.reader.get_random_proxy() try: conn = urllib3.proxy_from_url(proxyserver, maxsize=10, block=True, timeout=self.rest) except urllib3.exceptions.ProxySchemeUnknown as e: log.critical(e.message + ": " + proxyserver) else: conn = urllib3.connection_from_url(url, maxsize=10, block=True, timeout=self.rest) headers = { 'accept-encoding': 'gzip, deflate, sdch', 'accept-language': 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4,uk;q=0.2,es;q=0.2', 'cache-control': 'no-cache', 'user-agent': self.reader.get_random_user_agent() } try: response = conn.request(self.DEFAULT_HTTP_METHOD, url, headers=headers) except (urllib3.exceptions.ConnectTimeoutError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.HostChangedError, urllib3.exceptions.ReadTimeoutError, urllib3.exceptions.ProxyError) as e: response = None self.iterator = Progress.line(url + ' -> ' + e.message, self.urls.__len__(), 'warning', self.iterator) except exceptions.AttributeError as e: log.critical(e.message) except TypeError as e: log.critical(e.message) time.sleep(self.delay) return self.response(response, url)