def urlopen(self, url, timeout=30, data=None, headers=None, files=None, show_error=True): url = urllib2.quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not data: data = {} # Fill in some headers parsed_url = urlparse(url) host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else '')) headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host)) headers['Host'] = headers.get('Host', host) headers['User-Agent'] = headers.get('User-Agent', self.user_agent) headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip') headers['Connection'] = headers.get('Connection', 'keep-alive') headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0') r = self.http_opener # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2( 'Disabled calls to %s for 15 minutes because so many failed requests.', host) if not show_error: raise Exception( 'Disabled calls to %s for 15 minutes because so many failed requests' ) else: return '' else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host) try: kwargs = { 'headers': headers, 'data': data if len(data) > 0 else None, 'timeout': timeout, 'files': files, } method = 'post' if len(data) > 0 or files else 'get' log.info('Opening url: %s %s, data: %s', (method, url, [x for x in data.keys()] if isinstance( data, dict) else 'with data')) response = r.request(method, url, verify=False, **kwargs) if response.status_code == requests.codes.ok: data = response.content else: response.raise_for_status() self.http_failed_request[host] = 0 except (IOError, MaxRetryError, Timeout): if show_error: log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(0))) # Save failed requests by hosts try: if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP( host): self.http_failed_disabled[host] = time.time() except: log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data
def urlopen(self, url, timeout=30, params=None, headers=None, opener=None, multipart=False, show_error=True): url = urllib2.quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not params: params = {} # Fill in some headers parsed_url = urlparse(url) host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else '')) headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host)) headers['Host'] = headers.get('Host', host) headers['User-Agent'] = headers.get('User-Agent', self.user_agent) headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip') headers['Connection'] = headers.get('Connection', 'keep-alive') headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0') # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2( 'Disabled calls to %s for 15 minutes because so many failed requests.', host) if not show_error: raise Exception( 'Disabled calls to %s for 15 minutes because so many failed requests' ) else: return '' else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host) try: # Make sure opener has the correct headers if opener: opener.add_headers = headers if multipart: log.info('Opening multipart url: %s, params: %s', (url, [x for x in params.iterkeys()] if isinstance( params, dict) else 'with data')) request = urllib2.Request(url, params, headers) if opener: opener.add_handler(MultipartPostHandler()) else: cookies = cookielib.CookieJar() opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(cookies), MultipartPostHandler) response = opener.open(request, timeout=timeout) else: log.info('Opening url: %s, params: %s', (url, [x for x in params.iterkeys()] if isinstance( params, dict) else 'with data')) if isinstance(params, (str, unicode)) and len(params) > 0: data = params else: data = tryUrlencode(params) if len(params) > 0 else None request = urllib2.Request(url, data, headers) if opener: response = opener.open(request, timeout=timeout) else: response = urllib2.urlopen(request, timeout=timeout) # unzip if needed if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) data = f.read() f.close() else: data = response.read() response.close() self.http_failed_request[host] = 0 except IOError: if show_error: log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(1))) # Save failed requests by hosts try: if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP( host): self.http_failed_disabled[host] = time.time() except: log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data
def urlopen(self, url, timeout = 30, data = None, headers = None, files = None, show_error = True, stream = False): url = quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not data: data = {} # Fill in some headers parsed_url = urlparse(url) host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else '')) headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host)) headers['Host'] = headers.get('Host', None) headers['User-Agent'] = headers.get('User-Agent', self.user_agent) headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip') headers['Connection'] = headers.get('Connection', 'keep-alive') headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0') use_proxy = Env.setting('use_proxy') proxy_url = None if use_proxy: proxy_server = Env.setting('proxy_server') proxy_username = Env.setting('proxy_username') proxy_password = Env.setting('proxy_password') if proxy_server: loc = "{0}:{1}@{2}".format(proxy_username, proxy_password, proxy_server) if proxy_username else proxy_server proxy_url = { "http": "http://"+loc, "https": "https://"+loc, } else: proxy_url = getproxies() r = Env.get('http_opener') # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2('Disabled calls to %s for 15 minutes because so many failed requests.', host) if not show_error: raise Exception('Disabled calls to %s for 15 minutes because so many failed requests' % host) else: return '' else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host, url) status_code = None try: kwargs = { 'headers': headers, 'data': data if len(data) > 0 else None, 'timeout': timeout, 'files': files, 'verify': False, #verify_ssl, Disable for now as to many wrongly implemented certificates.. 'stream': stream, 'proxies': proxy_url, } method = 'post' if len(data) > 0 or files else 'get' log.info('Opening url: %s %s, data: %s', (method, url, [x for x in data.keys()] if isinstance(data, dict) else 'with data')) response = r.request(method, url, **kwargs) status_code = response.status_code if response.status_code == requests.codes.ok: data = response if stream else response.content else: response.raise_for_status() self.http_failed_request[host] = 0 except (IOError, MaxRetryError, Timeout): if show_error: log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(0))) # Save failed requests by hosts try: # To many requests if status_code in [429]: self.http_failed_request[host] = 1 self.http_failed_disabled[host] = time.time() if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP(host): self.http_failed_disabled[host] = time.time() except: log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data
def urlopen(self, url, timeout = 30, data = None, headers = None, files = None, show_error = True, verify_ssl = True): url = urllib2.quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not data: data = {} # Fill in some headers parsed_url = urlparse(url) host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else '')) headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host)) headers['Host'] = headers.get('Host', host) headers['User-Agent'] = headers.get('User-Agent', self.user_agent) headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip') headers['Connection'] = headers.get('Connection', 'keep-alive') headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0') r = self.http_opener # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2('Disabled calls to %s for 15 minutes because so many failed requests.', host) if not show_error: raise Exception('Disabled calls to %s for 15 minutes because so many failed requests') else: return '' else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host) try: kwargs = { 'headers': headers, 'data': data if len(data) > 0 else None, 'timeout': timeout, 'files': files, 'verify': verify_ssl, } method = 'post' if len(data) > 0 or files else 'get' log.info('Opening url: %s %s, data: %s', (method, url, [x for x in data.keys()] if isinstance(data, dict) else 'with data')) response = r.request(method, url, **kwargs) if response.status_code == requests.codes.ok: data = response.content else: response.raise_for_status() self.http_failed_request[host] = 0 except (IOError, MaxRetryError, Timeout): if show_error: log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(0))) # Save failed requests by hosts try: if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP(host): self.http_failed_disabled[host] = time.time() except: log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data
def urlopen(self, url, timeout=30, data=None, headers=None, files=None, show_error=True, stream=False): url = quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not data: data = {} # Fill in some headers parsed_url = urlparse(url) host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else '')) headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host)) headers['Host'] = headers.get('Host', None) headers['User-Agent'] = headers.get('User-Agent', self.user_agent) headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip') headers['Connection'] = headers.get('Connection', 'keep-alive') headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0') if headers.get('Authorization', '') != '': headers['Authorization'] = headers.get('Authorization', '') use_proxy = Env.setting('use_proxy') proxy_url = None if use_proxy: proxy_server = Env.setting('proxy_server') proxy_username = Env.setting('proxy_username') proxy_password = Env.setting('proxy_password') if proxy_server: loc = "{0}:{1}@{2}".format( proxy_username, proxy_password, proxy_server) if proxy_username else proxy_server proxy_url = { "http": "http://" + loc, "https": "https://" + loc, } else: proxy_url = getproxies() r = Env.get('http_opener') # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2( 'Disabled calls to %s for 15 minutes because so many failed requests.', host) if not show_error: raise Exception( 'Disabled calls to %s for 15 minutes because so many failed requests' % host) else: return '' else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host, url) status_code = None try: kwargs = { 'headers': headers, 'data': data if len(data) > 0 else None, 'timeout': timeout, 'files': files, 'verify': False, #verify_ssl, Disable for now as to many wrongly implemented certificates.. 'stream': stream, 'proxies': proxy_url, } method = 'post' if len(data) > 0 or files else 'get' log.info('Opening url: %s %s, data: %s', (method, url, [x for x in data.keys()] if isinstance( data, dict) else 'with data')) response = r.request(method, url, **kwargs) status_code = response.status_code if response.status_code == requests.codes.ok: data = response if stream else response.content else: response.raise_for_status() self.http_failed_request[host] = 0 except (IOError, MaxRetryError, Timeout): if show_error: log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(0))) # Save failed requests by hosts try: # To many requests if status_code in [429]: self.http_failed_request[host] = 1 self.http_failed_disabled[host] = time.time() if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP( host): self.http_failed_disabled[host] = time.time() except: log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data
def urlopen(self, url, timeout=30, data=None, headers=None, files=None, show_error=True): url = urllib2.quote(ss(url), safe="%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not data: data = {} # Fill in some headers parsed_url = urlparse(url) host = "%s%s" % (parsed_url.hostname, (":" + str(parsed_url.port) if parsed_url.port else "")) headers["Referer"] = headers.get("Referer", "%s://%s" % (parsed_url.scheme, host)) headers["Host"] = headers.get("Host", host) headers["User-Agent"] = headers.get("User-Agent", self.user_agent) headers["Accept-encoding"] = headers.get("Accept-encoding", "gzip") headers["Connection"] = headers.get("Connection", "keep-alive") headers["Cache-Control"] = headers.get("Cache-Control", "max-age=0") r = self.http_opener # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2("Disabled calls to %s for 15 minutes because so many failed requests.", host) if not show_error: raise Exception("Disabled calls to %s for 15 minutes because so many failed requests") else: return "" else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host) try: kwargs = {"headers": headers, "data": data if len(data) > 0 else None, "timeout": timeout, "files": files} method = "post" if len(data) > 0 or files else "get" log.info( "Opening url: %s %s, data: %s", (method, url, [x for x in data.keys()] if isinstance(data, dict) else "with data"), ) response = r.request(method, url, verify=False, **kwargs) if response.status_code == requests.codes.ok: data = response.content else: response.raise_for_status() self.http_failed_request[host] = 0 except (IOError, MaxRetryError, Timeout): if show_error: log.error("Failed opening url in %s: %s %s", (self.getName(), url, traceback.format_exc(0))) # Save failed requests by hosts try: if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP(host): self.http_failed_disabled[host] = time.time() except: log.debug("Failed logging failed requests for %s: %s", (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data
def urlopen(self, url, timeout = 30, params = None, headers = None, opener = None, multipart = False, show_error = True): url = urllib2.quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]") if not headers: headers = {} if not params: params = {} # Fill in some headers parsed_url = urlparse(url) host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else '')) headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host)) headers['Host'] = headers.get('Host', host) headers['User-Agent'] = headers.get('User-Agent', self.user_agent) headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip') headers['Connection'] = headers.get('Connection', 'keep-alive') headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0') # Don't try for failed requests if self.http_failed_disabled.get(host, 0) > 0: if self.http_failed_disabled[host] > (time.time() - 900): log.info2('Disabled calls to %s for 15 minutes because so many failed requests.', host) if not show_error: raise Exception('Disabled calls to %s for 15 minutes because so many failed requests') else: return '' else: del self.http_failed_request[host] del self.http_failed_disabled[host] self.wait(host) try: # Make sure opener has the correct headers if opener: opener.add_headers = headers if multipart: log.info('Opening multipart url: %s, params: %s', (url, [x for x in params.iterkeys()] if isinstance(params, dict) else 'with data')) request = urllib2.Request(url, params, headers) if opener: opener.add_handler(MultipartPostHandler()) else: cookies = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies), MultipartPostHandler) response = opener.open(request, timeout = timeout) else: log.info('Opening url: %s, params: %s', (url, [x for x in params.iterkeys()] if isinstance(params, dict) else 'with data')) if isinstance(params, (str, unicode)) and len(params) > 0: data = params else: data = tryUrlencode(params) if len(params) > 0 else None request = urllib2.Request(url, data, headers) if opener: response = opener.open(request, timeout = timeout) else: response = urllib2.urlopen(request, timeout = timeout) # unzip if needed if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj = buf) data = f.read() f.close() else: data = response.read() response.close() self.http_failed_request[host] = 0 except IOError: if show_error: log.error('Failed opening url in %s: %s %s', (self.getName(), url, traceback.format_exc(1))) # Save failed requests by hosts try: if not self.http_failed_request.get(host): self.http_failed_request[host] = 1 else: self.http_failed_request[host] += 1 # Disable temporarily if self.http_failed_request[host] > 5 and not isLocalIP(host): self.http_failed_disabled[host] = time.time() except: log.debug('Failed logging failed requests for %s: %s', (url, traceback.format_exc())) raise self.http_last_use[host] = time.time() return data