def _cached_http_get(self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, cache_limit=8): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers['Referer'] if 'Referer' in headers else url log_utils.log('Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers)) self.create_db_connection() _, html = self.db_connection.get_cached_url(url, cache_limit) if html: log_utils.log('Returning cached result for: %s' % (url), xbmc.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) if data is not None: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data request = urllib2.Request(url, data=data) request.add_header('User-Agent', USER_AGENT) request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if xbmcaddon.Addon().getSetting('cookie_debug') == 'true': log_utils.log('Response Cookies: %s - %s' % (url, self.cookies_as_str(self.cj)), xbmc.LOGDEBUG) self.cj.save(ignore_discard=True) if not allow_redirect and response.getcode() in [301, 302, 303, 307]: return response.info().getheader('Location') if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read() except urllib2.HTTPError as e: if e.code == 503 and 'cf-browser-verification' in e.read(): html = cloudflare.solve(url, self.cj) if not html: return '' else: log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url), xbmc.LOGWARNING) return '' except Exception as e: log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url), xbmc.LOGWARNING) return '' self.db_connection.cache_url(url, html) return html
def _cached_http_get(self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, method=None, cache_limit=8): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers['Referer'] if 'Referer' in headers else url log_utils.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers)) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data self.create_db_connection() _created, _res_header, html = self.db_connection.get_cached_url( url, data, cache_limit) if html: log_utils.log('Returning cached result for: %s' % (url), log_utils.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) request = urllib2.Request(url, data=data) request.add_header('User-Agent', scraper_utils.get_ua()) request.add_header('Accept', '*/*') request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) if method is not None: request.get_method = lambda: method.upper() response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': log_utils.log( 'Response Cookies: %s - %s' % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and ( response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: return response.info().getheader('Location') content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: log_utils.log( 'Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.code == 503 and 'cf-browser-verification' in e.read(): html = cloudflare.solve(url, self.cj, scraper_utils.get_ua()) if not html: return '' else: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' except Exception as e: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' self.db_connection.cache_url(url, html, data) return html
def _cached_http_get(self, url, base_url, timeout, params=None, data=None, multipart_data=None, headers=None, cookies=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8): if require_debrid: if Scraper.debrid_resolvers is None: Scraper.debrid_resolvers = [resolver for resolver in urlresolver.relevant_resolvers() if resolver.isUniversal()] if not Scraper.debrid_resolvers: logger.log('%s requires debrid: %s' % (self.__module__, Scraper.debrid_resolvers), log_utils.LOGDEBUG) return '' if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} if url.startswith('//'): url = 'http:' + url referer = headers['Referer'] if 'Referer' in headers else base_url if params: if url == base_url and not url.endswith('/'): url += '/' parts = urlparse.urlparse(url) if parts.query: params.update(scraper_utils.parse_query(url)) url = urlparse.urlunparse((parts.scheme, parts.netloc, parts.path, parts.params, '', parts.fragment)) url += '?' + urllib.urlencode(params) logger.log('Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers), log_utils.LOGDEBUG) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data _created, _res_header, html = self.db_connection().get_cached_url(url, data, cache_limit) if html: logger.log('Returning cached result for: %s' % (url), log_utils.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) if isinstance(url, unicode): url = url.encode('utf-8') request = urllib2.Request(url, data=data) headers = headers.copy() request.add_header('User-Agent', scraper_utils.get_ua()) request.add_header('Accept', '*/*') request.add_header('Accept-Encoding', 'gzip') request.add_unredirected_header('Host', request.get_host()) if referer: request.add_unredirected_header('Referer', referer) if 'Referer' in headers: del headers['Referer'] if 'Host' in headers: del headers['Host'] for key, value in headers.iteritems(): request.add_header(key, value) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) if method is not None: request.get_method = lambda: method.upper() response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': logger.log('Response Cookies: %s - %s' % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and (response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: redir_url = response.info().getheader('Location') if redir_url.startswith('='): redir_url = redir_url[1:] return redir_url content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: logger.log('Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if method == 'HEAD': return '' else: if response.info().get('Content-Encoding') == 'gzip': html = ungz(response.read(MAX_RESPONSE)) else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.info().get('Content-Encoding') == 'gzip': html = ungz(e.read(MAX_RESPONSE)) else: html = e.read(MAX_RESPONSE) if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html: html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(), self.get_name()) if not html: return '' elif e.code == 503 and 'cf-browser-verification' in html: html = cloudflare.solve(url, self.cj, scraper_utils.get_ua(), extra_headers=headers) if not html: return '' else: logger.log('Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) if not read_error: return '' except Exception as e: logger.log('Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' self.db_connection().cache_url(url, html, data) return html
def _cached_http_get(self, url, base_url, timeout, params=None, data=None, multipart_data=None, headers=None, cookies=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8): if require_debrid: if Scraper.debrid_resolvers is None: Scraper.debrid_resolvers = [ resolver for resolver in urlresolver.relevant_resolvers() if resolver.isUniversal() ] if not Scraper.debrid_resolvers: logger.log( '%s requires debrid: %s' % (self.__module__, Scraper.debrid_resolvers), log_utils.LOGDEBUG) return '' if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} if url.startswith('//'): url = 'http:' + url referer = headers['Referer'] if 'Referer' in headers else base_url if params: if url == base_url and not url.endswith('/'): url += '/' parts = urlparse.urlparse(url) if parts.query: params.update(scraper_utils.parse_query(url)) url = urlparse.urlunparse( (parts.scheme, parts.netloc, parts.path, parts.params, '', parts.fragment)) url += '?' + urllib.urlencode(params) logger.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers), log_utils.LOGDEBUG) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data _created, _res_header, html = self.db_connection().get_cached_url( url, data, cache_limit) if html: logger.log('Returning cached result for: %s' % (url), log_utils.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) if isinstance(url, unicode): url = url.encode('utf-8') request = urllib2.Request(url, data=data) headers = headers.copy() request.add_header('User-Agent', scraper_utils.get_ua()) request.add_header('Accept', '*/*') request.add_header('Accept-Encoding', 'gzip') request.add_unredirected_header('Host', request.get_host()) if referer: request.add_unredirected_header('Referer', referer) if 'Referer' in headers: del headers['Referer'] if 'Host' in headers: del headers['Host'] for key, value in headers.iteritems(): request.add_header(key, value) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) if method is not None: request.get_method = lambda: method.upper() response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': logger.log( 'Response Cookies: %s - %s' % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and ( response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: redir_url = response.info().getheader('Location') if redir_url.startswith('='): redir_url = redir_url[1:] return redir_url content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: logger.log( 'Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if method == 'HEAD': return '' else: if response.info().get('Content-Encoding') == 'gzip': html = ungz(response.read(MAX_RESPONSE)) else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.info().get('Content-Encoding') == 'gzip': html = ungz(e.read(MAX_RESPONSE)) else: html = e.read(MAX_RESPONSE) if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html: html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(), self.get_name()) if not html: return '' elif e.code == 503 and 'cf-browser-verification' in html: html = cloudflare.solve(url, self.cj, scraper_utils.get_ua(), extra_headers=headers) if not html: return '' else: logger.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) if not read_error: return '' except Exception as e: logger.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' self.db_connection().cache_url(url, html, data) return html
def _cached_http_get(self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, cache_limit=8): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers['Referer'] if 'Referer' in headers else url log_utils.log('Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers)) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data self.create_db_connection() _, html = self.db_connection.get_cached_url(url, data, cache_limit) if html: log_utils.log('Returning cached result for: %s' % (url), log_utils.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) request = urllib2.Request(url, data=data) request.add_header('User-Agent', self._get_ua()) request.add_header('Accept', '*/*') request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': log_utils.log('Response Cookies: %s - %s' % (url, self.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.__fix_bad_cookies() self.cj.save(ignore_discard=True) if not allow_redirect and (response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: return response.info().getheader('Location') content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: log_utils.log('Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.code == 503 and 'cf-browser-verification' in e.read(): html = cloudflare.solve(url, self.cj, self._get_ua()) if not html: return '' else: log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' except Exception as e: log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' self.db_connection.cache_url(url, html, data) return html
def _cached_http_get(self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, cache_limit=8): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers['Referer'] if 'Referer' in headers else url log_utils.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers)) self.create_db_connection() _, html = self.db_connection.get_cached_url(url, cache_limit) if html: log_utils.log('Returning cached result for: %s' % (url), xbmc.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) if data is not None: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data request = urllib2.Request(url, data=data) request.add_header('User-Agent', USER_AGENT) request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if xbmcaddon.Addon().getSetting('cookie_debug') == 'true': log_utils.log( 'Response Cookies: %s - %s' % (url, self.cookies_as_str(self.cj)), xbmc.LOGDEBUG) self.__fix_bad_cookies() self.cj.save(ignore_discard=True) if not allow_redirect and response.getcode() in [ 301, 302, 303, 307 ]: return response.info().getheader('Location') if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read() except urllib2.HTTPError as e: if e.code == 503 and 'cf-browser-verification' in e.read(): html = cloudflare.solve(url, self.cj) if not html: return '' else: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), xbmc.LOGWARNING) return '' except Exception as e: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), xbmc.LOGWARNING) return '' self.db_connection.cache_url(url, html) return html
def _cached_http_get( self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, cache_limit=8, ): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers["Referer"] if "Referer" in headers else url log_utils.log("Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|" % (url, cookies, data, headers)) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers["Content-Type"] = "multipart/form-data; boundary=X-X-X" data = multipart_data self.create_db_connection() _created, _res_header, html = self.db_connection.get_cached_url(url, data, cache_limit) if html: log_utils.log("Returning cached result for: %s" % (url), log_utils.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) request = urllib2.Request(url, data=data) request.add_header("User-Agent", scraper_utils.get_ua()) request.add_header("Accept", "*/*") request.add_unredirected_header("Host", request.get_host()) request.add_unredirected_header("Referer", referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting("cookie_debug") == "true": log_utils.log( "Response Cookies: %s - %s" % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG ) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and ( response.getcode() in [301, 302, 303, 307] or response.info().getheader("Refresh") ): if response.info().getheader("Refresh") is not None: refresh = response.info().getheader("Refresh") return refresh.split(";")[-1].split("url=")[-1] else: return response.info().getheader("Location") content_length = response.info().getheader("Content-Length", 0) if int(content_length) > MAX_RESPONSE: log_utils.log( "Response exceeded allowed size. %s => %s / %s" % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING, ) if response.info().get("Content-Encoding") == "gzip": buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.code == 503 and "cf-browser-verification" in e.read(): html = cloudflare.solve(url, self.cj, scraper_utils.get_ua()) if not html: return "" else: log_utils.log("Error (%s) during scraper http get: %s" % (str(e), url), log_utils.LOGWARNING) return "" except Exception as e: log_utils.log("Error (%s) during scraper http get: %s" % (str(e), url), log_utils.LOGWARNING) return "" self.db_connection.cache_url(url, html, data) return html