def get_cooked_url(url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, cache_limit=8): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers['Referer'] if 'Referer' in headers else url if kodi.get_setting('debug') == "true": log_utils.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers)) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data try: cj = _set_cookies(base_url, cookies) request = urllib2.Request(url, data=data) request.add_header('User-Agent', _get_ua()) request.add_unredirected_header('Host', '9movies.to') request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener2) response = urllib2.urlopen(request, timeout=timeout) cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': print 'Response Cookies: %s - %s' % (url, cookies_as_str(cj)) __fix_bad_cookies() cj.save(ignore_discard=True) if not allow_redirect and (response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: return response.info().getheader('Location') content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: log_utils.log( 'Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.code == 503 and 'cf-browser-verification' in e.read(): #print "WAS ERROR" html = cloudflare.solve(url, cj, _get_ua()) if not html: return '' else: log_utils.log( 'Error (%s) during THE scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' except Exception as e: log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' return html
def get_cooked_url(url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, cache_limit=8): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers['Referer'] if 'Referer' in headers else url if kodi.get_setting('debug') == "true": log_utils.log('Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers)) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data try: cj = _set_cookies(base_url, cookies) request = urllib2.Request(url, data=data) request.add_header('User-Agent', _get_ua()) request.add_unredirected_header('Host', '9movies.to') request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener2) response = urllib2.urlopen(request, timeout=timeout) cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': print 'Response Cookies: %s - %s' % (url, cookies_as_str(cj)) __fix_bad_cookies() cj.save(ignore_discard=True) if not allow_redirect and (response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: return response.info().getheader('Location') content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: log_utils.log('Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.code == 503 and 'cf-browser-verification' in e.read(): #print "WAS ERROR" html = cloudflare.solve(url, cj, _get_ua()) if not html: return '' else: log_utils.log('Error (%s) during THE scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' except Exception as e: log_utils.log('Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' return html
def _cached_http_get(self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8): # if require_debrid: # if Scraper.debrid_resolvers is None: # Scraper.debrid_resolvers = [resolver for resolver in urlresolver.relevant_resolvers() if # resolver.isUniversal()] # if not Scraper.debrid_resolvers: # log_utils.log('%s requires debrid: %s' % (self.__module__, Scraper.debrid_resolvers), # log_utils.LOGDEBUG) # return '' if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} if url.startswith('//'): url = 'http:' + url referer = headers['Referer'] if 'Referer' in headers else base_url log_utils.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers), log_utils.LOGDEBUG) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data try: self.cj = self._set_cookies(base_url, cookies) if isinstance(url, unicode): url = url.encode('utf-8') request = urllib2.Request(url, data=data) request.add_header('User-Agent', scraper_utils.get_ua()) request.add_header('Accept', '*/*') request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) if method is not None: request.get_method = lambda: method.upper() response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': log_utils.log( 'Response Cookies: %s - %s' % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and ( response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: redir_url = response.info().getheader('Location') if redir_url.startswith('='): redir_url = redir_url[1:] return redir_url content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: log_utils.log( 'Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if method == 'HEAD': return '' else: if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: html = e.read() if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html: html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(), self.get_name()) if not html: return '' elif e.code == 503 and 'cf-browser-verification' in html: html = cloudflare.solve(url, self.cj, scraper_utils.get_ua()) if not html: return '' else: log_utils.log( 'Error (%s) during first scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) if not read_error: return '' except Exception as e: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' #self.db_connection().cache_url(url, html, data) return html