def _fetch(self, url, form_data={}, headers={}, compression=True, jdata=False): """ Perform an HTTP GET or POST request. Args: url (str): The URL to GET or POST. form_data (dict): A dictionary of form data to POST. If empty, the request will be a GET, if it contains form data it will be a POST. Kwargs: headers (dict): A dictionary describing any headers you would like to add to the request. (eg. ``{'X-Test': 'testing'}``) compression (bool): If ``True`` (default), try to use gzip compression. Returns: An :class:`HttpResponse` object containing headers and other meta-information about the page and the page content. """ req = urllib_request.Request(url) if form_data: if jdata: form_data = json.dumps(form_data) elif isinstance(form_data, six.string_types): form_data = form_data else: form_data = urllib_parse.urlencode(form_data, True) form_data = form_data.encode('utf-8') if six.PY3 else form_data req = urllib_request.Request(url, form_data) req.add_header('User-Agent', self._user_agent) for key in headers: req.add_header(key, headers[key]) if compression: req.add_header('Accept-Encoding', 'gzip') if jdata: req.add_header('Content-Type', 'application/json') host = req.host if six.PY3 else req.get_host() req.add_unredirected_header('Host', host) try: response = urllib_request.urlopen(req, timeout=15) except urllib_error.HTTPError as e: if e.code == 403: self._update_opener(drop_tls_level=True) response = urllib_request.urlopen(req, timeout=15) return HttpResponse(response)
def get(url, cookiepath=None, cookie=None, user_agent=None, referer=None): # use cookies if cookiepath is set and if the cookiepath exists. if cookiepath is not None: # check if user has supplied only a folder path, or a full path if not os.path.isfile(cookiepath): # if the user supplied only a folder path, append on to the end # of the path a common filename. cookiepath = os.path.join(cookiepath, 'cookies.lwp') # check that the cookie exists if not os.path.exists(cookiepath): with open(cookiepath, 'w') as f: f.write('#LWP-Cookies-2.0\n') cj = http_cookiejar.LWPCookieJar() cj.load(cookiepath) req = urllib_request.Request(url) if user_agent: req.add_header('User-Agent', user_agent) else: req.add_header('User-Agent', USER_AGENT_STRING) if referer: req.add_header('Referer', referer) if cookie: req.add_header('Cookie', cookie) opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cj)) try: response = opener.open(req) except urllib_error.URLError as e: xbmc.log('%s Error opening %s' % (e, url)) sys.exit(1) link = response.read() response.close() return link else: return _loadwithoutcookies(url, user_agent)
def access(image): if '/' not in image: return True hub, image_name = image.split('/', 1) if ':' in image_name: image_name, image_tag = image_name.split(':', 1) else: image_tag = 'latest' url = 'https://%s/v2/%s/manifests/%s' % (hub, image_name, image_tag) username = password = ucr_get('uuid/license') auth = encodestring('%s:%s' % (username, password)).replace('\n', '') request = urllib_request.Request( url, headers={'Authorization': 'Basic %s' % auth}) try: urlopen(request) except urllib_request.HTTPError as exc: if exc.getcode() == 401: return False else: return False # TODO except (urllib_request.URLError, ssl.CertificateError, http_client.BadStatusLine): return False # TODO else: return True
def get_media_url(self, host, media_id): web_url = self.get_url(host, media_id) logger.log_debug('HugeFiles: get_link: %s' % (web_url)) html = self.net.http_GET(web_url).content r = re.findall('File Not Found', html) if r: raise ResolverError('File Not Found or removed') # Grab data values data = helpers.get_hidden(html) data.update(captcha_lib.do_captcha(html)) logger.log_debug('HugeFiles - Requesting POST URL: %s with data: %s' % (web_url, data)) html = self.net.http_POST(web_url, data).content # Re-grab data values data = helpers.get_hidden(html) data['referer'] = web_url headers = {'User-Agent': common.EDGE_USER_AGENT} logger.log_debug('HugeFiles - Requesting POST URL: %s with data: %s' % (web_url, data)) request = urllib_request.Request(web_url, data=urllib_parse.urlencode(data), headers=headers) try: stream_url = urllib_request.urlopen(request).geturl() except: return logger.log_debug('Hugefiles stream Found: %s' % stream_url) return stream_url
def request(url, params={}, headers={}, data=None, method=None): if params: url = "".join([url, "?", urllib_parse.urlencode(params)]) req = urllib_request.Request(url) if method: req.get_method = lambda: method req.add_header("User-Agent", USER_AGENT) req.add_header("Accept-Encoding", "gzip") for k, v in headers.items(): req.add_header(k, v) if data: req.data = data try: with closing(urllib_request.urlopen(req)) as response: data = response.read() if response.headers.get("Content-Encoding", "") == "gzip": import zlib data = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(data) response.data = data response.json = lambda: parse_json(data) response.xml = lambda: parse_xml(data) return response except Exception as e: import traceback map(log.error, traceback.format_exc().split("\n")) notify("%s: %s" % (getLocalizedString(30224), repr(e).encode('utf-8'))) return None, None
def getUrl(url, cookieJar=None, post=None, timeout=20, headers=None, noredir=False): cookie_handler = urllib_request.HTTPCookieProcessor(cookieJar) if noredir: opener = urllib_request.build_opener( NoRedirection, cookie_handler, urllib_request.HTTPBasicAuthHandler(), urllib_request.HTTPHandler()) else: opener = urllib_request.build_opener( cookie_handler, urllib_request.HTTPBasicAuthHandler(), urllib_request.HTTPHandler()) #opener = urllib_request.install_opener(opener) req = urllib_request.Request(url) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36' ) if headers: for h, hv in headers: req.add_header(h, hv) response = opener.open(req, post, timeout=timeout) link = response.read() response.close() return link
def openURL(url): xbmc.log("Opening %s" % url) req = urllib_request.Request(url) req.add_header('Referer', 'https://www.empflix.com/') response = urllib_request.urlopen(req) link = response.read().decode('utf-8') response.close() return link
def get_redirect_url(url, headers={}): class NoRedirection(urllib_request.HTTPErrorProcessor): def http_response(self, request, response): return response opener = urllib_request.build_opener(NoRedirection, urllib_request.HTTPHandler) urllib_request.install_opener(opener) request = urllib_request.Request(url, headers=headers) response = urllib_request.urlopen(request) return response.geturl()
def getResponse(url, headers, size): try: if size > 0: size = int(size) headers['Range'] = 'bytes=%d-' % size req = urllib_request.Request(url, headers=headers) resp = urllib_request.urlopen(req, timeout=30) return resp except: return None
def register(search, search_movie, search_episode, search_season=None): try: payload = json.loads(base64.b64decode(sys.argv[1])) except: notify(getElementumLocalizedString(30102), time=1000) return results = () method = { "search": search, "search_movie": search_movie, "search_season": search_season, "search_episode": search_episode, }.get(payload["method"]) or (lambda *a, **kw: []) try: results = () try: objects = method(payload["search_object"]) if objects is not None: results = tuple(objects) except Exception as e: import traceback map(log.error, traceback.format_exc().split("\n")) notify( py2_encode( "%s: %s" % (getElementumLocalizedString(30224), repr(e)), 'utf-8')) try: urllib_request.urlopen("%s/provider/%s/failure" % (ELEMENTUMD_HOST, ADDON_ID)) except: pass finally: try: req_data = json.dumps(results) if not PY2 and isinstance(req_data, str): req_data = req_data.encode() req = urllib_request.Request(payload["callback_url"], data=req_data) with closing(urllib_request.urlopen(req)) as response: log.debug("callback returned: %d" % response.getcode()) except Exception as e: import traceback map(log.error, traceback.format_exc().split("\n")) notify( py2_encode( "%s: %s" % (getElementumLocalizedString(30224), repr(e)), 'utf-8')) try: urllib_request.urlopen("%s/provider/%s/failure" % (ELEMENTUMD_HOST, ADDON_ID)) except: pass
def get_media_url(self, host, media_id): web_url = self.get_url(host, media_id) headers = {'User-Agent': common.RAND_UA} request = urllib_request.Request(web_url, headers=headers) response = urllib_request.urlopen(request, context=self.context) html = response.read() source = re.search(r'''file:\s*["']([^"']+)''', html) if source: headers.update({'Referer': web_url}) return source.group(1) + helpers.append_headers(headers) raise ResolverError('File not found')
def _fetch(self, url, form_data={}, headers={}, compression=True): ''' Perform an HTTP GET or POST request. Args: url (str): The URL to GET or POST. form_data (dict): A dictionary of form data to POST. If empty, the request will be a GET, if it contains form data it will be a POST. Kwargs: headers (dict): A dictionary describing any headers you would like to add to the request. (eg. ``{'X-Test': 'testing'}``) compression (bool): If ``True`` (default), try to use gzip compression. Returns: An :class:`HttpResponse` object containing headers and other meta-information about the page and the page content. ''' req = urllib_request.Request(url) if form_data: if isinstance(form_data, string_types): form_data = form_data else: form_data = urlencode(form_data, True) req = urllib_request.Request(url, form_data) req.add_header('User-Agent', self._user_agent) for key in headers: req.add_header(key, headers[key]) if compression: req.add_header('Accept-Encoding', 'gzip') req.add_unredirected_header('Host', req.get_host()) response = urllib_request.urlopen(req) return HttpResponse(response)
def _loadwithoutcookies(url, user_agent=None, referer=None): xbmc.log('Loading without cookies') url = url.replace('http:', 'https:') req = urllib_request.Request(url) if user_agent: req.add_header('User-Agent', user_agent) if referer: req.add_header('Referer', referer) try: response = urllib_request.urlopen(req) except urllib_error.HTTPError as e: xbmc.log("%s %s" % (url, e.reason), xbmc.LOGFATAL) sys.exit(0) link = response.read() response.close() return link
def _parse_redirect(self, url, hdrs={}): class NoRedirection(urllib_request.HTTPErrorProcessor): def http_response(self, request, response): return response opener = urllib_request.build_opener(NoRedirection) urllib_request.install_opener(opener) # @ big change request = urllib_request.Request(url, headers=hdrs) try: response = urllib_request.urlopen(request) except urllib_error.HTTPError as e: if e.code == 429 or e.code == 403: msg = 'Daily view limit reached' common.kodi.notify(header=None, msg=msg, duration=3000) raise ResolverError(msg) response_headers = dict([(item[0].title(), item[1]) for item in list(response.info().items())]) cookie = response_headers.get('Set-Cookie', None) if cookie: self.headers.update({'Cookie': cookie}) return response.geturl()
def http_DELETE(self, url, headers={}): """ Perform an HTTP DELETE request. Args: url (str): The URL to GET. Kwargs: headers (dict): A dictionary describing any headers you would like to add to the request. (eg. ``{'X-Test': 'testing'}``) Returns: An :class:`HttpResponse` object containing headers and other meta-information about the page. """ request = urllib_request.Request(url) request.get_method = lambda: 'DELETE' request.add_header('User-Agent', self._user_agent) for key in headers: request.add_header(key, headers[key]) response = urllib_request.urlopen(request) return HttpResponse(response)
def send_information(action, app=None, status=200, value=None): app_id = app and app.id utils_logger.debug( 'send_information: action=%s app=%s value=%s status=%s' % (action, app_id, value, status)) server = get_server() url = '%s/postinst' % server uuid = '00000000-0000-0000-0000-000000000000' system_uuid = '00000000-0000-0000-0000-000000000000' if not app or app.notify_vendor: uuid = ucr_get('uuid/license', uuid) system_uuid = ucr_get('uuid/system', system_uuid) if action == 'search': uuid = '00000000-0000-0000-0000-000000000000' system_uuid = None values = { 'action': action, 'status': status, 'uuid': uuid, 'role': ucr_get('server/role'), } if app: values['app'] = app.id values['version'] = app.version if value: values['value'] = value if system_uuid: values['system-uuid'] = system_uuid utils_logger.debug('tracking information: %s' % str(values)) try: request_data = urlencode(values).encode('utf-8') request = urllib_request.Request(url, request_data) urlopen(request) except Exception as exc: utils_logger.info( 'Error sending app infos to the App Center server: %s' % exc)
def getUrl(url, proxy={}, timeout=TIMEOUT, cookies=True): global cs cookie = [] if proxy: urllib_request.install_opener( urllib_request.build_opener(urllib_request.ProxyHandler(proxy))) elif cookies: cookie = http_cookiejar.LWPCookieJar() opener = urllib_request.build_opener( urllib_request.HTTPCookieProcessor(cookie)) urllib_request.install_opener(opener) req = urllib_request.Request(url) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36' ) try: response = urllib_request.urlopen(req, timeout=timeout) linkSRC = response.read() response.close() except: linkSRC = '' cs = ''.join(['%s=%s;' % (c.name, c.value) for c in cookie]) return linkSRC
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, limit=None, referer=None, cookie=None, output='', timeout='30'): handlers = [] if proxy is not None: handlers += [ ProxyHandler({'http': '{0}'.format(proxy)}), urllib_request.HTTPHandler ] opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) if output == 'cookie' or output == 'extended' or close is not True: cookies = cookielib.LWPCookieJar() handlers += [ urllib_request.HTTPHandler(), urllib_request.HTTPSHandler(), urllib_request.HTTPCookieProcessor(cookies) ] opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) try: if sys.version_info < (2, 7, 9): raise Exception() import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib_request.HTTPSHandler(context=ssl_context)] opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) except: pass try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif not mobile is True: # headers['User-Agent'] = agent() headers['User-Agent'] = cache.get(randomagent, 1) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is None: headers['Referer'] = '%s://%s/' % (urlparse(url).scheme, urlparse(url).netloc) else: headers['Referer'] = referer if not 'Accept-Language' in headers: headers['Accept-Language'] = 'en-US' if 'Cookie' in headers: pass elif cookie is not None: headers['Cookie'] = cookie if redirect is False: class NoRedirection(urllib_error.HTTPError): def http_response(self, request, response): return response opener = urllib_request.build_opener(NoRedirection) urllib_request.install_opener(opener) try: del headers['Referer'] except: pass req = urllib_request.Request(url, data=post, headers=headers) try: response = urllib_request.urlopen(req, timeout=int(timeout)) except urllib_error.HTTPError as response: if response.code == 503: if 'cf-browser-verification' in response.read(5242880): netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) cf = cache.get(cfcookie, 168, netloc, headers['User-Agent'], timeout) headers['Cookie'] = cf request = urllib_request.Request(url, data=post, headers=headers) response = urllib_request.urlopen(request, timeout=int(timeout)) elif error is False: return elif error is False: return if output == 'cookie': try: result = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass elif output == 'response': if limit == '0': result = (str(response.code), response.read(224 * 1024)) elif limit is not None: result = (str(response.code), response.read(int(limit) * 1024)) else: result = (str(response.code), response.read(5242880)) elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) elif output == 'extended': try: cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass content = response.headers result = response.read(5242880) return result, headers, content, cookie elif output == 'geturl': result = response.geturl() elif output == 'headers': content = response.headers return content else: if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) if close is True: response.close() return result
def solve(url, cj, user_agent=None, wait=True): if user_agent is None: user_agent = USER_AGENT headers = {'User-Agent': user_agent, 'Referer': url} if cj is not None: try: cj.load(ignore_discard=True) except: pass opener = urllib_request.build_opener( urllib_request.HTTPCookieProcessor(cj)) urllib_request.install_opener(opener) request = urllib_request.Request(url) for key in headers: request.add_header(key, headers[key]) try: response = urllib_request.urlopen(request) html = response.read() except urllib_error.HTTPError as e: html = e.read() tries = 0 while tries < MAX_TRIES: solver_pattern = r'var (?:s,t,o,p,b,r,e,a,k,i,n,g|t,r,a),f,\s*([^=]+)={"([^"]+)":([^}]+)};.+challenge-form\'\);.*?\n.*?;(.*?);a\.value' vc_pattern = 'input type="hidden" name="jschl_vc" value="([^"]+)' pass_pattern = 'input type="hidden" name="pass" value="([^"]+)' init_match = re.search(solver_pattern, html, re.DOTALL) vc_match = re.search(vc_pattern, html) pass_match = re.search(pass_pattern, html) if not init_match or not vc_match or not pass_match: xbmc.log( "Couldn't find attribute: init: |%s| vc: |%s| pass: |%s| No cloudflare check?" % (init_match, vc_match, pass_match)) return False init_dict, init_var, init_equation, equations = init_match.groups() vc = vc_match.group(1) password = pass_match.group(1) # log_utils.log("VC is: %s" % (vc), xbmc.LOGDEBUG, COMPONENT) varname = (init_dict, init_var) result = int(solve_equation(init_equation.rstrip())) xbmc.log('Initial value: |%s| Result: |%s|' % (init_equation, result)) for equation in equations.split(';'): equation = equation.rstrip() if equation[:len('.'.join(varname))] != '.'.join(varname): xbmc.log('Equation does not start with varname |%s|' % (equation)) else: equation = equation[len('.'.join(varname)):] expression = equation[2:] operator = equation[0] if operator not in ['+', '-', '*', '/']: # log_utils.log('Unknown operator: |%s|' % (equation), log_utils.LOGWARNING, COMPONENT) continue result = int( str( eval( str(result) + operator + str(solve_equation(expression))))) # log_utils.log('intermediate: %s = %s' % (equation, result), log_utils.LOGDEBUG, COMPONENT) scheme = urllib_parse.urlparse(url).scheme domain = urllib_parse.urlparse(url).hostname result += len(domain) # log_utils.log('Final Result: |%s|' % (result), log_utils.LOGDEBUG, COMPONENT) if wait: # log_utils.log('Sleeping for 5 Seconds', log_utils.LOGDEBUG, COMPONENT) xbmc.sleep(5000) url = '%s://%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s&pass=%s' % ( scheme, domain, vc, result, urllib_parse.quote(password)) # log_utils.log('url: %s' % (url), log_utils.LOGDEBUG, COMPONENT) request = urllib_request.Request(url) for key in headers: request.add_header(key, headers[key]) try: opener = urllib_request.build_opener(NoRedirection) urllib_request.install_opener(opener) response = urllib_request.urlopen(request) while response.getcode() in [301, 302, 303, 307]: if cj is not None: cj.extract_cookies(response, request) redir_url = response.info().getheader('location') if not redir_url.startswith('http'): base_url = '%s://%s' % (scheme, domain) redir_url = urllib_parse.urljoin(base_url, redir_url) request = urllib_request.Request(redir_url) for key in headers: request.add_header(key, headers[key]) if cj is not None: cj.add_cookie_header(request) response = urllib_request.urlopen(request) final = response.read() if 'cf-browser-verification' in final: # log_utils.log('CF Failure: html: %s url: %s' % (html, url), log_utils.LOGWARNING, COMPONENT) tries += 1 html = final else: break except urllib_error.HTTPError as e: # log_utils.log('CloudFlare HTTP Error: %s on url: %s' % (e.code, url), log_utils.LOGWARNING, COMPONENT) return False except urllib_error.URLError as e: # log_utils.log('CloudFlare URLError Error: %s on url: %s' % (e, url), log_utils.LOGWARNING, COMPONENT) return False if cj is not None: cj.save() return final
def cfcookie(netloc, ua, timeout): try: headers = {'User-Agent': ua} req = urllib_request.Request(netloc, headers=headers) try: urllib_request.urlopen(req, timeout=int(timeout)) except urllib_request.HTTPError as response: result = response.read(5242880) jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0] init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};', result)[-1] builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0] decryptVal = parseJSString(init) lines = builder.split(';') for line in lines: if len(line) > 0 and '=' in line: sections = line.split('=') line_val = parseJSString(sections[1]) decryptVal = int( eval(str(decryptVal) + sections[0][-1] + str(line_val))) answer = decryptVal + len(urlparse.urlparse(netloc).netloc) query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % ( netloc, jschl, answer) if 'type="hidden" name="pass"' in result: passval = re.findall('name="pass" value="(.*?)"', result)[0] query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % ( netloc, quote_plus(passval), jschl, answer) time.sleep(5) cookies = cookielib.LWPCookieJar() handlers = [ urllib_request.HTTPHandler(), urllib_request.HTTPSHandler(), urllib_request.HTTPCookieProcessor(cookies) ] opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) try: request = urllib_request.Request(query, headers=headers) urllib_request.urlopen(request, timeout=int(timeout)) except: pass cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) return cookie except: pass
def get_redirect_url(url, headers={}): request = urllib_request.Request(url, headers=headers) request.get_method = lambda: 'HEAD' response = urllib_request.urlopen(request) return response.geturl()
def getRegexParsed( regexs, url, cookieJar=None, forCookieJarOnly=False, recursiveCall=False, cachedPages={}, rawPost=False, cookie_jar_file=None): #0,1,2 = URL, regexOnly, CookieJarOnly #cachedPages = {} #print 'url',url doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url) # print 'doRegexs',doRegexs,regexs setresolved = True for k in doRegexs: if k in regexs: #print 'processing ' ,k m = regexs[k] #print m cookieJarParam = False if 'cookiejar' in m: # so either create or reuse existing jar #print 'cookiejar exists',m['cookiejar'] cookieJarParam = m['cookiejar'] if '$doregex' in cookieJarParam: cookieJar = getRegexParsed(regexs, m['cookiejar'], cookieJar, True, True, cachedPages) cookieJarParam = True else: cookieJarParam = True #print 'm[cookiejar]',m['cookiejar'],cookieJar if cookieJarParam: if cookieJar == None: #print 'create cookie jar' cookie_jar_file = None if 'open[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split( 'open[')[1].split(']')[0] # print 'cookieJar from file name',cookie_jar_file cookieJar = getCookieJar(cookie_jar_file) # print 'cookieJar from file',cookieJar if cookie_jar_file: saveCookieJar(cookieJar, cookie_jar_file) #cookieJar = http_cookiejar.LWPCookieJar() #print 'cookieJar new',cookieJar elif 'save[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split('save[')[1].split( ']')[0] complete_path = os.path.join(profile, cookie_jar_file) # print 'complete_path',complete_path saveCookieJar(cookieJar, cookie_jar_file) if m['page'] and '$doregex' in m['page']: pg = getRegexParsed(regexs, m['page'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if len(pg) == 0: pg = 'http://regexfailed' m['page'] = pg if 'setcookie' in m and m['setcookie'] and '$doregex' in m[ 'setcookie']: m['setcookie'] = getRegexParsed(regexs, m['setcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'appendcookie' in m and m['appendcookie'] and '$doregex' in m[ 'appendcookie']: m['appendcookie'] = getRegexParsed(regexs, m['appendcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'post' in m and '$doregex' in m['post']: m['post'] = getRegexParsed(regexs, m['post'], cookieJar, recursiveCall=True, cachedPages=cachedPages) # print 'post is now',m['post'] if 'rawpost' in m and '$doregex' in m['rawpost']: m['rawpost'] = getRegexParsed(regexs, m['rawpost'], cookieJar, recursiveCall=True, cachedPages=cachedPages, rawPost=True) #print 'rawpost is now',m['rawpost'] if 'rawpost' in m and '$epoctime$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime$', getEpocTime()) if 'rawpost' in m and '$epoctime2$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime2$', getEpocTime2()) link = '' if m['page'] and m[ 'page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False: #print 'using cache page',m['page'] link = cachedPages[m['page']] else: if m['page'] and not m['page'] == '' and m['page'].startswith( 'http'): if '$epoctime$' in m['page']: m['page'] = m['page'].replace('$epoctime$', getEpocTime()) if '$epoctime2$' in m['page']: m['page'] = m['page'].replace('$epoctime2$', getEpocTime2()) #print 'Ingoring Cache',m['page'] page_split = m['page'].split('|') pageUrl = page_split[0] header_in_page = None if len(page_split) > 1: header_in_page = page_split[1] # if # proxy = urllib_request.ProxyHandler({ ('https' ? proxytouse[:5]=="https":"http") : proxytouse}) # opener = urllib_request.build_opener(proxy) # urllib_request.install_opener(opener) # print 'urllib_request.getproxies',urllib_request.getproxies() current_proxies = urllib_request.ProxyHandler( urllib_request.getproxies()) #print 'getting pageUrl',pageUrl req = urllib_request.Request(pageUrl) if 'proxy' in m: proxytouse = m['proxy'] # print 'proxytouse',proxytouse # urllib_request.getproxies= lambda: {} if pageUrl[:5] == "https": proxy = urllib_request.ProxyHandler( {'https': proxytouse}) #req.set_proxy(proxytouse, 'https') else: proxy = urllib_request.ProxyHandler( {'http': proxytouse}) #req.set_proxy(proxytouse, 'http') opener = urllib_request.build_opener(proxy) urllib_request.install_opener(opener) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' ) proxytouse = None if 'referer' in m: req.add_header('Referer', m['referer']) if 'accept' in m: req.add_header('Accept', m['accept']) if 'agent' in m: req.add_header('User-agent', m['agent']) if 'x-req' in m: req.add_header('X-Requested-With', m['x-req']) if 'x-addr' in m: req.add_header('x-addr', m['x-addr']) if 'x-forward' in m: req.add_header('X-Forwarded-For', m['x-forward']) if 'setcookie' in m: # print 'adding cookie',m['setcookie'] req.add_header('Cookie', m['setcookie']) if 'appendcookie' in m: # print 'appending cookie to cookiejar',m['appendcookie'] cookiestoApend = m['appendcookie'] cookiestoApend = cookiestoApend.split(';') for h in cookiestoApend: n, v = h.split('=') w, n = n.split(':') ck = http_cookiejar.Cookie( version=0, name=n, value=v, port=None, port_specified=False, domain=w, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookieJar.set_cookie(ck) if 'origin' in m: req.add_header('Origin', m['origin']) if header_in_page: header_in_page = header_in_page.split('&') for h in header_in_page: n, v = h.split('=') req.add_header(n, v) if not cookieJar == None: # print 'cookieJarVal',cookieJar cookie_handler = urllib_request.HTTPCookieProcessor( cookieJar) opener = urllib_request.build_opener( cookie_handler, urllib_request.HTTPBasicAuthHandler(), urllib_request.HTTPHandler()) opener = urllib_request.install_opener(opener) # print 'noredirect','noredirect' in m if 'noredirect' in m: opener = urllib_request.build_opener( cookie_handler, NoRedirection, urllib_request.HTTPBasicAuthHandler(), urllib_request.HTTPHandler()) opener = urllib_request.install_opener(opener) elif 'noredirect' in m: opener = urllib_request.build_opener( NoRedirection, urllib_request.HTTPBasicAuthHandler(), urllib_request.HTTPHandler()) opener = urllib_request.install_opener(opener) if 'connection' in m: # print '..........................connection//////.',m['connection'] from keepalive import HTTPHandler keepalive_handler = HTTPHandler() opener = urllib_request.build_opener(keepalive_handler) urllib_request.install_opener(opener) #print 'after cookie jar' post = None if 'post' in m: postData = m['post'] #if '$LiveStreamRecaptcha' in postData: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # postData=postData.replace('$LiveStreamRecaptcha','manual_recaptcha_challenge_field:'+captcha_challenge+',recaptcha_response_field:'+catpcha_word+',id:'+idfield) splitpost = postData.split(',') post = {} for p in splitpost: n = p.split(':')[0] v = p.split(':')[1] post[n] = v post = urllib_parse.urlencode(post) if 'rawpost' in m: post = m['rawpost'] #if '$LiveStreamRecaptcha' in post: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # post=post.replace('$LiveStreamRecaptcha','&manual_recaptcha_challenge_field='+captcha_challenge+'&recaptcha_response_field='+catpcha_word+'&id='+idfield) link = '' try: if post: response = urllib_request.urlopen(req, post) else: response = urllib_request.urlopen(req) if response.info().get('Content-Encoding') == 'gzip': import gzip buf = six.BytesIO(response.read()) f = gzip.GzipFile(fileobj=buf) link = f.read() else: link = response.read() if 'proxy' in m and not current_proxies is None: urllib_request.install_opener( urllib_request.build_opener(current_proxies)) link = javascriptUnEscape(link) #print repr(link) #print link This just print whole webpage in LOG if 'includeheaders' in m: #link+=str(response.headers.get('Set-Cookie')) link += '$$HEADERS_START$$:' for b in response.headers: link += b + ':' + response.headers.get( b) + '\n' link += '$$HEADERS_END$$:' # print link response.close() except: pass cachedPages[m['page']] = link #print link #print 'store link for',m['page'],forCookieJarOnly if forCookieJarOnly: return cookieJar # do nothing elif m['page'] and not m['page'].startswith('http'): if m['page'].startswith('$pyFunction:'): val = doEval(m['page'].split('$pyFunction:')[1], '', cookieJar, m) if forCookieJarOnly: return cookieJar # do nothing link = val link = javascriptUnEscape(link) else: link = m['page'] if '$doregex' in m['expres']: m['expres'] = getRegexParsed(regexs, m['expres'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if not m['expres'] == '': #print 'doing it ',m['expres'] if '$LiveStreamCaptcha' in m['expres']: val = askCaptcha(m, link, cookieJar) #print 'url and val',url,val url = url.replace("$doregex[" + k + "]", val) elif m['expres'].startswith( '$pyFunction:') or '#$pyFunction' in m['expres']: #print 'expeeeeeeeeeeeeeeeeeee',m['expres'] val = '' if m['expres'].startswith('$pyFunction:'): val = doEval(m['expres'].split('$pyFunction:')[1], link, cookieJar, m) else: val = doEvalFunction(m['expres'], link, cookieJar, m) if 'ActivateWindow' in m['expres']: return if forCookieJarOnly: return cookieJar # do nothing if 'listrepeat' in m: listrepeat = m['listrepeat'] return listrepeat, eval(val), m, regexs, cookieJar try: url = url.replace(u"$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", six.ensure_text(val)) else: if 'listrepeat' in m: listrepeat = m['listrepeat'] ret = re.findall(m['expres'], link) return listrepeat, ret, m, regexs val = '' if not link == '': #print 'link',link reg = re.compile(m['expres']).search(link) try: val = reg.group(1).strip() except: traceback.print_exc() elif m['page'] == '' or m['page'] == None: val = m['expres'] if rawPost: # print 'rawpost' val = urllib_parse.quote_plus(val) if 'htmlunescape' in m: #val=urllib_parse.unquote_plus(val) import HTMLParser val = HTMLParser.HTMLParser().unescape(val) try: url = url.replace("$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", six.ensure_text(val)) #print 'ur',url #return val else: url = url.replace("$doregex[" + k + "]", '') if '$epoctime$' in url: url = url.replace('$epoctime$', getEpocTime()) if '$epoctime2$' in url: url = url.replace('$epoctime2$', getEpocTime2()) if '$GUID$' in url: import uuid url = url.replace('$GUID$', str(uuid.uuid1()).upper()) if '$get_cookies$' in url: url = url.replace('$get_cookies$', getCookiesString(cookieJar)) if recursiveCall: return url #print 'final url',repr(url) if url == "": return else: return url, setresolved
def __test_stream(self, stream_url): """ Returns True if the stream_url gets a non-failure http status (i.e. <400) back from the server otherwise return False Intended to catch stream urls returned by resolvers that would fail to playback """ # parse_qsl doesn't work because it splits elements by ';' which can be in a non-quoted UA try: headers = dict([ item.split('=') for item in (stream_url.split('|')[1]).split('&') ]) except: headers = {} for header in headers: headers[header] = urllib_parse.unquote_plus(headers[header]) common.logger.log_debug('Setting Headers on UrlOpen: %s' % headers) try: import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE opener = urllib_request.build_opener( urllib_request.HTTPSHandler(context=ssl_context)) urllib_request.install_opener(opener) except: pass try: msg = '' request = urllib_request.Request(stream_url.split('|')[0], headers=headers) # only do a HEAD request. gujal request.get_method = lambda: 'HEAD' # set urlopen timeout to 15 seconds http_code = urllib_request.urlopen(request, timeout=15).getcode() except urllib_error.HTTPError as e: if isinstance(e, urllib_error.HTTPError): http_code = e.code if http_code == 405: http_code = 200 else: http_code = 600 except urllib_error.URLError as e: http_code = 500 if hasattr(e, 'reason'): # treat an unhandled url type as success if 'unknown url type' in str(e.reason).lower(): return True else: msg = e.reason if not msg: msg = str(e) except Exception as e: http_code = 601 msg = str(e) if msg == "''": http_code = 504 # added this log line for now so that we can catch any logs on streams that are rejected due to test_stream failures # we can remove it once we are sure this works reliably if int(http_code) >= 400 and int(http_code) != 504: common.logger.log_warning( 'Stream UrlOpen Failed: Url: %s HTTP Code: %s Msg: %s' % (stream_url, http_code, msg)) return int(http_code) < 400 or int(http_code) == 504
def doLogin(cookiepath, username, password): #check if user has supplied only a folder path, or a full path if not os.path.isfile(cookiepath): # if the user supplied only a folder path, append on to the end of the # path a filename. cookiepath = os.path.join(cookiepath, 'cookies.lwp') #delete any old version of the cookie file try: os.remove(cookiepath) except: pass if username and password: #the url you will request to. login_url = 'https://fantasti.cc/signin.php' #the header used to pretend you are a browser header_string = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' #build the form data necessary for the login login_data = urllib_parse.urlencode({ 'user': username, 'pass': password, 'memento': 1, 'x': 0, 'y': 0, 'do': 'login', 'SSO': '' }) #build the request we will make req = urllib_request.Request(login_url) req.add_header('User-Agent', header_string) #initiate the cookielib class cj = http_cookiejar.LWPCookieJar() # Setup no redirects class NoRedirection(urllib_request.HTTPRedirectHandler): def redirect_request(self, req, fp, code, msg, headers, newurl): return None #install cookielib into the url opener, so that cookies are handled opener = urllib_request.build_opener( urllib_request.HTTPCookieProcessor(cj), NoRedirection()) urllib_request.install_opener(opener) #do the login and get the response try: source = urllib_request.urlopen(req, login_data.encode()).read() except urllib_error.HTTPError as e: source = e.read() #check the received html for a string that will tell us if the user is # logged in #pass the username, which can be used to do this. login, avatar = check_login(source, username) #if login suceeded, save the cookiejar to disk if login: cj.save(cookiepath) #return whether we are logged in or not return (login, avatar) else: return (False, False)
def _postHtml(url, form_data={}, headers={}, json_data={}, compression=True, NoCookie=None): try: _user_agent = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 ' + \ '(KHTML, like Gecko) Chrome/13.0.782.99 Safari/535.1' if form_data: form_data = urllib_parse.urlencode(form_data) form_data = form_data if PY2 else six.b(form_data) req = urllib_request.Request(url, form_data) elif json_data: json_data = json.dumps(json_data) json_data = json_data.encode('utf8') if PY3 else json_data req = urllib_request.Request(url, json_data) req.add_header('Content-Type', 'application/json') else: req = urllib_request.Request(url) req.get_method = lambda: 'POST' req.add_header('User-Agent', _user_agent) for k, v in list(headers.items()): req.add_header(k, v) if compression: req.add_header('Accept-Encoding', 'gzip') response = urllib_request.urlopen(req) if response.info().get('Content-Encoding', '').lower() == 'gzip': buf = six.BytesIO(response.read()) f = gzip.GzipFile(fileobj=buf) data = f.read() f.close() else: data = response.read() encoding = None content_type = response.headers.get('content-type', '') if 'charset=' in content_type: encoding = content_type.split('charset=')[-1] if encoding is None: epattern = r'<meta\s+http-equiv="Content-Type"\s+content="(?:.+?);\s+charset=(.+?)"' epattern = epattern.encode('utf8') if PY3 else epattern r = re.search(epattern, data, re.IGNORECASE) if r: encoding = r.group(1).decode('utf8') if PY3 else r.group(1) if encoding is not None: data = data.decode(encoding.lower(), errors='ignore') data = data.encode('utf8') if PY2 else data else: data = data.decode('ascii', errors='ignore') if PY3 else data.encode('utf8') if not NoCookie: cj.save(cookiePath, ignore_discard=True) response.close() except urllib_error.HTTPError as e: if 'SSL23_GET_SERVER_HELLO' in str(e): notify(i18n('oh_oh'), i18n('python_old')) raise e.msg else: notify(i18n('oh_oh'), i18n('site_down')) raise e.msg return None return data