def __init__(self, timeout=None, debuglevel=0): """Initialize the object. Args: timeout: the socket connect timeout value. debuglevel: the debuglevel level. """ _H.__init__(self, debuglevel) TimeoutHTTPConnection._timeout = timeout
def __init__(self, url, **kwargs): self.request = None self.response = None self.code = -1 self.info = {} self.cookieJar = None self.reason = '' data = kwargs.get('data', None) if data: if isinstance(data, dict): data = urlencode(data) if not isinstance(data, basestring): raise ValueError('data must be string or dict') request_type = kwargs.get('type', 'POST') if data and isinstance(request_type, basestring) and request_type.upper()!='POST': url = '{}?{}'.format(url, data) data = None # GET data must be None self.request = urlRequest(url, data) # referer referer = kwargs.get('referer', None) if referer: self.request.add_header('referer', referer) # user-agent user_agent = kwargs.get('user_agent', None) if user_agent: self.request.add_header('User-Agent', user_agent) # auth auth = kwargs.get('auth', None) if auth and isinstance(auth, dict) and 'usr' in auth: auth_string = base64.b64encode('{}:{}'.format(auth.get('usr',''), auth.get('pwd',''))) self.request.add_header('Authorization', 'Basic {}'.format(auth_string)) # cookie cookie = kwargs.get('cookie', None) cj = None if cookie: if isinstance(cookie, CookieJar): cj = cookie elif isinstance(cookie, dict): result = [] for k, v in cookie.items(): result.append('{}={}'.format(k, v)) cookie = '; '.join(result) elif isinstance(cookie, Cookie.BaseCookie): cookie = cookie.output(header='') if isinstance(cookie, basestring): self.request.add_header('Cookie', cookie) if cj is None: cj = CookieJar() #! TODO: proxy # build opener debuglevel = 1 if kwargs.get('debug', False) else 0 opener = build_opener( HTTPHandler(debuglevel=debuglevel), HTTPSHandler(debuglevel=debuglevel), HTTPCookieProcessor(cj) ) # timeout timeout = kwargs.get('timeout') if not isinstance(timeout, int): timeout = _DEFAULT_TIMEOUT try: self.response = opener.open(self.request, timeout=timeout) self.code = self.response.getcode() self.header = self.response.info().dict self.cookieJar = cj except HTTPError as e: self.code = e.code self.reason = '{}'.format(e) raise e except URLError as e: self.code = -1 self.reason = e.reason raise e except Exception as e: self.code = -1 self.reason = '{}'.format(e) raise e
def downloadfile(url, fileName, headers=[], silent=False, notStop=False): logger.debug("downloadfile: url=" + str(url)) logger.debug("downloadfile: fileName=" + fileName) try: logger.debug("downloadfile with fileName=" + fileName) if os.path.exists(fileName) and notStop: f = open(fileName, 'r+b') existSize = os.path.getsize(fileName) logger.info("downloadfile: file exists, size=%d" % existSize) recordedSize = existSize f.seek(existSize) elif os.path.exists(fileName) and not notStop: logger.info("downloadfile: file exists, dont re-download") return else: existSize = 0 logger.info("downloadfile: file doesn't exists") f = open(fileName, 'wb') recordedSize = 0 socket.setdefaulttimeout(30) #Timeout h = HTTPHandler(debuglevel=0) remoteFile = url params = None request = Request(url) logger.debug("checking headers... type: " + str(type(headers))) if len(headers) > 0: logger.debug("adding headers...") for key in headers.keys(): logger.debug("Header=" + key + ": " + headers.get(key)) request.add_header(key, headers.get(key)) else: logger.debug("headers figure are 0") logger.debug("checking resume status...") if existSize > 0: #restart logger.info("resume is launched!") request.add_header('Range', 'bytes=%d-' % (existSize, )) opener = build_opener(h) install_opener(opener) try: logger.debug("opening request...") connection = opener.open(request) except: # End logger.error("ERROR: " + traceback.format_exc()) f.close() logger.debug("detecting download size...") try: totalFileSize = int(connection.headers["Content-Length"]) except: totalFileSize = 1 logger.debug("total file size: " + str(totalFileSize)) if existSize > 0: totalFileSize = totalFileSize + existSize logger.debug("Content-Length=%s" % totalFileSize) blockSize = 100 * 1024 #Buffer size bufferReadedSize = connection.read(blockSize) logger.info("Starting download, readed=%s" % len(bufferReadedSize)) maxRetries = 5 while len(bufferReadedSize) > 0: try: f.write(bufferReadedSize) recordedSize = recordedSize + len(bufferReadedSize) percent = int(float(recordedSize) * 100 / float(totalFileSize)) totalMB = float(float(totalFileSize) / (1024 * 1024)) downloadedMB = float(float(recordedSize) / (1024 * 1024)) retries = 0 while retries <= maxRetries: try: before = time.time() bufferReadedSize = connection.read(blockSize) after = time.time() if (after - before) > 0: speed = len(bufferReadedSize) / ((after - before)) remainingSize = totalFileSize - recordedSize if speed > 0: remainingTime = remainingSize / speed else: remainingTime = 0 #infinite if not silent: logger.debug( percent, "downloading %s %s %s %s %s" % (downloadedMB, totalMB, percent, speed / 1024, sec_to_hms(remainingTime))) break except: retries = retries + 1 logger.info("ERROR downloading buffer, retry %d" % retries) logger.error(traceback.print_exc()) # Something wrong happened if retries > maxRetries: logger.error( "ERROR, something happened in download proccess") f.close() return -2 except: logger.error(traceback.print_exc()) f.close() return -2 except Exception as ex: logger.error(str(ex)) pass try: f.close() except: pass logger.info("Finished download proccess")
from urllib2 import Request, build_opener, HTTPCookieProcessor, HTTPHandler import cookielib cj = cookielib.CookieJar() opener = build_opener(HTTPCookieProcessor(cj), HTTPHandler()) # create a request object to be used to get the page. req = Request("http://web.ctf.b01lers.com:1002") f = opener.open(req) # see the first few lines of the page html = f.read() # print(html[:50]) # Check out the cookies print ("the cookies are: ") for cookie in cj: print cookie.name, cookie.value cookie_map = {} while len(cookie_map) != 67: print len(cookie_map) # create a request object to be used to get the page. req = Request("http://web.ctf.b01lers.com:1002") f = opener.open(req) # see the first few lines of the page
def get_page_source(self, cmd): self.cmd = cmd if self.shouldIuseB64: self.cmd = "echo %s | base64 -d | sh" % self.cmd.encode( 'base64').replace('\n', '') result = re.search(';sudo ', self.cmd) if result: command = self.cmd.replace( 'sudo', '{0}sudo{1}'.format('\033[91m', '\033[93m')) errmsg = colored( '\n[!] Warning this command ({0}) could break the connection. I\'m not going to allow it to be sent' .format(command), 'red') cprint(errmsg, 'red') elif getargs.url: try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: # Legacy Python that doesn't verify HTTPS certificates by default pass else: # Handle target environment that doesn't support HTTPS verification ssl._create_default_https_context = _create_unverified_https_context # Proxy support proxy_support = ProxyHandler( {'http': self.proxy} if self.proxy else {}) opener = build_opener(proxy_support, HTTPHandler(debuglevel=0)) opener.addheaders = [ ('Accept', '*/*'), ] if getargs.headers: # print opener.addheaders # print getargs.headers opener.addheaders.extend(getargs.headers) # Tor support if self.tor: opener = build_opener( SocksiPyHandler(PROXY_TYPE_SOCKS5, '127.0.0.1', 9050)) # print opener.open('http://ifconfig.me/ip').read() # exit() # User angent if getargs.random_agent: opener.addheaders.extend([('User-agent', self.random_agent)]) elif self.user_agent: opener.addheaders.extend([('User-agent', self.user_agent)]) else: pass install_opener(opener) errmsg = colored( '\n[!] Check your network connection and/or the proxy (if you\'re using one)', 'red') # Check if the method is POST if self.method == 'post' or (self.parameter and self.method != 'cookie'): self.method = 'post' parameters = urlencode({ self.parameter: 'echo ::command_start::;' + self.cmd.strip(';') + ';echo ::command_end::;' }) try: sc = map(str.rstrip, opener.open(self.url, parameters).readlines()) sc = '::command_deli::'.join(sc) sc = re.search('::command_start::(.*?)::command_end::', sc) if sc: sc = sc.group(1).split('::command_deli::')[1:-1] else: parameters = urlencode( {self.parameter: self.cmd.strip(';')}) sc = map(str.rstrip, opener.open(self.url, parameters).readlines()) return sc except InvalidURL: exit(errmsg) # except: # exit(fourzerofourmsg) # If the used method set GET else: try: if self.method == 'cookie': opener.addheaders += [ ('Cookie', '{0}={1}'.format( self.parameter, quote('echo ::command_start::;' + self.cmd.rstrip().strip(';') + ';echo ::command_end::;'))), ] sc = map(str.rstrip, opener.open(self.url).readlines()) else: sc = map( str.rstrip, opener.open('{0}{1}'.format( self.url, quote('echo ::command_start::;' + self.cmd.strip(';') + ';echo ::command_end::;'))).readlines()) sc = '::command_deli::'.join(sc) sc = re.search('::command_start::(.*?)::command_end::', sc) if sc: sc = sc.group(1).split('::command_deli::')[1:-1] else: sc = map( str.rstrip, opener.open('{0}{1}'.format( self.url, quote(self.cmd.strip(';')))).readlines()) return sc except InvalidURL: exit(errmsg) except HTTPError: cprint( '[!] This is a 414 error code and you need to work with a POST method', 'red') exit() elif getargs.listen: try: if (listen.socket.sendall(cmd + "\n") != None): errmsg = colored('\n[!] Error in sending data (#1)', 'red') cprint(errmsg, 'red') time.sleep(1) sc = '' buffer = listen.socket.recv(1024) if buffer == '': errmsg = colored('\n[!] Lost connection. Exiting...', 'red') cprint(errmsg, 'red') listen.socket.close() exit(1) while buffer != '': sc = sc + buffer # sc +=+ buffer # convert " to ' try: buffer = listen.socket.recv(1024) except: buffer = '' sc = [ i for i in sc.split('\n')[:-1] if not any(s in i for s in [ 'job control in this shell', 'cannot set terminal process group', 'can\'t access tty', '<' ]) ] return sc except: if (listen.socket.sendall(cmd + "\n") != None): errmsg = colored('\n[!] [!] Error in sending data (#2)', 'red') cprint(errmsg, 'red') pass elif getargs.connect: try: if (connect.socket.send(cmd + "\n") == None): errmsg = colored('\n[!] Error in sending data (#1)', 'red') cprint(errmsg, 'red') time.sleep(1) sc = '' buffer = connect.socket.recv(1024) if buffer == '': errmsg = colored('\n[!] Lost connection. Exiting...', 'red') cprint(errmsg, 'red') connect.socket.close() exit(1) while buffer != '': sc = sc + buffer try: buffer = connect.socket.recv(1024) except: buffer = '' return sc.split('\n')[:-1] except: pass else: errmsg = colored('\n[!] Unsupported mode!', 'red') cprint(errmsg, 'red') exit(1)
def __init__(self, exp_requests, fixtures_dir): HTTPHandler.__init__(self) self.__exp_requests = exp_requests self.__fixtures_dir = fixtures_dir
def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size download_file = os.path.basename(url).split('?')[0] if os.path.isfile(download_file): try: request = Request(url) request.get_method = lambda: 'HEAD' response = urlopen(request, timeout=30) remote_size = self.get_total_size(response) # Check that we were able to derive a size. if remote_size: local_size = os.path.getsize(download_file) if remote_size < (local_size + (local_size * .01)) and remote_size > ( local_size - (local_size * .01)): print( " > Download file {0} exists! \n > Skipping download of {1}. " .format(download_file, url)) return None, None #partial file size wasn't full file size, lets blow away the chunk and start again print( " > Found {0} but it wasn't fully downloaded. Removing file and downloading again." .format(download_file)) os.remove(download_file) except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None except HTTPError as e: if e.code == 401: print( " > IMPORTANT: Your user may not have permission to download this type of data!" ) else: print(" > Unknown Error, Could not get file HEAD: {0}". format(e)) except URLError as e: print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None # attempt https connection try: request = Request(url) response = urlopen(request, timeout=30) # Watch for redirect if response.geturl() != url: # See if we were redirect BACK to URS for re-auth. if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl( ): if recursion: print( " > Entering seemingly endless auth loop. Aborting. " ) return False, None # make this easier. If there is no app_type=401, add it new_auth_url = response.geturl() if "app_type" not in new_auth_url: new_auth_url += "&app_type=401" print( " > While attempting to download {0}....".format(url)) print(" > Need to obtain new cookie from {0}".format( new_auth_url)) old_cookies = [cookie.name for cookie in self.cookie_jar] opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request(new_auth_url) try: response = opener.open(request) for cookie in self.cookie_jar: if cookie.name not in old_cookies: print(" > Saved new cookie: {0}".format( cookie.name)) # A little hack to save session cookies if cookie.discard: cookie.expires = int( time.time()) + 60 * 60 * 24 * 30 print( " > Saving session Cookie that should have been discarded! " ) self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) return False, None # Okay, now we have more cookies! Lets try again, recursively! print(" > Attempting download again with new cookies!") return self.download_file_with_cookiejar(url, file_count, total, recursion=True) print( " > 'Temporary' Redirect download @ Remote archive:\n > {0}" .format(response.geturl())) # seems to be working print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status sys.stdout.write('\n') tempfile_name = tf.name tf.close() #handle errors except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) if e.code == 401: print( " > IMPORTANT: Your user does not have permission to download this type of data!" ) if e.code == 403: print(" > Got a 403 Error trying to download this file. ") print( " > You MAY need to log in this app and agree to a EULA. ") return False, None except URLError as e: print("URL Error (from GET): {0}, {1}, {2}".format( e, e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None except socket.timeout as e: print(" > timeout requesting: {0}; {1}".format(url, e)) return False, None except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None # Return the file size shutil.copy(tempfile_name, download_file) os.remove(tempfile_name) file_size = self.get_total_size(response) actual_size = os.path.getsize(download_file) if file_size is None: # We were unable to calculate file size. file_size = actual_size return actual_size, file_size
def check_cookie(self): if self.cookie_jar is None: print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Hanlder opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(" > attempting to download {0}".format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we ge this error, again, it likely means the user has not agreed to current EULA print("\nIMPORTANT: ") print( "Your user appears to lack permissions to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): try: redir_url = response.info().getheader('Location') except AttributeError: redir_url = response.getheader('Location') #Funky Test env: if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): print("Cough, cough. It's dusty in this test env!") return True print("Redirect ({0}) occured, invalid cookie value!".format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False
def getRegexParsed( regexs, url, cookieJar=None, forCookieJarOnly=False, recursiveCall=False, cachedPages={}, rawPost=False, cookie_jar_file=None): # 0,1,2 = URL, regexOnly, CookieJarOnly # cachedPages = {} # print 'url',url doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url) # print 'doRegexs',doRegexs,regexs setresolved = True for k in doRegexs: if k in regexs: # print 'processing ' ,k m = regexs[k] # print m cookieJarParam = False if 'cookiejar' in m: # so either create or reuse existing jar # print 'cookiejar exists',m['cookiejar'] cookieJarParam = m['cookiejar'] if '$doregex' in cookieJarParam: cookieJar = getRegexParsed(regexs, m['cookiejar'], cookieJar, True, True, cachedPages) cookieJarParam = True else: cookieJarParam = True # print 'm[cookiejar]',m['cookiejar'],cookieJar if cookieJarParam: if cookieJar is None: # print 'create cookie jar' cookie_jar_file = None if 'open[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split( 'open[')[1].split(']')[0] # print 'cookieJar from file name',cookie_jar_file cookieJar = getCookieJar(cookie_jar_file) # print 'cookieJar from file',cookieJar if cookie_jar_file: saveCookieJar(cookieJar, cookie_jar_file) # import cookielib # cookieJar = cookielib.LWPCookieJar() # print 'cookieJar new',cookieJar elif 'save[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split('save[')[1].split( ']')[0] complete_path = os.path.join(profile, cookie_jar_file) # print 'complete_path',complete_path saveCookieJar(cookieJar, cookie_jar_file) if m['page'] and '$doregex' in m['page']: pg = getRegexParsed(regexs, m['page'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if len(pg) == 0: pg = 'http://regexfailed' m['page'] = pg if 'setcookie' in m and m['setcookie'] and '$doregex' in m[ 'setcookie']: m['setcookie'] = getRegexParsed(regexs, m['setcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'appendcookie' in m and m['appendcookie'] and '$doregex' in m[ 'appendcookie']: m['appendcookie'] = getRegexParsed(regexs, m['appendcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'post' in m and '$doregex' in m['post']: m['post'] = getRegexParsed(regexs, m['post'], cookieJar, recursiveCall=True, cachedPages=cachedPages) # print 'post is now',m['post'] if 'rawpost' in m and '$doregex' in m['rawpost']: m['rawpost'] = getRegexParsed(regexs, m['rawpost'], cookieJar, recursiveCall=True, cachedPages=cachedPages, rawPost=True) # print 'rawpost is now',m['rawpost'] if 'rawpost' in m and '$epoctime$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime$', getEpocTime()) if 'rawpost' in m and '$epoctime2$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime2$', getEpocTime2()) link = '' if m['page'] and m[ 'page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly is False: # print 'using cache page',m['page'] link = cachedPages[m['page']] else: if m['page'] and not m['page'] == '' and m['page'].startswith( 'http'): if '$epoctime$' in m['page']: m['page'] = m['page'].replace('$epoctime$', getEpocTime()) if '$epoctime2$' in m['page']: m['page'] = m['page'].replace('$epoctime2$', getEpocTime2()) # print 'Ingoring Cache',m['page'] page_split = m['page'].split('|') pageUrl = page_split[0] header_in_page = None if len(page_split) > 1: header_in_page = page_split[1] # if # proxy = ProxyHandler({ ('https' ? proxytouse[:5]=="https":"http") : proxytouse}) # opener = build_opener(proxy) # install_opener(opener) # print 'getproxies',getproxies() current_proxies = ProxyHandler(getproxies()) # print 'getting pageUrl',pageUrl req = Request(pageUrl) if 'proxy' in m: proxytouse = m['proxy'] # print 'proxytouse',proxytouse # getproxies= lambda: {} if pageUrl[:5] == "https": proxy = ProxyHandler({'https': proxytouse}) # req.set_proxy(proxytouse, 'https') else: proxy = ProxyHandler({'http': proxytouse}) # req.set_proxy(proxytouse, 'http') opener = build_opener(proxy) install_opener(opener) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' ) proxytouse = None if 'referer' in m: req.add_header('Referer', m['referer']) if 'accept' in m: req.add_header('Accept', m['accept']) if 'agent' in m: req.add_header('User-agent', m['agent']) if 'x-req' in m: req.add_header('X-Requested-With', m['x-req']) if 'x-addr' in m: req.add_header('x-addr', m['x-addr']) if 'x-forward' in m: req.add_header('X-Forwarded-For', m['x-forward']) if 'setcookie' in m: # print 'adding cookie',m['setcookie'] req.add_header('Cookie', m['setcookie']) if 'appendcookie' in m: # print 'appending cookie to cookiejar',m['appendcookie'] cookiestoApend = m['appendcookie'] cookiestoApend = cookiestoApend.split(';') for h in cookiestoApend: n, v = h.split('=') w, n = n.split(':') ck = cookielib.Cookie(version=0, name=n, value=v, port=None, port_specified=False, domain=w, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookieJar.set_cookie(ck) if 'origin' in m: req.add_header('Origin', m['origin']) if header_in_page: header_in_page = header_in_page.split('&') for h in header_in_page: n, v = h.split('=') req.add_header(n, v) if cookieJar is not None: # print 'cookieJarVal',cookieJar cookie_handler = HTTPCookieProcessor(cookieJar) opener = build_opener(cookie_handler, HTTPBasicAuthHandler(), HTTPHandler()) opener = install_opener(opener) # print 'noredirect','noredirect' in m if 'noredirect' in m: opener = build_opener(cookie_handler, NoRedirection, HTTPBasicAuthHandler(), HTTPHandler()) opener = install_opener(opener) elif 'noredirect' in m: opener = build_opener(NoRedirection, HTTPBasicAuthHandler(), HTTPHandler()) opener = install_opener(opener) if 'connection' in m: # print '..........................connection//////.',m['connection'] from keepalive import HTTPHandler keepalive_handler = HTTPHandler() opener = build_opener(keepalive_handler) install_opener(opener) # print 'after cookie jar' post = None if 'post' in m: postData = m['post'] # if '$LiveStreamRecaptcha' in postData: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # postData=postData.replace('$LiveStreamRecaptcha','manual_recaptcha_challenge_field:'+captcha_challenge+',recaptcha_response_field:'+catpcha_word+',id:'+idfield) splitpost = postData.split(',') post = {} for p in splitpost: n = p.split(':')[0] v = p.split(':')[1] post[n] = v post = urlencode(post) if 'rawpost' in m: post = m['rawpost'] # if '$LiveStreamRecaptcha' in post: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # post=post.replace('$LiveStreamRecaptcha','&manual_recaptcha_challenge_field='+captcha_challenge+'&recaptcha_response_field='+catpcha_word+'&id='+idfield) link = '' try: if post: response = urlopen(req, post) else: response = urlopen(req) if response.info().get('Content-Encoding') == 'gzip': from StringIO import StringIO import gzip buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) link = f.read() else: link = response.read() if 'proxy' in m and not current_proxies is None: install_opener(build_opener(current_proxies)) link = javascriptUnEscape(link) # print repr(link) # print link This just print whole webpage in LOG if 'includeheaders' in m: # link+=str(response.headers.get('Set-Cookie')) link += '$$HEADERS_START$$:' for b in response.headers: link += b + ':' + response.headers.get( b) + '\n' link += '$$HEADERS_END$$:' # print link response.close() except: pass cachedPages[m['page']] = link # print link # print 'store link for',m['page'],forCookieJarOnly if forCookieJarOnly: return cookieJar # do nothing elif m['page'] and not m['page'].startswith('http'): if m['page'].startswith('$pyFunction:'): val = doEval(m['page'].split('$pyFunction:')[1], '', cookieJar, m) if forCookieJarOnly: return cookieJar # do nothing link = val link = javascriptUnEscape(link) else: link = m['page'] if '$doregex' in m['expres']: m['expres'] = getRegexParsed(regexs, m['expres'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if not m['expres'] == '': # print 'doing it ',m['expres'] if '$LiveStreamCaptcha' in m['expres']: val = askCaptcha(m, link, cookieJar) # print 'url and val',url,val url = url.replace("$doregex[" + k + "]", val) elif m['expres'].startswith( '$pyFunction:') or '#$pyFunction' in m['expres']: # print 'expeeeeeeeeeeeeeeeeeee',m['expres'] val = '' if m['expres'].startswith('$pyFunction:'): val = doEval(m['expres'].split('$pyFunction:')[1], link, cookieJar, m) else: val = doEvalFunction(m['expres'], link, cookieJar, m) if 'ActivateWindow' in m['expres']: return if forCookieJarOnly: return cookieJar # do nothing if 'listrepeat' in m: listrepeat = m['listrepeat'] return listrepeat, eval(val), m, regexs, cookieJar try: url = url.replace(u"$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) else: if 'listrepeat' in m: listrepeat = m['listrepeat'] ret = re.findall(m['expres'], link) return listrepeat, ret, m, regexs val = '' if not link == '': # print 'link',link reg = re.compile(m['expres']).search(link) try: val = reg.group(1).strip() except: traceback.print_exc() elif m['page'] == '' or m['page'] is None: val = m['expres'] if rawPost: # print 'rawpost' val = quote_plus(val) if 'htmlunescape' in m: # val=unquote_plus(val) try: from HTMLParser import HTMLParser except ImportError: from html.parser import HTMLParser val = HTMLParser().unescape(val) try: url = url.replace("$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) # print 'ur',url # return val else: url = url.replace("$doregex[" + k + "]", '') if '$epoctime$' in url: url = url.replace('$epoctime$', getEpocTime()) if '$epoctime2$' in url: url = url.replace('$epoctime2$', getEpocTime2()) if '$GUID$' in url: import uuid url = url.replace('$GUID$', str(uuid.uuid1()).upper()) if '$get_cookies$' in url: url = url.replace('$get_cookies$', getCookiesString(cookieJar)) if recursiveCall: return url # print 'final url',repr(url) if url == "": return else: return url, setresolved
def __get_http_opener(self): """ Devuelve una instancia del opener adecuado para interactuar vía https con client key y soporte de cookies """ return build_opener(HTTPHandler(debuglevel=self.DEBUG_LEVEL), HTTPCookieProcessor(self._cookiejar))
from contextlib import closing from subprocess import Popen, PIPE from lxml import html COOKIES_FILE = '/usr/local/etc/bandcamp.cookies' URL = 'https://bandcamp.com' CDN_COVERS = 'https://f4.bcbits.com/img' cj = LWPCookieJar() if os.path.isfile(COOKIES_FILE): cj.load(COOKIES_FILE) handler = HTTPHandler(debuglevel=0) opener = build_opener(handler, HTTPCookieProcessor(cj)) opener.addheaders = [('User-agent', 'Enter you own user agent !'), ('Accept', '*/*'), ('Accept-Encoding', 'deflate')] TMP_PATH = '' TMP_FILE_PREFIX = 'tmpS_' queue = Queue() # Do we have to download then add cover to downloaded music file ? ADD_COVER = 1 # Keep the cover file ? KEEP_COVER_FILE = 0 # Infinite DL ?
def main(): """ Initializes and executes the program. """ login_sucessful = [] login_failed = [] login_skipped = [] version = check_revision(VERSION) print("%s\n\n%s %s (%s)\n" % (BANNER % tuple([color(_) for _ in BANNER_PASSWORDS]), NAME, version, URL)) args = parse_args() if args.update: update() exit() sites = list_sites() if args.list: for _ in sites: print("- %s" % _) exit() if not args.password and not args.load_file: args.password = getpass("%s Please enter password:"******"(?P<type>[^:]+)://(?P<address>[^:]+)" r":(?P<port>\d+)", args.proxy, re.I) if match: if match.group("type").upper() in ("HTTP", "HTTPS"): proxy_host = "%s:%s" % (match.group("address"), match.group("port")) proxy_handler = ProxyHandler({ "http": proxy_host, "https": proxy_host }) else: from thirdparty.socks import socks if match.group("type").upper() == "SOCKS4": socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS4, match.group("address"), int(match.group("port")), True) elif match.group("type").upper() == "SOCKS5": socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, match.group("address"), int(match.group("port")), True) proxy_handler = None else: proxy_handler = ProxyHandler() else: proxy_handler = None opener = build_opener(HTTPHandler(), HTTPSHandler(), HTTPCookieProcessor(cookie_handler)) if proxy_handler: opener.add_handler(proxy_handler) install_opener(opener) with open(USER_AGENTS_FILE, 'r') as ua_file: args.user_agent = sample(ua_file.readlines(), 1)[0].strip() if args.only: sites = [site for site in sites if site in args.only] elif args.exclude: sites = [site for site in sites if site not in args.exclude] print("%s Loaded %d %s to test." % (INFO, len(sites), "site" if len(sites) == 1 else "sites")) if args.load_file: if not isfile(args.load_file): print("%s could not find the file \"%s\"" % (WARN, color(args.load_file))) exit() _ = sum(1 for line in open(args.load_file, "r")) if _ < 1: print("%s the file \"%s\" doesn't contain any valid credentials." % (WARN, color(args.load_file))) exit() print("%s Loaded %d credential%s from \"%s\".\n" % (INFO, _, "s" if _ != 1 else "", color(args.load_file))) print("%s Starting tests at: \"%s\"\n" % (INFO, color(strftime("%X"), BW))) if not exists(OUTPUT_DIR): makedirs(OUTPUT_DIR) log = Logger("%s/credmap" % OUTPUT_DIR) log.open() def get_targets(): """ Retrieve and yield list of sites (targets) for testing. """ for site in sites: _ = populate_site(site, args) if not _: continue target = Website(_, {"verbose": args.verbose}) if not target.user_agent: target.user_agent = args.user_agent yield target def login(): """ Verify credentials for login and check if login was successful. """ if (target.username_or_email == "email" and not credentials["email"] or target.username_or_email == "username" and not credentials["username"]): if args.verbose: print( "%s Skipping %s\"%s\" since " "no \"%s\" was specified.\n" % (INFO, "[%s:%s] on " % (credentials["username"] or credentials["email"], credentials["password"]) if args.load_file else "", color(target.name), color(target.username_or_email, BW))) login_skipped.append(target.name) return print("%s Testing %s\"%s\"..." % (TEST, "[%s:%s] on " % (credentials["username"] or credentials["email"], credentials["password"]) if args.load_file else "", color(target.name, BW))) cookie_handler.clear() if target.perform_login(credentials, cookie_handler): log.write(">>> %s - %s:%s\n" % (target.name, credentials["username"] or credentials["email"], credentials["password"])) login_sucessful.append( "%s%s" % (target.name, " [%s:%s]" % (credentials["username"] or credentials["email"], credentials["password"]) if args.load_file else "")) else: login_failed.append(target.name) if args.load_file: if args.cred_format: separators = [ re.escape(args.cred_format[1]), re.escape(args.cred_format[3]) if len(args.cred_format) > 3 else "\n" ] cred_format = re.match(r"(u|e|p)[^upe](u|e|p)(?:[^upe](u|e|p))?", args.cred_format) if not cred_format: print("%s Could not parse --format: \"%s\"" % (ERROR, color(args.cred_format, BW))) exit() cred_format = [ v.replace("e", "email").replace("u", "username").replace( "p", "password") for v in cred_format.groups() if v is not None ] with open(args.load_file, "r") as load_list: for user in load_list: if args.cred_format: match = re.match( r"([^{0}]+){0}([^{1}]+)(?:{1}([^\n]+))?".format( separators[0], separators[1]), user) credentials = dict(zip(cred_format, match.groups())) credentials["password"] = quote(credentials["password"]) if ("email" in credentials and not re.match( r"^[A-Za-z0-9._%+-]+@(?:[A-Z" r"a-z0-9-]+\.)+[A-Za-z]{2,12}$", credentials["email"])): print("%s Specified e-mail \"%s\" does not appear " "to be correct. Skipping...\n" % (WARN, color(credentials["email"], BW))) continue if "email" not in credentials: credentials["email"] = None elif "username" not in credentials: credentials["username"] = None else: user = user.rstrip().split(":", 1) if not user[0]: if args.verbose: print("%s Could not parse credentials: \"%s\"\n" % (WARN, color(user, BW))) continue match = re.match( r"^[A-Za-z0-9._%+-]+@(?:[A-Z" r"a-z0-9-]+\.)+[A-Za-z]{2,12}$", user[0]) credentials = { "email": user[0] if match else None, "username": None if match else user[0], "password": quote(user[1]) } for target in get_targets(): login() else: credentials = { "username": args.username, "email": args.email, "password": quote(args.password) } for target in get_targets(): login() log.close() if not args.verbose: print() if len(login_sucessful) > 0 or len(login_failed) > 0: _ = "%s/%s" % (color(len(login_sucessful), BW), color(len(login_sucessful) + len(login_failed), BW)) sign = PLUS if len(login_sucessful) > (len(login_failed) + len(login_skipped)) else INFO print( "%s Succesfully logged in%s." % (sign, " with %s credentials on the list." % _ if args.load_file else "to %s websites." % _), ) print("%s An overall success rate of %s.\n" % (sign, color( "%%%s" % (100 * len(login_sucessful) / (len(login_sucessful) + len(login_failed))), BW))) if len(login_sucessful) > 0: print("%s The provided credentials worked on the following website%s: " "%s\n" % (PLUS, "s" if len(login_sucessful) != 1 else "", ", ".join(login_sucessful))) print("%s Finished tests at: \"%s\"\n" % (INFO, color(strftime("%X"), BW)))
def __init__(self, *args, **kwargs): self.args = args self.kw = kwargs HTTPHandler.__init__(self)
def request(url, close=True, redirect=True, error=False, verify=True, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, compression=True, output='', timeout='30'): try: if not url: return handlers = [] if proxy is not None: handlers += [ProxyHandler({'http': '%s' % (proxy)}), HTTPHandler] opener = build_opener(*handlers) opener = install_opener(opener) if output == 'cookie' or output == 'extended' or not close is True: cookies = cookielib.LWPCookieJar() handlers += [HTTPHandler(), HTTPSHandler(), HTTPCookieProcessor(cookies)] opener = build_opener(*handlers) opener = install_opener(opener) try: import platform node = platform.node().lower() is_XBOX = platform.uname()[1] == 'XboxOne' except Exception: node = '' is_XBOX = False if verify is False and sys.version_info >= (2, 7, 12): try: import ssl ssl_context = ssl._create_unverified_context() handlers += [HTTPSHandler(context=ssl_context)] opener = build_opener(*handlers) opener = install_opener(opener) except: pass if verify is True and ((2, 7, 8) < sys.version_info < (2, 7, 12) or is_XBOX): try: import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [HTTPSHandler(context=ssl_context)] opener = build_opener(*handlers) opener = install_opener(opener) except: pass if url.startswith('//'): url = 'http:' + url _headers = {} try: _headers.update(headers) except: pass if 'User-Agent' in _headers: pass elif mobile is True: _headers['User-Agent'] = Database.get(randommobileagent, 1) else: _headers['User-Agent'] = Database.get(randomagent, 1) if 'Referer' in _headers: pass elif referer is not None: _headers['Referer'] = referer if not 'Accept-Language' in _headers: _headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in _headers: pass elif XHR is True: _headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in _headers: pass elif cookie is not None: _headers['Cookie'] = cookie if 'Accept-Encoding' in _headers: pass elif compression and limit is None: _headers['Accept-Encoding'] = 'gzip' if redirect is False: class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, req, fp, code, msg, headers): infourl = urllib.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 opener = urllib2.build_opener(NoRedirectHandler()) opener = install_opener(opener) try: del _headers['Referer'] except: pass if isinstance(post, dict): post = utils.byteify(post) post = urlencode(post) url = utils.byteify(url) request = Request(url, data=post) _add_request_header(request, _headers) try: response = urlopen(request, timeout=int(timeout)) except HTTPError as response: if response.code == 503: cf_result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile(fileobj=StringIO(cf_result)).read() if 'cf-browser-verification' in cf_result: while 'cf-browser-verification' in cf_result: netloc = '%s://%s/' % (urlparse(url).scheme, urlparse(url).netloc) ua = _headers['User-Agent'] cf = Database.get(cfcookie().get, 1, netloc, ua, timeout) _headers['Cookie'] = cf request = Request(url, data=post) _add_request_header(request, _headers) try: response = urlopen(request, timeout=int(timeout)) cf_result = 'Success' except HTTPError as response: Database.remove(cfcookie().get, netloc, ua, timeout) cf_result = response.read() else: controlo.log('Request-Error (%s): %s' % (str(response.code), url)) if error is False: return else: controlo.log('Request-Error (%s): %s' % (str(response.code), url)) if error is False: return if output == 'cookie': try: result = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close is True: response.close() return result elif output == 'geturl': result = response.geturl() if close is True: response.close() return result elif output == 'headers': result = response.headers if close is True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) if close is True: response.close() return result elif output == 'file_size': try: content = int(response.headers['Content-Length']) except: content = '0' response.close() return content if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) _headers['Cookie'] = su request = Request(url, data=post) _add_request_header(request, _headers) response = urlopen(request, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if 'Blazingfast.io' in result and 'xhr.open' in result: netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) ua = _headers['User-Agent'] _headers['Cookie'] = Database.get(bfcookie().get, 168, netloc, ua, timeout) result = _basic_request(url, headers=_headers, post=post, timeout=timeout, limit=limit) if output == 'extended': try: response_headers = dict([(item[0].title(), item[1]) for item in response.info().items()]) except: response_headers = response.headers response_code = str(response.code) try: cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close is True: response.close() return (result, response_code, response_headers, _headers, cookie) else: if close is True: response.close() return result except Exception as e: controlo.log('Request-Error: (%s) => %s' % (str(e), url)) return
def get_cookie(self, netloc, ua, timeout): try: headers = {'User-Agent': ua} request = Request(netloc) _add_request_header(request, headers) try: response = urlopen(request, timeout=int(timeout)) except HTTPError as response: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0] init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};', result)[-1] builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0] decryptVal = self.parseJSString(init) lines = builder.split(';') for line in lines: if len(line) > 0 and '=' in line: sections = line.split('=') line_val = self.parseJSString(sections[1]) decryptVal = int( eval( str(decryptVal) + sections[0][-1] + str(line_val))) answer = decryptVal + len(urlparse(netloc).netloc) query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % ( netloc, jschl, answer) if 'type="hidden" name="pass"' in result: passval = re.findall('name="pass" value="(.*?)"', result)[0] query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % ( netloc, quote_plus(passval), jschl, answer) time.sleep(6) cookies = cookielib.LWPCookieJar() handlers = [ HTTPHandler(), HTTPSHandler(), HTTPCookieProcessor(cookies) ] opener = build_opener(*handlers) opener = install_opener(opener) try: request = Request(query) _add_request_header(request, headers) response = urlopen(request, timeout=int(timeout)) except: pass cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) if 'cf_clearance' in cookie: self.cookie = cookie except: pass
def __init__(self, timeout=None, *args, **kw): HTTPHandler.__init__(self, *args, **kw) self.timeout = timeout
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, compression=True, output='', timeout='30', ignoreSsl=False, flare=True, ignoreErrors=None): try: if url is None: return None handlers = [] if proxy is not None: handlers += [ProxyHandler({'http': '%s' % (proxy)}), HTTPHandler] opener = build_opener(*handlers) opener = install_opener(opener) if output == 'cookie' or output == 'extended' or not close is True: cookies = cookielib.LWPCookieJar() handlers += [ HTTPHandler(), HTTPSHandler(), HTTPCookieProcessor(cookies) ] opener = build_opener(*handlers) opener = install_opener(opener) if ignoreSsl or ((2, 7, 8) < sys.version_info < (2, 7, 12)): try: import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [HTTPSHandler(context=ssl_context)] opener = build_opener(*handlers) opener = install_opener(opener) except: pass if url.startswith('//'): url = 'http:' + url try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif mobile is not True: # headers['User-Agent'] = agent() headers['User-Agent'] = cache.get(randomagent, 1) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is not None: headers['Referer'] = referer if 'Accept-Language' not in headers: headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in headers: pass elif XHR is True: headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in headers: pass elif cookie is not None: headers['Cookie'] = cookie if 'Accept-Encoding' in headers: pass elif compression and limit is None: headers['Accept-Encoding'] = 'gzip' if redirect is False: class NoRedirection(HTTPErrorProcessor): def http_response(self, request, response): return response opener = build_opener(NoRedirection) opener = install_opener(opener) try: del headers['Referer'] except: pass if isinstance(post, dict): # Gets rid of the error: 'ascii' codec can't decode byte 0xd0 in position 0: ordinal not in range(128) try: iter_items = post.iteritems() except: iter_items = post.items() for key, value in iter_items: try: post[key] = value.encode('utf-8') except: pass post = urlencode(post) request = Request(url, data=post) _add_request_header(request, headers) try: response = urlopen(request, timeout=int(timeout)) except HTTPError as response: try: ignore = ignoreErrors and (int(response.code) == ignoreErrors or int( response.code) in ignoreErrors) except: ignore = False if not ignore: if response.code in [301, 307, 308, 503]: cf_result = response.read(5242880) try: encoding = response.info().getheader( 'Content-Encoding') except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile( fileobj=StringIO(cf_result)).read() if flare and 'cloudflare' in str(response.info()).lower(): log_utils.log( 'client module calling cfscrape: url=%s' % url, log_utils.LOGDEBUG) try: from openscrapers.modules import cfscrape if isinstance(post, dict): data = post else: try: data = parse_qs(post) except: data = None scraper = cfscrape.CloudScraper() response = scraper.request( method='GET' if post is None else 'POST', url=url, headers=headers, data=data, timeout=int(timeout)) result = response.content flare = 'cloudflare' # Used below try: cookies = response.request._cookies except: log_utils.error() except: log_utils.error() elif 'cf-browser-verification' in cf_result: netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) ua = headers['User-Agent'] cf = cache.get(cfcookie().get, 168, netloc, ua, timeout) headers['Cookie'] = cf request = Request(url, data=post) _add_request_header(request, headers) response = urlopen(request, timeout=int(timeout)) else: log_utils.log( 'Request-Error (%s): %s' % (str(response.code), url), log_utils.LOGDEBUG) if error is False: return else: log_utils.log( 'Request-Error (%s): %s' % (str(response.code), url), log_utils.LOGDEBUG) if error is False: return if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close is True: response.close() return result elif output == 'geturl': result = response.geturl() if close is True: response.close() return result elif output == 'headers': result = response.headers if close is True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) if close is True: response.close() return result elif output == 'file_size': try: content = int(response.headers['Content-Length']) except: content = '0' response.close() return content if flare != 'cloudflare': if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) headers['Cookie'] = su request = Request(url, data=post) _add_request_header(request, headers) response = urlopen(request, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if 'Blazingfast.io' in result and 'xhr.open' in result: netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) ua = headers['User-Agent'] headers['Cookie'] = cache.get(bfcookie().get, 168, netloc, ua, timeout) result = _basic_request(url, headers=headers, post=post, timeout=timeout, limit=limit) if output == 'extended': try: response_headers = dict([(item[0].title(), item[1]) for item in response.info().items()]) except: response_headers = response.headers try: response_code = str(response.code) except: response_code = str(response.status_code ) # object from CFScrape Requests object. try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close is True: response.close() return (result, response_code, response_headers, headers, cookie) else: if close is True: response.close() return result except Exception as e: log_utils.log('Request-Error: (%s) => %s' % (str(e), url), log_utils.LOGDEBUG) return
def get_new_cookie(self): # Start by prompting user to input their credentials # Another Python2/3 workaround try: new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ") # Build URS4 Cookie request auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[ 'client'] + '&redirect_uri=' + self.asf_urs4[ 'redir'] + '&response_type=code&state=' try: #python2 user_pass = base64.b64encode( bytes(new_username + ":" + new_password)) except TypeError: #python3 user_pass = base64.b64encode( bytes(new_username + ":" + new_password, "utf-8")) user_pass = user_pass.decode("utf-8") # Authenticate against URS, grab all the cookies self.cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request( auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[ "WWW-Authenticate"]: print( " > Username and Password combo was not successful. Please try again." ) return False else: # If an error happens here, the user most likely has not confirmed EULA. print( "\nIMPORTANT: There was an error obtaining a download cookie!" ) print( "Your user appears to lack permission to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) except URLError as e: print( "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. " ) print("Try cookie generation later.") exit(-1) # Did we get a cookie? if self.check_cookie_is_logged_in(self.cookie_jar): #COOKIE SUCCESS! self.cookie_jar.save(self.cookie_jar_path) return True # if we aren't successful generating the cookie, nothing will work. Stop here! print( "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again." ) print("Response was {0}.".format(response.getcode())) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1)
def __init__(self, url, **kwargs): """ Request init """ self.request = None self.response = None self.code = -1 self.header = {} self.cookieJar = None self.reason = '' self.content = '' self.content_dict = {} # 是否将服务端返回结果从 json 转为 dict self.is_decode_response = kwargs.get('is_decode_response', False) data = kwargs.get('data', None) # 当请求是 GET 请求,同时传了 data 字典的话,post_type 默认是 form,会进行 urlencode,并拼接到请求 URL 上 post_type = kwargs.get('post_type', 'form') if data is not None: if isinstance(data, dict): if post_type == 'json': data_str = json.dumps(data) else: # data = {"name":"meetbill", "age":"21"} ==> urlencode(data) = 'age=21&name=meetbill' data_str = urlencode(data) if not isinstance(data_str, basestring): raise ValueError('data must be string or dict') else: data_str = None request_type = kwargs.get('type', 'POST') if data_str and isinstance( request_type, basestring) and request_type.upper() != 'POST': # 如果是 GET 请求,则将 data 中的内容转为 url 的一部分 url = '{}?{}'.format(url, data_str) data_str = None # GET data must be None self.request = urlRequest(url, data_str) # Content-type, 默认是 'application/x-www-form-urlencoded' if request_type.upper() == 'POST' and post_type == "json": self.request.add_header('Content-type', 'application/json') # referer referer = kwargs.get('referer', None) if referer: self.request.add_header('referer', referer) # user-agent user_agent = kwargs.get('user_agent', None) if user_agent: self.request.add_header('User-Agent', user_agent) # auth auth = kwargs.get('auth', None) if auth and isinstance(auth, dict) and 'usr' in auth: auth_string = base64.b64encode('{}:{}'.format( auth.get('usr', ''), auth.get('pwd', ''))) self.request.add_header('Authorization', 'Basic {}'.format(auth_string)) # cookie cookie = kwargs.get('cookie', None) cj = None if cookie: if isinstance(cookie, CookieJar): cj = cookie elif isinstance(cookie, dict): result = [] for k, v in cookie.items(): result.append('{}={}'.format(k, v)) cookie = '; '.join(result) elif isinstance(cookie, Cookie.BaseCookie): cookie = cookie.output(header='') if isinstance(cookie, basestring): self.request.add_header('Cookie', cookie) if cj is None: cj = CookieJar() #! TODO: proxy # build opener debuglevel = 1 if kwargs.get('debug', False) else 0 opener = build_opener(HTTPHandler(debuglevel=debuglevel), HTTPSHandler(debuglevel=debuglevel), HTTPCookieProcessor(cj)) # timeout timeout = kwargs.get('timeout') if not isinstance(timeout, int): timeout = _DEFAULT_TIMEOUT t_beginning = time.time() try: # opener.open accept a URL or a Request object # 程序中判断是字符串时按照 URL 来处理, 否则按照是已经封装好的 Request 处理 self.response = opener.open(self.request, timeout=timeout) self.code = self.response.getcode() self.header = self.response.info().dict self.cookieJar = cj self.content = self.response.read() # 进行将 response 转为 dict if self.is_decode_response: self.content_dict = json.loads(self.content) # 检查 response 内容是否符合预期 check_key = kwargs.get('check_key', None) check_value = kwargs.get('check_value', None) if check_key is not None and check_value is not None: # 检查 check_value 类型 if isinstance(check_value, list): if self.content_dict[check_key] not in check_value: self.code = -1 self.reason = "[response not match: {response_value} not in {check_value}]".format( response_value=self.content_dict[check_key], check_value=check_value) elif self.content_dict[check_key] != check_value: self.code = -1 self.reason = "[response not match: {response_value} != {check_value}]".format( response_value=self.content_dict[check_key], check_value=check_value) except HTTPError as e: self.code = e.code self.reason = '{}'.format(e) except URLError as e: self.code = -1 self.reason = e.reason except Exception as e: self.code = -1 self.reason = '{}'.format(e) seconds_passed = time.time() - t_beginning cost_str = "%.6f" % seconds_passed # 打印日志 f = inspect.currentframe().f_back file_name, lineno, func_name = self._get_backframe_info(f) log_msg = ("[file={file_name}:{func_name}:{lineno} " "type=http_{method} " "req_path={req_path} " "req_data={req_data} " "cost={cost} " "is_success={is_success} " "err_no={err_no} " "err_msg={err_msg} " "res_len={res_len} " "res_data={res_data} " "res_attr={res_attr}]".format(file_name=file_name, func_name=func_name, lineno=lineno, method=request_type, req_path=url, req_data=data, cost=cost_str, is_success=self.success(), err_no=self.code, err_msg=self.reason, res_len=len(self.content), res_data=self.content, res_attr=json.dumps( self.header))) if self.success(): log.info(log_msg) else: log.error(log_msg)
def open_url(url, config, data=None, handlers=None): """Attempts to open a connection to a specified URL. @param url: URL to attempt to open @param config: SSL context configuration @type config: Configuration @param data: HTTP POST data @type data: str @param handlers: list of custom urllib2 handlers to add to the request @type handlers: iterable @return: tuple ( returned HTTP status code or 0 if an error occurred returned message or error description response object) """ debuglevel = 1 if config.debug else 0 # Set up handlers for URL opener. if config.cookie: cj = config.cookie else: cj = cookielib.CookieJar() # Use a cookie processor that accumulates cookies when redirects occur so # that an application can redirect for authentication and retain both any # cookies for the application and the security system (c.f., # urllib2.HTTPCookieProcessor which replaces cookies). cookie_handler = AccumulatingHTTPCookieProcessor(cj) if not handlers: handlers = [] handlers.append(cookie_handler) if config.debug: http_handler = HTTPHandler(debuglevel=debuglevel) https_handler = HTTPSContextHandler(config.ssl_context, debuglevel=debuglevel) handlers.extend([http_handler, https_handler]) if config.http_basicauth: # currently only supports http basic auth auth_handler = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm()) auth_handler.add_password(realm=None, uri=url, user=config.httpauth[0], passwd=config.httpauth[1]) handlers.append(auth_handler) # Explicitly remove proxy handling if the host is one listed in the value of # the no_proxy environment variable because urllib2 does use proxy settings # set via http_proxy and https_proxy, but does not take the no_proxy value # into account. if not _should_use_proxy(url, config.no_proxy): handlers.append(urllib2.ProxyHandler({})) log.debug("Not using proxy") elif config.proxies: handlers.append(urllib2.ProxyHandler(config.proxies)) log.debug("Configuring proxies: %s" % config.proxies) opener = build_opener(*handlers, ssl_context=config.ssl_context) headers = config.headers if headers is None: headers = {} request = urllib2.Request(url, data, headers) # Open the URL and check the response. return_code = 0 return_message = '' response = None try: response = opener.open(request) return_message = response.msg return_code = response.code if log.isEnabledFor(logging.DEBUG): for index, cookie in enumerate(cj): log.debug("%s : %s", index, cookie) except urllib2.HTTPError, exc: return_code = exc.code return_message = "Error: %s" % exc.msg if log.isEnabledFor(logging.DEBUG): log.debug("%s %s", exc.code, exc.msg)
def handler_with_parent_and_request(parent, req): handler = HTTPHandler() handler.add_parent(parent) handler.http_request(req) return handler