def _get_opener(self): opener = urllib2.build_opener(urllib2.HTTPErrorProcessor()) opener.add_handler(urllib2.HTTPBasicAuthHandler(self.password_mgr)) opener.add_handler(urllib2.HTTPDigestAuthHandler(self.password_mgr)) opener.add_handler(urllib2.HTTPCookieProcessor(self.cookiejar)) return opener
def test_basic_auth_httperror(self): ah = urllib2.HTTPBasicAuthHandler() ah.add_password(self.REALM, self.server_url, self.USER, self.INCORRECT_PASSWD) urllib2.install_opener(urllib2.build_opener(ah)) self.assertRaises(urllib2.HTTPError, urllib2.urlopen, self.server_url)
def request(method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=60, allow_redirects=False, stream=False): """Initiate an HTTP(S) request. Returns :class:`Response` object. :param method: 'GET' or 'POST' :type method: unicode :param url: URL to open :type url: unicode :param params: mapping of URL parameters :type params: dict :param data: mapping of form data ``{'field_name': 'value'}`` or :class:`str` :type data: dict or str :param headers: HTTP headers :type headers: dict :param cookies: cookies to send to server :type cookies: dict :param files: files to upload (see below). :type files: dict :param auth: username, password :type auth: tuple :param timeout: connection timeout limit in seconds :type timeout: int :param allow_redirects: follow redirections :type allow_redirects: bool :param stream: Stream content instead of fetching it all at once. :type stream: bool :returns: Response object :rtype: :class:`Response` The ``files`` argument is a dictionary:: {'fieldname' : { 'filename': 'blah.txt', 'content': '<binary data>', 'mimetype': 'text/plain'} } * ``fieldname`` is the name of the field in the HTML form. * ``mimetype`` is optional. If not provided, :mod:`mimetypes` will be used to guess the mimetype, or ``application/octet-stream`` will be used. """ # TODO: cookies socket.setdefaulttimeout(timeout) # Default handlers openers = [] if not allow_redirects: openers.append(NoRedirectHandler()) if auth is not None: # Add authorisation handler username, password = auth password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, url, username, password) auth_manager = urllib2.HTTPBasicAuthHandler(password_manager) openers.append(auth_manager) # Install our custom chain of openers opener = urllib2.build_opener(*openers) urllib2.install_opener(opener) if not headers: headers = CaseInsensitiveDictionary() else: headers = CaseInsensitiveDictionary(headers) if 'user-agent' not in headers: headers['user-agent'] = USER_AGENT # Accept gzip-encoded content encodings = [ s.strip() for s in headers.get('accept-encoding', '').split(',') ] if 'gzip' not in encodings: encodings.append('gzip') headers['accept-encoding'] = ', '.join(encodings) if files: if not data: data = {} new_headers, data = encode_multipart_formdata(data, files) headers.update(new_headers) elif data and isinstance(data, dict): data = urllib.urlencode(str_dict(data)) # Make sure everything is encoded text headers = str_dict(headers) if isinstance(url, unicode): url = url.encode('utf-8') if params: # GET args (POST args are handled in encode_multipart_formdata) scheme, netloc, path, query, fragment = urlparse.urlsplit(url) if query: # Combine query string and `params` url_params = urlparse.parse_qs(query) # `params` take precedence over URL query string url_params.update(params) params = url_params query = urllib.urlencode(str_dict(params), doseq=True) url = urlparse.urlunsplit((scheme, netloc, path, query, fragment)) req = Request(url, data, headers, method=method) return Response(req, stream)
values1 = { 'method': 'provision_circuit', 'workgroup_id': wg_id, 'provision_time': -1, 'remove_time': -1, 'description': sys.argv[2], 'node': [sys.argv[3], sys.argv[6]], 'interface': [sys.argv[4], sys.argv[7]], 'tag': [sys.argv[5], sys.argv[8]] } data = urllib.urlencode(values1, doseq=True) gh_url2 = 'https://al2s.net.internet2.edu/oess/services-kerb/provisioning.cgi' req = urllib2.Request(gh_url2, data) password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, gh_url2, username, passwd) auth_manager = urllib2.HTTPBasicAuthHandler(password_manager) opener = urllib2.build_opener(auth_manager) urllib2.install_opener(opener) try: handler = urllib2.urlopen(req) result = handler.read() jsonData = json.loads(result) except urllib2.URLError: jsonData = {'error_text': 'URLError', 'results': None} except urllib2.HTTPError: jsonData = {'error_text': 'HTTPError', 'results': None} searchResults = jsonData['results'] else: jsonData = edit.edit_endpoint(wg_id, sys.argv[3], sys.argv[4], sys.argv[5], ct_id, "add", username, passwd) searchResults = jsonData['results']
def get(self, xnatSrcUri, localDstUri, showProgressIndicator = True): """ This method is in place for the main purpose of downlading a given Uri in packets (buffers) as opposed to one large file. If, for whatever reason, a packet-based download cannot occur, (say the server doesn't like urllib2) the function then resorts to a standard 'GET' call, via 'httpsRequest' which will download everything without a progress indicator. This is bad UX, but still necessary. """ #-------------------- # A download state of '1' indicates # that the user hasn't cancelled the download. #-------------------- self.downloadState = 1 #-------------------- # Set the src URI based on the # internal variables of XnatIo. #-------------------- xnatSrcUri = self.host + "/data/archive" + xnatSrcUri if not self.host in xnatSrcUri else xnatSrcUri #-------------------- # Construct the authentication handler #-------------------- passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, xnatSrcUri, self.user, self.password) authhandler = urllib2.HTTPBasicAuthHandler(passman) opener = urllib2.build_opener(authhandler) urllib2.install_opener(opener) #-------------------- # Open the local destination file # so that it can start reading in the buffers. #-------------------- XnatFile = open(localDstUri, "wb") #-------------------- # Get the response URL from the XNAT host. #-------------------- errorString = "" try: #print self.MODULE.utils.lf(), "xnatSrcUri: ", xnatSrcUri response = urllib2.urlopen(xnatSrcUri) #-------------------- # If the urllib2 version fails (some servers do not like # the communication method), then use httplib to do all the downloading. # this eliminates the possibility of reading by buffers, therefore # the progress indicator isn't accurate. #-------------------- except Exception, e: errorString += str(e) + "\n" try: #print self.MODULE.utils.lf(), "urllib2 get failed. Attempting httplib version." #------------ # HTTP LIB VERSION - if urllib2 doesn't work. #----------- # # Reset popup and basically show it without # any real progress indicator -- it's just there # to let the user know it's downloading stuff. # self.MODULE.XnatDownloadPopup.reset() self.MODULE.XnatDownloadPopup.setDownloadFilename(xnatSrcUri) self.MODULE.XnatDownloadPopup.show() self.MODULE.XnatDownloadPopup.setDownloadFileSize(0) self.MODULE.XnatDownloadPopup.update(0) # # Get the file using httpsRequest and GET # response = self.httpsRequest('GET', xnatSrcUri) data = response.read() XnatFile.close() # # Write the response data to file. # with open(localDstUri, 'wb') as f: f.write(data) # # Enable the view widget. # self.MODULE.XnatView.setEnabled(True) self.MODULE.XnatDownloadPopup.hide() return except Exception, e2: errorString += str(e2) qt.QMessageBox.warning( None, "Xnat Error", errorStrings) self.MODULE.XnatView.setEnabled(True) return
def process(args): global _server, _opener, _cookie _server = KNOWN_SERVERS _opener = urllib2.build_opener(MyHTTPErrorProcessor()) # process credentials credentials = args.pop(0) if ":" in credentials: # Credentials must be sent over SSL, unless running locally. h = urllib2.HTTPBasicAuthHandler() h.add_password("EZID", _server, *credentials.split(":", 1)) _opener.add_handler(h) elif credentials != "-": _cookie = "sessionid=" + credentials command = args.pop(0) operation = filter(lambda o: o.startswith(command), OPERATIONS) if len(operation) != 1: print "%s is unrecognized or ambiguous operation" % operation return operation = operation[0] # args = ['doi:10.5061/DRYAD.8157N', '_target', 'http://datadryad.org/resource/doi:10.5061/dryad.8157n', 'datacite', '@/Users/daisie/Desktop/test.xml'] if (type(OPERATIONS[operation]) is int and\ len(args) != OPERATIONS[operation]) or\ (type(OPERATIONS[operation]) is types.LambdaType and\ not OPERATIONS[operation](len(args))): parser.error("incorrect number of arguments for operation") # Perform the operation. if operation == "mint": shoulder = args[0] if len(args) > 1: data = formatAnvlRequest(args[1:]) else: data = None response = issueRequest("shoulder/" + encode(shoulder), "POST", data) #printAnvlResponse(response) elif operation == "create": id = args[0] if len(args) > 1: data = formatAnvlRequest(args[1:]) else: data = None # PW edited to include new API call to "update_if_exist" path = "id/" + encode(id) + "?update_if_exists=yes" response = issueRequest(path, "PUT", data) #printAnvlResponse(response) elif operation == "view": id = args[0] response = issueRequest("id/" + encode(id), "GET") #printAnvlResponse(response, sortLines=True) elif operation == "update": id = args[0] if len(args) > 1: data = formatAnvlRequest(args[1:]) else: data = None response = issueRequest("id/" + encode(id), "POST", data) #printAnvlResponse(response) elif operation == "delete": id = args[0] response = issueRequest("id/" + encode(id), "DELETE") #printAnvlResponse(response) elif operation == "login": response, headers = issueRequest("login", "GET", returnHeaders=True) response += "\nsessionid: %s\n" %\ headers["set-cookie"].split(";")[0].split("=")[1] #printAnvlResponse(response) elif operation == "logout": response = issueRequest("logout", "GET") #printAnvlResponse(response) elif operation == "status": if len(args) > 0: subsystems = "?subsystems=" + args[0] else: subsystems = "" response = issueRequest("status" + subsystems, "GET") #printAnvlResponse(response) return (response)
def create_opener(base_url, username, password): logger.debug(locals()) password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, base_url, username, password) handler = urllib2.HTTPBasicAuthHandler(password_mgr) return urllib2.build_opener(handler)
def urllib_request(url, parameter, username=None, password=None, method='POST', config=None, timeout=10.0): """ build the urllib request and check the response for success or fail :param url: target url :param parameter: additonal parameter to append to the url request :param username: basic authentication with username (optional) :param password: basic authentication with password (optional) :param method: run an GET or POST request :param config: in case of Proxy support, the proxy settings are taken from :param timeout: timeout for waiting on connect/reply :return: the response of the request """ try: handlers = [] if config and 'PROXY' in config and config['PROXY']: # for simplicity we set both protocols proxy_handler = urllib2.ProxyHandler({ "http": config['PROXY'], "https": config['PROXY'] }) handlers.append(proxy_handler) print "using Proxy: %r" % config['PROXY'] if username and password is not None: password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, url, username, password) auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr) handlers.append(auth_handler) opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) full_url = str(url) encoded_params = None if parameter is not None and len(parameter) > 0: encoded_params = urllib.urlencode(urllib_encoded_dict(parameter)) if method == 'GET': c_data = None if encoded_params: full_url = str("%s?%s" % (url, encoded_params)) else: c_data = encoded_params requ = urllib2.Request(full_url, data=c_data, headers={}) if username and password is not None: base64string = base64.encodestring( '%s:%s' % (username, password)).replace('\n', '') requ.add_header("Authorization", "Basic %s" % base64string) response = urllib2.urlopen(requ, timeout=float(timeout)) reply = response.read() log.debug(">>%s...%s<<", reply[:20], reply[-20:]) except Exception as exc: log.exception("%r" % exc) raise Exception("Failed to send request: %r" % exc) return reply
import json, urllib2 from urllib import urlencode user = '******' passwd = 'gmailpasswd' server = 'gmail.com' # some stuff to identify yourself against the server # and the base API path identiuser = "******" pwd = "your_identi.ca_passwd" apibase = "https://identi.ca/api" pwd_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() pwd_mgr.add_password(None, apibase, identiuser, pwd) handler = urllib2.HTTPBasicAuthHandler(pwd_mgr) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) class IdentiCa: def message_handler(connect_object, message_node): command1 = str(unicode(message_node.getBody()).encode('utf-8')) command2 = str(message_node.getFrom().getStripped()) c3 = command2.replace("@", " [at] ") c4 = c3.replace(".", " [dot] ") # now define a message msg = command1 # url encode it nicely and set your own client name – no links in source! themsg = urlencode({ 'status': msg,
def getRegexParsed(regexs, url, cookieJar=None, forCookieJarOnly=False, recursiveCall=False, cachedPages={}, rawPost=False): #0,1,2 = URL, regexOnly, CookieJarOnly if not recursiveCall: regexs = eval(urllib.unquote(regexs)) #cachedPages = {} print 'url', url doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url) print 'doRegexs', doRegexs, regexs for k in doRegexs: if k in regexs: print 'processing ', k m = regexs[k] print m cookieJarParam = False if 'cookiejar' in m: # so either create or reuse existing jar #print 'cookiejar exists',m['cookiejar'] cookieJarParam = m['cookiejar'] if '$doregex' in cookieJarParam: cookieJar = getRegexParsed(regexs, m['cookiejar'], cookieJar, True, True, cachedPages) cookieJarParam = True else: cookieJarParam = True if cookieJarParam: if cookieJar == None: print 'create cookie jar' import cookielib cookieJar = cookielib.LWPCookieJar() #print 'cookieJar new',cookieJar if '$doregex' in m['page']: m['page'] = getRegexParsed(regexs, m['page'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'post' in m and '$doregex' in m['post']: m['post'] = getRegexParsed(regexs, m['post'], cookieJar, recursiveCall=True, cachedPages=cachedPages) print 'post is now', m['post'] if 'rawpost' in m and '$doregex' in m['rawpost']: m['rawpost'] = getRegexParsed(regexs, m['rawpost'], cookieJar, recursiveCall=True, cachedPages=cachedPages, rawPost=True) print 'rawpost is now', m['rawpost'] if m['page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False: link = cachedPages[m['page']] else: #print 'Ingoring Cache',m['page'] req = urllib2.Request(m['page']) print 'req', m['page'] req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' ) if 'refer' in m: req.add_header('Referer', m['refer']) if 'agent' in m: req.add_header('User-agent', m['agent']) if 'setcookie' in m: print 'adding cookie', m['setcookie'] req.add_header('Cookie', m['setcookie']) if not cookieJar == None: #print 'cookieJarVal',cookieJar cookie_handler = urllib2.HTTPCookieProcessor(cookieJar) opener = urllib2.build_opener( cookie_handler, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) #print 'after cookie jar' post = None if 'post' in m: postData = m['post'] if '$LiveStreamRecaptcha' in postData: (captcha_challenge, catpcha_word) = processRecaptcha(m['page']) if captcha_challenge: postData += 'recaptcha_challenge_field:' + captcha_challenge + ',recaptcha_response_field:' + catpcha_word splitpost = postData.split(',') post = {} for p in splitpost: n = p.split(':')[0] v = p.split(':')[1] post[n] = v post = urllib.urlencode(post) if 'rawpost' in m: post = m['rawpost'] if '$LiveStreamRecaptcha' in post: (captcha_challenge, catpcha_word) = processRecaptcha(m['page']) if captcha_challenge: post += '&recaptcha_challenge_field=' + captcha_challenge + '&recaptcha_response_field=' + catpcha_word if post: response = urllib2.urlopen(req, post) else: response = urllib2.urlopen(req) link = response.read() link = javascriptUnEscape(link) response.close() cachedPages[m['page']] = link #print link print 'store link for', m['page'], forCookieJarOnly if forCookieJarOnly: return cookieJar # do nothing if '$doregex' in m['expre']: m['expre'] = getRegexParsed(regexs, m['expre'], cookieJar, recursiveCall=True, cachedPages=cachedPages) print 'exp k and url' print m['expre'], k, url print 'aa' if not m['expre'] == '': print 'doing it ', m['expre'] if not '$LiveStreamCaptcha' in m['expre']: reg = re.compile(m['expre']).search(link) val = reg.group(1).strip() if rawPost: print 'rawpost' val = urllib.quote_plus(val) if 'htmlunescape' in m: #val=urllib.unquote_plus(val) import HTMLParser val = HTMLParser.HTMLParser().unescape(val) url = url.replace("$doregex[" + k + "]", val) else: val = askCaptcha(m, link, cookieJar) print 'url and val', url, val url = url.replace("$doregex[" + k + "]", val) #return val else: url = url.replace("$doregex[" + k + "]", '') if '$epoctime$' in url: url = url.replace('$epoctime$', getEpocTime()) if recursiveCall: return url print 'final url', url item = xbmcgui.ListItem(path=url) #setResolvedUrl #xbmc.playlist(xbmc.playlist_video).clear() #xbmc.playlist(xbmc.playlist_video).add(url) #xbmc.Player().play(item=url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, item)
def config(config_values, testing="no"): """ Loads information from the Couchbase collectd plugin config file. Args: :param config_values: Object containing config values :param testing: Used by test script to test the plugin """ plugin_config = {} interval = DEFAULT_INTERVAL collect_mode = DEFAULT_COLLECT_MODE collect_bucket = None username = None password = None api_urls = {} field_length = DEFAULT_FIELD_LENGTH cluster_name = CLUSTER_DEFAULT extra_dimensions = '' required_keys = ('CollectTarget', 'Host', 'Port') opt_keys = ('Interval', 'CollectMode', 'ClusterName', 'Dimensions') bucket_specific_keys = ('CollectBucket', 'Username', 'Password') for val in config_values.children: if val.key in required_keys: plugin_config[val.key] = val.values[0] # Read optional parameters elif val.key in opt_keys and val.key == 'Interval' and val.values[0]: interval = val.values[0] elif val.key in opt_keys and val.key == 'CollectMode'\ and val.values[0]: collect_mode = val.values[0] # Read bucket specific parameters elif val.key in bucket_specific_keys and val.key == 'CollectBucket'\ and val.values[0]: collect_bucket = val.values[0] elif val.key in bucket_specific_keys and val.key == 'Username' and \ val.values[0]: username = val.values[0] elif val.key in bucket_specific_keys and val.key == 'Password' and \ val.values[0]: password = val.values[0] elif val.key == 'FieldLength' and val.values[0]: field_length = int(val.values[0]) elif val.key in opt_keys and val.key == 'ClusterName'\ and val.values[0]: cluster_name = val.values[0] elif val.key in opt_keys and val.key == 'Dimensions'\ and val.values[0]: extra_dimensions = val.values[0] # Make sure all required config settings are present, and log them collectd.info("Using config settings:") for key in required_keys: val = plugin_config.get(key) if val is None: raise ValueError("Missing required config setting: %s" % key) collectd.info("%s=%s" % (key, val)) # If CollectTarget is bucket, make sure collect_bucket is set if plugin_config.get("CollectTarget") == TARGET_NODE: pass elif plugin_config.get("CollectTarget") == TARGET_BUCKET: if collect_bucket is None: raise ValueError("Missing required config setting for bucket " + "CollectBucket") collectd.info("%s=%s" % ('CollectBucket', collect_bucket)) else: raise ValueError('Invalid CollectTarget parameter') # Populate the API URLs now that we have the config base_url = ("http://%s:%s" % (plugin_config['Host'], plugin_config['Port'])) auth = urllib2.HTTPPasswordMgrWithDefaultRealm() if username is None and password is None: username = password = '' collectd.info("Using username '%s' and password '%s' " % (username, password)) auth.add_password(None, user=username, passwd=password, uri=base_url) handler = urllib2.HTTPBasicAuthHandler(auth) opener = urllib2.build_opener(handler) # Log registered api urls for key in api_urls: val = api_urls.get(key) collectd.info("%s=%s" % (key, val)) module_config = { 'plugin_config': plugin_config, 'interval': interval, 'collect_mode': collect_mode, 'collect_bucket': collect_bucket, 'username': username, 'password': password, 'opener': opener, 'field_length': field_length, 'base_url': base_url, 'cluster_name': cluster_name, 'extra_dimensions': extra_dimensions, } # Prepare dimensions list module_config['dimensions'] = _build_dimensions(module_config) collectd.info("Using dimensions:") collectd.info(pprint.pformat(module_config['dimensions'])) if testing == "yes": # for testing purposes return module_config # register read callbacks if plugin_config['CollectTarget'] == TARGET_NODE: collectd.register_read(read_node_stats, interval, data=module_config, name='node_' + plugin_config['Host'] + ':' + plugin_config['Port']) else: collectd.register_read(read_bucket_stats, interval, data=module_config, name='bucket_' + collect_bucket + '_' + plugin_config['Host'] + ':' + plugin_config['Port'])
import urllib2 import re auth = urllib2.HTTPBasicAuthHandler() auth.add_password('pluses and minuses', 'www.pythonchallenge.com', 'butter', 'fly') urllib2.install_opener(urllib2.build_opener(auth)) url = 'http://www.pythonchallenge.com/pc/hex/unreal.jpg' req = urllib2.Request(url) start = 2123456789 req.add_header('Range', '') while start: req.headers['Range'] = 'bytes=%i-' % (start) resp = urllib2.urlopen(req) print(start, resp.read()) start -= 1
def query(action=None, command=None, args=None, method='GET', data=None): ''' Make a web call to a Parallels provider ''' path = config.get_cloud_config_value('url', get_configured_provider(), __opts__, search_global=False) auth_handler = urllib2.HTTPBasicAuthHandler() auth_handler.add_password( realm='Parallels Instance Manager', uri=path, user=config.get_cloud_config_value('user', get_configured_provider(), __opts__, search_global=False), passwd=config.get_cloud_config_value('password', get_configured_provider(), __opts__, search_global=False)) opener = urllib2.build_opener(auth_handler) urllib2.install_opener(opener) if action: path += action if command: path += '/{0}'.format(command) if not type(args, dict): args = {} kwargs = {'data': data} if isinstance(data, str) and '<?xml' in data: kwargs['headers'] = { 'Content-type': 'application/xml', } if args: params = urllib.urlencode(args) req = urllib2.Request(url='{0}?{1}'.format(path, params), **kwargs) else: req = urllib2.Request(url=path, **kwargs) req.get_method = lambda: method log.debug('{0} {1}'.format(method, req.get_full_url())) if data: log.debug(data) try: result = urllib2.urlopen(req) log.debug('PARALLELS Response Status Code: {0}'.format( result.getcode())) if 'content-length' in result.headers: content = result.read() result.close() items = ET.fromstring(content) return items return {} except urllib2.URLError as exc: log.error('PARALLELS Response Status Code: {0} {1}'.format( exc.code, exc.msg)) root = ET.fromstring(exc.read()) log.error(root) return {'error': root}
"%.1f"%CC3_U, "%.2f"%CC3_I, CC3_P, CC3_T, CC3_S,\ "%.1f"%CA1_U, "%.2f"%CA1_I, CA1_P, CA1_T,\ "%.1f"%CA2_U, "%.2f"%CA2_I, CA2_P, CA2_T,\ "%.1f"%CA3_U, "%.2f"%CA3_I, CA3_P, CA3_T,\ CC_P, CA_P, EFF, \ CA_S, Status, TodayWh, TotalWh)) except: if Dbg or Verbose : print "Failed to insert realtime data into DB" pass if opt.ShowHistory: URL = 'http://%s/LogDaten.dat' % host PwdMan = urllib2.HTTPPasswordMgrWithDefaultRealm() PwdMan.add_password(None, URL, opt.InvUser, opt.InvPassword) AuthHandler = urllib2.HTTPBasicAuthHandler(PwdMan) Opener = urllib2.build_opener(AuthHandler) urllib2.install_opener(Opener) PageHandle = urllib2.urlopen(URL) Now=datetime.now() Page=PageHandle.read() Lines=Page.split("\n") Reader=csv.reader(Lines, delimiter='\t') for Row in Reader: if len(Row) == 2: if Row.pop(0)=='akt. Zeit:': TRef=GetHistInt(Row.pop(0)) if len(Row) >= 38: St=Row.pop(0).strip() if St.isdigit():
if __name__ == '__main__': import sys, thread from wsgiref.simple_server import make_server httpd = make_server('', 8000, rest.router(routes)) # if unit test is desired, perform unit testing if len(sys.argv) > 1 and sys.argv[1] == '--test': import urllib2, cookielib password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() top_level_url = "localhost:8000" password_mgr.add_password(None, top_level_url, "*****@*****.**", "somepass") cj = cookielib.CookieJar() urllib2.install_opener( urllib2.build_opener(urllib2.HTTPBasicAuthHandler(password_mgr), urllib2.HTTPCookieProcessor(cj))) def urlopen(url, prefix="http://localhost:8000"): try: return urllib2.urlopen(prefix + url).read() except: return sys.exc_info()[1] def test(): print urlopen('/config') print urlopen('/config?directory=rtclite/app') print urlopen('/config') print urlopen('/xml/files') print urlopen('/xml/files/web') print urlopen('/json/files')
def bingapif(search_terms): bingDict = {} # Specify the base root_url = 'https://api.datamarket.azure.com/Bing/Search/' source = 'Web' # Specify how many results we wish to be returned per page. # Offset specifies where in the results list to start from. # With results_per_page = 10 and offset = 11, this would start from page 2. results_per_page = 10 offset = 0 # Wrap quotes around our query terms as required by the Bing API. # The query we will then use is stored within variable query. query = "'{0}'".format(search_terms) query = urllib.quote(query) # Construct the latter part of our request's URL. # Sets the format of the response to JSON and sets other properties. search_url = "{0}{1}?$format=json&$top={2}&$skip={3}&Query={4}".format( root_url, source, results_per_page, offset, query) # Setup authentication with the Bing servers. # The username MUST be a blank string, and put in your API key! username = '' # Create a 'password manager' which handles authentication for us. password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, search_url, username, BING_API_KEY) # Create our results list which we'll populate. results = [] try: # Prepare for connecting to Bing's servers. handler = urllib2.HTTPBasicAuthHandler(password_mgr) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) # Connect to the server and read the response generated. response = urllib2.urlopen(search_url).read() # Convert the string response to a Python dictionary object. json_response = json.loads(response) # Loop through each page returned, populating out results list. for result in json_response['d']['results']: results.append({ 'title': result['Title'], 'link': result['Url'], 'summary': result['Description'] }) # Catch a URLError exception - something went wrong when connecting! except urllib2.URLError as e: print "Error when querying the Bing API: ", e # construct the dictionary for result in results: url = str(result['link']) title = result['title'] summary = result['summary'] bingDict[url] = [title] # Reading ease score score = textstat.flesch_reading_ease(summary) scoreText = readingEaseScore(score) bingDict[url].append(str(score) + " (" + scoreText + ")") # Sentiment score blob = TextBlob(summary) sentimentPolarity = blob.sentiment.polarity sentimentSubjectivity = blob.sentiment.subjectivity sentimentScore = "polarity= %.3f (%s), subjectivity= %.3f (%s)" % ( sentimentPolarity, polarityScore(sentimentPolarity), sentimentSubjectivity, subjectivityScore(sentimentSubjectivity)) bingDict[url].append(sentimentScore) return bingDict
def getRegexParsed( regexs, url, cookieJar=None, forCookieJarOnly=False, recursiveCall=False, cachedPages={}, rawPost=False, cookie_jar_file=None): #0,1,2 = URL, regexOnly, CookieJarOnly #cachedPages = {} #print 'url',url doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url) # print 'doRegexs',doRegexs,regexs setresolved = True for k in doRegexs: if k in regexs: #print 'processing ' ,k m = regexs[k] #print m cookieJarParam = False if 'cookiejar' in m: # so either create or reuse existing jar #print 'cookiejar exists',m['cookiejar'] cookieJarParam = m['cookiejar'] if '$doregex' in cookieJarParam: cookieJar = getRegexParsed(regexs, m['cookiejar'], cookieJar, True, True, cachedPages) cookieJarParam = True else: cookieJarParam = True #print 'm[cookiejar]',m['cookiejar'],cookieJar if cookieJarParam: if cookieJar == None: #print 'create cookie jar' cookie_jar_file = None if 'open[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split( 'open[')[1].split(']')[0] # print 'cookieJar from file name',cookie_jar_file cookieJar = getCookieJar(cookie_jar_file) # print 'cookieJar from file',cookieJar if cookie_jar_file: saveCookieJar(cookieJar, cookie_jar_file) #import cookielib #cookieJar = cookielib.LWPCookieJar() #print 'cookieJar new',cookieJar elif 'save[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split('save[')[1].split( ']')[0] complete_path = os.path.join(profile, cookie_jar_file) # print 'complete_path',complete_path saveCookieJar(cookieJar, cookie_jar_file) if m['page'] and '$doregex' in m['page']: pg = getRegexParsed(regexs, m['page'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if len(pg) == 0: pg = 'http://regexfailed' m['page'] = pg if 'setcookie' in m and m['setcookie'] and '$doregex' in m[ 'setcookie']: m['setcookie'] = getRegexParsed(regexs, m['setcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'appendcookie' in m and m['appendcookie'] and '$doregex' in m[ 'appendcookie']: m['appendcookie'] = getRegexParsed(regexs, m['appendcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'post' in m and '$doregex' in m['post']: m['post'] = getRegexParsed(regexs, m['post'], cookieJar, recursiveCall=True, cachedPages=cachedPages) # print 'post is now',m['post'] if 'rawpost' in m and '$doregex' in m['rawpost']: m['rawpost'] = getRegexParsed(regexs, m['rawpost'], cookieJar, recursiveCall=True, cachedPages=cachedPages, rawPost=True) #print 'rawpost is now',m['rawpost'] if 'rawpost' in m and '$epoctime$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime$', getEpocTime()) if 'rawpost' in m and '$epoctime2$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime2$', getEpocTime2()) link = '' if m['page'] and m[ 'page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False: #print 'using cache page',m['page'] link = cachedPages[m['page']] else: if m['page'] and not m['page'] == '' and m['page'].startswith( 'http'): if '$epoctime$' in m['page']: m['page'] = m['page'].replace('$epoctime$', getEpocTime()) if '$epoctime2$' in m['page']: m['page'] = m['page'].replace('$epoctime2$', getEpocTime2()) #print 'Ingoring Cache',m['page'] page_split = m['page'].split('|') pageUrl = page_split[0] header_in_page = None if len(page_split) > 1: header_in_page = page_split[1] # if # proxy = urllib2.ProxyHandler({ ('https' ? proxytouse[:5]=="https":"http") : proxytouse}) # opener = urllib2.build_opener(proxy) # urllib2.install_opener(opener) # import urllib2 # print 'urllib2.getproxies',urllib2.getproxies() current_proxies = urllib2.ProxyHandler( urllib2.getproxies()) #print 'getting pageUrl',pageUrl req = urllib2.Request(pageUrl) if 'proxy' in m: proxytouse = m['proxy'] # print 'proxytouse',proxytouse # urllib2.getproxies= lambda: {} if pageUrl[:5] == "https": proxy = urllib2.ProxyHandler({'https': proxytouse}) #req.set_proxy(proxytouse, 'https') else: proxy = urllib2.ProxyHandler({'http': proxytouse}) #req.set_proxy(proxytouse, 'http') opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' ) proxytouse = None if 'referer' in m: req.add_header('Referer', m['referer']) if 'accept' in m: req.add_header('Accept', m['accept']) if 'agent' in m: req.add_header('User-agent', m['agent']) if 'x-req' in m: req.add_header('X-Requested-With', m['x-req']) if 'x-addr' in m: req.add_header('x-addr', m['x-addr']) if 'x-forward' in m: req.add_header('X-Forwarded-For', m['x-forward']) if 'setcookie' in m: # print 'adding cookie',m['setcookie'] req.add_header('Cookie', m['setcookie']) if 'appendcookie' in m: # print 'appending cookie to cookiejar',m['appendcookie'] cookiestoApend = m['appendcookie'] cookiestoApend = cookiestoApend.split(';') for h in cookiestoApend: n, v = h.split('=') w, n = n.split(':') ck = cookielib.Cookie(version=0, name=n, value=v, port=None, port_specified=False, domain=w, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookieJar.set_cookie(ck) if 'origin' in m: req.add_header('Origin', m['origin']) if header_in_page: header_in_page = header_in_page.split('&') for h in header_in_page: n, v = h.split('=') req.add_header(n, v) if not cookieJar == None: # print 'cookieJarVal',cookieJar cookie_handler = urllib2.HTTPCookieProcessor(cookieJar) opener = urllib2.build_opener( cookie_handler, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) # print 'noredirect','noredirect' in m if 'noredirect' in m: opener = urllib2.build_opener( cookie_handler, NoRedirection, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) elif 'noredirect' in m: opener = urllib2.build_opener( NoRedirection, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) if 'connection' in m: # print '..........................connection//////.',m['connection'] from keepalive import HTTPHandler keepalive_handler = HTTPHandler() opener = urllib2.build_opener(keepalive_handler) urllib2.install_opener(opener) #print 'after cookie jar' post = None if 'post' in m: postData = m['post'] #if '$LiveStreamRecaptcha' in postData: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # postData=postData.replace('$LiveStreamRecaptcha','manual_recaptcha_challenge_field:'+captcha_challenge+',recaptcha_response_field:'+catpcha_word+',id:'+idfield) splitpost = postData.split(',') post = {} for p in splitpost: n = p.split(':')[0] v = p.split(':')[1] post[n] = v post = urllib.urlencode(post) if 'rawpost' in m: post = m['rawpost'] #if '$LiveStreamRecaptcha' in post: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # post=post.replace('$LiveStreamRecaptcha','&manual_recaptcha_challenge_field='+captcha_challenge+'&recaptcha_response_field='+catpcha_word+'&id='+idfield) link = '' try: if post: response = urllib2.urlopen(req, post) else: response = urllib2.urlopen(req) if response.info().get('Content-Encoding') == 'gzip': from StringIO import StringIO import gzip buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) link = f.read() else: link = response.read() if 'proxy' in m and not current_proxies is None: urllib2.install_opener( urllib2.build_opener(current_proxies)) link = javascriptUnEscape(link) #print repr(link) #print link This just print whole webpage in LOG if 'includeheaders' in m: #link+=str(response.headers.get('Set-Cookie')) link += '$$HEADERS_START$$:' for b in response.headers: link += b + ':' + response.headers.get( b) + '\n' link += '$$HEADERS_END$$:' # print link response.close() except: pass cachedPages[m['page']] = link #print link #print 'store link for',m['page'],forCookieJarOnly if forCookieJarOnly: return cookieJar # do nothing elif m['page'] and not m['page'].startswith('http'): if m['page'].startswith('$pyFunction:'): val = doEval(m['page'].split('$pyFunction:')[1], '', cookieJar, m) if forCookieJarOnly: return cookieJar # do nothing link = val link = javascriptUnEscape(link) else: link = m['page'] if '$doregex' in m['expres']: m['expres'] = getRegexParsed(regexs, m['expres'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if not m['expres'] == '': #print 'doing it ',m['expres'] if '$LiveStreamCaptcha' in m['expres']: val = askCaptcha(m, link, cookieJar) #print 'url and val',url,val url = url.replace("$doregex[" + k + "]", val) elif m['expres'].startswith( '$pyFunction:') or '#$pyFunction' in m['expres']: #print 'expeeeeeeeeeeeeeeeeeee',m['expres'] val = '' if m['expres'].startswith('$pyFunction:'): val = doEval(m['expres'].split('$pyFunction:')[1], link, cookieJar, m) else: val = doEvalFunction(m['expres'], link, cookieJar, m) if 'ActivateWindow' in m['expres']: return if forCookieJarOnly: return cookieJar # do nothing if 'listrepeat' in m: listrepeat = m['listrepeat'] return listrepeat, eval(val), m, regexs, cookieJar try: url = url.replace(u"$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) else: if 'listrepeat' in m: listrepeat = m['listrepeat'] ret = re.findall(m['expres'], link) return listrepeat, ret, m, regexs val = '' if not link == '': #print 'link',link reg = re.compile(m['expres']).search(link) try: val = reg.group(1).strip() except: traceback.print_exc() elif m['page'] == '' or m['page'] == None: val = m['expres'] if rawPost: # print 'rawpost' val = urllib.quote_plus(val) if 'htmlunescape' in m: #val=urllib.unquote_plus(val) import HTMLParser val = HTMLParser.HTMLParser().unescape(val) try: url = url.replace("$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) #print 'ur',url #return val else: url = url.replace("$doregex[" + k + "]", '') if '$epoctime$' in url: url = url.replace('$epoctime$', getEpocTime()) if '$epoctime2$' in url: url = url.replace('$epoctime2$', getEpocTime2()) if '$GUID$' in url: import uuid url = url.replace('$GUID$', str(uuid.uuid1()).upper()) if '$get_cookies$' in url: url = url.replace('$get_cookies$', getCookiesString(cookieJar)) if recursiveCall: return url #print 'final url',repr(url) if url == "": return else: return url, setresolved
def nsidc_icesat2_sync(ddir, PRODUCTS, RELEASE, VERSIONS, GRANULES, TRACKS, USER='', PASSWORD='', YEARS=None, SUBDIRECTORY=None, AUXILIARY=False, FLATTEN=False, LOG=False, LIST=False, MODE=None, CLOBBER=False): #-- check if directory exists and recursively create if not os.makedirs(ddir, MODE) if not os.path.exists(ddir) else None #-- output of synchronized files if LOG: #-- format: NSIDC_IceBridge_sync_2002-04-01.log today = time.strftime('%Y-%m-%d', time.localtime()) LOGFILE = 'NSIDC_IceSat-2_sync_{0}.log'.format(today) fid = open(os.path.join(ddir, LOGFILE), 'w') print('ICESat-2 Data Sync Log ({0})'.format(today), file=fid) else: #-- standard output (terminal output) fid = sys.stdout #-- https://docs.python.org/3/howto/urllib2.html#id5 #-- create a password manager password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() #-- Add the username and password for NASA Earthdata Login system password_mgr.add_password(None, 'https://urs.earthdata.nasa.gov', USER, PASSWORD) #-- Encode username/password for request authorization headers base64_string = base64.b64encode('{0}:{1}'.format(USER, PASSWORD).encode()) #-- compile HTML parser for lxml parser = lxml.etree.HTMLParser() #-- Create cookie jar for storing cookies. This is used to store and return #-- the session cookie given to use by the data server (otherwise will just #-- keep sending us back to Earthdata Login to authenticate). cookie_jar = CookieJar() #-- create "opener" (OpenerDirector instance) opener = urllib2.build_opener( urllib2.HTTPBasicAuthHandler(password_mgr), urllib2.HTTPSHandler(context=ssl.SSLContext()), urllib2.HTTPCookieProcessor(cookie_jar)) #-- add Authorization header to opener authorization_header = "Basic {0}".format(base64_string.decode()) opener.addheaders = [("Authorization", authorization_header)] #-- Now all calls to urllib2.urlopen use our opener. urllib2.install_opener(opener) #-- All calls to urllib2.urlopen will now use handler #-- Make sure not to include the protocol in with the URL, or #-- HTTPPasswordMgrWithDefaultRealm will be confused. #-- remote https server for ICESat-2 Data HOST = 'https://n5eil01u.ecs.nsidc.org' #-- regular expression operator for finding files of a particular granule #-- find ICESat-2 HDF5 files in the subdirectory for product and release regex_track = '|'.join(['{0:04d}'.format(T) for T in TRACKS]) regex_granule = '|'.join(['{0:02d}'.format(G) for G in GRANULES]) regex_version = '|'.join(['{0:02d}'.format(V) for V in VERSIONS]) regex_suffix = '(.*?)' if AUXILIARY else '(h5)' remote_regex_pattern = ( '{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})(\d{{2}})' '(\d{{2}})(\d{{2}})_({1})(\d{{2}})({2})_({3})_({4})(.*?).{5}$') #-- regular expression operator for finding subdirectories if SUBDIRECTORY: #-- Sync particular subdirectories for product R2 = re.compile('(' + '|'.join(SUBDIRECTORY) + ')', re.VERBOSE) elif YEARS: #-- Sync particular years for product regex_pattern = '|'.join('{0:d}'.format(y) for y in YEARS) R2 = re.compile('({0}).(\d+).(\d+)'.format(regex_pattern), re.VERBOSE) else: #-- Sync all available subdirectories for product R2 = re.compile('(\d+).(\d+).(\d+)', re.VERBOSE) #-- for each icesat2 product listed for p in PRODUCTS: print('PRODUCT={0}'.format(p), file=fid) #-- get directories from remote directory (* splat operator) remote_directories = ['ATLAS', '{0}.{1}'.format(p, RELEASE)] d = posixpath.join(HOST, *remote_directories) req = urllib2.Request(url=d) #-- compile regular expression operator for product, release and version args = (p, regex_track, regex_granule, RELEASE, regex_version, regex_suffix) R1 = re.compile(remote_regex_pattern.format(*args), re.VERBOSE) #-- read and parse request for subdirectories (find column names) tree = lxml.etree.parse(urllib2.urlopen(req), parser) colnames = tree.xpath('//td[@class="indexcolname"]//a/@href') remote_sub = [sd for sd in colnames if R2.match(sd)] #-- for each remote subdirectory for sd in remote_sub: #-- local directory for product and subdirectory if FLATTEN: local_dir = os.path.expanduser(ddir) else: local_dir = os.path.join(ddir, '{0}.{1}'.format(p, RELEASE), sd) #-- check if data directory exists and recursively create if not os.makedirs(local_dir, MODE) if not os.path.exists(local_dir) else None #-- find ICESat-2 data files req = urllib2.Request(url=posixpath.join(d, sd)) #-- read and parse request for remote files (columns and dates) tree = lxml.etree.parse(urllib2.urlopen(req), parser) colnames = tree.xpath('//td[@class="indexcolname"]//a/@href') collastmod = tree.xpath('//td[@class="indexcollastmod"]/text()') #-- find matching files (for granule, release, version, track) remote_file_lines = [ i for i, f in enumerate(colnames) if R1.match(f) ] #-- sync each ICESat-2 data file for i in remote_file_lines: #-- remote and local versions of the file remote_file = posixpath.join(d, sd, colnames[i]) local_file = os.path.join(local_dir, colnames[i]) #-- get last modified date and convert into unix time LMD = time.strptime(collastmod[i].rstrip(), '%Y-%m-%d %H:%M') remote_mtime = calendar.timegm(LMD) #-- sync ICESat-2 files with NSIDC server http_pull_file(fid, remote_file, remote_mtime, local_file, LIST, CLOBBER, MODE) #-- close request req = None #-- close log file and set permissions level to MODE if LOG: fid.close() os.chmod(os.path.join(ddir, LOGFILE), MODE)
def open_url(url, data=None, headers=None, method=None, use_proxy=True, force=False, last_mod_time=None, timeout=10, validate_certs=True, url_username=None, url_password=None, http_agent=None, force_basic_auth=False, follow_redirects='urllib2'): ''' Fetches a file from an HTTP/FTP server using urllib2 ''' handlers = [] # FIXME: change the following to use the generic_urlparse function # to remove the indexed references for 'parsed' parsed = urlparse.urlparse(url) if parsed[0] == 'https' and validate_certs: if not HAS_SSL: raise NoSSLError( 'SSL validation is not available in your version of python. You can use validate_certs=False, however this is unsafe and not recommended' ) # do the cert validation netloc = parsed[1] if '@' in netloc: netloc = netloc.split('@', 1)[1] if ':' in netloc: hostname, port = netloc.split(':', 1) port = int(port) else: hostname = netloc port = 443 # create the SSL validation handler and # add it to the list of handlers ssl_handler = SSLValidationHandler(hostname, port) handlers.append(ssl_handler) if parsed[0] != 'ftp': username = url_username if username: password = url_password netloc = parsed[1] elif '@' in parsed[1]: credentials, netloc = parsed[1].split('@', 1) if ':' in credentials: username, password = credentials.split(':', 1) else: username = credentials password = '' parsed = list(parsed) parsed[1] = netloc # reconstruct url without credentials url = urlparse.urlunparse(parsed) if username and not force_basic_auth: passman = urllib2.HTTPPasswordMgrWithDefaultRealm() # this creates a password manager passman.add_password(None, netloc, username, password) # because we have put None at the start it will always # use this username/password combination for urls # for which `theurl` is a super-url authhandler = urllib2.HTTPBasicAuthHandler(passman) # create the AuthHandler handlers.append(authhandler) elif username and force_basic_auth: if headers is None: headers = {} headers["Authorization"] = "Basic %s" % base64.b64encode( "%s:%s" % (username, password)) if not use_proxy: proxyhandler = urllib2.ProxyHandler({}) handlers.append(proxyhandler) # pre-2.6 versions of python cannot use the custom https # handler, since the socket class is lacking create_connection. # Some python builds lack HTTPS support. if hasattr(socket, 'create_connection') and CustomHTTPSHandler: handlers.append(CustomHTTPSHandler) if follow_redirects != 'urllib2': handlers.append(RedirectHandlerFactory(follow_redirects)) opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if method: if method.upper() not in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'CONNECT', 'PATCH'): raise ConnectionError('invalid HTTP request method; %s' % method.upper()) request = RequestWithMethod(url, method.upper(), data) else: request = urllib2.Request(url, data) # add the custom agent header, to help prevent issues # with sites that block the default urllib agent string request.add_header('User-agent', http_agent) # if we're ok with getting a 304, set the timestamp in the # header, otherwise make sure we don't get a cached copy if last_mod_time and not force: tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000') request.add_header('If-Modified-Since', tstamp) else: request.add_header('cache-control', 'no-cache') # user defined headers now, which may override things we've set above if headers: if not isinstance(headers, dict): raise ValueError("headers provided to fetch_url() must be a dict") for header in headers: request.add_header(header, headers[header]) urlopen_args = [request, None] if sys.version_info >= (2, 6, 0): # urlopen in python prior to 2.6.0 did not # have a timeout parameter urlopen_args.append(timeout) if HAS_SSLCONTEXT and not validate_certs: # In 2.7.9, the default context validates certificates context = SSLContext(ssl.PROTOCOL_SSLv23) context.options |= ssl.OP_NO_SSLv2 context.options |= ssl.OP_NO_SSLv3 context.verify_mode = ssl.CERT_NONE context.check_hostname = False urlopen_args += (None, None, None, context) r = urllib2.urlopen(*urlopen_args) return r
try: print '\n' # ''' Grab login information. # ''' proxy_ip = raw_input('Proxy ip address: ') proxy_u = raw_input('Proxy Admin user name: ') proxy_p = getpass.getpass('Admin user password (hiden): ') ''' Create a db to user/password mapping. It will be used in auth requests. The 'uri' part of .add_password is composed by blue coat proxy ip addr and mgmt port 8082. ''' pwd_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() pwd_mgr.add_password(None, proxy_ip + ':8082', proxy_u, proxy_p) urllib2.install_opener( urllib2.build_opener(urllib2.HTTPBasicAuthHandler(pwd_mgr))) ''' Call function to connect to Blue Coat proxy, test authentication, grab some information and handle possible errors. ''' proxy_connect() ''' Call function to check WebFilter state. ''' check_WebFilter() except KeyboardInterrupt: print '\n<KeyboardInterrupt>: User interrupted, done!' exit() while True: try:
import urllib2 import re import cookielib import sys import config import os path = os.path.abspath(os.curdir) question = path + "/XML/codec_question" #storing the Authentication token in a file in the OS vs. leaving in script username = config.codec_username password = config.codec_password f = open(question, 'r') string = f.read() location = str(sys.argv[1]) url = 'http://' + location + '/putxml' param_data = string #params = urllib.urlencode(param_data) passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, url, username, password) authhandler = urllib2.HTTPBasicAuthHandler(passman) opener = urllib2.build_opener(authhandler) opener.addheaders = [('Content-Type', 'text/xml')] urllib2.install_opener(opener) urllib2.urlopen(url, param_data)
def pull(self, task, onconflict=None): """ Fetch updates from the peer repository and import them into the local database (active pull) @param task: the synchronization task (sync_task Row) @param onconflict: callback for automatic conflict resolution @return: tuple (error, mtime), with error=None if successful, else error=message, and mtime=modification timestamp of the youngest record sent """ repository = self.repository xml = current.xml config = repository.config resource_name = task.resource_name current.log.debug("S3Sync: pull %s from %s" % (resource_name, repository.url)) # Construct the URL url = "%s/sync/sync.xml?resource=%s&repository=%s" % \ (repository.url, resource_name, config.uuid) last_pull = task.last_pull if last_pull and task.update_policy not in ("THIS", "OTHER"): url += "&msince=%s" % s3_encode_iso_datetime(last_pull) url += "&include_deleted=True" # Send sync filters to peer filters = current.sync.get_filters(task.id) filter_string = None resource_name = task.resource_name for tablename in filters: prefix = "~" if not tablename or tablename == resource_name \ else tablename for k, v in filters[tablename].items(): urlfilter = "[%s]%s=%s" % (prefix, k, v) url += "&%s" % urlfilter # Figure out the protocol from the URL url_split = url.split("://", 1) if len(url_split) == 2: protocol, path = url_split else: protocol, path = "http", None # Create the request req = urllib2.Request(url=url) handlers = [] # Proxy handling proxy = repository.proxy or config.proxy or None if proxy: current.log.debug("S3Sync: pull, using proxy=%s" % proxy) proxy_handler = urllib2.ProxyHandler({protocol: proxy}) handlers.append(proxy_handler) # Authentication handling username = repository.username password = repository.password if username and password: # Send auth data unsolicitedly (the only way with Eden instances): import base64 base64string = base64.encodestring('%s:%s' % (username, password))[:-1] req.add_header("Authorization", "Basic %s" % base64string) # Just in case the peer does not accept that, add a 401 handler: passwd_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() passwd_manager.add_password(realm=None, uri=url, user=username, passwd=password) auth_handler = urllib2.HTTPBasicAuthHandler(passwd_manager) handlers.append(auth_handler) # Install all handlers if handlers: opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) # Execute the request remote = False action = "fetch" response = None output = None log = repository.log try: f = urllib2.urlopen(req) except urllib2.HTTPError, e: result = log.ERROR remote = True # Peer error code = e.code message = e.read() try: # Sahana-Eden would send a JSON message, # try to extract the actual error message: message_json = json.loads(message) message = message_json.get("message", message) except: pass # Prefix as peer error and strip XML markup from the message # @todo: better method to do this? message = "<message>%s</message>" % message try: markup = etree.XML(message) message = markup.xpath(".//text()") if message: message = " ".join(message) else: message = "" except etree.XMLSyntaxError: pass output = xml.json_message(False, code, message, tree=None)
def _create_basic_auth_handler(self): password_man = urllib2.HTTPPasswordMgrWithDefaultRealm() password_man.add_password(None, self._get_url(), self._username, self._password) auth_handler = urllib2.HTTPBasicAuthHandler(password_man) return auth_handler
def push(self, task): """ Extract new updates from the local database and send them to the peer repository (active push) @param task: the synchronization task (sync_task Row) @return: tuple (error, mtime), with error=None if successful, else error=message, and mtime=modification timestamp of the youngest record sent """ xml = current.xml repository = self.repository config = repository.config resource_name = task.resource_name _debug("S3SyncRepository.push(%s, %s)", repository.url, resource_name) # Construct the URL url = "%s/sync/sync.xml?resource=%s&repository=%s" % \ (repository.url, resource_name, config.uuid) strategy = task.strategy if strategy: url += "&strategy=%s" % ",".join(strategy) update_policy = task.update_policy if update_policy: url += "&update_policy=%s" % update_policy conflict_policy = task.conflict_policy if conflict_policy: url += "&conflict_policy=%s" % conflict_policy last_push = task.last_push if last_push and update_policy not in ("THIS", "OTHER"): url += "&msince=%s" % s3_encode_iso_datetime(last_push) else: last_push = None _debug("...push to URL %s", url) # Define the resource resource = current.s3db.resource(resource_name, include_deleted=True) # Apply sync filters for this task filters = current.sync.get_filters(task.id) # Export the resource as S3XML data = resource.export_xml(filters=filters, msince=last_push) count = resource.results or 0 mtime = resource.muntil # Transmit the data via HTTP remote = False output = None log = repository.log if data and count: # Find the protocol url_split = url.split("://", 1) if len(url_split) == 2: protocol, path = url_split else: protocol, path = "http", None # Generate the request import urllib2 req = urllib2.Request(url=url, data=data) req.add_header('Content-Type', "text/xml") handlers = [] # Proxy handling proxy = repository.proxy or config.proxy or None if proxy: _debug("using proxy=%s", proxy) proxy_handler = urllib2.ProxyHandler({protocol: proxy}) handlers.append(proxy_handler) # Authentication username = repository.username password = repository.password if username and password: # send auth credentials unsolicitedly import base64 base64string = base64.encodestring('%s:%s' % (username, password))[:-1] req.add_header("Authorization", "Basic %s" % base64string) # Just in case the peer does not accept that # => add a 401 handler: passwd_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() passwd_manager.add_password(realm=None, uri=url, user=username, passwd=password) auth_handler = urllib2.HTTPBasicAuthHandler(passwd_manager) handlers.append(auth_handler) # Install all handlers if handlers: opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) # Execute the request try: f = urllib2.urlopen(req) except urllib2.HTTPError, e: result = log.FATAL remote = True # Peer error code = e.code message = e.read() try: # Sahana-Eden sends a JSON message, # try to extract the actual error message: message_json = json.loads(message) message = message_json.get("message", message) except: pass output = xml.json_message(False, code, message) except:
import urllib2, cookielib, urllib, bz2, xmlrpclib auth_handler = urllib2.HTTPBasicAuthHandler() auth_handler.add_password('inflate', 'www.pythonchallenge.com', 'huge', 'file') jar = cookielib.CookieJar() cookie_handler = urllib2.HTTPCookieProcessor(jar) opener = urllib2.build_opener(auth_handler, cookie_handler) print opener.open( 'http://www.pythonchallenge.com/pc/def/linkedlist.php?busynothing=12345' ).read() list(jar) #cookies here show a hidden message i = 0 message = [] busynothing = 12345 while i < 400: url = 'http://www.pythonchallenge.com/pc/def/linkedlist.php?busynothing=' + str( busynothing) contents = opener.open(url).read() try: busynothing = int(contents.split('is')[-1]) message.append(list(jar)[0].value) print message[-1], contents except: break message2 = ''.join(message) print message2 message3 = urllib.unquote(message2) print message3
def register(self): """ Register this site at the peer repository @return: True to indicate success, otherwise False """ repository = self.repository if not repository.url: return True current.log.debug("S3Sync: register at %s" % (repository.url)) # Construct the URL config = repository.config url = "%s/sync/repository/register.xml?repository=%s" % \ (repository.url, config.uuid) current.log.debug("S3Sync: send registration to URL %s" % url) # Generate the request req = urllib2.Request(url=url) handlers = [] # Proxy handling proxy = repository.proxy or config.proxy or None if proxy: proxy_handler = urllib2.ProxyHandler({"http": proxy}) handlers.append(proxy_handler) # Authentication username = repository.username password = repository.password if username and password: import base64 base64string = base64.encodestring('%s:%s' % (username, password))[:-1] req.add_header("Authorization", "Basic %s" % base64string) passwd_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() passwd_manager.add_password(realm=None, uri=url, user=username, passwd=password, ) auth_handler = urllib2.HTTPBasicAuthHandler(passwd_manager) handlers.append(auth_handler) # Install all handlers if handlers: opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) # Execute the request log = repository.log success = True remote = False try: f = urllib2.urlopen(req) except urllib2.HTTPError, e: result = log.FATAL remote = True # Peer error code = e.code message = e.read() success = False try: message_json = json.loads(message) message = message_json.get("message", message) except: pass
# HTTPPasswordMgrWithDefaultRealm():来保存私密代理的用户密码 # ProxyBasicAuthHandler():来处理代理的身份验证。 #如果我们有客户端的用户名和密码,我们可以通过下面的方法去访问爬取 import urllib import urllib2 user = '******' passwd = '123456' webserver = 'http://192.168.199.107' passwdmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() passwdmgr.add_password(None, webserver, user, passwd) #构建一个HTTP基础用户名/密码验证的HTTPBasicAuthHandler处理器对象,参数是创建的密码管理对象 # 基础验证处理器 httpauth_handler = urllib2.HTTPBasicAuthHandler(passwdmgr) opener = urllib2.build_opener(httpauth_handler) #可以有多个处理器 # 可以选择通过install_opener()方法定义opener为全局opener urllib2.install_opener(opener) request = urllib2.Request('http://192.168.199.107') response = urllib2.urlopen(request) print(response.read())
uwcicon = xbmc.translatePath(os.path.join(rootDir, 'icon.png')) changelog = xbmc.translatePath(os.path.join(rootDir, 'changelog.txt')) profileDir = addon.getAddonInfo('profile') profileDir = xbmc.translatePath(profileDir).decode("utf-8") cookiePath = os.path.join(profileDir, 'cookies.lwp') kodiver = xbmc.getInfoLabel("System.BuildVersion").split(".")[0] if not os.path.exists(profileDir): os.makedirs(profileDir) urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar(xbmc.translatePath(cookiePath)) Request = urllib2.Request handlers = [urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()] if (2, 7, 8) < sys.version_info < (2, 7, 12): try: import ssl; ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] except: pass if cj != None: if os.path.isfile(xbmc.translatePath(cookiePath)): try: cj.load() except:
import urllib2 import shutil import os import popen2 import requests from lxml import html from bs4 import BeautifulSoup password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password( None, "https://dds.cr.usgs.gov/emodis/CONUS/historical/TERRA/", "*****@*****.**", "USGSwoodard14") handler = urllib2.HTTPBasicAuthHandler(password_mgr) opener = urllib2.build_opener(handler) opener.open("https://dds.cr.usgs.gov/emodis/CONUS/historical/TERRA/") urllib2.install_opener(opener) #TODO: now loop over all year directories for *.US_eMTH_NDVI.YYYY.DDD-DDD.QKM.COMPRES...zip files #for example .../TERRA/2014/US_eMTH_NDVI.2014.365-006.QKM.COMPRES.* res = urllib2.urlopen("https://dds.cr.usgs.gov/emodis/CONUS/historical/TERRA/") html = res.read() Soup = BeautifulSoup(html) tags = Soupww('a') print tags
def fetch_shims(): """ Download shim files from remote server """ import urllib2 attempts = 0 shims = iter(( "operaextensions_background.js", "operaextensions_popup.js", "operaextensions_injectedscript.js", )) shim_dir = os.path.join(shim_fs_path, shim_dirname) shim = next(shims) url = shim_fetch_from + shim while attempts < 10: attempts += 1 try: res = urllib2.urlopen(url) if res.code == 200: try: if not os.path.exists(shim_dir): os.mkdir(shim_dir) elif os.path.isdir(shim_dir): fh = open(os.path.join(shim_dir, shim), 'w') fh.write(res.read()) fh.close() except Exception as e: sys.exit("ERROR: Unable to fetch shim files from " + url + "\nException was :" + str(e)) else: if debug: print(('Response:', res.code)) try: shim = next(shims) except StopIteration: break url = shim_fetch_from + shim except urllib2.HTTPError as ex: if ex.code == 401: if debug: print(('HTTP Authentication required:', ex.code, ex.msg, ex.hdrs)) auth_type = ex.hdrs["WWW-Authenticate"].split()[0] realm = ex.hdrs["WWW-Authenticate"].split('=')[1] realm = realm.strip('"') if auth_type == "Basic": auth_handler = urllib2.HTTPBasicAuthHandler() print("Basic auth: Realm: ", realm) print("Enter username:"******"\n") print("Enter password:"******"\n") auth_handler.add_password(realm=realm, uri=shim_fetch_from, user=usr, passwd=pwd) opener = urllib2.build_opener(auth_handler) urllib2.install_opener(opener) continue else: print(('Threw :', ex, ' when fetching ', url))