def get_download_links(self, folderid=None, fields=""): download_links = dict() download_api_url = self.base_url[:] fields = (set(fields.split(",")) | set(["filename"])) - set([""]) if len(fields) > 1: supported_fields = set("downloads,lastdownload,filename,size,killcode,serverid,type,x,y,realfolder,bodtype,killdeadline,licids,uploadtime".split(",")) for field in fields: if field not in supported_fields: raise HosterAPIError, "field: %s not supported" % field query = urllib.urlencode({"sub": "listfiles", "fields": ",".join(fields)}) download_api_url[4] += "&"+query if folderid is not None: query = urllib.urlencode({"realfolder": folderid}) download_api_url[4] += "&"+query url = urlunparse(download_api_url) lines = urllib2.urlopen(url).readlines() self._catch_error(lines, url) for line in lines: try: rows = line.split(",") fileid = rows[0] properties = dict(zip(fields, rows[1:])) properties["filename"] = properties["filename"].replace("\n", "") download_url = [self.scheme, "rapidshare.com", "files/%s/%s" % (fileid, properties["filename"]), "", "", ""] properties["url"] = urlunparse(download_url) download_links[fileid] = properties except (ValueError, KeyError): pass return download_links
def translate_git_url(git_url, commit_id): """Create a real git URL based on defined translations. :param git_url: The git URL as obtained from the backend. :param commit_id: The git SHA. :return The base URL to create URLs, and the real commit URL. """ base_url = None commit_url = None if git_url and commit_id: t_url = urlparse.urlparse(git_url) known_git_urls = CONFIG_GET("KNOWN_GIT_URLS") if t_url.netloc in known_git_urls.keys(): known_git = known_git_urls.get(t_url.netloc) path = t_url.path for replace_rule in known_git[3]: path = path.replace(*replace_rule) base_url = urlparse.urlunparse(( known_git[0], t_url.netloc, known_git[1] % path, "", "", "" )) commit_url = urlparse.urlunparse(( known_git[0], t_url.netloc, (known_git[2] % path) + commit_id, "", "", "" )) return base_url, commit_url
def download_css(soup, url_parts, dst_folder, index_path): """ parse css src's and download css to dst_folder """ tmp_url_parts = deepcopy(url_parts) for css in soup.findAll("link", {"rel": "stylesheet"}): if css.has_key("href"): file_name = css["href"].split("/")[-1] file_name = sanitize_file_name(file_name) logging.debug("Downloading css " + file_name + "...") new_src = create_directories(dst_folder, list(urlparse.urlparse(css["href"]))[2]) full_path = os.path.join(dst_folder, new_src) outpath = os.path.join(full_path, file_name) if css["href"].lower().startswith("http"): tmp_url_parts = list(urlparse.urlparse(css["href"])) download_file(css["href"], outpath) else: tmp_url_parts[2] = css["href"] download_file(urlparse.urlunparse(tmp_url_parts), outpath) root_url = urlparse.urlunparse(tmp_url_parts) file_name_index = root_url.index(file_name) root_url = root_url[:file_name_index] download_css_imports( soup, list(urlparse.urlparse(css["href"])), root_url, new_src + "/" + file_name, dst_folder, index_path ) css["href"] = css["href"].replace(css["href"], index_path + "/" + new_src + "/" + file_name)
def _add_utm_param(url, type, sourse, campaign, name, matching): url_parts = list(urlparse.urlparse(url)) if not _url_param_safe_check(url_parts[4]): return urlparse.urlunparse(url_parts) query = dict(urlparse.parse_qsl(url_parts[4])) if (type == 'banner'): utm_medium = 'cpm_yottos' else: utm_medium = 'cpc_yottos' utm_source = str(sourse or 'other') utm_campaign = str(campaign) utm_content = str(name) if query.has_key('utm_source'): utm_term = str(sourse or 'other') else: utm_term = str(matching) if not query.has_key('utm_medium'): query.update({'utm_medium':utm_medium}) if not query.has_key('utm_source'): query.update({'utm_source':utm_source}) if not query.has_key('utm_campaign'): query.update({'utm_campaign':utm_campaign}) if not query.has_key('utm_content'): query.update({'utm_content':utm_content}) if not query.has_key('utm_term'): query.update({'utm_term':utm_term}) url_parts[4] = urllib.urlencode(query) return urlparse.urlunparse(url_parts)
def _secure_request(self, url, method, data=None, files=None, headers=None, raw=False, send_as_json=True, content_type=None, **request_kwargs): full_url = self.build_url(url) # Add token (if it's not already there) if self._token: parsed = list(urlparse.urlparse(full_url)) if not parsed[4]: # query parsed[4] = 'token=%s' % self._token full_url = urlparse.urlunparse(parsed) elif 'token' not in urlparse.parse_qs(parsed[4]): parsed[4] += '&token=%s' % self._token full_url = urlparse.urlunparse(parsed) headers = headers or {} # If files are being sent, we cannot encode data as JSON if send_as_json and not files: headers['content-type'] = 'application/json' data = json.dumps(data or {}) else: if content_type: headers['content-type'] = content_type data = data or '' method = getattr(requests, method, None) response = method(full_url, data=data, files=files, headers=headers, **request_kwargs) self.check_for_errors(response) # Raise exception if something failed if raw or not response.content: return response.content return json.loads(response.text)
def _stripSitePath(self, uri, parms): """ Strip off our site-host and site-path from 'uri'. """ ( scheme , netloc , path , url_parm , query , fragment ) = urlparse.urlparse( uri ) site_host = urlparse.urlunparse( ( scheme, netloc, '', '', '', '' ) ) if scheme and parms.get( 'site_host' ) is None: parms[ 'site_host' ] = site_host if site_host != parms[ 'site_host' ]: # XXX foreign site! Punt! return None, None if self._site_path and path.startswith( self._site_path ): path = path[ len( self._site_path ) : ] uri = urlparse.urlunparse( ( '', '', path, url_parm, query, fragment ) ) return uri, query
def _insert_links(data_dict, limit, offset): '''Adds link to the next/prev part (same limit, offset=offset+limit) and the resource page.''' data_dict['_links'] = {} # get the url from the request urlstring = toolkit.request.environ['CKAN_CURRENT_URL'] # change the offset in the url parsed = list(urlparse.urlparse(urlstring)) query = urllib2.unquote(parsed[4]) arguments = dict(urlparse.parse_qsl(query)) arguments_start = dict(arguments) arguments_prev = dict(arguments) arguments_next = dict(arguments) if 'offset' in arguments_start: arguments_start.pop('offset') arguments_next['offset'] = int(offset) + int(limit) arguments_prev['offset'] = int(offset) - int(limit) parsed_start = parsed[:] parsed_prev = parsed[:] parsed_next = parsed[:] parsed_start[4] = urllib.urlencode(arguments_start) parsed_next[4] = urllib.urlencode(arguments_next) parsed_prev[4] = urllib.urlencode(arguments_prev) # add the links to the data dict data_dict['_links']['start'] = urlparse.urlunparse(parsed_start) data_dict['_links']['next'] = urlparse.urlunparse(parsed_next) if int(offset) - int(limit) > 0: data_dict['_links']['prev'] = urlparse.urlunparse(parsed_prev)
def validate_ticket(self, ticket, request): service_name = self.service_name ticket_name = self.ticket_name this_url = self.get_url(request) p = urlparse.urlparse(this_url) qs_map = urlparse.parse_qs(p.query) if ticket_name in qs_map: del qs_map[ticket_name] param_str = urlencode(qs_map) p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:])) service_url = urlparse.urlunparse(p) params = { service_name: service_url, ticket_name: ticket,} param_str = urlencode(params) p = urlparse.urlparse(self.cas_info['service_validate_url']) p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:])) service_validate_url = urlparse.urlunparse(p) log.msg("[INFO] requesting URL '%s' ..." % service_validate_url) http_client = HTTPClient(self.agent) d = http_client.get(service_validate_url) d.addCallback(treq.content) d.addCallback(self.parse_sv_results, service_url, ticket, request) return d
def urlunjoin(base_url, url): if url == None: return str(base_url) else: url = str(url) if base_url == None: return str(url) else: base_url = str(base_url) if url.startswith('_:'): # you might expect that '_' would be parsed as a scheme by urlparse, but it isn't return url else: o = urlparse.urlparse(url) if (o.scheme == '' or o.scheme == 'http' or o.scheme == 'https'): if o.netloc == '': # http(s) relative url if len(o.path) > 0 and o.path[0] == '/': return url else: abs_url = urlparse.urljoin(base_url, url) #make it absolute first o = list(urlparse.urlparse(abs_url)) o[0] = o[1] = '' # blank out the scheme and the netloc return urlparse.urlunparse(o) else: b = urlparse.urlparse(base_url) if o.netloc == b.netloc: o = list(o) o[0] = o[1] = '' # blank out the scheme and the netloc return urlparse.urlunparse(o) else: return url else: return url
def _do_lastfm_query(self, type, method, **kwargs): args = { 'method': method, 'api_key': self.key, } for k, v in kwargs.items(): args[k] = v.encode('utf8') s = '' for k in sorted(args.keys()): s+=k+args[k] s+=self.secret if 'sk' in args.keys() or 'token' in args.keys(): args['api_sig'] = hashlib.md5(s).hexdigest() if type == 'GET': url = urlparse.urlunparse(('http', 'ws.audioscrobbler.com', '/2.0/', '', urllib.urlencode(args), '')) return self._do_raw_lastfm_query(url) elif type == 'POST': url = urlparse.urlunparse(('http', 'ws.audioscrobbler.com', '/2.0/', '', '', '')) self._do_lastfm_post(url, urllib.urlencode(args))
def __init__(self, merchant_code, secret_code, merchant_titular, merchant_name, terminal_number, return_url=None, transaction_type=None, lang=None, domain=None, domain_protocol="http", currency_code=None, redirect_url=None, **kwargs): self._merchant_code = merchant_code self._secret_code = secret_code self._merchant_titular = merchant_titular self._merchant_name = merchant_name self._terminal_number = terminal_number self._redirect_url = redirect_url self._lang = lang or self._lang self._transaction_type = transaction_type or self._transaction_type self._currency_code = currency_code or self._currency_code self._domain = domain or urlparse.urlunparse(( domain_protocol, Site.objects.get_current().domain, '/', None, None, None)) domain = urlparse.urlparse(self._domain) merchant_path = reverse('process_payment', args=[kwargs.get('variant')]) self._merchant_url = urlparse.urlunparse((domain.scheme, domain.netloc, merchant_path, None, None, None)) self._return_url = return_url return super(CaixaCatalunyaBaseProvider, self).__init__(**kwargs)
def process(self): parsed = urlparse.urlparse(self.uri) protocol = parsed[0] host = parsed[1] port = self.ports[protocol] if ':' in host: host, port = host.split(':') port = int(port) rest = urlparse.urlunparse(('', '') + parsed[2:]) if not rest: rest = rest + '/' class_ = self.protocols[protocol] headers = self.getAllHeaders().copy() if 'host' not in headers: headers['host'] = host real_host = host else: real_host = headers['host'] self.path = urlparse.urlunparse((protocol, real_host) + parsed[2:]) self.content.seek(0, 0) s = self.content.read() clientFactory = class_(self.method, rest, self.clientproto, headers, s, self) # The magic line for SSL support! if self.useSSL: self.reactor.connectSSL(host, port, clientFactory, ssl.ClientContextFactory()) else: self.reactor.connectTCP(host, port, clientFactory)
def open_with_auth(url): """Open a urllib2 request, handling HTTP authentication""" scheme, netloc, path, params, query, frag = urlparse.urlparse(url) if scheme in ('http', 'https'): auth, host = urllib2.splituser(netloc) else: auth = None if auth: auth = "Basic " + urllib2.unquote(auth).encode('base64').strip() new_url = urlparse.urlunparse((scheme,host,path,params,query,frag)) request = urllib2.Request(new_url) request.add_header("Authorization", auth) else: request = urllib2.Request(url) request.add_header('User-Agent', user_agent) fp = urllib2.urlopen(request) if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url) if s2==scheme and h2==host: fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2)) return fp
def do_METHOD_Direct(self): scheme, netloc, path, params, query, fragment = urlparse.urlparse(self.path, 'http') try: host, _, port = netloc.rpartition(':') port = int(port) except ValueError: host = netloc port = 80 try: self.log_request() if not common.PROXY_ENABLE: sock = socket.create_connection((host, port)) self.headers['connection'] = 'close' data = '%s %s %s\r\n' % (self.command, urlparse.urlunparse(('', '', path, params, query, '')), self.request_version) data += ''.join('%s: %s\r\n' % (k, self.headers[k]) for k in self.headers if not k.startswith('proxy-')) data += '\r\n' else: sock = socket.create_connection((common.PROXY_HOST, common.PROXY_PORT)) host = common.HOSTS_MAP.get(host, host) url = urlparse.urlunparse((scheme, host + ('' if port == 80 else ':%d' % port), path, params, query, '')) data ='%s %s %s\r\n' % (self.command, url, self.request_version) data += ''.join('%s: %s\r\n' % (k, self.headers[k]) for k in self.headers if k != 'host') data += 'Host: %s\r\n' % netloc if common.PROXY_USERNAME and not common.PROXY_NTLM: data += '%s\r\n' % proxy_auth_header(common.PROXY_USERNAME, common.PROXY_PASSWROD) data += 'Proxy-connection: close\r\n' data += '\r\n' content_length = int(self.headers.get('content-length', 0)) if content_length > 0: data += self.rfile.read(content_length) sock.sendall(data) socket_forward(self.connection, sock) except Exception, ex: logging.exception('GaeProxyHandler.do_GET Error, %s', ex)
def form_url(parenturl,url): url = url.strip() # ran across an image with a space in the # src. Browser handled it, so we'd better, too. if "//" in url or parenturl == None: returl = url else: parsedUrl = urlparse.urlparse(parenturl) if url.startswith("/") : returl = urlparse.urlunparse( (parsedUrl.scheme, parsedUrl.netloc, url, '','','')) else: toppath="" if parsedUrl.path.endswith("/"): toppath = parsedUrl.path else: toppath = parsedUrl.path[:parsedUrl.path.rindex('/')] returl = urlparse.urlunparse( (parsedUrl.scheme, parsedUrl.netloc, toppath + '/' + url, '','','')) return returl
def canonURL(url, parent_domain): (scheme, netloc, path, parameters, query, fragment) = urlparse(url) parent = urlparse(parent_domain) if not netloc and not path: return "" if not netloc and path.startswith("."): new_url = urljoin(parent_domain, path) (scheme, netloc, path, parameters, query, fragment) = urlunparse(new_url) elif not netloc and path: netloc = parent.netloc netloc_lower = netloc.lower() netloc = netloc_lower.split(":")[0] prev_path = path print prev_path while 1: path = collapse_url.sub('/', path, 1) print path if prev_path == path: break prev_path = path path = unquote(path) canon_url = urlunparse((scheme, netloc, path, "", "", "")) return canon_url
def doHarvest(self, fromDate, until): lrUrl = self.config['lr']['url'] if not fromDate: fromDate = self.config['lr']['first_run_start_date'] urlParts = urlparse.urlparse(lrUrl) params = {"until": until} if fromDate: params['from'] = fromDate newQuery = urllib.urlencode(params) lrUrl = urlparse.urlunparse((urlParts[0], urlParts[1], '/harvest/listrecords', urlParts[3], newQuery, urlParts[5])) resumption_token = self.harvestData(lrUrl) while resumption_token is not None: newQuery = urllib.urlencode({"resumption_token": resumption_token}) lrUrl = urlparse.urlunparse((urlParts[0], urlParts[1], '/harvest/listrecords', urlParts[3], newQuery, urlParts[5])) resumption_token = self.harvestData(lrUrl)
def setUp(self): super(TestShotgunDownloadAndUnpack, self).setUp() zip_file_location = os.path.join(self.fixtures_root, "misc", "zip") # Identify the source file to "download" self.download_source = os.path.join(zip_file_location, "tank_core.zip") # store the expected contents of the zip, to ensure it's properly # extracted. self.expected_output_txt = os.path.join(zip_file_location, "tank_core.txt") self.expected_output = open(self.expected_output_txt).read().split("\n") # Construct URLs from the source file name # "file" will be used for the protocol, so this URL will look like # `file:///fixtures_root/misc/zip/tank_core.zip` self.good_zip_url = urlparse.urlunparse( ("file", None, self.download_source, None, None, None) ) self.bad_zip_url = urlparse.urlunparse( ("file", None, self.download_source, None, None, None) ) # Temporary destination to unpack sources to. self.download_destination = os.path.join( self.tank_temp, self.short_test_name, "test_unpack" ) os.makedirs(os.path.dirname(self.download_destination)) if os.path.exists(self.download_destination): os.remove(self.download_destination) # Make sure mockgun is properly configured if self.mockgun.config.server is None: self.mockgun.config.server = "unit_test_mock_sg"
def validate_ticket(self, ticket, request): service_name = self.service_name ticket_name = self.ticket_name this_url = self.get_url(request) p = urlparse.urlparse(this_url) qs_map = urlparse.parse_qs(p.query) if ticket_name in qs_map: del qs_map[ticket_name] param_str = urlencode(qs_map, doseq=True) p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:])) service_url = urlparse.urlunparse(p) params = { service_name: service_url, ticket_name: ticket,} param_str = urlencode(params, doseq=True) p = urlparse.urlparse(self.cas_info['service_validate_url']) p = urlparse.ParseResult(*tuple(p[:4] + (param_str,) + p[5:])) service_validate_url = urlparse.urlunparse(p) self.log( "Requesting service-validate URL => '{0}' ...".format( service_validate_url)) http_client = HTTPClient(self.cas_agent) d = http_client.get(service_validate_url) d.addCallback(treq.content) d.addCallback(self.parse_sv_results, service_url, ticket, request) return d
def get_canonical_and_alternates_urls(url, drop_ln=True, washed_argd=None, quote_path=False): """ Given an Invenio URL returns a tuple with two elements. The first is the canonical URL, that is the original URL with CFG_SITE_URL prefix, and where the ln= argument stripped. The second element element is mapping, language code -> alternate URL @param quote_path: if True, the path section of the given C{url} is quoted according to RFC 2396 """ dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse(url) canonical_scheme, canonical_netloc = urlparse(CFG_SITE_URL)[0:2] parsed_query = washed_argd or parse_qsl(query) no_ln_parsed_query = [(key, value) for (key, value) in parsed_query if key != "ln"] if drop_ln: canonical_parsed_query = no_ln_parsed_query else: canonical_parsed_query = parsed_query if quote_path: path = urllib.quote(path) canonical_query = urlencode(canonical_parsed_query) canonical_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, canonical_query, fragment)) alternate_urls = {} for ln in CFG_SITE_LANGS: alternate_query = urlencode(no_ln_parsed_query + [("ln", ln)]) alternate_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, alternate_query, fragment)) alternate_urls[ln] = alternate_url return canonical_url, alternate_urls
def format_message(template, config, first_name=None, last_name=None, uid=None, target_email=None): first_name = ('Alice' if not isinstance(first_name, (str, unicode)) else first_name) last_name = ('Liddle' if not isinstance(last_name, (str, unicode)) else last_name) target_email = ('*****@*****.**' if not isinstance(target_email, (str, unicode)) else target_email) uid = (uid or config['server_config'].get('server.secret_id') or make_uid()) template = template_environment.from_string(template) template_vars = {} template_vars['uid'] = uid template_vars['first_name'] = first_name template_vars['last_name'] = last_name template_vars['email_address'] = target_email template_vars['company_name'] = config.get('mailer.company_name', '') webserver_url = config.get('mailer.webserver_url', '') webserver_url = urlparse.urlparse(webserver_url) tracking_image = config['server_config']['server.tracking_image'] template_vars['webserver'] = webserver_url.netloc tracking_url = urlparse.urlunparse((webserver_url.scheme, webserver_url.netloc, tracking_image, '', 'id=' + uid, '')) webserver_url = urlparse.urlunparse((webserver_url.scheme, webserver_url.netloc, webserver_url.path, '', '', '')) template_vars['tracking_dot_image_tag'] = "<img src=\"{0}\" style=\"display:none\" />".format(tracking_url) template_vars_url = {} template_vars_url['rickroll'] = 'http://www.youtube.com/watch?v=oHg5SJYRHA0' template_vars_url['webserver'] = webserver_url + '?id=' + uid template_vars_url['webserver_raw'] = webserver_url template_vars_url['tracking_dot'] = tracking_url template_vars['url'] = template_vars_url template_vars.update(template_environment.standard_variables) return template.render(template_vars)
def open_with_auth(url): """Open a urllib2 request, handling HTTP authentication""" scheme, netloc, path, params, query, frag = urlparse.urlparse(url) # Double scheme does not raise on Mac OS X as revealed by a # failing test. We would expect "nonnumeric port". Refs #20. if netloc.endswith(':'): raise httplib.InvalidURL("nonnumeric port: ''") if scheme in ('http', 'https'): auth, host = urllib2.splituser(netloc) else: auth = None if auth: auth = "Basic " + _encode_auth(auth) new_url = urlparse.urlunparse((scheme,host,path,params,query,frag)) request = urllib2.Request(new_url) request.add_header("Authorization", auth) else: request = urllib2.Request(url) request.add_header('User-Agent', user_agent) fp = urllib2.urlopen(request) if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url) if s2==scheme and h2==host: fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2)) return fp
def doPrivmsg(self, irc, msg): if(self.registryValue('urlmodify',msg.args[0])): toModify = { 'what.cd' : { 'http' : 'what.cd', 'https' : 'ssl.what.cd'}, 'awesome-hd.net' : { 'http' : 'awesome-hd.net', 'https' : 'ssl.awesome-hd.net'} } tmp = [] for name in toModify: if(msg.args[1].find(name) != -1): for word in msg.args[1].split(' '): if(word.find(name) != -1): url = urlparse(word) if (url[0] == 'http'): url = urlunparse(('https', toModify[name]['https'], url[2], url[3], url[4], url[5])) tmp.append(url) if (url[0] == 'https'): url = urlunparse(('http', toModify[name]['http'], url[2], url[3], url[4], url[5])) tmp.append(url) if len(tmp) > 0: tmp.reverse() reply = ' '.join(tmp) irc.queueMsg(ircmsgs.privmsg(msg.args[0], reply))
def get_favicon(url): try: html = requests.request('GET', url) soup = BeautifulSoup(html.text) icon = soup.find('link', rel='shortcut icon') if icon is None: icon = soup.find('link', type='image/x-icon') icon_href = None if hasattr(icon, 'href'): icon_href = str(icon['href']) if icon_href is None or icon_href.strip() == "": parsed_url = urlparse.urlparse(url) icon_href = urlparse.urlunparse((parsed_url.scheme, parsed_url.netloc, '', '', '', '')) + '/favicon.ico' last_try = requests.request('GET', icon_href) if last_try.status_code == 200: return icon_href else: return None if "http://" not in icon_href: parsed_url = urlparse.urlparse(url) icon_href = urlparse.urlunparse((parsed_url.scheme, parsed_url.netloc, '', '', '', '')) + icon_href last_try = requests.request('GET', icon_href) if last_try.status_code == 200: return icon_href else: return 'https://www.readbox.co/static/lightpng.png' except Exception: return 'https://www.readbox.co/static/lightpng.png'
def __init__(self, scheme=None, hostname=None, path="/", params="", query={}, fragment="", username=None, password=None, port=None): self.path = path self.params = params self.query = query self.fragment = fragment if hostname: # Absolute URL if username: if password: netloc = username + ':' + password + '@' + hostname else: netloc = username + '@' + hostname else: netloc = hostname if not scheme: scheme = DEFAULT_SCHEME host = hostname defport = DEFAULT_PORTS.get(scheme, None) if port: if port != defport: netloc = netloc + ':' + str(port) host = host + ':' + str(port) else: port = defport self.scheme = scheme self.netloc = netloc self.host = host self.hostname = hostname self.username = username self.password = password self.port = port else: # Relative URL self.scheme = "" self.netloc = "" self.host = "" self.hostname = "" self.username = None self.password = None self.port = None query_string = unparse_qs(self.query) quoted_path = urllib.quote(self.path) self.location = urlparse.urlunparse(('', '', quoted_path, self.params, query_string, self.fragment)) self.url = urlparse.urlunparse((self.scheme, self.netloc, quoted_path, self.params, query_string, self.fragment))
def _starter1(self): # to be honest, i ignore why I should make this GET request... headers = dict(self.headers) headers.update({ "Referer": urlunparse((__PROTOCOL__, self.host, __STARTER_PATH__, None, None, None))}) url = urlunparse((__PROTOCOL__, self.host, __STARTER_PATH__, None, None, None)) log.debug(url) cookies = dict( DSSignInURL="/", DSLastAccess=self.DSLastAccess, DSID=self.DSID, DSFirstAccess=self.last_res.cookies['DSLastAccess']) params = dict(check="yes") res = requests.get( url, headers=headers, params=params, cookies=cookies, verify=False) log.debug(res.text) res.raise_for_status()
def makeArchive(srcrepo, archive, pkgs): # pull is the pkgrecv(1) command import pull print "source directory:", srcrepo print "package list:", pkgs urlprefix = ['http://', 'https://', 'file://'] if not True in [srcrepo.startswith(i) for i in urlprefix]: # We need the replace statement because the urllib.url2pathname # command used in pull.py will work correctly with '/' slash in # windows. srcrepo = urlunparse(("file", os.path.abspath(srcrepo).replace('\\', '/'), '','','','')) destrepo = tempfile.mkdtemp() if not True in [destrepo.startswith(i) for i in urlprefix]: destrepo_url = urlunparse(("file", os.path.abspath(destrepo).replace('\\', '/'), '','','','')) sys.argv = [sys.argv[0], '-m', 'all-timestamps', '-s', srcrepo, '-d', destrepo_url] sys.argv.extend(pkgs) rv = pull.main_func() #copy the cfg_cache to the archive if isinstance(archive, zipfile.ZipFile): for root, dirs, files in os.walk(destrepo, topdown=False): reldir = root[len(destrepo)+1:] for name in files: archive.write(os.path.join(root, name), os.path.join(reldir, name)) elif isinstance(archive, tarfile.TarFile): archive.add(destrepo, destrepo[len(destrepo):]) #close the archive archive.close() return rv
def _do_lastfm_query(type, method,**kwargs): args = { "method" : method, "api_key" : key, } for k,v in kwargs.items(): args[k] = v.encode("utf8") s = "" for k in sorted(args.keys()): s+=k+args[k] s+=secret if "sk" in args.keys() or "token" in args.keys(): args["api_sig"] = hashlib.md5(s).hexdigest() if type == "GET": url=urlparse.urlunparse(('http', 'ws.audioscrobbler.com', '/2.0/', '', urllib.urlencode(args), '')) return _do_raw_lastfm_query(url) elif type == "POST": url=urlparse.urlunparse(('http', 'ws.audioscrobbler.com', '/2.0/', '', '', '')) _do_lastfm_post(url, urllib.urlencode(args))
def normalizeURL(url): url = url.lower() url = urlparse.urldefrag(url)[0] # split the URL link_parts = urlparse.urlparse(url) # link has been updated, so resplitting is required link_parts = urlparse.urlparse(url) if link_parts.path == '/': temp = list(link_parts[:]) temp[2] = '' url = urlparse.urlunparse(tuple(temp)) # link has been updated, so resplitting is required link_parts = urlparse.urlparse(url) try: if link_parts.netloc.split(':')[1] == '80' or \ link_parts.netloc.split(':')[1] == '443': temp = list(link_parts[:]) temp[1] = temp[1].split(':')[0] url = urlparse.urlunparse(tuple(temp)) except IndexError: pass url = url.decode('utf-8', 'ignore') return url
def handle_endtag(self, tag): if self._link_counter is not None: if self._link_counter < 1: if tag != 'a': logger.warn( u'Invalid HTML tags in %s', self._url) href = self._get_link_attr('href') # We discard anchors and empty href. if href and href[0] != '#': href_parts = urlparse.urlparse(href) # Convert absolute URL to absolute URI if href[0] == '/': href = urlparse.urlunparse( self._base_uri + href_parts[2:]) elif not is_remote_uri(href): # Handle relative URL href = urlparse.urlunparse( self._base_uri + ('/'.join((self._relative_path, href_parts[2])),) + href_parts[3:]) filename = os.path.basename(href_parts[2]) # If the content of the link is empty, we use the last # part of path. if self._buffer: name = ' '.join(self._buffer) else: name = filename rel = self._get_link_attr('rel') self.links.append((href, filename, name, rel),) self._link_counter = None self._link_attrs = None self._buffer = None else: self._link_counter -= 1
def solve_cf_challenge(self, resp, **original_kwargs): self.tries += 1 timeout = int( re.compile("\}, ([\d]+)\);", re.MULTILINE).findall( resp.text)[0]) / 1000 sleep(timeout) body = resp.text parsed_url = urlparse(resp.url) domain = parsed_url.netloc submit_url = '{}://{}/cdn-cgi/l/chk_jschl'.format( parsed_url.scheme, domain) cloudflare_kwargs = deepcopy(original_kwargs) headers = cloudflare_kwargs.setdefault('headers', {'Referer': resp.url}) try: params = cloudflare_kwargs.setdefault( 'params', OrderedDict([ ('s', re.search(r'name="s"\svalue="(?P<s_value>[^"]+)', body).group('s_value')), ('jschl_vc', re.search(r'name="jschl_vc" value="(\w+)"', body).group(1)), ('pass', re.search(r'name="pass" value="(.+?)"', body).group(1)), ])) answer = self.get_answer(body, domain) except Exception as e: logging.error("Unable to parse Cloudflare anti-bots page. %s" % e) raise try: params["jschl_answer"] = str(answer) except: pass # Requests transforms any request into a GET after a redirect, # so the redirect has to be handled manually here to allow for # performing other types of requests even as the first request. method = resp.request.method cloudflare_kwargs['allow_redirects'] = False redirect = self.request(method, submit_url, **cloudflare_kwargs) redirect_location = urlparse(redirect.headers['Location']) if not redirect_location.netloc: redirect_url = urlunparse( (parsed_url.scheme, domain, redirect_location.path, redirect_location.params, redirect_location.query, redirect_location.fragment)) return self.request(method, redirect_url, **original_kwargs) return self.request(method, redirect.headers['Location'], **original_kwargs)
def make_absolute_url(path): # NOTE: We're using the default site as set by # settings.SITE_ID and the Sites framework site = get_current_site(None) return urlunparse(('https' if settings.USE_SSL else 'http', site.domain, path, '', '', ''))
def _create_url(self, destination, path_bytes, param_bytes, query_bytes): return urlparse.urlunparse( ("matrix", destination, path_bytes, param_bytes, query_bytes, ""))
def _cached_http_get(self, url, base_url, timeout, params=None, data=None, multipart_data=None, headers=None, cookies=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8): if require_debrid: if Scraper.debrid_resolvers is None: Scraper.debrid_resolvers = [ resolver for resolver in resolveurl.relevant_resolvers() if resolver.isUniversal() ] if not Scraper.debrid_resolvers: logger.log( '%s requires debrid: %s' % (self.__module__, Scraper.debrid_resolvers), log_utils.LOGDEBUG) return '' if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} if url.startswith('//'): url = 'http:' + url referer = headers['Referer'] if 'Referer' in headers else base_url if params: if url == base_url and not url.endswith('/'): url += '/' parts = urlparse.urlparse(url) if parts.query: params.update(scraper_utils.parse_query(url)) url = urlparse.urlunparse( (parts.scheme, parts.netloc, parts.path, parts.params, '', parts.fragment)) url += '?' + urllib.urlencode(params) logger.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers), log_utils.LOGDEBUG) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data _created, _res_header, html = self.db_connection().get_cached_url( url, data, cache_limit) if html: logger.log('Returning cached result for: %s' % (url), log_utils.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) if isinstance(url, unicode): url = url.encode('utf-8') request = urllib2.Request(url, data=data) headers = headers.copy() request.add_header('User-Agent', scraper_utils.get_ua()) request.add_header('Accept', '*/*') request.add_header('Accept-Encoding', 'gzip') request.add_unredirected_header('Host', request.get_host()) if referer: request.add_unredirected_header('Referer', referer) if 'Referer' in headers: del headers['Referer'] if 'Host' in headers: del headers['Host'] for key, value in headers.iteritems(): request.add_header(key, value) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) if method is not None: request.get_method = lambda: method.upper() response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': logger.log( 'Response Cookies: %s - %s' % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and ( response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: redir_url = response.info().getheader('Location') if redir_url.startswith('='): redir_url = redir_url[1:] return redir_url content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: logger.log( 'Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if method == 'HEAD': return '' else: if response.info().get('Content-Encoding') == 'gzip': html = ungz(response.read(MAX_RESPONSE)) else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.info().get('Content-Encoding') == 'gzip': html = ungz(e.read(MAX_RESPONSE)) else: html = e.read(MAX_RESPONSE) if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html: html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(), self.get_name()) if not html: return '' elif e.code == 503 and 'cf-browser-verification' in html: html = cloudflare.solve(url, self.cj, scraper_utils.get_ua(), extra_headers=headers) if not html: return '' else: logger.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) if not read_error: return '' except Exception as e: logger.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' self.db_connection().cache_url(url, html, data) return html
if player.isPlayingVideo(): player.position = player.getTime() player.totaltime = player.getTotalTime() try: position = player.position * 100 / player.totaltime #log("Playing %s at %s %%" %(params['title'], position) ) if position >= 95: log("Mark as viewed") # Python = langage de merde donc # je decoupe et recalcul mon url urlparams = urlparse.urlparse(params['markviewed']) urlparams_copie = (urlparams[0], urlparams.netloc.split('@')[1] ) + urlparams[2:] url = urlparse.urlunparse(urlparams_copie) # python = langage de merde donc # je cree un contexte pour pas etre emmerde avec ssl ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE # python = langage de merde donc # je cree une requete (c'est peut etre le moins pourri de tout le code) # encore que je fait l'authen basic a la main car sinon c'est trop reloud req = urllib2.Request(url, 'action=set') auth_cred = "%s:%s" % (urlparams.username, urlparams.password) auth_cred = auth_cred.encode('base64') auth_cred = auth_cred.replace("\n", "") auth_cred = auth_cred.rstrip()
def torrentAction(method, arguments): host = lazylibrarian.CONFIG['TRANSMISSION_HOST'] port = check_int(lazylibrarian.CONFIG['TRANSMISSION_PORT'], 0) if not host or not port: logger.error('Invalid transmission host or port, check your config') return False username = lazylibrarian.CONFIG['TRANSMISSION_USER'] password = lazylibrarian.CONFIG['TRANSMISSION_PASS'] if not host.startswith("http://") and not host.startswith("https://"): host = 'http://' + host if host.endswith('/'): host = host[:-1] # Fix the URL. We assume that the user does not point to the RPC endpoint, # so add it if it is missing. parts = list(urlparse.urlparse(host)) if parts[0] not in ("http", "https"): parts[0] = "http" if ':' not in parts[1]: parts[1] += ":%s" % port if not parts[2].endswith("/rpc"): parts[2] += "/transmission/rpc" host = urlparse.urlunparse(parts) # Retrieve session id auth = (username, password) if username and password else None proxies = proxyList() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) response = requests.get(host, auth=auth, proxies=proxies, timeout=timeout) if response is None: logger.error("Error getting Transmission session ID") return # Parse response session_id = '' if response.status_code == 401: if auth: logger.error("Username and/or password not accepted by " "Transmission") else: logger.error("Transmission authorization required") return elif response.status_code == 409: session_id = response.headers['x-transmission-session-id'] if not session_id: logger.error("Expected a Session ID from Transmission") return # Prepare next request headers = {'x-transmission-session-id': session_id} data = {'method': method, 'arguments': arguments} proxies = proxyList() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) try: response = requests.post(host, data=json.dumps(data), headers=headers, proxies=proxies, auth=auth, timeout=timeout) response = response.json() except Exception as e: logger.debug('Transmission %s: %s' % (type(e).__name__, str(e))) response = '' if not response: logger.error("Error sending torrent to Transmission") return return response
def fetch_via_oauth(url_to_fetch, email_of_user_wanting_access=None, password_of_user_wanting_access=None, consumer_is_anointed=False, method="GET"): """Fetches a given url (e.g. http://localhost:8080/api/v1/user) that requires oauth authentication, and returns the results. This function takes care of all the necessary oauth handshaking. The host at this url must accept remote-api calls via /_ah/remote_api. The intended use is for it to be a dev_appserver instance. **NOTE**: this function also will stub out all appengine calls so they go to the remote api server (using remote_api_stub)! Be careful if you call appengine functions after calling this. TODO(csilvers): can we unstub at the end of this function? Arguments: url_to_fetch: The url to retreive. The host/port should be that of the local dev-appserver instance, probably localhost:8080. The protocol should probably be http://. email_of_user_wanting_access: This is who the oauth process will say is logging in (the 'resource provider'). This user must exist in the khan db. You can create a user via user_data = user_models.UserData.insert_for('random_string', '*****@*****.**') user_data.set_password('password') If you pass in None, we will use an 'internal' user we create. password_of_user_wanting_access: The password corresponding to the user specified via email. If you pass in None for email_of_user_wanting_access, the value here is ignored. consumer_is_anointed: oauth cares not only about the user it's retrieving information for, but also about the client (application) fetching the data. Khan recognizes two classes of clients: anointed (like the ipad), and non-anointed (the default). Anointed clients can perform some actions that non-anointed ones cannot. This boolean specifies whether you wish the oauth requeste to seem to come from an anointed client or a non-anointed client. method: GET or POST are definitely supported. PUT will probably work. Returns: The response from fetching the given url. The HTTP response code is not returned. """ user_pw_pair = (email_of_user_wanting_access, password_of_user_wanting_access) scheme, hostname, path, params, query, fragment = \ urlparse.urlparse(url_to_fetch) dev_appserver_url = urlparse.urlunparse((scheme, hostname, '', '', '', '')) # Do the stubbing if we haven't done it already. global _CALLED_STUB_APPENGINE_FOR_DEV_APPSERVER if not _CALLED_STUB_APPENGINE_FOR_DEV_APPSERVER: _stub_appengine_for_dev_appserver(hostname) _CALLED_STUB_APPENGINE_FOR_DEV_APPSERVER = True # If the caller doesn't care who the user is making the request, # we'll just use one that we make and keep around. if user_pw_pair == (None, None): (_, user, pw) = _create_user('test_user_for_oauth_fetch') user_pw_pair = (user, pw) # If we already have the tokens cached, don't refetch (it's expensive). # The key is exactly the set of arguments for _create_oauth_tokens. token_map_key = (dev_appserver_url, user_pw_pair, consumer_is_anointed) if token_map_key in _TOKEN_MAP: consumer, access_token = _TOKEN_MAP[token_map_key] else: consumer, access_token = _create_oauth_tokens(*token_map_key) _TOKEN_MAP[token_map_key] = (consumer, access_token) return _access_resource(url_to_fetch, consumer, access_token, method)
def __init__(self, data, encoding='UTF8'): """ @:parameter data: Either a string representing a URL or a 6-elems tuple representing the URL components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Simple generic test, more detailed tests in each method! # u = url_object('http://w3af.com/foo/bar.txt') # u.path '/foo/bar.txt' # u.scheme 'http' # u.getFileName() 'bar.txt' # u.getExtension() 'txt' # # # http is the default protocol, we can provide URLs with no proto # # u = url_object('w3af.com') # u.getDomain 'w3af.com' # u.getProtocol() 'http' # # But we can't specify a URL without a domain! # # u = url_object('http://') Traceback (most recent call last): File "<stdin>", line 1, in ? ValueError: Invalid URL "http://" # u = url_object(u'http://w3af.com/foo/bar.txt') # u.path u'/foo/bar.txt' # u = url_object('http://w3af.org/?foo=http://w3af.com') # u.netloc 'w3af.org' # u = url_object('http://w3af.org/', encoding='x-euc-jp') Traceback (most recent call last): File "<stdin>", line 1, in ? ValueError: Invalid encoding "x-euc-jp" when creating URL. """ self._already_calculated_url = None self._querystr = None self._changed = True self._encoding = encoding if data is None: raise ValueError('Can not build a url_object from data=None.') # Verify that the encoding is a valid one. If we don't do it here, # things might get crazy afterwards. if isinstance(data, tuple): scheme, netloc, path, params, qs, fragment = data else: scheme, netloc, path, params, qs, fragment = \ urlparse.urlparse(data) # # This is the case when someone creates a url_object like # this: url_object('www.w3af.com') # if scheme == netloc == '' and path: # By default we set the protocol to "http" scheme = 'http' netloc = path path = '' self.scheme = scheme or u'' self.netloc = netloc or u'' self.path = path or u'' self.params = params or u'' self.querystring = qs or u'' self.fragment = fragment or u'' if not self.netloc: # The URL is invalid, we don't have a netloc! if isinstance(data, tuple): invalid_url = urlparse.urlunparse(data) else: invalid_url = data raise ValueError, 'Invalid URL "%s"' % (invalid_url, )
def baseurl(self): return urlparse.urlunparse((self.scheme, self.netloc, self.path))
def __repr__(self): parsed_url = (self.scheme, self.netloc, self.path, self.params, self.query, self.fragment) return urlparse.urlunparse(parsed_url)
def parse_addition_data(self, response, sku, js_data): meta = response.meta.copy() product = response.meta['product'] reqs = meta.get('reqs', []) currency = response.xpath( '//meta[contains(@itemprop, "priceCurrency")]/@content').extract() if currency: meta['product']['price'] = Price(price=0.00, priceCurrency='USD') # if js_data['review']['count'] > 0: reqs.append( Request(url=self.REVIEW_URL.format(sku=sku), dont_filter=True, callback=self.parse_buyer_reviews, meta=meta)) url = self.RELATED_PRODUCT.format(sku=sku) params = { 'pType': 'product', 'prodId': sku, 'prodName': product['title'].encode('ascii', 'ignore'), 'ref': '', 'status': 'ok', 'url': 'http://www.staples.com/product_%s' % sku, 'userAgent': self.user_agent, } url_parts = list(urlparse.urlparse(url)) url_parts[4] = urllib.urlencode(params) new_url = urlparse.urlunparse(url_parts) reqs.append( Request(url=new_url, dont_filter=True, callback=self.parse_related_product, meta=meta)) # Get base product data and child "additionalProductsWarrantyServices" variants, if any try: reqs.append( Request( url=self.PRICE_URL.format( sku=sku, metadata__coming_soon_flag=js_data['metadata'] ['coming_soon_flag'], metadata__price_in_cart_flag=js_data['metadata'] ['price_in_cart_flag'], prod_doc_key=js_data['prod_doc_key'], metadata__product_type__id=js_data['metadata'] ['product_type']['id'], metadata__preorder_flag=js_data['metadata'] ['preorder_flag'], street_date=time.time(), metadata__channel_availability_for__id=js_data[ 'metadata']['channel_availability_for']['id'], metadata__backorder_flag=js_data['metadata'] ['backorder_flag']), dont_filter=True, callback=self.get_price_and_stockstatus, meta=meta, )) except Exception as e: self.log( "Error while forming request for base product data: {}".format( e), WARNING) # Get real variants, if any # import pprint # pprint.pprint(response.meta['product']['variants']) if self.scrape_variants_with_extra_requests: for v in response.meta['product']['variants']: try: reqs.append( Request( url=self.PRICE_URL.format( sku=v['partnumber'], metadata__coming_soon_flag=js_data['metadata'] ['coming_soon_flag'], metadata__price_in_cart_flag=js_data[ 'metadata']['price_in_cart_flag'], prod_doc_key=v['prod_doc_key'], metadata__product_type__id=js_data['metadata'] ['product_type']['id'], metadata__preorder_flag=js_data['metadata'] ['preorder_flag'], street_date=time.time(), metadata__channel_availability_for__id=js_data[ 'metadata']['channel_availability_for'] ['id'], metadata__backorder_flag=js_data['metadata'] ['backorder_flag']), dont_filter=True, callback=self.get_variant_price, meta=meta, )) except Exception as e: self.log( "Error while forming request for variant: {}".format( e), WARNING) if reqs: return self.send_next_request(reqs, response) else: return product
def add_login_token_to_redirect_url(self, url, token): url_parts = list(urlparse.urlparse(url)) query = dict(urlparse.parse_qsl(url_parts[4])) query.update({"loginToken": token}) url_parts[4] = urllib.urlencode(query) return urlparse.urlunparse(url_parts)
def get_logged_in_program_certificate_url(certificate_url): parsed_url = urlparse(certificate_url) query_string = 'next=' + parsed_url.path url_parts = (parsed_url.scheme, parsed_url.netloc, '/login/', '', query_string, '') return urlunparse(url_parts)
def _NormalizeUrl(url): """Returns normalized URL such as removing trailing slashes.""" parsed_url = list(urlparse.urlparse(url)) parsed_url[2] = re.sub(r'/{2,}', r'/', parsed_url[2]) return urlparse.urlunparse(parsed_url)
def includeme(config): """Install SyncServer application into the given Pyramid configurator.""" # Set the umask so that files are created with secure permissions. # Necessary for e.g. created-on-demand sqlite database files. os.umask(0o077) # If PyOpenSSL is available, configure requests to use it. # This helps improve security on older python versions. if HAS_PYOPENSSL: requests.packages.urllib3.contrib.pyopenssl.inject_into_urllib3() settings = config.registry.settings import_settings_from_environment_variables(settings) # Sanity-check the deployment settings and provide sensible defaults. public_url = settings.get("syncserver.public_url") if public_url is None: raise RuntimeError("you must configure syncserver.public_url") public_url = public_url.rstrip("/") settings["syncserver.public_url"] = public_url secret = settings.get("syncserver.secret") if secret is None: secret = generate_random_hex_key(64) sqluri = settings.get("syncserver.sqluri") if sqluri is None: rootdir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) sqluri = "sqlite:///" + os.path.join(rootdir, "syncserver.db") # Automagically configure from IdP if one is given. idp = settings.get("syncserver.identity_provider") if idp is not None: r = requests.get(urljoin(idp, '/.well-known/fxa-client-configuration')) r.raise_for_status() idp_config = r.json() idp_issuer = urlparse(idp_config["auth_server_base_url"]).netloc # Configure app-specific defaults based on top-level configuration. settings.pop("config", None) if "tokenserver.backend" not in settings: # Default to our simple static node-assignment backend settings["tokenserver.backend"] = DEFAULT_TOKENSERVER_BACKEND if settings["tokenserver.backend"] == DEFAULT_TOKENSERVER_BACKEND: # Provide some additional defaults for the default backend, # unless overridden in the config. if "tokenserver.sqluri" not in settings: settings["tokenserver.sqluri"] = sqluri if "tokenserver.node_url" not in settings: settings["tokenserver.node_url"] = public_url if "endpoints.sync-1.5" not in settings: settings["endpoints.sync-1.5"] = "{node}/storage/1.5/{uid}" if "tokenserver.monkey_patch_gevent" not in settings: # Default to no gevent monkey-patching settings["tokenserver.monkey_patch_gevent"] = False if "tokenserver.applications" not in settings: # Default to just the sync-1.5 application settings["tokenserver.applications"] = "sync-1.5" if "tokenserver.secrets.backend" not in settings: # Default to a single fixed signing secret settings["tokenserver.secrets.backend"] = "mozsvc.secrets.FixedSecrets" settings["tokenserver.secrets.secrets"] = [secret] if "tokenserver.allow_new_users" not in settings: allow_new_users = settings.get("syncserver.allow_new_users") if allow_new_users is not None: settings["tokenserver.allow_new_users"] = allow_new_users if "hawkauth.secrets.backend" not in settings: # Default to the same secrets backend as the tokenserver for key in settings.keys(): if key.startswith("tokenserver.secrets."): newkey = "hawkauth" + key[len("tokenserver"):] settings[newkey] = settings[key] if "storage.backend" not in settings: # Default to sql syncstorage backend settings["storage.backend"] = "syncstorage.storage.sql.SQLStorage" settings["storage.sqluri"] = sqluri settings["storage.create_tables"] = True if "storage.batch_upload_enabled" not in settings: settings["storage.batch_upload_enabled"] = True if "browserid.backend" not in settings: # Default to local verifier to reduce external dependencies, # unless an explicit verifier URL has been configured. verifier_url = settings.get("syncserver.browserid_verifier") if not verifier_url: settings["browserid.backend"] = \ "tokenserver.verifiers.LocalBrowserIdVerifier" else: settings["browserid.backend"] = \ "tokenserver.verifiers.RemoteBrowserIdVerifier" settings["browserid.verifier_url"] = verifier_url # Use base of public_url as only audience audience = urlunparse(urlparse(public_url)._replace(path="")) settings["browserid.audiences"] = audience # If an IdP was specified, allow it and only it as issuer. if idp is not None: settings["browserid.trusted_issuers"] = [idp_issuer] settings["browserid.allowed_issuers"] = [idp_issuer] if "oauth.backend" not in settings: settings["oauth.backend"] = "tokenserver.verifiers.RemoteOAuthVerifier" # If an IdP was specified, use it for oauth verification. if idp is not None: settings["oauth.server_url"] = idp_config["oauth_server_base_url"] settings["oauth.default_issuer"] = idp_issuer if "loggers" not in settings: # Default to basic logging config. root_logger = logging.getLogger("") if not root_logger.handlers: logging.basicConfig(level=logging.WARN) if "fxa.metrics_uid_secret_key" not in settings: # Default to a randomly-generated secret. # This setting isn't useful in a self-hosted setup # and setting a default avoids scary-sounding warnings. settings["fxa.metrics_uid_secret_key"] = generate_random_hex_key(32) # Include the relevant sub-packages. config.scan("syncserver", ignore=["syncserver.wsgi_app"]) config.include("syncstorage", route_prefix="/storage") config.include("tokenserver", route_prefix="/token") # Add a top-level "it works!" view. def itworks(request): return Response("it works!") config.add_route('itworks', '/') config.add_view(itworks, route_name='itworks')
def main(cmdline): """main(cmdline) -- process cmdline as if it were sys.argv""" # parse options/files options = [] optvalues = {} for opt in cmdline[1:]: if opt.startswith('-'): if ':' in opt: k, v = tuple(opt.split(':', 1)) optvalues[k] = v options.append(k) else: options.append(opt) else: break files = cmdline[len(options)+1:] ### create converting object verbose = ('-v' in options) # load fontifier if '-marcs' in options: # use mxTextTool's tagging engine as fontifier from mx.TextTools import tag from mx.TextTools.Examples.Python import python_script tagfct = lambda text, tag=tag, pytable=python_script: tag( text, pytable)[1] print "Py2HTML: using Marc's tagging engine" else: # load Just's fontifier try: import PyFontify if PyFontify.__version__ < '0.3': raise ImportError tagfct = PyFontify.fontify except ImportError: print """ Sorry, but this script needs the PyFontify.py module version 0.3; You can download it from Just's homepage at URL: http://starship.python.net/~just/ """ sys.exit() if '-format' in options: format = optvalues['-format'] else: # use default format = 'html' if '-mode' in options: mode = optvalues['-mode'] else: # use default mode = 'color' c = PrettyPrint(tagfct, format, mode) convert = c.file_filter ### start working if '-title' in options: c.title = optvalues['-title'] if '-bgcolor' in options: c.bgcolor = optvalues['-bgcolor'] if '-header' in options: try: f = open(optvalues['-header']) c.header = f.read() f.close() except IOError: if verbose: print 'IOError: header file not found' if '-footer' in options: try: f = open(optvalues['-footer']) c.footer = f.read() f.close() except IOError: if verbose: print 'IOError: footer file not found' if '-URL' in options: c.replace_URLs = True if '-' in options: convert(sys.stdin, sys.stdout) sys.exit() if '-h' in options: print __doc__ sys.exit() if not files: # Turn URL processing on c.replace_URLs = True # Try CGI processing... import cgi, urllib, urlparse, os form = cgi.FieldStorage() if 'script' not in form: # Ok, then try pathinfo if 'PATH_INFO' not in os.environ: if INPUT_FORM: redirect_to(INPUT_FORM) else: sys.stdout.write('Content-Type: text/html\r\n\r\n') write_html_error('Missing Parameter', 'Missing script=URL field in request') sys.exit(1) url = os.environ['PATH_INFO'][1:] # skip the leading slash else: url = form['script'].value sys.stdout.write('Content-Type: text/html\r\n\r\n') scheme, host, path, params, query, frag = urlparse.urlparse(url) if not host: scheme = 'http' host = os.environ.get('HTTP_HOST', 'localhost') url = urlparse.urlunparse((scheme, host, path, params, query, frag)) #print url; sys.exit() network = urllib.URLopener() try: tempfile, headers = network.retrieve(url) except IOError as reason: write_html_error('Error opening "%s"' % url, 'The given URL could not be opened. Reason: %s' % str(reason)) sys.exit(1) f = open(tempfile,'rb') c.title = url c.footer = __cgifooter__ convert(f, sys.stdout) f.close() network.close() sys.exit() if '-stdout' in options: filebreak = '-'*72 for f in files: try: if len(files) > 1: print filebreak print 'File:', f print filebreak convert(f, sys.stdout) except IOError: pass else: if verbose: print 'Py2HTML: working on', for f in files: try: if verbose: print f, convert(f, f+'.html') except IOError: if verbose: print '(IOError!)', if verbose: print print 'Done.'
def remove_fragment(url): scheme, netloc, url, params, query, fragment = urlparse.urlparse(url) return urlparse.urlunparse((scheme, netloc, url, params, query, ''))
def list_repos(compact=False): """ List the Zoomdata repositories which are locally configured. compact : False Set ``True`` to get compact dictionary containing the Zoomdata repositories configuration CLI Example: .. code-block:: bash salt '*' zoomdata.list_repos """ repo_config = { 'base_url': None, 'gpgkey': None, 'release': None, 'repositories': [], 'components': [], } repos = { k: v for (k, v) in __salt__['pkg.list_repos']().items() # pylint: disable=undefined-variable if k.startswith(ZOOMDATA) } if not compact: return repos for repo in repos: # Skip repository discovery if disabled if not int(repos[repo].get('enabled', 0)): continue url = urlparse.urlparse(repos[repo]['baseurl'].strip()) if not repo_config['base_url']: repo_config['base_url'] = urlparse.urlunparse( (url.scheme, url.netloc, '', '', '', '')) try: if not repo_config['gpgkey'] and 'gpgkey' in repos[repo] and \ int(repos[repo].get('gpgcheck', '0')): repo_config['gpgkey'] = repos[repo]['gpgkey'].strip() except ValueError: pass repo_root = url.path.split('/')[1] log.debug("zoomdata.list_repos: Processing repo_root: %s" % repo_root) try: if repo_root == 'latest': repo_config['release'] = repo_root else: if not StrictVersion(repo_root): raise ValueError # repo_root is a string like '5.8' or '5.10' if isinstance(repo_config['release'], type(None)): repo_config['release'] = repo_root elif isinstance(repo_config['release'], str) and \ LooseVersion(repo_root) > LooseVersion(repo_config['release']): repo_config['release'] = repo_root except ValueError: # Collect all other unique repos which are not release numbers, # such as ``tools`` for example. if repo_root not in repo_config['repositories']: repo_config['repositories'].append(repo_root) component = url.path.rstrip('/').rsplit('/')[-1] if component not in repo_config['components']: repo_config['components'].append(component) return repo_config
def _Dynamic_Fetch(self, request, response): """Trivial implementation of URLFetchService::Fetch(). Args: request: the fetch to perform, a URLFetchRequest response: the fetch response, a URLFetchResponse """ (protocol, host, path, parameters, query, fragment) = urlparse.urlparse(request.url()) payload = '' if request.method() == urlfetch_service_pb.URLFetchRequest.GET: method = 'GET' elif request.method() == urlfetch_service_pb.URLFetchRequest.POST: method = 'POST' payload = request.payload() elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD: method = 'HEAD' elif request.method() == urlfetch_service_pb.URLFetchRequest.PUT: method = 'PUT' payload = request.payload() elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD: method = 'DELETE' else: logging.error('Invalid method: %s', request.method()) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR) if not (protocol == 'http' or protocol == 'https'): logging.error('Invalid protocol: %s', protocol) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) url = urlparse.urlunparse(('', '', path, parameters, query, fragment)) logging.debug('Fetching URL: %s', url) headers = { 'Content-Length': len(payload), 'Host': host, 'Accept': '*/*', } for header in request.header_list(): headers[header.key()] = header.value() logging.debug( 'Making HTTP request: host = %s, ' 'url = %s, payload = %s, headers = %s', host, url, payload, headers) try: if protocol == 'http': connection = httplib.HTTPConnection(host) elif protocol == 'https': connection = httplib.HTTPSConnection(host) else: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) try: connection.request(method, url, payload, headers) http_response = connection.getresponse() http_response_data = http_response.read() finally: connection.close() except (httplib.error, socket.error, IOError), e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
def notify(cls, resource_id): """ Asynchronous task to notify a subscriber about updates, runs a POST?format=msg request against the subscribed controller which extracts the data and renders and sends the notification message (see send()). @param resource_id: the pr_subscription_resource record ID """ _debug = current.log.debug _debug("S3Notifications.notify(resource_id=%s)" % resource_id) db = current.db s3db = current.s3db stable = s3db.pr_subscription rtable = db.pr_subscription_resource ftable = s3db.pr_filter # Extract the subscription data join = stable.on(rtable.subscription_id == stable.id) left = ftable.on(ftable.id == stable.filter_id) # @todo: should not need rtable.resource here row = db(rtable.id == resource_id).select(stable.id, stable.pe_id, stable.frequency, stable.notify_on, stable.method, stable.email_format, stable.attachment, rtable.id, rtable.resource, rtable.url, rtable.last_check_time, ftable.query, join=join, left=left).first() if not row: return True s = getattr(row, "pr_subscription") r = getattr(row, "pr_subscription_resource") f = getattr(row, "pr_filter") # Create a temporary token to authorize the lookup request auth_token = str(uuid4()) # Store the auth_token in the subscription record r.update_record(auth_token=auth_token) db.commit() # Construct the send-URL public_url = current.deployment_settings.get_base_public_url() lookup_url = "%s/%s/%s" % (public_url, current.request.application, r.url.lstrip("/")) # Break up the URL into its components purl = list(urlparse.urlparse(lookup_url)) # Subscription parameters # Date (must ensure we pass to REST as tz-aware) last_check_time = s3_encode_iso_datetime(r.last_check_time) query = {"subscription": auth_token, "format": "msg"} if "upd" in s.notify_on: query["~.modified_on__ge"] = "%sZ" % last_check_time else: query["~.created_on__ge"] = "%sZ" % last_check_time # Filters if f.query: from s3filter import S3FilterString resource = s3db.resource(r.resource) fstring = S3FilterString(resource, f.query) for k, v in fstring.get_vars.iteritems(): if v is not None: if k in query: value = query[k] if type(value) is list: value.append(v) else: query[k] = [value, v] else: query[k] = v query_nice = s3_unicode(fstring.represent()) else: query_nice = None # Add subscription parameters and filters to the URL query, and # put the URL back together query = urlencode(query) if purl[4]: query = "&".join((purl[4], query)) page_url = urlparse.urlunparse([purl[0], # scheme purl[1], # netloc purl[2], # path purl[3], # params query, # query purl[5], # fragment ]) # Serialize data for send (avoid second lookup in send) data = json.dumps({"pe_id": s.pe_id, "notify_on": s.notify_on, "method": s.method, "email_format": s.email_format, "attachment": s.attachment, "resource": r.resource, "last_check_time": last_check_time, "filter_query": query_nice, "page_url": lookup_url, "item_url": None, }) # Send the request _debug("Requesting %s" % page_url) req = urllib2.Request(page_url, data=data) req.add_header("Content-Type", "application/json") success = False try: response = json.loads(urllib2.urlopen(req).read()) message = response["message"] if response["status"] == "success": success = True except urllib2.HTTPError, e: message = ("HTTP %s: %s" % (e.code, e.read()))
def _get_automatic_captions(self, video_id, webpage=None): sub_tracks = [] if None == webpage: url = 'http://www.youtube.com/watch?v=%s&hl=%s&has_verified=1' % ( video_id, GetDefaultLang()) sts, data = self.cm.getPage(url) if not sts: return sub_tracks sts, data = self.cm.ph.getDataBeetwenMarkers(data, ';ytplayer.config =', '};', False) if not sts: return sub_tracks try: player_config = json_loads(data.strip() + '}') args = player_config['args'] caption_url = args.get('ttsurl') if caption_url: timestamp = args['timestamp'] # We get the available subtitles list_params = urllib.urlencode({ 'type': 'list', 'tlangs': 1, 'asrs': 1, }) list_url = caption_url + '&' + list_params caption_list = self.cm.getPage(list_url) printDBG(caption_list) return sub_lang_list original_lang_node = caption_list.find('track') if original_lang_node is None: return [] original_lang = original_lang_node.attrib['lang_code'] caption_kind = original_lang_node.attrib.get('kind', '') sub_lang_list = {} for lang_node in caption_list.findall('target'): sub_lang = lang_node.attrib['lang_code'] sub_formats = [] for ext in self._SUBTITLE_FORMATS: params = urllib.urlencode({ 'lang': original_lang, 'tlang': sub_lang, 'fmt': ext, 'ts': timestamp, 'kind': caption_kind, }) sub_formats.append({ 'url': caption_url + '&' + params, 'ext': ext, }) sub_lang_list[sub_lang] = sub_formats return sub_lang_list # Some videos don't provide ttsurl but rather caption_tracks and # caption_translation_languages (e.g. 20LmZk1hakA) caption_tracks = args['caption_tracks'] caption_translation_languages = args[ 'caption_translation_languages'] caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] parsed_caption_url = urlparse(caption_url) caption_qs = compat_parse_qs(parsed_caption_url.query) sub_lang_list = {} for lang in caption_translation_languages.split(','): lang_qs = compat_parse_qs(urllib.unquote_plus(lang)) sub_lang = lang_qs.get('lc', [None])[0] if not sub_lang: continue caption_qs.update({ 'tlang': [sub_lang], 'fmt': ['vtt'], }) sub_url = urlunparse( parsed_caption_url._replace( query=urllib.urlencode(caption_qs, True))) sub_tracks.append({ 'title': lang_qs['n'][0].encode('utf-8'), 'url': sub_url, 'lang': sub_lang.encode('utf-8'), 'ytid': len(sub_tracks), 'format': 'vtt' }) except Exception: printExc() return sub_tracks
def DeviceDetail(self, **unused_args): """Query for a specific device.""" errormsg = None device_id = self.request.get('device_id') device = model.DeviceInfo.GetDeviceWithAcl(device_id) try: if not device: errormsg = 'Device %s not found' % device_id template_args = { 'error': errormsg, 'user': users.get_current_user().email(), 'logout_link': users.create_logout_url('/') } self.response.out.write( template.render('templates/devicedetail.html', template_args)) return # Get set of properties associated with this device query = device.deviceproperties_set query.order('-timestamp') properties = query.fetch(config.NUM_PROPERTIES_IN_LIST) # Get current tasks assigned to this device cur_schedule = [ device_task.task for device_task in device.devicetask_set ] # Get measurements cursor = self.request.get('measurement_cursor') if self.request.get('all') == '1': query = db.GqlQuery( 'SELECT * FROM Measurement ' 'WHERE ANCESTOR IS :1 ' 'ORDER BY timestamp DESC', device.key()) else: query = db.GqlQuery( 'SELECT * FROM Measurement ' 'WHERE ANCESTOR IS :1 AND success = TRUE ' 'ORDER BY timestamp DESC', device.key()) if cursor: query.with_cursor(cursor) measurements = query.fetch(config.NUM_MEASUREMENTS_IN_LIST) # If there are more measurements to show, give the user a cursor if len(measurements) == config.NUM_MEASUREMENTS_IN_LIST: cursor = query.cursor() parsed_url = list(urlparse.urlparse(self.request.url)) url_query_dict = { 'device_id': device_id, 'measurement_cursor': cursor, 'all': self.request.get('all') } parsed_url[4] = urllib.urlencode(url_query_dict) more_measurements_link = urlparse.urlunparse(parsed_url) else: more_measurements_link = None template_args = { 'error': errormsg, 'device_id': device_id, 'dev': device, 'properties': properties, 'measurements': measurements, 'more_measurements_link': more_measurements_link, 'schedule': cur_schedule, 'user': users.get_current_user().email(), 'logout_link': users.create_logout_url('/'), } self.response.out.write( template.render('templates/devicedetail.html', template_args)) except: raise
def expander(self): # the json object to return data = {} # get the passed url url = self.request.get("url") # url has no scheme, default to http url = url if url_regex.match(url) != None else "http://" + url # fix IDNA urls error = False try: # parse url into it's components parsed = list(urlparse.urlparse(url)) # loop each label in the domain and convert them to ascii parsed[1] = ".".join([ encodings.idna.ToASCII(domain) for domain in parsed[1].split(".") ]) url = urlparse.urlunparse(parsed) except Exception as e: data["status"] = "InternalError" error = True # put together the basic data data["urls"] = [url] data["start_url"] = url data["end_url"] = url if not error: # if the input URL still doesn't start with http:// or https://, discard it if not url.startswith("http://") and not url.startswith("https://"): data["status"] = "InvalidURL" else: requests = 0 # follow redirects, max x times while (requests < max_redirects): requests += 1 try: # fetch the url _without_ following redirects, we handle them manually response = google.appengine.api.urlfetch.fetch( url, follow_redirects=False, allow_truncated=True, method="HEAD") except: data["status"] = "InvalidURL" break if response.status_code in (300, 301, 302, 303, 307): if "location" in response.headers: location = response.headers["location"] elif "Location" in response.headers: location = response.headers["Location"] else: data["status"] = "OK" break # check if the url is relative or absolute if location.startswith('/'): parsedloc = list(urlparse.urlparse(location)) parsedurl = list(urlparse.urlparse(url)) url = urlparse.urlunparse(parsedurl[:2] + parsedloc[2:]) else: url = location else: # no more redirects; we're done data["status"] = "OK" break # add the current url to the urls array in the output data["urls"].append(url) else: data["status"] = "TooManyRedirects" data["redirects"] = len(data["urls"]) - 1 data["end_url"] = url # output in json self.response.out.write(json.dumps(data))
def iriToUri(iri): parts = urlparse.urlparse(iri) return urlparse.urlunparse( part.encode('idna') if parti == 1 else urlEncodeNonAscii(part.encode('utf-8')) for parti, part in enumerate(parts))
def test_unparse_parse(self): for u in ['Python', './Python','x-newscheme://foo.com/stuff']: self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
def open_url(url, data=None, headers=None, method=None, use_proxy=True, force=False, last_mod_time=None, timeout=10, validate_certs=True, url_username=None, url_password=None, http_agent=None, force_basic_auth=False, follow_redirects='urllib2'): ''' Fetches a file from an HTTP/FTP server using urllib2 ''' handlers = [] ssl_handler = maybe_add_ssl_handler(url, validate_certs) if ssl_handler: handlers.append(ssl_handler) # FIXME: change the following to use the generic_urlparse function # to remove the indexed references for 'parsed' parsed = urlparse.urlparse(url) if parsed[0] != 'ftp': username = url_username if headers is None: headers = {} if username: password = url_password netloc = parsed[1] elif '@' in parsed[1]: credentials, netloc = parsed[1].split('@', 1) if ':' in credentials: username, password = credentials.split(':', 1) else: username = credentials password = '' parsed = list(parsed) parsed[1] = netloc # reconstruct url without credentials url = urlparse.urlunparse(parsed) if username and not force_basic_auth: passman = urllib2.HTTPPasswordMgrWithDefaultRealm() # this creates a password manager passman.add_password(None, netloc, username, password) # because we have put None at the start it will always # use this username/password combination for urls # for which `theurl` is a super-url authhandler = urllib2.HTTPBasicAuthHandler(passman) # create the AuthHandler handlers.append(authhandler) elif username and force_basic_auth: headers["Authorization"] = basic_auth_header(username, password) else: try: rc = netrc.netrc(os.environ.get('NETRC')) login = rc.authenticators(parsed[1]) except IOError: login = None if login: username, _, password = login if username and password: headers["Authorization"] = basic_auth_header(username, password) if not use_proxy: proxyhandler = urllib2.ProxyHandler({}) handlers.append(proxyhandler) if HAS_SSLCONTEXT and not validate_certs: # In 2.7.9, the default context validates certificates context = SSLContext(ssl.PROTOCOL_SSLv23) context.options |= ssl.OP_NO_SSLv2 context.options |= ssl.OP_NO_SSLv3 context.verify_mode = ssl.CERT_NONE context.check_hostname = False handlers.append(urllib2.HTTPSHandler(context=context)) # pre-2.6 versions of python cannot use the custom https # handler, since the socket class is lacking create_connection. # Some python builds lack HTTPS support. if hasattr(socket, 'create_connection') and CustomHTTPSHandler: handlers.append(CustomHTTPSHandler) handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs)) opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if method: if method.upper() not in ('OPTIONS','GET','HEAD','POST','PUT','DELETE','TRACE','CONNECT','PATCH'): raise ConnectionError('invalid HTTP request method; %s' % method.upper()) request = RequestWithMethod(url, method.upper(), data) else: request = urllib2.Request(url, data) # add the custom agent header, to help prevent issues # with sites that block the default urllib agent string request.add_header('User-agent', http_agent) # if we're ok with getting a 304, set the timestamp in the # header, otherwise make sure we don't get a cached copy if last_mod_time and not force: tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000') request.add_header('If-Modified-Since', tstamp) else: request.add_header('cache-control', 'no-cache') # user defined headers now, which may override things we've set above if headers: if not isinstance(headers, dict): raise ValueError("headers provided to fetch_url() must be a dict") for header in headers: request.add_header(header, headers[header]) urlopen_args = [request, None] if sys.version_info >= (2,6,0): # urlopen in python prior to 2.6.0 did not # have a timeout parameter urlopen_args.append(timeout) r = urllib2.urlopen(*urlopen_args) return r
def get_repo_url(self): return urlparse.urlunparse( ("file", "", urllib.pathname2url(self.__dir), "", "", ""))
def _create_request(self, destination, method, path_bytes, body_callback, headers_dict={}, param_bytes=b"", query_bytes=b"", retry_on_dns_fail=True, timeout=None, long_retries=False): """ Creates and sends a request to the given url """ headers_dict[b"User-Agent"] = [self.version_string] headers_dict[b"Host"] = [destination] url_bytes = self._create_url(destination, path_bytes, param_bytes, query_bytes) txn_id = "%s-O-%s" % (method, self._next_id) self._next_id = (self._next_id + 1) % (sys.maxint - 1) outbound_logger.info("{%s} [%s] Sending request: %s %s", txn_id, destination, method, url_bytes) # XXX: Would be much nicer to retry only at the transaction-layer # (once we have reliable transactions in place) if long_retries: retries_left = MAX_LONG_RETRIES else: retries_left = MAX_SHORT_RETRIES http_url_bytes = urlparse.urlunparse( ("", "", path_bytes, param_bytes, query_bytes, "")) log_result = None try: while True: producer = None if body_callback: producer = body_callback(method, http_url_bytes, headers_dict) try: def send_request(): request_deferred = preserve_context_over_fn( self.agent.request, method, url_bytes, Headers(headers_dict), producer) return self.clock.time_bound_deferred( request_deferred, time_out=timeout / 1000. if timeout else 60, ) response = yield preserve_context_over_fn(send_request) log_result = "%d %s" % ( response.code, response.phrase, ) break except Exception as e: if not retry_on_dns_fail and isinstance(e, DNSLookupError): logger.warn("DNS Lookup failed to %s with %s", destination, e) log_result = "DNS Lookup failed to %s with %s" % ( destination, e) raise logger.warn( "{%s} Sending request failed to %s: %s %s: %s - %s", txn_id, destination, method, url_bytes, type(e).__name__, _flatten_response_never_received(e), ) log_result = "%s - %s" % ( type(e).__name__, _flatten_response_never_received(e), ) if retries_left and not timeout: if long_retries: delay = 4**(MAX_LONG_RETRIES + 1 - retries_left) delay = min(delay, 60) delay *= random.uniform(0.8, 1.4) else: delay = 0.5 * 2**(MAX_SHORT_RETRIES - retries_left) delay = min(delay, 2) delay *= random.uniform(0.8, 1.4) yield sleep(delay) retries_left -= 1 else: raise finally: outbound_logger.info( "{%s} [%s] Result: %s", txn_id, destination, log_result, ) if 200 <= response.code < 300: pass else: # :'( # Update transactions table? body = yield preserve_context_over_fn(readBody, response) raise HttpResponseException(response.code, response.phrase, body) defer.returnValue(response)
def _create_oauth_tokens(dev_appserver_url, email_and_password_of_user_wanting_access, anointed): """Helper for _setup_dev_appserver_for_oauth, to create two token-sets.""" if anointed: name = 'test_consumer_anointed' else: name = 'test_consumer_not_anointed' # First, we need to create a user. (user_data, _, _) = _create_user('test_user_for_oauth_token') # To start the oauth request, we need a consumer token. consumer_object = models_oauth.Consumer.get_or_insert( key_name=name, name=name, description=name, website='', user=user_data.user, status=consts.ACCEPTED, phone='', company='', anointed=anointed) if not consumer_object.secret: # we just created it consumer_object.generate_random_codes() consumer = oauth.OAuthConsumer(consumer_object.key_, consumer_object.secret) # Next, create an oauth request token. request = oauth.OAuthRequest.from_consumer_and_token( consumer, http_url='%s/api/auth/request_token' % dev_appserver_url) request.sign_request(oauth.OAuthSignatureMethod_PLAINTEXT(), consumer, None) request_token_req = urllib2.urlopen(request.to_url()) if request_token_req.code != 200: raise RuntimeError('Unable to get the request token, ' 'instead got %d: "%s"' % (request_token_req.code, request_token_req.read())) # Next, we need to register the request token in Khan Academy # oauth-map. The response from the request-token fetch is the url # we need to hit to do this: probably /login/mobileoauth?<stuff>. # The only thing we need to add are the name and password of the # user who wants access (which can/will be different from the user # who created the consumer key, above). scheme, netloc, path, params, query, fragment = \ urlparse.urlparse(request_token_req.geturl()) query += ('&identifier=%s&password=%s' % email_and_password_of_user_wanting_access) oauth_map_url = urlparse.urlunparse( (scheme, netloc, path, params, {}, fragment)) oauth_map_req = urllib2.urlopen(oauth_map_url, query) contents = oauth_map_req.read() if contents != 'OK': raise RuntimeError( 'Unable to get the access token, instead got: "%s"' % contents) # The url that we end up with after going through # /login/mobileauth, which yields a *second* request token. # TODO(csilvers): figure out what's going on here. oauth_map_key_and_secret = urlparse.urlparse(oauth_map_req.geturl())[4] oauth_map_token = oauth.OAuthToken.from_string(oauth_map_key_and_secret) # Finally, we can get the access token from the previous request token. request = oauth.OAuthRequest.from_consumer_and_token( consumer, token=oauth_map_token, http_url="%s/api/auth/access_token" % dev_appserver_url) request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(), consumer, oauth_map_token) access_token_req = urllib2.urlopen(request.to_url()) if access_token_req.code != 200: raise RuntimeError('Unable to get the access token, ' 'instead got %d: "%s"' % (access_token_req.code, access_token_req.read())) access_key_and_secret_str = access_token_req.read() access_token = oauth.OAuthToken.from_string(access_key_and_secret_str) return (consumer, access_token)
def msg_search_subscription_notifications(frequency): """ Send Notifications for all Subscriptions """ s3db = current.s3db table = s3db.pr_saved_search if frequency not in dict(table.notification_frequency.requires.options()): return db = current.db searches = db(table.notification_frequency == frequency).select() if not searches: return import urlparse from urllib import urlencode from uuid import uuid4 try: import json # try stdlib (Python 2.6) except ImportError: try: import simplejson as json # try external module except: import gluon.contrib.simplejson as json # fallback to pure-Python module loads = json.loads from gluon.tools import fetch msg = current.msg settings = current.deployment_settings public_url = settings.get_base_public_url() system_name_short = settings.get_system_name_short() def send(search, message): if not message: return # Send the email msg.send_by_pe_id(search.pe_id, subject="%s Search Notification %s" % \ (system_name_short, search.name), message=message) for search in searches: # Fetch the latest records from the search # search.url has no host search_url = "%s%s" % (public_url, search.url) # Create a temporary token for this search # that will be used when impersonating users auth_token = uuid4() search.update_record(auth_token=auth_token) # Commit so that when we request via http, then we'll see the change db.commit() # Parsed URL, break up the URL into its components purl = list(urlparse.urlparse(search_url)) if search.notification_batch: # Send all records in a single notification # query string parameters to be added to the search URL page_qs_parms = { "search_subscription": auth_token, "%s.modified_on__ge" % (search.resource_name): search.last_checked, "format": "email", } # Turn the parameter list into a URL query string page_qs = urlencode(page_qs_parms) # Put the URL back together page_url = urlparse.urlunparse([ purl[0], # scheme purl[1], # netloc purl[2], # path purl[3], # params "&".join([purl[4], page_qs]), # query purl[5], # fragment ]) message = fetch(page_url) # Send the email send(search, message) else: # Not batch # query string parameters to be added to the search URL page_qs_parms = { "search_subscription": auth_token, "%s.modified_on__ge" % (search.resource_name): search.last_checked, "format": "json", } # Turn the parameter list into a URL query string page_qs = urlencode(page_qs_parms) # Put the URL back together page_url = urlparse.urlunparse([ purl[0], # scheme purl[1], # netloc purl[2], # path purl[3], # params "&".join([purl[4], page_qs]), # query purl[5], # fragment ]) # Fetch the record list as json json_string = fetch(page_url) if json_string: records = loads(json_string) for record in records: email_qs = urlencode({ "search_subscription": auth_token, "format": "email", "%s.id__eq" % search.resource_name: record["id"], }) email_url = urlparse.urlunparse([ purl[0], # scheme purl[1], # netloc purl[2], # path purl[3], # params email_qs, # query purl[5], # fragment ]) message = fetch(email_url) # Send the email send(search, message) # Update the saved searches to indicate they've just been checked # & revoke the temporary token query = (table.notification_frequency == frequency) & \ (table.deleted != True) db(query).update( last_checked=datetime.datetime.utcnow(), auth_token=None, ) # Explictly commit db.commit()