def func(m): ''' 替换成编码的地址 ''' #"%(tag)s%(equals)s%(quote)s/%(url)s"), ## ( m.group('url').endswith("/") and number=="5" ) or \ endwith_slash_gt = \ ( m.group('url').endswith(";") and (number in ["9" ,"6" ,"8"]) ) or \ ( m.group('url').endswith("/") and number in ["1" ,"3","5"] and m.string.find('/>')>=0 \ and (mvalue(m,'tag')=='src' and m.end('url')== (m.string.find('/>')+1 ) ) \ ) #logging.error('No:%s tag:%s end of url:%s,findbegin:%s'%(number,mvalue(m,'tag'),m.end('url'), m.string.find('/>')+1)) last = m.group('url')[len(m.group('url'))-1:] url_in = '' if endwith_slash_gt: url_in = m.group('url')[:len(m.group('url'))-1] and m.group('url')[:len(m.group('url'))-1] or '' else: url_in = m.group('url') result_fullurl = scheme ='' #书签 , javascript #No_b64_encoding = m.group('url').lstrip().startswith('#') or m.group('url').lstrip().lower().startswith('javascript') No_b64_encoding = [x for x in ['#','javascript'] if m.group('url').lstrip().startswith(x)] if No_b64_encoding: result_fullurl = fullurl%{ "base": '', #"netloc":url_obj.netloc, "accessed_dir": '', 'url':m.group('url'), 'relative':'', } result_fullurl = result_fullurl[1:] #logging.error('%s url:%s %s %s'%(number,m.group('url'),endwith_slash_gt ,len(m.group('url')) )) else: scheme = m.groupdict().has_key('scheme') and m.group('scheme') or url_obj.scheme result_fullurl = '%s:/'%scheme.replace(":","") + fullurl%{ "base": base_url, #"netloc":url_obj.netloc, "accessed_dir": (accessed_dir), 'url':m.groupdict().has_key('url') and url_in or '', 'relative':m.groupdict().has_key('relative') and m.group('relative') or '', } #logging.error('\n\n %s result_fullurl result : %s '%(number,result_fullurl )) #logging.error('scheme:"%s" base_url:%s accessed_dir:%s'%(scheme,base_url,accessed_dir) ) #logging.error('%s url:%s %s %s'%(number,m.group('url'),endwith_slash_gt ,len(m.group('url')) )) #result_fullurl = result_fullurl[1:] kk = result%{ 'fullurl': No_b64_encoding and result_fullurl or '/'+b64.uri_b64encode(result_fullurl) + (endwith_slash_gt and last or ""), 'tag':m.groupdict().has_key('tag') and m.group('tag') or '', 'equals':m.groupdict().has_key('equals') and m.group('equals') or '', 'quote':m.groupdict().has_key('quote') and m.group('quote') or '', 'spacing':m.groupdict().has_key('spacing') and m.group('spacing') or '', } #logging.info('uri_b64encode:%s'%kk) #logging.info('pattern %s end:%s '%(number,pattern)) #logging.info('%s result : %s '%(number,result_fullurl)) #logging.error('string : %s '%(m.string)) return kk
def post(self): # Handle the input form to redirect the user to a relative url form_url = self.request.get("url") if form_url: # Accept URLs that still have a leading 'http://' inputted_url = form_url #urllib.quote(form_url.encode('utf-8')) #form_url # #if inputted_url.startswith(HTTP_PREFIX): # inputted_url = inputted_url[len(HTTP_PREFIX):] #return self.redirect("/" + b64.uri_b64encode(inputted_url)) return self.redirect("/" + b64.uri_b64encode(inputted_url)) self.response.out.write(unicode(template.render("main.html", {})))
def post(self): # Handle the input form to redirect the user to a relative url form_url = self.request.get("url") if form_url: # Accept URLs that still have a leading 'http://' inputted_url = form_url #urllib.quote(form_url.encode('utf-8')) #form_url # #if inputted_url.startswith(HTTP_PREFIX): # inputted_url = inputted_url[len(HTTP_PREFIX):] #return self.redirect("/" + b64.uri_b64encode(inputted_url)) return self.redirect("/" + b64.uri_b64encode(inputted_url)) self.response.out.write( template.render("main.html", { 'change_url': self.get_change_root(), }))
def uri_b64encode(url): url = url[1:] return b64.uri_b64encode(url)
def _RunTransformTest(self, base_url, accessed_url, original, expected): tag_tests = [ '<img src="%s"/>', "<img src='%s'/>", "<img src=%s/>", "<img src=\"%s'/>", "<img src='%s\"/>", "<img src \t= '%s'/>", "<img src \t= \t '%s'/>", "<img src = '%s'/>", '<a href="%s">', "<a href='%s'>", "<a href=%s>", "<a href=\"%s'>", "<a href='%s\">", "<a href \t = \t'%s'>", "<a href \t = '%s'>", "<a href = \t'%s'>", "<td background=%s>", "<td background='%s'>", '<td background="%s">', '<form action="%s">', "<form action='%s'>", "<form action=%s>", "<form action=\"%s'>", "<form action='%s\">", "<form action \t = \t'%s'>", "<form action \t = '%s'>", "<form action = \t'%s'>", "@import '%s';", "@import '%s'\nnext line here", "@import \t '%s';", "@import %s;", "@import %s", '@import "%s";', '@import "%s"\nnext line here', "@import url(%s)", "@import url('%s')", '@import url("%s")', "background: transparent url(%s) repeat-x left;", 'background: transparent url("%s") repeat-x left;', "background: transparent url('%s') repeat-x left;", '<meta http-equiv="Refresh" content="0; URL=%s">', 'url(%s)', 'src="%s" ', 'style="background:url(%s)' ] No_b64_encoding = expected.lstrip().startswith('#') or expected.lstrip().lower().startswith('javascript') scheme = original.startswith('https') and 'https' or urlparse.urlparse(accessed_url).scheme expected = No_b64_encoding and expected or ('/'+b64.uri_b64encode('%s:/'%scheme +expected)) for tag in tag_tests: #logging.error("\n\n\n tag begin:%s "%tag) test = tag % original correct = tag % expected result = transform_content.TransformContent(base_url, accessed_url, test) logging.error("Test with\n" "Accessed: %s\n" "Input : %s\n" "Received: %s\n" "Expected: %s", accessed_url, test, result, correct) if result != correct: logging.info("FAIL") self.assertEquals(correct, result)
def to_uri_b64encode(url): url = url[1:] return "/"+b64.uri_b64encode(url)
def to_uri_b64encode(url): url = url[1:] return "/" + b64.uri_b64encode(url)