def redirect_sharp(url): url_part_o = re.search(r'(.+)(#.*)',url) if url_part_o is not None: url1 = '/proxy?url=' + util.my_quote_plus(url_part_o.group(1)) if nojs is not None: url1 += '&nojs=1' url1 += url_part_o.group(2) my_direct(url1)#303
def redirect_refresh(url): new_url = tco.find_meta_refresh_url(s2) if(new_url and len(new_url)): url1 = '/proxy?url=' + util.my_quote_plus(new_url) if nojs is not None: url1 += '&nojs=1' set_len_header() my_direct(url1)#303
def convert_url(url): if url.startswith('#'): return url elif url.startswith('//'):#for links starts with '//',this link is same with the page's crx protocol url = 'http:' + url elif not url.startswith('http'): tmp_url = refer_url if base_url is None else base_url if url.startswith('/'): url = util.get_base_url(tmp_url) + url elif url.startswith('..'): url = convert_rel_url(url,tmp_url) elif url.startswith('./'): url1 = get_current(tmp_url) url2 = url.replace('.','',1) url = url1 + '/' + url2 else: url1 = get_current(tmp_url) url = url1 + '/' + url f = lambda x: '' if x is None else '&nojs=1' url = util.get_chs_lnk(url,charset) return ''.join([pre1,util.my_quote_plus(url),f(nojs)])
def convert_html_tag_url(s1,refer_url,base_url=None,nojs=None,charset='utf-8'): def get_current(url): pos = url.rfind('/') return url if -1 == pos else url[:pos] def convert_rel_url(path1,ref_url): rv = ref_url pos = rv.rfind('/') if pos != -1: rv = rv[:pos] while 1: if path1.startswith('../'): path1 = path1[3:] rv = get_current(rv) else: break return rv + '/' + path1 def convert_url(url): if url.startswith('#'): return url elif url.startswith('//'):#for links starts with '//',this link is same with the page's crx protocol url = 'http:' + url elif not url.startswith('http'): tmp_url = refer_url if base_url is None else base_url if url.startswith('/'): url = util.get_base_url(tmp_url) + url elif url.startswith('..'): url = convert_rel_url(url,tmp_url) elif url.startswith('./'): url1 = get_current(tmp_url) url2 = url.replace('.','',1) url = url1 + '/' + url2 else: url1 = get_current(tmp_url) url = url1 + '/' + url f = lambda x: '' if x is None else '&nojs=1' url = util.get_chs_lnk(url,charset) return ''.join([pre1,util.my_quote_plus(url),f(nojs)]) #p1 = r'''(?:<\s*\w+\s+[^>]*?(?:\shref|\ssrc)\s*=\s*["']?\s*([^>'"\s]+)(?=\s*["']?[^>]*?>))''' p1 = r'''(?:(?:\Whref|\Wsrc)\s*=\s*["']?\s*([^>'"\s]+)(?=\s*["']?[^>]*?>))''' p2 = r'''(?:url\s*[(]\s*['"]?([^)]+?)['"]?\s*[)])'''#see dbanotes.net #p2 before url should be \s todo p3 = r'''(?:[(]\s*src\s*=\s*["']?([^)]+?)["']?\s*[)])''' p4 = r'''(?:<\s*(?:object|param)\s+[^>]*?(?:data|value)\s*=\s*["']?\s*([^>'"\s]+)(?=\s*["']?[^>]*?>))''' #example:@import '1510_layout.css'; from http://my1510.cn/templates/skin3/css/1510.css p5 = r'''(?:@import\s+["']([^"']+?)["']\s*;)''' p6 = r'''(?:<\s*td\s[^>]*background\s*=\s*["']([^>]+)["'])''' pt = p1 + '|' +p2 + '|' + p3 + '|' + p4 + '|' + p5 + '|' + p6 iter1 = re.finditer(pt,s1,flag1) new_str = '' pos = 0 for each in iter1: try: if each.group(1): url_index = 1 elif each.group(2): url_index = 2 elif each.group(3): url_index = 3 elif each.group(4): url_index = 4 elif each.group(5): url_index = 5 else: url_index = 6 before_pos = each.start(url_index) after_pos = each.end(url_index) new_url = each.group(url_index) if not new_url.startswith('data:'): new_url = convert_url(new_url) new_url += '&rf=' + util.my_quote_plus(refer_url) #print 'new_url is',new_url new_tag = s1[each.start():before_pos] +\ new_url +\ s1[after_pos:each.end()] new_str += s1[pos:each.start()] + new_tag pos = each.end() except Exception,e: print str(e) continue