Beispiel #1
0
 def redirect_sharp(url):
     url_part_o = re.search(r'(.+)(#.*)',url)
     if url_part_o is not None:
         url1 = '/proxy?url=' + util.my_quote_plus(url_part_o.group(1))
         if nojs is not None:
             url1 += '&nojs=1'
         url1 += url_part_o.group(2)
         my_direct(url1)#303
Beispiel #2
0
 def redirect_refresh(url):
     new_url = tco.find_meta_refresh_url(s2)
     if(new_url and len(new_url)):
         url1 = '/proxy?url=' + util.my_quote_plus(new_url)
         if nojs is not None:
             url1 += '&nojs=1'
         set_len_header()
         my_direct(url1)#303
Beispiel #3
0
 def convert_url(url):
     if url.startswith('#'):
         return url
     elif url.startswith('//'):#for links starts with '//',this link is same with the page's crx protocol
         url = 'http:' + url
     elif not url.startswith('http'):
         tmp_url = refer_url if base_url is None else base_url
         if url.startswith('/'):
             url = util.get_base_url(tmp_url) + url
         elif url.startswith('..'):
             url = convert_rel_url(url,tmp_url)
         elif url.startswith('./'):
             url1 = get_current(tmp_url)
             url2 = url.replace('.','',1)
             url = url1 + '/' + url2
         else:
             url1 = get_current(tmp_url)
             url = url1 + '/' + url
     f = lambda x: '' if x is None else '&nojs=1'
     url = util.get_chs_lnk(url,charset)
     return ''.join([pre1,util.my_quote_plus(url),f(nojs)])
Beispiel #4
0
def convert_html_tag_url(s1,refer_url,base_url=None,nojs=None,charset='utf-8'):
    def get_current(url):
        pos = url.rfind('/')
        return url if -1 == pos else url[:pos]
    def convert_rel_url(path1,ref_url):
        rv = ref_url
        pos = rv.rfind('/')
        if pos != -1:
            rv = rv[:pos]
        while 1:
            if path1.startswith('../'):
                path1 = path1[3:]
                rv = get_current(rv)
            else:
                break
        return rv + '/' + path1
    def convert_url(url):
        if url.startswith('#'):
            return url
        elif url.startswith('//'):#for links starts with '//',this link is same with the page's crx protocol
            url = 'http:' + url
        elif not url.startswith('http'):
            tmp_url = refer_url if base_url is None else base_url
            if url.startswith('/'):
                url = util.get_base_url(tmp_url) + url
            elif url.startswith('..'):
                url = convert_rel_url(url,tmp_url)
            elif url.startswith('./'):
                url1 = get_current(tmp_url)
                url2 = url.replace('.','',1)
                url = url1 + '/' + url2
            else:
                url1 = get_current(tmp_url)
                url = url1 + '/' + url
        f = lambda x: '' if x is None else '&nojs=1'
        url = util.get_chs_lnk(url,charset)
        return ''.join([pre1,util.my_quote_plus(url),f(nojs)])
    #p1 = r'''(?:<\s*\w+\s+[^>]*?(?:\shref|\ssrc)\s*=\s*["']?\s*([^>'"\s]+)(?=\s*["']?[^>]*?>))'''
    p1 = r'''(?:(?:\Whref|\Wsrc)\s*=\s*["']?\s*([^>'"\s]+)(?=\s*["']?[^>]*?>))'''
    p2 = r'''(?:url\s*[(]\s*['"]?([^)]+?)['"]?\s*[)])'''#see dbanotes.net 
    #p2 before url should be \s todo
    p3 = r'''(?:[(]\s*src\s*=\s*["']?([^)]+?)["']?\s*[)])'''
    p4 = r'''(?:<\s*(?:object|param)\s+[^>]*?(?:data|value)\s*=\s*["']?\s*([^>'"\s]+)(?=\s*["']?[^>]*?>))'''
    #example:@import '1510_layout.css'; from http://my1510.cn/templates/skin3/css/1510.css
    p5 = r'''(?:@import\s+["']([^"']+?)["']\s*;)'''
    p6 = r'''(?:<\s*td\s[^>]*background\s*=\s*["']([^>]+)["'])'''
    pt = p1 + '|' +p2 + '|' + p3 + '|' + p4 + '|' + p5 + '|' + p6
    iter1 = re.finditer(pt,s1,flag1)
    new_str = ''
    pos = 0
    for each in iter1:
        try:
            if each.group(1):
                url_index = 1
            elif each.group(2):
                url_index = 2 
            elif each.group(3):
                url_index = 3
            elif each.group(4):
                url_index = 4
            elif each.group(5):
                url_index = 5
            else:
                url_index = 6 
            before_pos = each.start(url_index)
            after_pos = each.end(url_index)
            new_url = each.group(url_index)
            if not new_url.startswith('data:'):
                new_url = convert_url(new_url)
                new_url += '&rf=' + util.my_quote_plus(refer_url)
            #print 'new_url is',new_url
            new_tag = s1[each.start():before_pos] +\
                new_url +\
                s1[after_pos:each.end()]
            new_str += s1[pos:each.start()] + new_tag
            pos = each.end()
        except Exception,e:
            print str(e)
            continue