예제 #1
0
def clean_url(url):
    parsed = urlparse(url)
    query = [(k, v) for (k, v) in parse_qsl(parsed.query)
             if k not in ['ixid', 's']]

    return urlunparse(
        (parsed.scheme, parsed.netloc, parsed.path, parsed.params,
         urlencode(query), parsed.fragment))
예제 #2
0
def extract_doi(url):
    match = regex.search(url.path)
    if match:
        return match.group(0)
    for _, v in parse_qsl(url.query):
        match = regex.search(v)
        if match:
            return match.group(0)
    return None
예제 #3
0
def extract_doi(url):
    match = regex.search(url.path)
    if match:
        return match.group(0)
    for _, v in parse_qsl(url.query):
        match = regex.search(v)
        if match:
            return match.group(0)
    return None
예제 #4
0
파일: unsplash.py 프로젝트: asciimoo/searx
def clean_url(url):
    parsed = urlparse(url)
    query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']]

    return urlunparse((parsed.scheme,
                       parsed.netloc,
                       parsed.path,
                       parsed.params,
                       urlencode(query),
                       parsed.fragment))
예제 #5
0
파일: google.py 프로젝트: asciimoo/searx
def parse_url(url_string, google_hostname):
    # sanity check
    if url_string is None:
        return url_string

    # normal case
    parsed_url = urlparse(url_string)
    if (parsed_url.netloc in [google_hostname, '']
            and parsed_url.path == redirect_path):
        query = dict(parse_qsl(parsed_url.query))
        return query['q']
    else:
        return url_string
예제 #6
0
def parse_url(url_string, google_hostname):
    # sanity check
    if url_string is None:
        return url_string

    # normal case
    parsed_url = urlparse(url_string)
    if (parsed_url.netloc in [google_hostname, '']
            and parsed_url.path == redirect_path):
        query = dict(parse_qsl(parsed_url.query))
        return query['q']
    else:
        return url_string
def on_result(request, search, result):
    if 'parsed_url' not in result:
        return True

    query = result['parsed_url'].query

    if query == "":
        return True
    parsed_query = parse_qsl(query)

    changes = 0
    for i, (param_name, _) in enumerate(list(parsed_query)):
        for reg in regexes:
            if reg.match(param_name):
                parsed_query.pop(i - changes)
                changes += 1
                result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query))
                result['url'] = urlunparse(result['parsed_url'])
                break

    return True
예제 #8
0
파일: bing_news.py 프로젝트: cy8aer/searx
def image_url_cleanup(url_string):
    parsed_url = urlparse(url_string)
    if parsed_url.netloc.endswith('bing4.com') and parsed_url.path == '/th':
        query = dict(parse_qsl(parsed_url.query))
        return "https://www.bing.com/th?id=" + query.get('id')
    return url_string
예제 #9
0
파일: bing_news.py 프로젝트: cy8aer/searx
def url_cleanup(url_string):
    parsed_url = urlparse(url_string)
    if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx':
        query = dict(parse_qsl(parsed_url.query))
        return query.get('url', None)
    return url_string
예제 #10
0
파일: bing_news.py 프로젝트: xyz12810/searx
def image_url_cleanup(url_string):
    parsed_url = urlparse(url_string)
    if parsed_url.netloc.endswith('bing4.com') and parsed_url.path == '/th':
        query = dict(parse_qsl(parsed_url.query))
        return "https://www.bing.com/th?id=" + query.get('id')
    return url_string
예제 #11
0
파일: bing_news.py 프로젝트: xyz12810/searx
def url_cleanup(url_string):
    parsed_url = urlparse(url_string)
    if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx':
        query = dict(parse_qsl(parsed_url.query))
        return query.get('url', None)
    return url_string