Ejemplo n.º 1
0
def image_proxy():
    url = request.args.get("url").encode()

    if not url:
        return "", 400

    h = new_hmac(settings["server"]["secret_key"], url)

    if h != request.args.get("h"):
        return "", 400

    headers = dict_subset(request.headers,
                          {"If-Modified-Since", "If-None-Match"})
    headers["User-Agent"] = gen_useragent()

    resp = requests.get(
        url,
        stream=True,
        timeout=settings["outgoing"]["request_timeout"],
        headers=headers,
        proxies=get_global_proxies(),
    )

    if resp.status_code == 304:
        return "", resp.status_code

    if resp.status_code != 200:
        logger.debug("image-proxy: wrong response code: {0}".format(
            resp.status_code))
        if resp.status_code >= 400:
            return "", resp.status_code
        return "", 400

    if not resp.headers.get("content-type", "").startswith("image/"):
        logger.debug("image-proxy: wrong content-type: {0}".format(
            resp.headers.get("content-type")))
        return "", 400

    img = b""
    chunk_counter = 0

    for chunk in resp.iter_content(1024 * 1024):
        chunk_counter += 1
        if chunk_counter > 5:
            return "", 502  # Bad gateway - file is too big (>5M)
        img += chunk

    headers = dict_subset(
        resp.headers,
        {
            "Content-Length", "Length", "Date", "Last-Modified", "Expires",
            "Etag"
        },
    )

    return Response(img,
                    mimetype=resp.headers["content-type"],
                    headers=headers)
Ejemplo n.º 2
0
def image_proxy():
    url = request.args.get('url')

    if not url:
        return '', 400

    h = new_hmac(settings['server']['secret_key'], url.encode())

    if h != request.args.get('h'):
        return '', 400

    maximum_size = 5 * 1024 * 1024

    try:
        headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
        headers['User-Agent'] = gen_useragent()
        stream = http_stream(
            method='GET',
            url=url,
            headers=headers,
            timeout=settings['outgoing']['request_timeout'],
            allow_redirects=True,
            max_redirects=20)

        resp = next(stream)
        content_length = resp.headers.get('Content-Length')
        if content_length and content_length.isdigit() and int(content_length) > maximum_size:
            return 'Max size', 400

        if resp.status_code == 304:
            return '', resp.status_code

        if resp.status_code != 200:
            logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
            if resp.status_code >= 400:
                return '', resp.status_code
            return '', 400

        if not resp.headers.get('content-type', '').startswith('image/'):
            logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
            return '', 400

        headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})

        total_length = 0

        def forward_chunk():
            nonlocal total_length
            for chunk in stream:
                total_length += len(chunk)
                if total_length > maximum_size:
                    break
                yield chunk

        return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers)
    except httpx.HTTPError:
        return '', 400
Ejemplo n.º 3
0
def image_proxy():
    url = request.args.get('url').encode('utf-8')

    if not url:
        return '', 400

    h = hashlib.sha256(
        url + settings['server']['secret_key'].encode('utf-8')).hexdigest()

    if h != request.args.get('h'):
        return '', 400

    headers = dict_subset(request.headers,
                          {'If-Modified-Since', 'If-None-Match'})
    headers['User-Agent'] = gen_useragent()

    resp = requests.get(url,
                        stream=True,
                        timeout=settings['outgoing']['request_timeout'],
                        headers=headers,
                        proxies=outgoing_proxies)

    if resp.status_code == 304:
        return '', resp.status_code

    if resp.status_code != 200:
        logger.debug('image-proxy: wrong response code: {0}'.format(
            resp.status_code))
        if resp.status_code >= 400:
            return '', resp.status_code
        return '', 400

    if not resp.headers.get('content-type', '').startswith('image/'):
        logger.debug('image-proxy: wrong content-type: {0}'.format(
            resp.headers.get('content-type')))
        return '', 400

    img = ''
    chunk_counter = 0

    for chunk in resp.iter_content(1024 * 1024):
        chunk_counter += 1
        if chunk_counter > 5:
            return '', 502  # Bad gateway - file is too big (>5M)
        img += chunk

    headers = dict_subset(resp.headers, {
        'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'
    })

    return Response(img,
                    mimetype=resp.headers['content-type'],
                    headers=headers)
Ejemplo n.º 4
0
def image_proxy():
    url = request.args.get('url').encode('utf-8')

    if not url:
        return '', 400

    h = hashlib.sha256(url + settings['server']['secret_key'].encode('utf-8')).hexdigest()

    if h != request.args.get('h'):
        return '', 400

    headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
    headers['User-Agent'] = gen_useragent()

    resp = requests.get(url,
                        stream=True,
                        timeout=settings['outgoing']['request_timeout'],
                        headers=headers,
                        proxies=outgoing_proxies)

    if resp.status_code == 304:
        return '', resp.status_code

    if resp.status_code != 200:
        logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
        if resp.status_code >= 400:
            return '', resp.status_code
        return '', 400

    if not resp.headers.get('content-type', '').startswith('image/'):
        logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
        return '', 400

    img = ''
    chunk_counter = 0

    for chunk in resp.iter_content(1024 * 1024):
        chunk_counter += 1
        if chunk_counter > 5:
            return '', 502  # Bad gateway - file is too big (>5M)
        img += chunk

    headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})

    return Response(img, mimetype=resp.headers['content-type'], headers=headers)
Ejemplo n.º 5
0
def image_proxy():
    url = request.args.get("url").encode("utf-8")

    if not url:
        return "", 400

    h = hashlib.sha256(url + settings["server"]["secret_key"].encode("utf-8")).hexdigest()

    if h != request.args.get("h"):
        return "", 400

    headers = dict_subset(request.headers, {"If-Modified-Since", "If-None-Match"})
    headers["User-Agent"] = gen_useragent()

    resp = requests.get(
        url, stream=True, timeout=settings["outgoing"]["request_timeout"], headers=headers, proxies=outgoing_proxies
    )

    if resp.status_code == 304:
        return "", resp.status_code

    if resp.status_code != 200:
        logger.debug("image-proxy: wrong response code: {0}".format(resp.status_code))
        if resp.status_code >= 400:
            return "", resp.status_code
        return "", 400

    if not resp.headers.get("content-type", "").startswith("image/"):
        logger.debug("image-proxy: wrong content-type: {0}".format(resp.headers.get("content-type")))
        return "", 400

    img = ""
    chunk_counter = 0

    for chunk in resp.iter_content(1024 * 1024):
        chunk_counter += 1
        if chunk_counter > 5:
            return "", 502  # Bad gateway - file is too big (>5M)
        img += chunk

    headers = dict_subset(resp.headers, {"Content-Length", "Length", "Date", "Last-Modified", "Expires", "Etag"})

    return Response(img, mimetype=resp.headers["content-type"], headers=headers)
Ejemplo n.º 6
0
def response(resp):

    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
    get(url_ping, headers=headers_ping)

    if resp.status_code == 303:
        return []

    results = []
    doc = fromstring(resp.text)

    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
    if not len(result_table) >= 3:
        # no more results
        return []
    result_table = result_table[2]

    tr_rows = eval_xpath(result_table, './/tr')

    # In the last <tr> is the form of the 'previous/next page' links
    tr_rows = tr_rows[:-1]

    len_tr_rows = len(tr_rows)
    offset = 0

    while len_tr_rows >= offset + 4:

        # assemble table rows we need to scrap
        tr_title = tr_rows[offset]
        tr_content = tr_rows[offset + 1]
        offset += 4

        # ignore sponsored Adds <tr class="result-sponsored">
        if tr_content.get('class') == 'result-sponsored':
            continue

        a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
        if a_tag is None:
            continue

        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
        if td_content is None:
            continue

        results.append({
            'title': a_tag.text_content(),
            'content': extract_text(td_content),
            'url': a_tag.get('href'),
        })

    return results
Ejemplo n.º 7
0
def response(resp):
    if resp.status_code == 303:
        return []

    # ping
    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
    get(url_ping, headers=headers_ping)

    # parse the response
    results = []
    doc = fromstring(resp.text)
    for i, r in enumerate(eval_xpath(doc, result_xpath)):
        if i >= 30:
            break
        try:
            res_url = eval_xpath(r, url_xpath)[-1]
        except:
            continue

        if not res_url:
            continue

        title = extract_text(eval_xpath(r, title_xpath))
        content = extract_text(eval_xpath(r, content_xpath))

        # append result
        results.append({'title': title,
                        'content': content,
                        'url': res_url})

    # parse correction
    for correction in eval_xpath(doc, correction_xpath):
        # append correction
        results.append({'correction': extract_text(correction)})

    # return results
    return results
Ejemplo n.º 8
0
def image_proxy():
    # pylint: disable=too-many-return-statements, too-many-branches

    url = request.args.get('url')

    if not url:
        return '', 400

    h = new_hmac(settings['server']['secret_key'], url.encode())

    if h != request.args.get('h'):
        return '', 400

    maximum_size = 5 * 1024 * 1024
    forward_resp = False
    resp = None
    try:
        request_headers = {
            'User-Agent': gen_useragent(),
            'Accept': 'image/webp,*/*',
            'Accept-Encoding': 'gzip, deflate',
            'Sec-GPC': '1',
            'DNT': '1',
        }
        set_context_network_name('image_proxy')
        stream = http_stream(method='GET',
                             url=url,
                             headers=request_headers,
                             timeout=settings['outgoing']['request_timeout'],
                             follow_redirects=True,
                             max_redirects=20)

        resp = next(stream)
        content_length = resp.headers.get('Content-Length')
        if content_length and content_length.isdigit(
        ) and int(content_length) > maximum_size:
            return 'Max size', 400

        if resp.status_code != 200:
            logger.debug('image-proxy: wrong response code: {0}'.format(
                resp.status_code))
            if resp.status_code >= 400:
                return '', resp.status_code
            return '', 400

        if not resp.headers.get('Content-Type', '').startswith('image/'):
            logger.debug('image-proxy: wrong content-type: %s',
                         resp.headers.get('Content-Type', ''))
            return '', 400

        forward_resp = True
    except httpx.HTTPError:
        logger.exception('HTTP error')
        return '', 400
    finally:
        if resp and not forward_resp:
            # the code is about to return an HTTP 400 error to the browser
            # we make sure to close the response between searxng and the HTTP server
            try:
                resp.close()
            except httpx.HTTPError:
                logger.exception('HTTP error on closing')

    try:
        headers = dict_subset(
            resp.headers,
            {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'})

        def forward_chunk():
            total_length = 0
            for chunk in stream:
                total_length += len(chunk)
                if total_length > maximum_size:
                    break
                yield chunk

        return Response(forward_chunk(),
                        mimetype=resp.headers['Content-Type'],
                        headers=headers)
    except httpx.HTTPError:
        return '', 400