Exemple #1
0
 def test_bytes(self):
     for secret_key in ['secret', b'secret', 1]:
         if secret_key == 1:
             with self.assertRaises(TypeError):
                 webutils.new_hmac(secret_key, b'http://example.com')
             continue
         res = webutils.new_hmac(secret_key, b'http://example.com')
         self.assertEqual(
             res,
             '23e2baa2404012a5cc8e4a18b4aabf0dde4cb9b56f679ddc0fd6d7c24339d819')
Exemple #2
0
def image_proxify(url):

    if url.startswith('//'):
        url = 'https:' + url

    if not request.preferences.get_value('image_proxy'):
        return url

    if url.startswith('data:image/'):
        # 50 is an arbitrary number to get only the beginning of the image.
        partial_base64 = url[len('data:image/'):50].split(';')
        if len(partial_base64) == 2 \
           and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp']\
           and partial_base64[1].startswith('base64,'):
            return url
        else:
            return None

    if settings.get('result_proxy'):
        return proxify(url)

    h = new_hmac(settings['server']['secret_key'], url.encode())

    return '{0}?{1}'.format(url_for('image_proxy'),
                            urlencode(dict(url=url.encode(), h=h)))
Exemple #3
0
def image_proxify(url):

    if url.startswith("//"):
        url = "https:" + url

    if not request.preferences.get_value("image_proxy"):
        return url

    if url.startswith("data:image/"):
        # 50 is an arbitrary number to get only the beginning of the image.
        partial_base64 = url[len("data:image/"):50].split(";")
        if (len(partial_base64) == 2 and partial_base64[0]
                in ["gif", "png", "jpeg", "pjpeg", "webp", "tiff", "bmp"]
                and partial_base64[1].startswith("base64,")):
            return url
        else:
            return None

    if settings.get("result_proxy"):
        return proxify(url)

    h = new_hmac(settings["server"]["secret_key"], url.encode())

    return "{0}?{1}".format(url_for("image_proxy"),
                            urlencode(dict(url=url.encode(), h=h)))
Exemple #4
0
def image_proxy():
    url = request.args.get("url").encode()

    if not url:
        return "", 400

    h = new_hmac(settings["server"]["secret_key"], url)

    if h != request.args.get("h"):
        return "", 400

    headers = dict_subset(request.headers,
                          {"If-Modified-Since", "If-None-Match"})
    headers["User-Agent"] = gen_useragent()

    resp = requests.get(
        url,
        stream=True,
        timeout=settings["outgoing"]["request_timeout"],
        headers=headers,
        proxies=get_global_proxies(),
    )

    if resp.status_code == 304:
        return "", resp.status_code

    if resp.status_code != 200:
        logger.debug("image-proxy: wrong response code: {0}".format(
            resp.status_code))
        if resp.status_code >= 400:
            return "", resp.status_code
        return "", 400

    if not resp.headers.get("content-type", "").startswith("image/"):
        logger.debug("image-proxy: wrong content-type: {0}".format(
            resp.headers.get("content-type")))
        return "", 400

    img = b""
    chunk_counter = 0

    for chunk in resp.iter_content(1024 * 1024):
        chunk_counter += 1
        if chunk_counter > 5:
            return "", 502  # Bad gateway - file is too big (>5M)
        img += chunk

    headers = dict_subset(
        resp.headers,
        {
            "Content-Length", "Length", "Date", "Last-Modified", "Expires",
            "Etag"
        },
    )

    return Response(img,
                    mimetype=resp.headers["content-type"],
                    headers=headers)
Exemple #5
0
def image_proxy():
    url = request.args.get('url')

    if not url:
        return '', 400

    h = new_hmac(settings['server']['secret_key'], url.encode())

    if h != request.args.get('h'):
        return '', 400

    maximum_size = 5 * 1024 * 1024

    try:
        headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
        headers['User-Agent'] = gen_useragent()
        stream = http_stream(
            method='GET',
            url=url,
            headers=headers,
            timeout=settings['outgoing']['request_timeout'],
            allow_redirects=True,
            max_redirects=20)

        resp = next(stream)
        content_length = resp.headers.get('Content-Length')
        if content_length and content_length.isdigit() and int(content_length) > maximum_size:
            return 'Max size', 400

        if resp.status_code == 304:
            return '', resp.status_code

        if resp.status_code != 200:
            logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
            if resp.status_code >= 400:
                return '', resp.status_code
            return '', 400

        if not resp.headers.get('content-type', '').startswith('image/'):
            logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
            return '', 400

        headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})

        total_length = 0

        def forward_chunk():
            nonlocal total_length
            for chunk in stream:
                total_length += len(chunk)
                if total_length > maximum_size:
                    break
                yield chunk

        return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers)
    except httpx.HTTPError:
        return '', 400
Exemple #6
0
def image_proxy():
    url = request.args.get('url').encode()

    if not url:
        return '', 400

    h = new_hmac(settings['server']['secret_key'], url)

    if h != request.args.get('h'):
        return '', 400

    headers = dict_subset(request.headers,
                          {'If-Modified-Since', 'If-None-Match'})
    headers['User-Agent'] = gen_useragent()

    resp = requests.get(url,
                        stream=True,
                        timeout=settings['outgoing']['request_timeout'],
                        headers=headers,
                        proxies=outgoing_proxies)

    if resp.status_code == 304:
        return '', resp.status_code

    if resp.status_code != 200:
        logger.debug('image-proxy: wrong response code: {0}'.format(
            resp.status_code))
        if resp.status_code >= 400:
            return '', resp.status_code
        return '', 400

    if not resp.headers.get('content-type', '').startswith('image/'):
        logger.debug('image-proxy: wrong content-type: {0}'.format(
            resp.headers.get('content-type')))
        return '', 400

    img = b''
    chunk_counter = 0

    for chunk in resp.iter_content(1024 * 1024):
        chunk_counter += 1
        if chunk_counter > 5:
            return '', 502  # Bad gateway - file is too big (>5M)
        img += chunk

    headers = dict_subset(resp.headers, {
        'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'
    })

    return Response(img,
                    mimetype=resp.headers['content-type'],
                    headers=headers)
Exemple #7
0
def response(resp):
    results = []
    dom = html.fromstring(resp.text)

    try:
        results.append({'number_of_results': int(dom.xpath('//span[@class="nums_text"]/text()')[0]
                                                 .split(u'\u7ea6')[1].split(u'\u4e2a')[0].replace(',', ''))})
    except Exception as e:
        logger.debug('result error :\n%s', e)

    # parse results
    for result in dom.xpath('//div[@class="result c-container new-pmd"]'):
        title = extract_text(result.xpath('.//h3/a')[0])

        # when search query is Chinese words
        try:
            url = result.xpath('.//h3[@class="t"]/a')[0].attrib.get('href')
            url = get_baidu_link_location(url)
            
            # To generate miji url with baidu url
            content = extract_text((result.xpath('.//div[@class="c-abstract"]') or 
                result.xpath('.//div[@class="c-abstract c-abstract-en"]')))

            # append result
            results.append({'url': url,'title': title,'content': content})

        # when search query is English words
        except Exception:
            try:
                url = result.xpath('.//h3[@class="t"]/a')[0].attrib.get('href')
                content = extract_text(result.xpath('.//div[@class="c-span18 c-span-last"]')[0])
                # To generate miji url with baidu url
                url = settings['result_proxy'].get('server_name') + "/url_proxy?proxyurl=" + \
                    url + "&token=" + new_hmac(settings['result_proxy']['key'], url.encode("utf-8"))

                # append result
                results.append({'url': url,
                                'title': title,
                                'content': content})
            except Exception as e:
                logger.debug('result error :\n%s', e)

    # return results
    return results
Exemple #8
0
def image_proxy():
    # pylint: disable=too-many-return-statements, too-many-branches

    url = request.args.get('url')

    if not url:
        return '', 400

    h = new_hmac(settings['server']['secret_key'], url.encode())

    if h != request.args.get('h'):
        return '', 400

    maximum_size = 5 * 1024 * 1024
    forward_resp = False
    resp = None
    try:
        request_headers = {
            'User-Agent': gen_useragent(),
            'Accept': 'image/webp,*/*',
            'Accept-Encoding': 'gzip, deflate',
            'Sec-GPC': '1',
            'DNT': '1',
        }
        set_context_network_name('image_proxy')
        stream = http_stream(method='GET',
                             url=url,
                             headers=request_headers,
                             timeout=settings['outgoing']['request_timeout'],
                             follow_redirects=True,
                             max_redirects=20)

        resp = next(stream)
        content_length = resp.headers.get('Content-Length')
        if content_length and content_length.isdigit(
        ) and int(content_length) > maximum_size:
            return 'Max size', 400

        if resp.status_code != 200:
            logger.debug('image-proxy: wrong response code: {0}'.format(
                resp.status_code))
            if resp.status_code >= 400:
                return '', resp.status_code
            return '', 400

        if not resp.headers.get('Content-Type', '').startswith('image/'):
            logger.debug('image-proxy: wrong content-type: %s',
                         resp.headers.get('Content-Type', ''))
            return '', 400

        forward_resp = True
    except httpx.HTTPError:
        logger.exception('HTTP error')
        return '', 400
    finally:
        if resp and not forward_resp:
            # the code is about to return an HTTP 400 error to the browser
            # we make sure to close the response between searxng and the HTTP server
            try:
                resp.close()
            except httpx.HTTPError:
                logger.exception('HTTP error on closing')

    try:
        headers = dict_subset(
            resp.headers,
            {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'})

        def forward_chunk():
            total_length = 0
            for chunk in stream:
                total_length += len(chunk)
                if total_length > maximum_size:
                    break
                yield chunk

        return Response(forward_chunk(),
                        mimetype=resp.headers['Content-Type'],
                        headers=headers)
    except httpx.HTTPError:
        return '', 400