Example #1
0
    def search_multiple_requests(self, requests):
        search_id = uuid4().__str__()

        for engine_name, query, request_params in requests:
            th = threading.Thread(
                target=processors[engine_name].search,
                args=(query, request_params, self.result_container,
                      self.start_time, self.actual_timeout),
                name=search_id,
            )
            th._timeout = False
            th._engine_name = engine_name
            th.start()

        for th in threading.enumerate():
            if th.name == search_id:
                remaining_time = max(
                    0.0, self.actual_timeout - (time() - self.start_time))
                th.join(remaining_time)
                if th.is_alive():
                    th._timeout = True
                    self.result_container.add_unresponsive_engine(
                        th._engine_name, 'timeout')
                    logger.warning('engine timeout: {0}'.format(
                        th._engine_name))
Example #2
0
def search_multiple_requests(requests, result_container, start_time, timeout_limit):
    search_id = uuid4().__str__()

    for engine_name, query, request_params in requests:
        th = threading.Thread(
            target=search_one_request_safe,
            args=(
                engine_name,
                query,
                request_params,
                result_container,
                start_time,
                timeout_limit,
            ),
            name=search_id,
        )
        th._timeout = False
        th._engine_name = engine_name
        th.start()

    for th in threading.enumerate():
        if th.name == search_id:
            remaining_time = max(0.0, timeout_limit - (time() - start_time))
            th.join(remaining_time)
            if th.is_alive():
                th._timeout = True
                result_container.add_unresponsive_engine(th._engine_name, "timeout")
                logger.warning("engine timeout: {0}".format(th._engine_name))
Example #3
0
def search_multiple_requests(requests, result_container, start_time,
                             timeout_limit):
    from searx.webapp import sentry
    search_id = uuid4().__str__()

    for engine_name, query, request_params in requests:
        th = threading.Thread(
            target=search_one_request_safe,
            args=(engine_name, query, request_params, result_container,
                  start_time, timeout_limit),
            name=search_id,
        )
        th._engine_name = engine_name
        th.start()

    for th in threading.enumerate():
        if th.name == search_id:
            remaining_time = max(0.0, timeout_limit - (time() - start_time))
            th.join(remaining_time)
            if th.isAlive():
                result_container.add_unresponsive_engine(
                    (th._engine_name, gettext('timeout')))
                logger.warning('engine timeout: {0}'.format(th._engine_name))
                sentry.captureMessage('engine timeout: {0}'.format(
                    th._engine_name))
Example #4
0
 async def arequest(self, method, url, headers=None, stream=None, ext=None):
     retry = 2
     while retry > 0:
         retry -= 1
         try:
             return await super().arequest(method, url, headers, stream,
                                           ext)
         except (python_socks._errors.ProxyConnectionError,
                 python_socks._errors.ProxyTimeoutError,
                 python_socks._errors.ProxyError) as e:
             raise httpcore.ProxyError(e)
         except OSError as e:
             # socket.gaierror when DNS resolution fails
             raise httpcore.NetworkError(e)
         except httpcore.RemoteProtocolError as e:
             # in case of httpcore.RemoteProtocolError: Server disconnected
             await close_connections_for_url(self, url)
             logger.warning('httpcore.RemoteProtocolError: retry',
                            exc_info=e)
             # retry
         except (httpcore.NetworkError, httpcore.ProtocolError) as e:
             # httpcore.WriteError on HTTP/2 connection leaves a new opened stream
             # then each new request creates a new stream and raise the same WriteError
             await close_connections_for_url(self, url)
             raise e
Example #5
0
def get_client_id():
    response = http_get("https://soundcloud.com")
    rx_namespace = {"re": "http://exslt.org/regular-expressions"}

    if response.ok:
        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]",
                                 namespaces=rx_namespace)
        app_js_urls = [
            script_tag.get('src') for script_tag in script_tags
            if script_tag is not None
        ]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = re.search(r'client_id:"([^"]*)"', response.content,
                                 re.M | re.I)
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning(
        "Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Example #6
0
 async def arequest(self, method, url, headers=None, stream=None, ext=None):
     retry = 2
     while retry > 0:
         retry -= 1
         try:
             return await super().arequest(method, url, headers, stream,
                                           ext)
         except OSError as e:
             # socket.gaierror when DNS resolution fails
             raise httpcore.ConnectError(e)
         except httpcore.CloseError as e:
             # httpcore.CloseError: [Errno 104] Connection reset by peer
             # raised by _keepalive_sweep()
             #   from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198  # noqa
             await close_connections_for_url(self._pool, url)
             logger.warning('httpcore.CloseError: retry', exc_info=e)
             # retry
         except httpcore.RemoteProtocolError as e:
             # in case of httpcore.RemoteProtocolError: Server disconnected
             await close_connections_for_url(self._pool, url)
             logger.warning('httpcore.RemoteProtocolError: retry',
                            exc_info=e)
             # retry
         except (httpcore.ProtocolError, httpcore.NetworkError) as e:
             await close_connections_for_url(self._pool, url)
             raise e
Example #7
0
async def close_connections_for_url(
        connection_pool: httpcore.AsyncConnectionPool,
        url: httpcore._utils.URL):
    origin = httpcore._utils.url_to_origin(url)
    logger.debug('Drop connections for %r', origin)
    connections_to_close = connection_pool._connections_for_origin(origin)
    for connection in connections_to_close:
        await connection_pool._remove_from_pool(connection)
        try:
            await connection.aclose()
        except httpcore.NetworkError as e:
            logger.warning('Error closing an existing connection', exc_info=e)
Example #8
0
async def close_connections_for_url(
        connection_pool: httpcore.AsyncConnectionPool, url: httpx._models.URL):
    logger.debug('Drop connections for %r', url.host)
    connections_to_close = [
        conn for conn in connection_pool._pool if conn._origin == url.host
    ]
    for connection in connections_to_close:
        connection_pool._pool.remove(connection)
        try:
            await connection.aclose()
        except httpx.NetworkError as e:
            logger.warning('Error closing an existing connection', exc_info=e)
Example #9
0
def format_date_by_locale(date_string, locale_string):
    # strftime works only on dates after 1900
    parsed_date = dateutil.parser.parse(date_string)
    if parsed_date.year <= 1900:
        return parsed_date.isoformat().split('T')[0]

    orig_locale = locale.getlocale()[0]
    try:
        locale.setlocale(locale.LC_ALL, locale_string)
    except:
        logger.warning('cannot set locale: {0}'.format(locale_string))
    formatted_date = parsed_date.strftime(locale.nl_langinfo(locale.D_FMT))
    try:
        locale.setlocale(locale.LC_ALL, orig_locale)
    except:
        logger.warning('cannot set original locale: {0}'.format(orig_locale))
    return formatted_date
Example #10
0
def get_client_id():
    response = http_get("https://soundcloud.com")

    if response.ok:
        tree = html.fromstring(response.content)
        script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
        app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = cid_re.search(response.text)
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Example #11
0
def search_multiple_requests(requests, result_container, start_time, timeout_limit):
    search_id = uuid4().__str__()

    for engine_name, query, request_params in requests:
        th = threading.Thread(
            target=search_one_request_safe,
            args=(engine_name, query, request_params, result_container, start_time, timeout_limit),
            name=search_id,
        )
        th._engine_name = engine_name
        th.start()

    for th in threading.enumerate():
        if th.name == search_id:
            remaining_time = max(0.0, timeout_limit - (time() - start_time))
            th.join(remaining_time)
            if th.isAlive():
                logger.warning('engine timeout: {0}'.format(th._engine_name))
Example #12
0
File: search.py Project: XkaV/searx
def search_multiple_requests(requests, result_container, timeout_limit):
    start_time = time()
    search_id = uuid4().__str__()

    for engine_name, query, request_params in requests:
        th = threading.Thread(
            target=search_one_request,
            args=(engine_name, query, request_params, result_container, timeout_limit),
            name=search_id,
        )
        th._engine_name = engine_name
        th.start()

    for th in threading.enumerate():
        if th.name == search_id:
            remaining_time = max(0.0, timeout_limit - (time() - start_time))
            th.join(remaining_time)
            if th.isAlive():
                logger.warning('engine timeout: {0}'.format(th._engine_name))
Example #13
0
def get_client_id():
    response = http_get("https://soundcloud.com")
    rx_namespace = {"re": "http://exslt.org/regular-expressions"}

    if response.ok:
        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
        app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Example #14
0
def threaded_requests(requests):
    timeout_limit = max(r[2]['timeout'] for r in requests)
    search_start = time()
    for fn, url, request_args, engine_name in requests:
        request_args['timeout'] = timeout_limit
        th = threading.Thread(
            target=search_request_wrapper,
            args=(fn, url, engine_name),
            kwargs=request_args,
            name='search_request',
        )
        th._engine_name = engine_name
        th.start()

    for th in threading.enumerate():
        if th.name == 'search_request':
            remaining_time = max(0.0, timeout_limit - (time() - search_start))
            th.join(remaining_time)
            if th.isAlive():
                logger.warning('engine timeout: {0}'.format(th._engine_name))
Example #15
0
def threaded_requests(requests):
    timeout_limit = max(r[2]['timeout'] for r in requests)
    search_start = time()
    for fn, url, request_args, engine_name in requests:
        request_args['timeout'] = timeout_limit
        th = threading.Thread(
            target=search_request_wrapper,
            args=(fn, url, engine_name),
            kwargs=request_args,
            name='search_request',
        )
        th._engine_name = engine_name
        th.start()

    for th in threading.enumerate():
        if th.name == 'search_request':
            remaining_time = max(0.0, timeout_limit - (time() - search_start))
            th.join(remaining_time)
            if th.isAlive():
                logger.warning('engine timeout: {0}'.format(th._engine_name))
Example #16
0
 async def handle_async_request(self, request: httpx.Request):
     retry = 2
     while retry > 0:
         retry -= 1
         try:
             return await super().handle_async_request(request)
         except (ProxyConnectionError, ProxyTimeoutError, ProxyError) as e:
             raise httpx.ProxyError(e)
         except OSError as e:
             # socket.gaierror when DNS resolution fails
             raise httpx.NetworkError(e)
         except httpx.RemoteProtocolError as e:
             # in case of httpx.RemoteProtocolError: Server disconnected
             await close_connections_for_url(self, request.url)
             logger.warning('httpx.RemoteProtocolError: retry', exc_info=e)
             # retry
         except (httpx.NetworkError, httpx.ProtocolError) as e:
             # httpx.WriteError on HTTP/2 connection leaves a new opened stream
             # then each new request creates a new stream and raise the same WriteError
             await close_connections_for_url(self, request.url)
             raise e
Example #17
0
def pre_request():
    # merge GET, POST vars
    preferences = Preferences(themes, categories.keys(), engines, plugins)
    try:
        preferences.parse_cookies(request.cookies)
    except:
        # TODO throw error message to the user
        logger.warning('Invalid config')
    request.preferences = preferences

    request.form = dict(request.form.items())
    for k, v in request.args.items():
        if k not in request.form:
            request.form[k] = v

    request.user_plugins = []
    allowed_plugins = preferences.plugins.get_enabled()
    disabled_plugins = preferences.plugins.get_disabled()
    for plugin in plugins:
        if ((plugin.default_on and plugin.id not in disabled_plugins)
                or plugin.id in allowed_plugins):
            request.user_plugins.append(plugin)
Example #18
0
File: webapp.py Project: llpj/searx
def pre_request():
    # merge GET, POST vars
    preferences = Preferences(themes, categories.keys(), engines, plugins)
    try:
        preferences.parse_cookies(request.cookies)
    except:
        # TODO throw error message to the user
        logger.warning('Invalid config')
    request.preferences = preferences

    request.form = dict(request.form.items())
    for k, v in request.args.items():
        if k not in request.form:
            request.form[k] = v

    request.user_plugins = []
    allowed_plugins = preferences.plugins.get_enabled()
    disabled_plugins = preferences.plugins.get_disabled()
    for plugin in plugins:
        if ((plugin.default_on and plugin.id not in disabled_plugins)
                or plugin.id in allowed_plugins):
            request.user_plugins.append(plugin)
Example #19
0
def response(resp):
    results = []
    dom = html.fromstring(resp.text)

    for result in eval_xpath_list(dom, results_xpath):

        single_result = {'template': template}

        for single_field in field_definition:
            single_field = {**default_field_settings, **single_field}
            try:
                if single_field['single_element']:
                    node = eval_xpath(result, single_field['xpath'])
                else:
                    node = eval_xpath_list(result, single_field['xpath'])

                if 'extract' in single_field and single_field[
                        'extract'] == 'url':
                    value = extract_url(node, search_url)
                elif 'extract' in single_field and single_field[
                        'extract'] == 'boolean':
                    value = (isinstance(node, list) and len(node) > 0)
                elif 'extract' in single_field and single_field[
                        'extract'] == 'boolean_negate':
                    value = (isinstance(node, list) and len(node) < 1)
                else:
                    value = extract_text(node)

                single_result[single_field['field_name']] = value
            except Exception as e:
                logger.warning('error in resolving field %s:\n%s',
                               single_field['field_name'], e)
                single_result[single_field['field_name']] = unresolvable_value

        results.append(single_result)

    return results
Example #20
0
def get_client_id():
    response = http_get("https://soundcloud.com")

    if response.ok:
        tree = html.fromstring(response.content)
        # script_tags has been moved from /assets/app/ to /assets/ path.  I
        # found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js
        script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
        app_js_urls = [
            script_tag.get('src') for script_tag in script_tags
            if script_tag is not None
        ]

        # extracts valid app_js urls from soundcloud.com content
        for app_js_url in app_js_urls[::-1]:
            # gets app_js and searches for the clientid
            response = http_get(app_js_url)
            if response.ok:
                cids = cid_re.search(response.content.decode())
                if cids is not None and len(cids.groups()):
                    return cids.groups()[0]
    logger.warning(
        "Unable to fetch guest client_id from SoundCloud, check parser!")
    return ""
Example #21
0
def get_proxy():
    try:
        args = settings['outgoing']['haipproxy_redis']
        fetcher = ProxyFetcher('zhihu', strategy='greedy', redis_args=args)
        proxy = fetcher.get_proxy()
        if proxy:
            return {'http': proxy}
        else:
            logger.warning('No available proxy fetched from the proxy pool.')
    except Exception:
        logger.warning('Exception in fetching proxy.')
        logger.warning(traceback.print_exc())
Example #22
0
except ImportError:
    logger.critical("The pyopenssl package has to be installed.\n"
                    "Some HTTPS connections will fail")

try:
    from cStringIO import StringIO
except:
    from io import StringIO


if sys.version_info[0] == 3:
    unicode = str
    PY3 = True
else:
    PY3 = False
    logger.warning('\033[1;31m *** Deprecation Warning ***\033[0m')
    logger.warning('\033[1;31m Python2 is deprecated\033[0m')

# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))

# about static
static_path = get_resources_directory(searx_dir, 'static', settings['ui']['static_path'])
logger.debug('static directory is %s', static_path)
static_files = get_static_files(static_path)

# about templates
default_theme = settings['ui']['default_theme']
templates_path = get_resources_directory(searx_dir, 'templates', settings['ui']['templates_path'])
logger.debug('templates directory is %s', templates_path)
Example #23
0
try:
    import OpenSSL.SSL  # NOQA
except ImportError:
    logger.critical("The pyopenssl package has to be installed.\n"
                    "Some HTTPS connections will fail")

try:
    from cStringIO import StringIO
except:
    from io import StringIO

if sys.version_info[0] == 3:
    unicode = str
    PY3 = True
else:
    logger.warning('\033[1;31m Python2 is no longer supported\033[0m')
    exit(1)

# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get(
    'http_protocol_version', '1.0'))

# about static
static_path = get_resources_directory(searx_dir, 'static',
                                      settings['ui']['static_path'])
logger.debug('static directory is %s', static_path)
static_files = get_static_files(static_path)

# about templates
default_theme = settings['ui']['default_theme']