def search_one_offline_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit): engine = engines[engine_name] try: search_results = search_one_offline_request(engine, query, request_params) if search_results: result_container.extend(engine_name, search_results) engine_time = time() - start_time result_container.add_timing(engine_name, engine_time, engine_time) with threading.RLock(): engine.stats['engine_time'] += engine_time engine.stats['engine_time_count'] += 1 except ValueError as e: record_offline_engine_stats_on_error(engine, result_container, start_time) logger.exception('engine {0} : invalid input : {1}'.format( engine_name, e)) except Exception as e: record_offline_engine_stats_on_error(engine, result_container, start_time) result_container.add_unresponsive_engine(engine_name, 'unexpected crash', str(e)) logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
def search_one_offline_request_safe( engine_name, query, request_params, result_container, start_time, timeout_limit ): engine = engines[engine_name] try: search_results = search_one_offline_request(engine, query, request_params) if search_results: result_container.extend(engine_name, search_results) engine_time = time() - start_time result_container.add_timing(engine_name, engine_time, engine_time) with threading.RLock(): engine.stats["engine_time"] += engine_time engine.stats["engine_time_count"] += 1 except ValueError as e: record_exception(engine_name, e) record_offline_engine_stats_on_error(engine, result_container, start_time) logger.exception("engine {0} : invalid input : {1}".format(engine_name, e)) except Exception as e: record_exception(engine_name, e) record_offline_engine_stats_on_error(engine, result_container, start_time) result_container.add_unresponsive_engine( engine_name, "unexpected crash", str(e) ) logger.exception("engine {0} : exception : {1}".format(engine_name, e)) else: if getattr(threading.current_thread(), "_timeout", False): record_error(engine_name, "Timeout")
def pre_request(): request.errors = [] preferences = Preferences(themes, list(categories.keys()), engines, plugins) request.preferences = preferences try: preferences.parse_dict(request.cookies) except Exception: request.errors.append(gettext('Invalid settings, please edit your preferences')) # merge GET, POST vars # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v if request.form.get('preferences'): preferences.parse_encoded_data(request.form['preferences']) else: try: preferences.parse_dict(request.form) except Exception as e: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings')) # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): request.user_plugins.append(plugin)
def load_engine(engine_data): if '_' in engine_data['name']: logger.error('Engine name conains underscore: "{}"'.format(engine_data['name'])) sys.exit(1) engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py', engine_dir) except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: if param_name == 'engine': continue if param_name == 'categories': if engine_data['categories'] == 'none': engine.categories = [] else: engine.categories = map( str.strip, engine_data['categories'].split(',')) continue setattr(engine, param_name, engine_data[param_name]) for arg_name, arg_value in engine_default_args.iteritems(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"' .format(engine.name, engine_attr)) sys.exit(1) engine.stats = { 'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'page_load_count': 0, 'engine_time': 0, 'engine_time_count': 0, 'score_count': 0, 'errors': 0 } for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def engine_init(engine_name, init_fn): try: init_fn(get_engine_from_settings(engine_name)) except Exception: logger.exception('%s engine: Fail to initialize', engine_name) else: logger.debug('%s engine: Initialized', engine_name)
def pre_request(): request.start_time = time() request.timings = [] request.errors = [] preferences = Preferences(themes, list(categories.keys()), engines) request.preferences = preferences try: preferences.parse_dict(request.cookies) except: request.errors.append(gettext('Invalid settings, please edit your preferences')) # merge GET, POST vars # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v if request.form.get('preferences'): preferences.parse_encoded_data(request.form['preferences']) else: try: preferences.parse_dict(request.form) except Exception as e: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings'))
def run(): if not running.acquire(blocking=False): return try: logger.info('Starting checker') result = {'status': 'ok', 'engines': {}} for name, processor in processors.items(): logger.debug('Checking %s engine', name) checker = Checker(processor) checker.run() if checker.test_results.succesfull: result['engines'][name] = {'success': True} else: result['engines'][name] = { 'success': False, 'errors': checker.test_results.errors } _set_result(result) logger.info('Check done') except Exception: _set_result({'status': 'error'}) logger.exception('Error while running the checker') finally: running.release()
def pre_request(): request.errors = [] preferences = Preferences(themes, list(categories.keys()), engines, plugins) request.preferences = preferences try: preferences.parse_dict(request.cookies) except: request.errors.append(gettext('Invalid settings, please edit your preferences')) # merge GET, POST vars # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v if request.form.get('preferences'): preferences.parse_encoded_data(request.form['preferences']) else: try: preferences.parse_dict(request.form) except Exception as e: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings')) # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): request.user_plugins.append(plugin)
def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit): engine = engines[engine_name] try: # send requests and parse the results search_results = search_one_request(engine, query, request_params, start_time, timeout_limit) # add results result_container.extend(engine_name, search_results) # update engine time when there is no exception with threading.RLock(): engine.stats['engine_time'] += time() - start_time engine.stats['engine_time_count'] += 1 return True except Exception as e: engine.stats['errors'] += 1 search_duration = time() - start_time requests_exception = False if (issubclass(e.__class__, requests.exceptions.Timeout)): result_container.add_unresponsive_engine( (engine_name, gettext('timeout'))) # requests timeout (connect or read) logger.error( "engine {0} : HTTP requests timeout" "(search duration : {1} s, timeout: {2} s) : {3}".format( engine_name, search_duration, timeout_limit, e.__class__.__name__)) requests_exception = True elif (issubclass(e.__class__, requests.exceptions.RequestException)): result_container.add_unresponsive_engine( (engine_name, gettext('request exception'))) # other requests exception logger.exception( "engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}".format( engine_name, search_duration, timeout_limit, e)) requests_exception = True else: result_container.add_unresponsive_engine( (engine_name, gettext('unexpected crash'))) # others errors logger.exception('engine {0} : exception : {1}'.format( engine_name, e)) # update continuous_errors / suspend_end_time if requests_exception: with threading.RLock(): engine.continuous_errors += 1 engine.suspend_end_time = time() + min( 60, engine.continuous_errors) # return False
def search(self, query, params, result_container, start_time, timeout_limit): try: search_results = self._search_basic(query, params) if search_results: result_container.extend(self.engine_name, search_results) engine_time = time() - start_time result_container.add_timing(self.engine_name, engine_time, engine_time) with threading.RLock(): self.engine.stats['engine_time'] += engine_time self.engine.stats['engine_time_count'] += 1 except ValueError as e: record_exception(self.engine_name, e) self._record_stats_on_error(result_container, start_time) logger.exception('engine {0} : invalid input : {1}'.format( self.engine_name, e)) except Exception as e: record_exception(self.engine_name, e) self._record_stats_on_error(result_container, start_time) result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash', str(e)) logger.exception('engine {0} : exception : {1}'.format( self.engine_name, e)) else: if getattr(threading.current_thread(), '_timeout', False): record_error(self.engine_name, 'Timeout')
def load_engine(engine_data): """Load engine from ``engine_data``. :param dict engine_data: Attributes from YAML ``settings:engines/<engine>`` :return: initialized namespace of the ``<engine>``. 1. create a namespace and load module of the ``<engine>`` 2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS` 3. update namespace with values from ``engine_data`` If engine *is active*, return namespace of the engine, otherwise return ``None``. This function also returns ``None`` if initialization of the namespace fails for one of the following reasons: - engine name contains underscore - engine name is not lowercase - required attribute is not set :py:func:`is_missing_required_attributes` """ engine_name = engine_data['name'] if '_' in engine_name: logger.error( 'Engine name contains underscore: "{}"'.format(engine_name)) return None if engine_name.lower() != engine_name: logger.warn( 'Engine name is not lowercase: "{}", converting to lowercase'. format(engine_name)) engine_name = engine_name.lower() engine_data['name'] = engine_name # load_module engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py', ENGINE_DIR) except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError): logger.exception( 'Fatal exception in engine "{}"'.format(engine_module)) sys.exit(1) except BaseException: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None update_engine_attributes(engine, engine_data) set_language_attributes(engine) update_attributes_for_tor(engine) if not is_engine_active(engine): return None if is_missing_required_attributes(engine): return None return engine
def load_engine(engine_data): if '_' in engine_data['name']: logger.error('Engine name conains underscore: "{}"'.format( engine_data['name'])) sys.exit(1) engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py') except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: if param_name == 'engine': continue if param_name == 'categories': if engine_data['categories'] == 'none': engine.categories = [] else: engine.categories = map(str.strip, engine_data['categories'].split(',')) continue setattr(engine, param_name, engine_data[param_name]) for arg_name, arg_value in engine_default_args.iteritems(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"'.format( engine.name, engine_attr)) sys.exit(1) engine.stats = { 'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0 } for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format( engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def send_http_request(engine, request_params, timeout_limit): response = None try: # create dictionary which contain all # informations about the request request_args = dict(headers=request_params['headers'], cookies=request_params['cookies'], timeout=timeout_limit, verify=request_params['verify']) # specific type of request (GET or POST) if request_params['method'] == 'GET': req = requests_lib.get else: req = requests_lib.post request_args['data'] = request_params['data'] # for page_load_time stats time_before_request = time() # send the request response = req(request_params['url'], **request_args) with threading.RLock(): # no error : reset the suspend variables engine.continuous_errors = 0 engine.suspend_end_time = 0 # update stats with current page-load-time # only the HTTP request engine.stats['page_load_time'] += time() - time_before_request engine.stats['page_load_count'] += 1 # is there a timeout (no parsing in this case) timeout_overhead = 0.2 # seconds search_duration = time() - request_params['started'] if search_duration > timeout_limit + timeout_overhead: logger.exception( 'engine timeout on HTTP request:' '{0} (search duration : {1} ms, time-out: {2} )'.format( engine.name, search_duration, timeout_limit)) with threading.RLock(): engine.stats['errors'] += 1 return False # everything is ok : return the response return response except: # increase errors stats with threading.RLock(): engine.stats['errors'] += 1 engine.continuous_errors += 1 engine.suspend_end_time = time() + min(60, engine.continuous_errors) # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine.name)) return False
def send_http_request(engine, request_params, timeout_limit): response = None try: # create dictionary which contain all # informations about the request request_args = dict( headers=request_params['headers'], cookies=request_params['cookies'], timeout=timeout_limit, verify=request_params['verify'] ) # specific type of request (GET or POST) if request_params['method'] == 'GET': req = requests_lib.get else: req = requests_lib.post request_args['data'] = request_params['data'] # for page_load_time stats time_before_request = time() # send the request response = req(request_params['url'], **request_args) with threading.RLock(): # no error : reset the suspend variables engine.continuous_errors = 0 engine.suspend_end_time = 0 # update stats with current page-load-time # only the HTTP request engine.stats['page_load_time'] += time() - time_before_request engine.stats['page_load_count'] += 1 # is there a timeout (no parsing in this case) timeout_overhead = 0.2 # seconds search_duration = time() - request_params['started'] if search_duration > timeout_limit + timeout_overhead: logger.exception('engine timeout on HTTP request:' '{0} (search duration : {1} ms, time-out: {2} )' .format(engine.name, search_duration, timeout_limit)) with threading.RLock(): engine.stats['errors'] += 1 return False # everything is ok : return the response return response except: # increase errors stats with threading.RLock(): engine.stats['errors'] += 1 engine.continuous_errors += 1 engine.suspend_end_time = time() + min(60, engine.continuous_errors) # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine.name)) return False
def search_request_wrapper(fn, url, engine_name, **kwargs): try: return fn(url, **kwargs) except: # increase errors stats engines[engine_name].stats['errors'] += 1 # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine_name)) return
def engine_init(engine_name, init_fn): try: init_fn(get_engine_from_settings(engine_name)) except SearxEngineResponseException as exc: logger.warn('%s engine: Fail to initialize // %s', engine_name, exc) except Exception: logger.exception('%s engine: Fail to initialize', engine_name) else: logger.debug('%s engine: Initialized', engine_name)
def initialize(self): try: self.engine.init(get_engine_from_settings(self.engine_name)) except SearxEngineResponseException as exc: logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc) except Exception: # pylint: disable=broad-except logger.exception('%s engine: Fail to initialize', self.engine_name) else: logger.debug('%s engine: Initialized', self.engine_name)
def pre_request(): request.start_time = default_timer() # pylint: disable=assigning-non-slot request.render_time = 0 # pylint: disable=assigning-non-slot request.timings = [] # pylint: disable=assigning-non-slot request.errors = [] # pylint: disable=assigning-non-slot preferences = Preferences(themes, list(categories.keys()), engines, plugins) # pylint: disable=redefined-outer-name user_agent = request.headers.get('User-Agent', '').lower() if 'webkit' in user_agent and 'android' in user_agent: preferences.key_value_settings['method'].value = 'GET' request.preferences = preferences # pylint: disable=assigning-non-slot try: preferences.parse_dict(request.cookies) except Exception as e: # pylint: disable=broad-except logger.exception(e, exc_info=True) request.errors.append( gettext('Invalid settings, please edit your preferences')) # merge GET, POST vars # request.form request.form = dict(request.form.items()) # pylint: disable=assigning-non-slot for k, v in request.args.items(): if k not in request.form: request.form[k] = v if request.form.get('preferences'): preferences.parse_encoded_data(request.form['preferences']) else: try: preferences.parse_dict(request.form) except Exception as e: # pylint: disable=broad-except logger.exception(e, exc_info=True) request.errors.append(gettext('Invalid settings')) # init search language and locale if not preferences.get_value("language"): preferences.parse_dict({ "language": _get_browser_or_settings_language(request, LANGUAGE_CODES) }) if not preferences.get_value("locale"): preferences.parse_dict({"locale": get_locale()}) # request.user_plugins request.user_plugins = [] # pylint: disable=assigning-non-slot allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): request.user_plugins.append(plugin)
def search(self, query, params, result_container, start_time, timeout_limit): try: search_results = self._search_basic(query, params) self.extend_container(result_container, start_time, search_results) except ValueError as e: # do not record the error logger.exception('engine {0} : invalid input : {1}'.format( self.engine_name, e)) except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format( self.engine_name, e))
def pre_request(): request.start_time = time() request.timings = [] request.errors = [] preferences = Preferences(themes, list(categories.keys()), engines, plugins) user_agent = request.headers.get("User-Agent", "").lower() if "webkit" in user_agent and "android" in user_agent: preferences.key_value_settings["method"].value = "GET" request.preferences = preferences try: preferences.parse_dict(request.cookies) except: request.errors.append( gettext("Invalid settings, please edit your preferences")) # merge GET, POST vars # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v if request.form.get("preferences"): preferences.parse_encoded_data(request.form["preferences"]) else: try: preferences.parse_dict(request.form) except Exception: logger.exception("invalid settings") request.errors.append(gettext("Invalid settings")) # init search language and locale if not preferences.get_value("language"): preferences.parse_dict({ "language": _get_browser_or_settings_language(request, LANGUAGE_CODES) }) if not preferences.get_value("locale"): preferences.parse_dict({"locale": get_locale()}) # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if (plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins: request.user_plugins.append(plugin)
def load_engine(engine_data): if "_" in engine_data["name"]: logger.error('Engine name conains underscore: "{}"'.format(engine_data["name"])) sys.exit(1) engine_module = engine_data["engine"] try: engine = load_module(engine_module + ".py") except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: if param_name == "engine": continue if param_name == "categories": if engine_data["categories"] == "none": engine.categories = [] else: engine.categories = map(str.strip, engine_data["categories"].split(",")) continue setattr(engine, param_name, engine_data[param_name]) for arg_name, arg_value in engine_default_args.iteritems(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith("_"): continue if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr)) sys.exit(1) engine.stats = {"result_count": 0, "search_count": 0, "page_load_time": 0, "score_count": 0, "errors": 0} for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error("Engine config error: ambigious shortcut: {0}".format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def code_highlighter(codelines, language=None): if not language: language = 'text' try: # find lexer by programing language lexer = get_lexer_by_name(language, stripall=True) except Exception as e: # pylint: disable=broad-except logger.exception(e, exc_info=True) # if lexer is not found, using default one lexer = get_lexer_by_name('text', stripall=True) html_code = '' tmp_code = '' last_line = None # parse lines for line, code in codelines: if not last_line: line_code_start = line # new codeblock is detected if last_line is not None and\ last_line + 1 != line: # highlight last codepart formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") html_code = html_code + highlight(tmp_code, lexer, formatter) # reset conditions for next codepart tmp_code = '' line_code_start = line # add codepart tmp_code += code + '\n' # update line last_line = line # highlight last codepart formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") html_code = html_code + highlight(tmp_code, lexer, formatter) return html_code
def response(resp): dom = html.fromstring(resp.text) results = [] for result in dom.xpath(results_xpath): try: res = {'url': result.xpath(url_xpath)[0], 'title': ''.join(result.xpath(title_xpath)), 'content': ''.join(result.xpath(content_xpath))} except: logger.exception('yandex parse crash') continue results.append(res) return results
def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit): engine = engines[engine_name] try: # send requests and parse the results search_results = search_one_request(engine, query, request_params, start_time, timeout_limit) # add results result_container.extend(engine_name, search_results) # update engine time when there is no exception with threading.RLock(): engine.stats['engine_time'] += time() - start_time engine.stats['engine_time_count'] += 1 return True except Exception as e: engine.stats['errors'] += 1 search_duration = time() - start_time requests_exception = False if (issubclass(e.__class__, requests.exceptions.Timeout)): # requests timeout (connect or read) logger.error("engine {0} : HTTP requests timeout" "(search duration : {1} s, timeout: {2} s) : {3}" .format(engine_name, search_duration, timeout_limit, e.__class__.__name__)) requests_exception = True elif (issubclass(e.__class__, requests.exceptions.RequestException)): # other requests exception logger.exception("engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}" .format(engine_name, search_duration, timeout_limit, e)) requests_exception = True else: # others errors logger.exception('engine {0} : exception : {1}'.format(engine_name, e)) # update continuous_errors / suspend_end_time if requests_exception: with threading.RLock(): engine.continuous_errors += 1 engine.suspend_end_time = time() + min(60, engine.continuous_errors) # return False
def search(self, query, params, result_container, start_time, timeout_limit): # set timeout for all HTTP requests searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time) # reset the HTTP total time searx.network.reset_time_for_thread() # set the network searx.network.set_context_network_name(self.engine_name) try: # send requests and parse the results search_results = self._search_basic(query, params) self.extend_container(result_container, start_time, search_results) except (httpx.TimeoutException, asyncio.TimeoutError) as e: # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) logger.error( "engine {0} : HTTP requests timeout" "(search duration : {1} s, timeout: {2} s) : {3}".format( self.engine_name, default_timer() - start_time, timeout_limit, e.__class__.__name__)) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception self.handle_exception(result_container, e, suspend=True) logger.exception( "engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}".format( self.engine_name, default_timer() - start_time, timeout_limit, e)) except SearxEngineCaptchaException as e: self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : CAPTCHA'.format(self.engine_name)) except SearxEngineTooManyRequestsException as e: self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : Too many requests'.format( self.engine_name)) except SearxEngineAccessDeniedException as e: self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : Searx is blocked'.format( self.engine_name)) except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format( self.engine_name, e))
def search_one_request(engine_name, query, request_params, result_container, timeout_limit): engine = engines[engine_name] # update request parameters dependent on # search-engine (contained in engines folder) engine.request(query, request_params) # TODO add support of offline engines if request_params['url'] is None: return False # ignoring empty urls if not request_params['url']: return False # send request response = send_http_request(engine, request_params, timeout_limit) # parse response success = None if response: # parse the response response.search_params = request_params try: search_results = engine.response(response) except: logger.exception('engine crash: {0}'.format(engine.name)) search_results = [] # add results for result in search_results: result['engine'] = engine.name result_container.extend(engine.name, search_results) success = True else: success = False with threading.RLock(): # update stats : total time engine.stats['engine_time'] += time() - request_params['started'] engine.stats['engine_time_count'] += 1 return success
def pre_request(): request.start_time = time() request.timings = [] request.errors = [] preferences = Preferences(themes, list(categories.keys()), engines, plugins) request.preferences = preferences try: preferences.parse_dict(request.cookies) except: request.errors.append( gettext('Invalid settings, please edit your preferences')) # merge GET, POST vars # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v if request.form.get('preferences'): preferences.parse_encoded_data(request.form['preferences']) else: try: preferences.parse_dict(request.form) except Exception as e: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings')) # init search language and locale if not preferences.get_value("language"): preferences.parse_dict( {"language": _get_browser_language(request, LANGUAGE_CODES)}) if not preferences.get_value("locale"): preferences.parse_dict({"locale": get_locale()}) # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): request.user_plugins.append(plugin)
def _is_url_image(image_url): if not isinstance(image_url, str): return False if image_url.startswith('//'): image_url = 'https:' + image_url if image_url.startswith('data:'): return image_url.startswith('data:image/') if not _is_url(image_url): return False retry = 2 while retry > 0: a = time() try: network.set_timeout_for_thread(10.0, time()) r = network.get( image_url, timeout=10.0, allow_redirects=True, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US;q=0.5,en;q=0.3', 'Accept-Encoding': 'gzip, deflate, br', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-GPC': '1', 'Cache-Control': 'max-age=0' }) if r.headers["content-type"].startswith('image/'): return True return False except httpx.TimeoutException: logger.error('Timeout for %s: %i', image_url, int(time() - a)) retry -= 1 except httpx.HTTPError: logger.exception('Exception for %s', image_url) return False
def search_request_wrapper(fn, url, engine_name, **kwargs): ret = None engine = engines[engine_name] try: ret = fn(url, **kwargs) with threading.RLock(): engine.continuous_errors = 0 engine.suspend_end_time = 0 except: # increase errors stats with threading.RLock(): engine.stats['errors'] += 1 engine.continuous_errors += 1 engine.suspend_end_time = time() + min(60, engine.continuous_errors) # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine_name)) return ret
def response(resp): dom = html.fromstring(resp.text) results = [] for result in dom.xpath(results_xpath): try: res = { 'url': result.xpath(url_xpath)[0], 'title': ''.join(result.xpath(title_xpath)), 'content': ''.join(result.xpath(content_xpath)) } except: logger.exception('yandex parse crash') continue results.append(res) return results
def pre_request(): request.start_time = time() request.timings = [] request.errors = [] preferences = Preferences(themes, list(categories.keys()), engines, plugins) request.preferences = preferences try: preferences.parse_dict(request.cookies) except: request.errors.append(gettext('Invalid settings, please edit your preferences')) # merge GET, POST vars # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v # todo 缓存机制 # 关键词累积加一 kr.keyNum(request.form['q']) # 查询form是否存在,存在将结果直接返回 if request.form.get('research') == '0' and kr.existsForm(json.dumps(request.form)): request.form['Result'] = kr.getResult(json.dumps(request.form)) if request.form.get('preferences'): preferences.parse_encoded_data(request.form['preferences']) else: try: preferences.parse_dict(request.form) except Exception as e: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings')) # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): request.user_plugins.append(plugin)
def response(resp): dom = html.fromstring(resp.text) results = [] for result in range(10): try: res = { 'url': dom.xpath(url_xpath)[result], 'title': ''.join(dom.xpath(title_xpath)[result]), 'content': ''.join(dom.xpath(content_xpath)[result]) } except: logger.exception('bidb parse crash') continue results.append(res) return results
def response(resp): resp_url = urlparse(resp.url) if resp_url.path.startswith('/showcaptcha'): raise SearxEngineCaptchaException() dom = html.fromstring(resp.text) results = [] for result in dom.xpath(results_xpath): try: res = {'url': result.xpath(url_xpath)[0], 'title': ''.join(result.xpath(title_xpath)), 'content': ''.join(result.xpath(content_xpath))} except: logger.exception('yandex parse crash') continue results.append(res) return results
def load_engine(engine_data): if '_' in engine_data['name']: logger.error('Engine name conains underscore: "{}"'.format(engine_data['name'])) sys.exit(1) engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py', engine_dir) except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: if param_name == 'engine': continue if param_name == 'categories': if engine_data['categories'] == 'none': engine.categories = [] else: engine.categories = list(map(str.strip, engine_data['categories'].split(','))) continue setattr(engine, param_name, engine_data[param_name]) for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if engine_attr == 'inactive' and getattr(engine, engine_attr) is True: return None if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"' .format(engine.name, engine_attr)) sys.exit(1) # assign supported languages from json file if engine_data['name'] in languages: setattr(engine, 'supported_languages', languages[engine_data['name']]) # assign language fetching method if auxiliary method exists if hasattr(engine, '_fetch_supported_languages'): setattr(engine, 'fetch_supported_languages', lambda: engine._fetch_supported_languages(get(engine.supported_languages_url))) engine.stats = { 'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'page_load_count': 0, 'engine_time': 0, 'engine_time_count': 0, 'score_count': 0, 'errors': 0 } for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def load_engine(engine_data): engine_name = engine_data['name'] if '_' in engine_name: logger.error( 'Engine name contains underscore: "{}"'.format(engine_name)) sys.exit(1) if engine_name.lower() != engine_name: logger.warn( 'Engine name is not lowercase: "{}", converting to lowercase'. format(engine_name)) engine_name = engine_name.lower() engine_data['name'] = engine_name engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py', engine_dir) except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError): logger.exception( 'Fatal exception in engine "{}"'.format(engine_module)) sys.exit(1) except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name, param_value in engine_data.items(): if param_name == 'engine': pass elif param_name == 'categories': if param_value == 'none': engine.categories = [] else: engine.categories = list(map(str.strip, param_value.split(','))) else: setattr(engine, param_name, param_value) for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if engine_attr == 'inactive' and getattr(engine, engine_attr) is True: return None if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"'.format( engine.name, engine_attr)) sys.exit(1) # assign supported languages from json file if engine_data['name'] in ENGINES_LANGUAGES: setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']]) # find custom aliases for non standard language codes if hasattr(engine, 'supported_languages'): if hasattr(engine, 'language_aliases'): language_aliases = getattr(engine, 'language_aliases') else: language_aliases = {} for engine_lang in getattr(engine, 'supported_languages'): iso_lang = match_language(engine_lang, babel_langs, fallback=None) if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ iso_lang not in getattr(engine, 'supported_languages'): language_aliases[iso_lang] = engine_lang setattr(engine, 'language_aliases', language_aliases) # language_support setattr(engine, 'language_support', len(getattr(engine, 'supported_languages', [])) > 0) # assign language fetching method if auxiliary method exists if hasattr(engine, '_fetch_supported_languages'): headers = { 'User-Agent': gen_useragent(), 'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language } setattr( engine, 'fetch_supported_languages', lambda: engine._fetch_supported_languages( get(engine.supported_languages_url, headers=headers))) # tor related settings if settings['outgoing'].get('using_tor_proxy'): # use onion url if using tor. if hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr( engine, 'search_path', '') elif 'onions' in engine.categories: # exclude onion engines if not using tor. return None engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format( engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def index(): """Render index page. Supported outputs: html, json, csv, rss. """ # output_format output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' # check if there is query if request.form.get('q') is None: if output_format == 'html': return render( 'index.html', ) else: return index_error(output_format, 'No query'), 400 # search search_query = None result_container = None try: search_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) result_container = search.search() except Exception as e: # log exception logger.exception('search error') # is it an invalid input parameter or something else ? if (issubclass(e.__class__, SearxParameterException)): return index_error(output_format, e.message), 400 else: return index_error(output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 # UI advanced_search = request.form.get('advanced_search', None) # output for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) result['title'] = highlight_content(escape(result['title'] or u''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join(html_to_text(result['title']).strip().split()) result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right if 'publishedDate' in result: try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(result['publishedDate']) if output_format == 'json': return Response(json.dumps({'query': search_query.query.decode('utf-8'), 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': list(result_container.unresponsive_engines)}, default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') return render( 'results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), advanced_search=advanced_search, suggestions=result_container.suggestions, answers=result_container.answers, corrections=result_container.corrections, infoboxes=result_container.infoboxes, paging=result_container.paging, unresponsive_engines=result_container.unresponsive_engines, current_language=match_language(search_query.lang, LANGUAGE_CODES, fallback=settings['search']['language']), base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] )
def index(): """Render index page. Supported outputs: html, json, csv, rss. """ if request.form.get('q') is None: return render( 'index.html', ) # search search_query = None result_container = None try: search_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request) result_container = search.search() except: request.errors.append(gettext('search error')) logger.exception('search error') return render( 'index.html', ) results = result_container.get_ordered_results() # UI advanced_search = request.form.get('advanced_search', None) output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' # output for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content(result['content'][:1024], search_query.query.encode('utf-8')) result['title'] = highlight_content(result['title'], search_query.query.encode('utf-8')) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join(html_to_text(result['title']).strip().split()) result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right if 'publishedDate' in result: try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(result['publishedDate']) number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 if output_format == 'json': return Response(json.dumps({'query': search_query.query, 'number_of_results': number_of_results, 'results': results}), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(cStringIO.StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8')) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url() ) return Response(response_rss, mimetype='text/xml') return render( 'results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), advanced_search=advanced_search, suggestions=result_container.suggestions, answers=result_container.answers, infoboxes=result_container.infoboxes, paging=result_container.paging, base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] )
def index(): """Render index page. Supported outputs: html, json, csv, rss. """ if request.form.get("q") is None: return render("index.html") # search search_query = None result_container = None try: search_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request) result_container = search.search() except: request.errors.append(gettext("search error")) logger.exception("search error") return render("index.html") results = result_container.get_ordered_results() # UI advanced_search = request.form.get("advanced_search", None) output_format = request.form.get("format", "html") if output_format not in ["html", "csv", "json", "rss"]: output_format = "html" # output for result in results: if output_format == "html": if "content" in result and result["content"]: result["content"] = highlight_content( escape(result["content"][:1024]), search_query.query.encode("utf-8") ) result["title"] = highlight_content(escape(result["title"] or u""), search_query.query.encode("utf-8")) else: if result.get("content"): result["content"] = html_to_text(result["content"]).strip() # removing html content and whitespace duplications result["title"] = " ".join(html_to_text(result["title"]).strip().split()) result["pretty_url"] = prettify_url(result["url"]) # TODO, check if timezone is calculated right if "publishedDate" in result: try: # test if publishedDate >= 1900 (datetime module bug) result["pubdate"] = result["publishedDate"].strftime("%Y-%m-%d %H:%M:%S%z") except ValueError: result["publishedDate"] = None else: if result["publishedDate"].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now() - result["publishedDate"].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result["publishedDate"] = gettext(u"{minutes} minute(s) ago").format(minutes=minutes) else: result["publishedDate"] = gettext(u"{hours} hour(s), {minutes} minute(s) ago").format( hours=hours, minutes=minutes ) # noqa else: result["publishedDate"] = format_date(result["publishedDate"]) number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 if output_format == "json": return Response( json.dumps( { "query": search_query.query, "number_of_results": number_of_results, "results": results, "answers": list(result_container.answers), "infoboxes": result_container.infoboxes, "suggestions": list(result_container.suggestions), } ), mimetype="application/json", ) elif output_format == "csv": csv = UnicodeWriter(cStringIO.StringIO()) keys = ("title", "url", "content", "host", "engine", "score") csv.writerow(keys) for row in results: row["host"] = row["parsed_url"].netloc csv.writerow([row.get(key, "") for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype="application/csv") cont_disp = "attachment;Filename=searx_-_{0}.csv".format(search_query.query.encode("utf-8")) response.headers.add("Content-Disposition", cont_disp) return response elif output_format == "rss": response_rss = render( "opensearch_response_rss.xml", results=results, q=request.form["q"], number_of_results=number_of_results, base_url=get_base_url(), ) return Response(response_rss, mimetype="text/xml") return render( "results.html", results=results, q=request.form["q"], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), advanced_search=advanced_search, suggestions=result_container.suggestions, answers=result_container.answers, infoboxes=result_container.infoboxes, paging=result_container.paging, base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())], )
def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit): # set timeout for all HTTP requests requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time) # reset the HTTP total time requests_lib.reset_time_for_thread() # engine = engines[engine_name] # suppose everything will be alright requests_exception = False try: # send requests and parse the results search_results = search_one_request(engine, query, request_params) # add results result_container.extend(engine_name, search_results) # update engine time when there is no exception with threading.RLock(): engine.stats['engine_time'] += time() - start_time engine.stats['engine_time_count'] += 1 # update stats with the total HTTP time engine.stats['page_load_time'] += requests_lib.get_time_for_thread() engine.stats['page_load_count'] += 1 except Exception as e: search_duration = time() - start_time with threading.RLock(): engine.stats['errors'] += 1 if (issubclass(e.__class__, requests.exceptions.Timeout)): result_container.add_unresponsive_engine((engine_name, gettext('timeout'))) # requests timeout (connect or read) logger.error("engine {0} : HTTP requests timeout" "(search duration : {1} s, timeout: {2} s) : {3}" .format(engine_name, search_duration, timeout_limit, e.__class__.__name__)) requests_exception = True elif (issubclass(e.__class__, requests.exceptions.RequestException)): result_container.add_unresponsive_engine((engine_name, gettext('request exception'))) # other requests exception logger.exception("engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}" .format(engine_name, search_duration, timeout_limit, e)) requests_exception = True else: result_container.add_unresponsive_engine(( engine_name, u'{0}: {1}'.format(gettext('unexpected crash'), e), )) # others errors logger.exception('engine {0} : exception : {1}'.format(engine_name, e)) # suspend or not the engine if there are HTTP errors with threading.RLock(): if requests_exception: # update continuous_errors / suspend_end_time engine.continuous_errors += 1 engine.suspend_end_time = time() + min(60, engine.continuous_errors) else: # no HTTP error (perhaps an engine error) # anyway, reset the suspend variables engine.continuous_errors = 0 engine.suspend_end_time = 0
def index(): """Render index page. Supported outputs: html, json, csv, rss. """ # output_format output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' # check if there is query if request.form.get('q') is None: if output_format == 'html': return render( 'index.html', ) else: return index_error(output_format, 'No query'), 400 # search search_query = None raw_text_query = None result_container = None try: search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) result_container = search.search() except Exception as e: # log exception logger.exception('search error') # is it an invalid input parameter or something else ? if (issubclass(e.__class__, SearxParameterException)): return index_error(output_format, e.message), 400 else: return index_error(output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 # UI advanced_search = request.form.get('advanced_search', None) # Server-Timing header request.timings = result_container.get_timings() # output for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) if 'title' in result and result['title']: result['title'] = highlight_content(escape(result['title'] or u''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join(html_to_text(result['title']).strip().split()) if 'url' in result: result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right if 'publishedDate' in result: try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(result['publishedDate']) if output_format == 'json': return Response(json.dumps({'query': search_query.query.decode('utf-8'), 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines)}, # noqa default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc row['type'] = 'result' csv.writerow([row.get(key, '') for key in keys]) for a in result_container.answers: row = {'title': a, 'type': 'answer'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.suggestions: row = {'title': a, 'type': 'suggestion'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.corrections: row = {'title': a, 'type': 'correction'} csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8')) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, answers=result_container.answers, corrections=result_container.corrections, suggestions=result_container.suggestions, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') # HTML output format # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list(map(lambda suggestion: { 'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(), 'title': suggestion }, result_container.suggestions)) correction_urls = list(map(lambda correction: { 'url': raw_text_query.changeSearchQuery(correction).getFullQuery(), 'title': correction }, result_container.corrections)) # return render( 'results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), advanced_search=advanced_search, suggestions=suggestion_urls, answers=result_container.answers, corrections=correction_urls, infoboxes=result_container.infoboxes, paging=result_container.paging, unresponsive_engines=__get_translated_errors(result_container.unresponsive_engines), current_language=match_language(search_query.lang, LANGUAGE_CODES, fallback=request.preferences.get_value("language")), base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())], timeout_limit=request.form.get('timeout_limit', None) )