def request(query, params): offset = (params['pageno'] - 1) * 10 language = match_language(params['language'], supported_languages) language_array = language.split('-') if params['language'].find('-') > 0: country = params['language'].split('-')[1] elif len(language_array) == 2: country = language_array[1] else: country = 'US' url_lang = 'lang_' + language if use_locale_domain: google_hostname = country_to_hostname.get(country.upper(), default_hostname) else: google_hostname = default_hostname # original format: ID=3e2b6616cee08557:TM=5556667580:C=r:IP=4.1.12.5-:S=23ASdf0soFgF2d34dfgf-_22JJOmHdfgg params['cookies']['GOOGLE_ABUSE_EXEMPTION'] = 'x' params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), hostname=google_hostname, lang=url_lang, lang_short=language) if params['time_range'] in time_range_dict: params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' params['google_hostname'] = google_hostname return params
def request(query, params): locale = match_language(params['language'], supported_languages) params['url'] = search_url.format( query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']) return params
def get_region_code(lang, lang_list=[]): if lang == 'all': return None lang_code = match_language(lang, lang_list, language_aliases, 'wt-WT') lang_parts = lang_code.split('-') # country code goes first return lang_parts[1].lower() + '-' + lang_parts[0].lower()
def request(query, params): if params['time_range'] and params['time_range'] not in time_range_dict: return params offset = (params['pageno'] - 1) * 10 + 1 language = match_language(params['language'], supported_languages, language_aliases) params['url'] = _get_url(query, language, offset, params['time_range']) return params
def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 lang = match_language(params['language'], supported_languages, language_aliases) query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') search_path = search_string.format( query=urlencode({'q': query}), offset=offset) params['url'] = base_url + search_path return params
def request(query, params): region = match_language(params['language'], supported_languages) ui_language = region.split('-')[0] search_path = search_string.format( query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}), page=params['pageno'] ) # image search query is something like 'image?{query}&page={page}' if params['category'] == 'images': search_path = 'image' + search_path params['url'] = base_url + search_path return params
def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 if params['language'] == 'all': language = 'en' else: language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] params['url'] = search_url.format(offset=offset, query=urlencode({'p': query}), lang=language) # TODO required? params['cookies']['sB'] = '"v=1&vm=p&fl=1&vl=lang_{lang}&sh=1&pn=10&rw=new'\ .format(lang=language) return params
def response(resp): results = [] html = fromstring(resp.text) search_results = html.xpath(wikidata_ids_xpath) language = match_language(resp.search_params['language'], supported_languages).split('-')[0] # TODO: make requests asynchronous to avoid timeout when result_count > 1 for search_result in search_results[:result_count]: wikidata_id = search_result.split('/')[-1] url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) htmlresponse = get(url) jsonresponse = loads(htmlresponse.text) results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language']) return results
def request(query, params): if params['time_range'] and params['time_range'] not in time_range_dict: return params offset = (params['pageno'] - 1) * 10 + 1 language = match_language(params['language'], supported_languages, language_aliases) if language not in language_aliases.values(): language = language.split('-')[0] language = language.replace('-', '_').lower() params['url'] = _get_url(query, offset, language, params['time_range']) # TODO required? params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\ .format(lang=language) return params
def request(query, params): offset = (params['pageno'] - 1) * 10 if categories[0] and categories[0] in category_to_keyword: params['url'] = url.format(keyword=category_to_keyword[categories[0]], query=urlencode({'q': query}), offset=offset) else: params['url'] = url.format(keyword='web', query=urlencode({'q': query}), offset=offset) # add language tag language = match_language(params['language'], supported_languages) params['url'] += '&locale=' + language.replace('-', '_').lower() return params
def request(query, params): offset = (params['pageno'] - 1) * 10 if params['language'] == 'all' or params['language'] == 'en-US': language = 'en-GB' else: language = match_language(params['language'], supported_languages, language_aliases) language_array = language.split('-') if params['language'].find('-') > 0: country = params['language'].split('-')[1] elif len(language_array) == 2: country = language_array[1] else: country = 'US' url_lang = 'lang_' + language if use_locale_domain: google_hostname = country_to_hostname.get(country.upper(), default_hostname) else: google_hostname = default_hostname # original format: ID=3e2b6616cee08557:TM=5556667580:C=r:IP=4.1.12.5-:S=23ASdf0soFgF2d34dfgf-_22JJOmHdfgg params['cookies']['GOOGLE_ABUSE_EXEMPTION'] = 'x' params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), hostname=google_hostname, lang=url_lang, lang_short=language) if params['time_range'] in time_range_dict: params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' # Force Internet Explorer 12 user agent to avoid loading the new UI that Searx can't parse params['headers']['User-Agent'] = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)" params['google_hostname'] = google_hostname return params
def request(query, params): search_options = { 'start': (params['pageno'] - 1) * number_of_results } if params['time_range'] in time_range_dict: search_options['tbs'] = time_range_attr.format(range=time_range_dict[params['time_range']]) if safesearch and params['safesearch']: search_options['safe'] = 'on' params['url'] = search_url.format(query=urlencode({'q': query}), search_options=urlencode(search_options)) language = match_language(params['language'], supported_languages).split('-')[0] if language: params['url'] += '&lr=lang_' + language return params
def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 search_path = search_string.format( query=urlencode({'q': query}), offset=offset) language = match_language(params['language'], supported_languages, language_aliases).lower() params['cookies']['SRCHHPGUSR'] = \ 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') params['cookies']['_EDGE_S'] = 'mkt=' + language +\ '&ui=' + language + '&F=1' params['url'] = base_url + search_path if params['time_range'] in time_range_dict: params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) return params
def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 # safesearch cookie params['cookies']['SRCHHPGUSR'] = \ 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') # language cookie language = match_language(params['language'], supported_languages, language_aliases).lower() params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1' # query and paging params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, number_of_results=number_of_results) # time range if params['time_range'] in time_range_dict: params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) return params
def render(template_name, override_theme=None, **kwargs): # values from the HTTP requests kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint kwargs['cookies'] = request.cookies kwargs['errors'] = request.errors # values from the preferences kwargs['preferences'] = request.preferences kwargs['method'] = request.preferences.get_value('method') kwargs['autocomplete'] = request.preferences.get_value('autocomplete') kwargs['results_on_new_tab'] = request.preferences.get_value( 'results_on_new_tab') kwargs['advanced_search'] = request.preferences.get_value( 'advanced_search') kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['theme'] = get_current_theme_name(override=override_theme) kwargs['all_categories'] = _get_ordered_categories() kwargs['categories'] = _get_enable_categories(kwargs['all_categories']) # i18n kwargs['language_codes'] = languages # from searx.languages kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) locale = request.preferences.get_value('locale') if locale in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True if 'current_language' not in kwargs: kwargs['current_language'] = match_language( request.preferences.get_value('language'), LANGUAGE_CODES) # values from settings kwargs['search_formats'] = [ x for x in settings['search']['formats'] if x != 'html' ] # brand kwargs['instance_name'] = settings['general']['instance_name'] kwargs['searx_version'] = VERSION_STRING kwargs['brand'] = brand # helpers to create links to other pages kwargs['url_for'] = url_for_theme # override url_for function in templates kwargs['image_proxify'] = image_proxify kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None kwargs['proxify_results'] = settings.get('result_proxy', {}).get('proxify_results', True) kwargs['get_result_template'] = get_result_template kwargs['opensearch_url'] = ( url_for('opensearch') + '?' + urlencode({ 'method': kwargs['method'], 'autocomplete': kwargs['autocomplete'] })) # scripts from plugins kwargs['scripts'] = set() for plugin in request.user_plugins: for script in plugin.js_dependencies: kwargs['scripts'].add(script) # styles from plugins kwargs['styles'] = set() for plugin in request.user_plugins: for css in plugin.css_dependencies: kwargs['styles'].add(css) start_time = default_timer() result = render_template('{}/{}'.format(kwargs['theme'], template_name), **kwargs) request.render_time += default_timer() - start_time # pylint: disable=assigning-non-slot return result
def request(query, params): params['url'] = url.format(query=urlencode({'q': query})) language = match_language(params['language'], supported_languages, language_aliases) params['headers']['Accept-Language'] = language.split('-')[0] return params
def search(): """Search query in q and return results. Supported outputs: html, json, csv, rss. """ # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches # pylint: disable=too-many-statements # output_format output_format = request.form.get('format', 'html') if output_format not in OUTPUT_FORMATS: output_format = 'html' if output_format not in settings['search']['formats']: flask.abort(403) # check if there is query (not None and not an empty string) if not request.form.get('q'): if output_format == 'html': return render( 'index.html', selected_categories=get_selected_categories( request.preferences, request.form), ) return index_error(output_format, 'No query'), 400 # search search_query = None raw_text_query = None result_container = None try: search_query, raw_text_query, _, _ = get_search_query_from_webapp( request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name result_container = search.search() except SearxParameterException as e: logger.exception('search error: SearxParameterException') return index_error(output_format, e.message), 400 except Exception as e: # pylint: disable=broad-except logger.exception(e, exc_info=True) return index_error(output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 # checkin for a external bang if result_container.redirect_url: return redirect(result_container.redirect_url) # Server-Timing header request.timings = result_container.get_timings() # pylint: disable=assigning-non-slot # output for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content( escape(result['content'][:1024]), search_query.query) if 'title' in result and result['title']: result['title'] = highlight_content( escape(result['title'] or ''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join( html_to_text(result['title']).strip().split()) if 'url' in result: result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right # pylint: disable=fixme if result.get( 'publishedDate' ): # do not try to get a date from an empty string or a None type try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime( '%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: if result['publishedDate'].replace( tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now( ) - result['publishedDate'].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result['publishedDate'] = gettext( '{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext( '{hours} hour(s), {minutes} minute(s) ago').format( hours=hours, minutes=minutes) else: result['publishedDate'] = format_date( result['publishedDate']) if output_format == 'json': x = { 'query': search_query.query, 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines) } response = json.dumps(x, default=lambda item: list(item) if isinstance(item, set) else item) return Response(response, mimetype='application/json') if output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc row['type'] = 'result' csv.writerow([row.get(key, '') for key in keys]) for a in result_container.answers: row = {'title': a, 'type': 'answer'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.suggestions: row = {'title': a, 'type': 'suggestion'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.corrections: row = {'title': a, 'type': 'correction'} csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format( search_query.query) response.headers.add('Content-Disposition', cont_disp) return response if output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, answers=result_container.answers, corrections=result_container.corrections, suggestions=result_container.suggestions, q=request.form['q'], number_of_results=number_of_results, override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') # HTML output format # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list( map( lambda suggestion: { 'url': raw_text_query.changeQuery(suggestion).getFullQuery(), 'title': suggestion }, result_container.suggestions)) correction_urls = list( map( lambda correction: { 'url': raw_text_query.changeQuery(correction).getFullQuery(), 'title': correction }, result_container.corrections)) return render('results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), suggestions=suggestion_urls, answers=result_container.answers, corrections=correction_urls, infoboxes=result_container.infoboxes, engine_data=result_container.engine_data, paging=result_container.paging, unresponsive_engines=__get_translated_errors( result_container.unresponsive_engines), current_language=match_language( search_query.lang, LANGUAGE_CODES, fallback=request.preferences.get_value("language")), theme=get_current_theme_name(), favicons=global_favicons[themes.index( get_current_theme_name())], timeout_limit=request.form.get('timeout_limit', None))
def url_lang(lang): return match_language(lang, supported_languages).split('-')[0]
def _get_browser_language(request, lang_list): for lang in request.headers.get("Accept-Language", "en").split(","): locale = match_language(lang, lang_list, fallback=None) if locale is not None: return locale
def index(): """Render index page. Supported outputs: html, json, csv, rss. """ # output_format output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' # check if there is query if request.form.get('q') is None: if output_format == 'html': return render( 'index.html', ) else: return index_error(output_format, 'No query'), 400 # search search_query = None result_container = None try: search_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) result_container = search.search() except Exception as e: # log exception logger.exception('search error') # is it an invalid input parameter or something else ? if (issubclass(e.__class__, SearxParameterException)): return index_error(output_format, e.message), 400 else: return index_error(output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 # UI advanced_search = request.form.get('advanced_search', None) # output for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) result['title'] = highlight_content(escape(result['title'] or u''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join(html_to_text(result['title']).strip().split()) result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right if 'publishedDate' in result: try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(result['publishedDate']) if output_format == 'json': return Response(json.dumps({'query': search_query.query.decode('utf-8'), 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': list(result_container.unresponsive_engines)}, default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') return render( 'results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), advanced_search=advanced_search, suggestions=result_container.suggestions, answers=result_container.answers, corrections=result_container.corrections, infoboxes=result_container.infoboxes, paging=result_container.paging, unresponsive_engines=result_container.unresponsive_engines, current_language=match_language(search_query.lang, LANGUAGE_CODES, fallback=settings['search']['language']), base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] )
def get_lang_info(params, lang_list, custom_aliases, supported_any_language): ret_val = {} _lang = params['language'] _any_language = _lang.lower() == 'all' if _any_language: _lang = 'en-US' language = match_language(_lang, lang_list, custom_aliases) ret_val['language'] = language # the requested language from params (en, en-US, de, de-AT, fr, fr-CA, ...) _l = _lang.split('-') # the country code (US, AT, CA) if len(_l) == 2: country = _l[1] else: country = _l[0].upper() if country == 'EN': country = 'US' ret_val['country'] = country # the combination (en-US, en-EN, de-DE, de-AU, fr-FR, fr-FR) lang_country = '%s-%s' % (language, country) # subdomain ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') ret_val['params'] = {} ret_val['headers'] = {} if _any_language and supported_any_language: # based on whoogle ret_val['params']['source'] = 'lnt' else: # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5 ret_val['headers']['Accept-Language'] = ','.join([ lang_country, language + ';q=0.8,', 'en;q=0.6', '*;q=0.5', ]) # lr parameter: # https://developers.google.com/custom-search/docs/xml_results#lrsp # Language Collection Values: # https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections ret_val['params'][ 'lr'] = "lang_" + lang_country if lang_country in lang_list else language ret_val['params'][ 'hl'] = lang_country if lang_country in lang_list else language # hl parameter: # https://developers.google.com/custom-search/docs/xml_results#hlsp The # Interface Language: # https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages return ret_val
def render(template_name, override_theme=None, **kwargs): disabled_engines = request.preferences.engines.get_disabled() enabled_categories = set(category for engine_name in engines for category in engines[engine_name].categories if (engine_name, category) not in disabled_engines) if 'categories' not in kwargs: kwargs['categories'] = [ x for x in _get_ordered_categories() if x in enabled_categories ] if 'autocomplete' not in kwargs: kwargs['autocomplete'] = request.preferences.get_value('autocomplete') locale = request.preferences.get_value('locale') if locale in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True kwargs['searx_version'] = VERSION_STRING kwargs['method'] = request.preferences.get_value('method') kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['language_codes'] = languages if 'current_language' not in kwargs: kwargs['current_language'] = match_language( request.preferences.get_value('language'), LANGUAGE_CODES) # override url_for function in templates kwargs['url_for'] = url_for_theme kwargs['image_proxify'] = image_proxify kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None kwargs['opensearch_url'] = url_for('opensearch') + '?' \ + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) kwargs['get_result_template'] = get_result_template kwargs['theme'] = get_current_theme_name(override=override_theme) kwargs['template_name'] = template_name kwargs['cookies'] = request.cookies kwargs['errors'] = request.errors kwargs['instance_name'] = settings['general']['instance_name'] kwargs['results_on_new_tab'] = request.preferences.get_value( 'results_on_new_tab') kwargs['preferences'] = request.preferences kwargs['brand'] = brand kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) kwargs['scripts'] = set() kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint for plugin in request.user_plugins: for script in plugin.js_dependencies: kwargs['scripts'].add(script) kwargs['styles'] = set() for plugin in request.user_plugins: for css in plugin.css_dependencies: kwargs['styles'].add(css) return render_template('{}/{}'.format(kwargs['theme'], template_name), **kwargs)
def load_engine(engine_data): if '_' in engine_data['name']: logger.error('Engine name conains underscore: "{}"'.format(engine_data['name'])) sys.exit(1) engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py', engine_dir) except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: if param_name == 'engine': continue if param_name == 'categories': if engine_data['categories'] == 'none': engine.categories = [] else: engine.categories = list(map(str.strip, engine_data['categories'].split(','))) continue setattr(engine, param_name, engine_data[param_name]) for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if engine_attr == 'inactive' and getattr(engine, engine_attr) is True: return None if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"' .format(engine.name, engine_attr)) sys.exit(1) # assign supported languages from json file if engine_data['name'] in languages: setattr(engine, 'supported_languages', languages[engine_data['name']]) # find custom aliases for non standard language codes if hasattr(engine, 'supported_languages'): if hasattr(engine, 'language_aliases'): language_aliases = getattr(engine, 'language_aliases') else: language_aliases = {} for engine_lang in getattr(engine, 'supported_languages'): iso_lang = match_language(engine_lang, babel_langs, fallback=None) if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ iso_lang not in getattr(engine, 'supported_languages'): language_aliases[iso_lang] = engine_lang setattr(engine, 'language_aliases', language_aliases) # assign language fetching method if auxiliary method exists if hasattr(engine, '_fetch_supported_languages'): setattr(engine, 'fetch_supported_languages', lambda: engine._fetch_supported_languages(get(engine.supported_languages_url))) engine.stats = { 'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'page_load_count': 0, 'engine_time': 0, 'engine_time_count': 0, 'score_count': 0, 'errors': 0 } for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def _is_selected_language_supported(engine, preferences): language = preferences.get_value('language') return (language == 'all' or match_language( language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None))
def url_lang(lang): lang_pre = lang.split('-')[0] if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases: return 'en' return match_language(lang, supported_languages, language_aliases).split('-')[0]
def index(): """Render index page. Supported outputs: html, json, csv, rss. """ # output_format output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' # check if there is query if request.form.get('q') is None: if output_format == 'html': return render('index.html', ) else: return index_error(output_format, 'No query'), 400 # search search_query = None raw_text_query = None result_container = None try: search_query, raw_text_query = get_search_query_from_webapp( request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) result_container = search.search() except Exception as e: # log exception logger.exception('search error') # is it an invalid input parameter or something else ? if (issubclass(e.__class__, SearxParameterException)): return index_error(output_format, e.message), 400 else: return index_error(output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 # UI advanced_search = request.form.get('advanced_search', None) # Server-Timing header request.timings = result_container.get_timings() # output for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content( escape(result['content'][:1024]), search_query.query) if 'title' in result and result['title']: result['title'] = highlight_content( escape(result['title'] or u''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join( html_to_text(result['title']).strip().split()) if 'url' in result: result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right if 'publishedDate' in result: try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime( '%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: if result['publishedDate'].replace( tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now( ) - result['publishedDate'].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result['publishedDate'] = gettext( u'{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext( u'{hours} hour(s), {minutes} minute(s) ago' ).format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date( result['publishedDate']) if output_format == 'json': return Response(json.dumps( { 'query': search_query.query.decode('utf-8'), 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': list( result_container.unresponsive_engines) }, default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc row['type'] = 'result' csv.writerow([row.get(key, '') for key in keys]) for a in result_container.answers: row = {'title': a, 'type': 'answer'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.suggestions: row = {'title': a, 'type': 'suggestion'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.corrections: row = {'title': a, 'type': 'correction'} csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format( search_query.query.decode('utf-8')) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': print(results) response_rss = render( 'opensearch_response_rss.xml', results=results, answers=result_container.answers, corrections=result_container.corrections, suggestions=result_container.suggestions, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') # HTML output format # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list( map( lambda suggestion: { 'url': raw_text_query.changeSearchQuery(suggestion). getFullQuery(), 'title': suggestion }, result_container.suggestions)) correction_urls = list( map( lambda correction: { 'url': raw_text_query.changeSearchQuery(correction). getFullQuery(), 'title': correction }, result_container.corrections)) # return render('results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), advanced_search=advanced_search, suggestions=suggestion_urls, answers=result_container.answers, corrections=correction_urls, infoboxes=result_container.infoboxes, paging=result_container.paging, unresponsive_engines=result_container.unresponsive_engines, current_language=match_language( search_query.lang, LANGUAGE_CODES, fallback=request.preferences.get_value("language")), base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index( get_current_theme_name())], timeout_limit=request.form.get('timeout_limit', None))
def render(template_name, override_theme=None, **kwargs): disabled_engines = request.preferences.engines.get_disabled() enabled_categories = set(category for engine_name in engines for category in engines[engine_name].categories if (engine_name, category) not in disabled_engines) if 'categories' not in kwargs: kwargs['categories'] = ['general'] kwargs['categories'].extend( x for x in sorted(categories.keys()) if x != 'general' and x in enabled_categories) if 'all_categories' not in kwargs: kwargs['all_categories'] = ['general'] kwargs['all_categories'].extend(x for x in sorted(categories.keys()) if x != 'general') if 'selected_categories' not in kwargs: kwargs['selected_categories'] = [] for arg in request.args: if arg.startswith('category_'): c = arg.split('_', 1)[1] if c in categories: kwargs['selected_categories'].append(c) if not kwargs['selected_categories']: cookie_categories = request.preferences.get_value('categories') for ccateg in cookie_categories: kwargs['selected_categories'].append(ccateg) if not kwargs['selected_categories']: kwargs['selected_categories'] = ['general'] if 'autocomplete' not in kwargs: kwargs['autocomplete'] = request.preferences.get_value('autocomplete') locale = request.preferences.get_value('locale') if locale in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True kwargs['searx_version'] = VERSION_STRING kwargs['method'] = request.preferences.get_value('method') kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['language_codes'] = languages if 'current_language' not in kwargs: kwargs['current_language'] = match_language( request.preferences.get_value('language'), LANGUAGE_CODES) # override url_for function in templates kwargs['url_for'] = url_for_theme kwargs['image_proxify'] = image_proxify kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None kwargs['get_result_template'] = get_result_template kwargs['theme'] = get_current_theme_name(override=override_theme) kwargs['template_name'] = template_name kwargs['cookies'] = request.cookies kwargs['errors'] = request.errors kwargs['instance_name'] = settings['general']['instance_name'] kwargs['results_on_new_tab'] = request.preferences.get_value( 'results_on_new_tab') kwargs['unicode'] = unicode kwargs['preferences'] = request.preferences kwargs['scripts'] = set() for plugin in request.user_plugins: for script in plugin.js_dependencies: kwargs['scripts'].add(script) kwargs['styles'] = set() for plugin in request.user_plugins: for css in plugin.css_dependencies: kwargs['styles'].add(css) return render_template('{}/{}'.format(kwargs['theme'], template_name), **kwargs)
def test_match_language(self): self.assertEqual(utils.match_language('es', ['es']), 'es') self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') aliases = {'en-GB': 'en-UK', 'he': 'iw'} # guess country self.assertEqual(utils.match_language('de-DE', ['de']), 'de') self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE') self.assertEqual( utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX') self.assertEqual( utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB') self.assertEqual( utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK') # language aliases self.assertEqual(utils.match_language('iw', ['he']), 'he') self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw') self.assertEqual(utils.match_language('iw-IL', ['he']), 'he') self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw') self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL') self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL') self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL') self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
def load_engine(engine_data): engine_name = engine_data['name'] if '_' in engine_name: logger.error('Engine name contains underscore: "{}"'.format(engine_name)) sys.exit(1) if engine_name.lower() != engine_name: logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name)) engine_name = engine_name.lower() engine_data['name'] = engine_name engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py', engine_dir) except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError) as e: logger.exception('Fatal exception in engine "{}"'.format(engine_module)) sys.exit(1) except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: if param_name == 'engine': continue if param_name == 'categories': if engine_data['categories'] == 'none': engine.categories = [] else: engine.categories = list(map(str.strip, engine_data['categories'].split(','))) continue setattr(engine, param_name, engine_data[param_name]) for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if engine_attr == 'inactive' and getattr(engine, engine_attr) is True: return None if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"' .format(engine.name, engine_attr)) sys.exit(1) # assign supported languages from json file if engine_data['name'] in ENGINES_LANGUAGES: setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']]) # find custom aliases for non standard language codes if hasattr(engine, 'supported_languages'): if hasattr(engine, 'language_aliases'): language_aliases = getattr(engine, 'language_aliases') else: language_aliases = {} for engine_lang in getattr(engine, 'supported_languages'): iso_lang = match_language(engine_lang, babel_langs, fallback=None) if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ iso_lang not in getattr(engine, 'supported_languages'): language_aliases[iso_lang] = engine_lang setattr(engine, 'language_aliases', language_aliases) # assign language fetching method if auxiliary method exists if hasattr(engine, '_fetch_supported_languages'): setattr(engine, 'fetch_supported_languages', lambda: engine._fetch_supported_languages(get(engine.supported_languages_url))) engine.stats = { 'result_count': 0, 'search_count': 0, 'engine_time': 0, 'engine_time_count': 0, 'score_count': 0, 'errors': 0 } if not engine.offline: engine.stats['page_load_time'] = 0 engine.stats['page_load_count'] = 0 for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def render(template_name, override_theme=None, **kwargs): disabled_engines = request.preferences.engines.get_disabled() enabled_categories = set(category for engine_name in engines for category in engines[engine_name].categories if (engine_name, category) not in disabled_engines) if 'categories' not in kwargs: kwargs['categories'] = ['general'] kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general' and x in enabled_categories) if 'all_categories' not in kwargs: kwargs['all_categories'] = ['general'] kwargs['all_categories'].extend(x for x in sorted(categories.keys()) if x != 'general') if 'selected_categories' not in kwargs: kwargs['selected_categories'] = [] for arg in request.args: if arg.startswith('category_'): c = arg.split('_', 1)[1] if c in categories: kwargs['selected_categories'].append(c) if not kwargs['selected_categories']: cookie_categories = request.preferences.get_value('categories') for ccateg in cookie_categories: kwargs['selected_categories'].append(ccateg) if not kwargs['selected_categories']: kwargs['selected_categories'] = ['general'] if 'autocomplete' not in kwargs: kwargs['autocomplete'] = request.preferences.get_value('autocomplete') if get_locale() in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True kwargs['searx_version'] = VERSION_STRING kwargs['method'] = request.preferences.get_value('method') kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['language_codes'] = languages if 'current_language' not in kwargs: kwargs['current_language'] = match_language(request.preferences.get_value('language'), LANGUAGE_CODES, fallback=settings['search']['language']) # override url_for function in templates kwargs['url_for'] = url_for_theme kwargs['image_proxify'] = image_proxify kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None kwargs['get_result_template'] = get_result_template kwargs['theme'] = get_current_theme_name(override=override_theme) kwargs['template_name'] = template_name kwargs['cookies'] = request.cookies kwargs['errors'] = request.errors kwargs['instance_name'] = settings['general']['instance_name'] kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') kwargs['unicode'] = unicode kwargs['preferences'] = request.preferences kwargs['scripts'] = set() for plugin in request.user_plugins: for script in plugin.js_dependencies: kwargs['scripts'].add(script) kwargs['styles'] = set() for plugin in request.user_plugins: for css in plugin.css_dependencies: kwargs['styles'].add(css) return render_template( '{}/{}'.format(kwargs['theme'], template_name), **kwargs)
def test_match_language(self): self.assertEqual(utils.match_language('es', ['es']), 'es') self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') aliases = {'en-GB': 'en-UK', 'he': 'iw'} # guess country self.assertEqual(utils.match_language('de-DE', ['de']), 'de') self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE') self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX') self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB') self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK') # language aliases self.assertEqual(utils.match_language('iw', ['he']), 'he') self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw') self.assertEqual(utils.match_language('iw-IL', ['he']), 'he') self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw') self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL') self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL') self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL') self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
def _is_selected_language_supported(engine, preferences): language = preferences.get_value('language') return (language == 'all' or match_language(language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None))
def load_engine(engine_data): engine_name = engine_data['name'] if '_' in engine_name: logger.error( 'Engine name contains underscore: "{}"'.format(engine_name)) sys.exit(1) if engine_name.lower() != engine_name: logger.warn( 'Engine name is not lowercase: "{}", converting to lowercase'. format(engine_name)) engine_name = engine_name.lower() engine_data['name'] = engine_name engine_module = engine_data['engine'] try: engine = load_module(engine_module + '.py', engine_dir) except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError): logger.exception( 'Fatal exception in engine "{}"'.format(engine_module)) sys.exit(1) except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name, param_value in engine_data.items(): if param_name == 'engine': pass elif param_name == 'categories': if param_value == 'none': engine.categories = [] else: engine.categories = list(map(str.strip, param_value.split(','))) elif param_name == 'proxies': engine.proxies = get_proxy_cycles(param_value) else: setattr(engine, param_name, param_value) for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if engine_attr == 'inactive' and getattr(engine, engine_attr) is True: return None if getattr(engine, engine_attr) is None: logger.error('Missing engine config attribute: "{0}.{1}"'.format( engine.name, engine_attr)) sys.exit(1) # assign supported languages from json file if engine_data['name'] in ENGINES_LANGUAGES: setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']]) # find custom aliases for non standard language codes if hasattr(engine, 'supported_languages'): if hasattr(engine, 'language_aliases'): language_aliases = getattr(engine, 'language_aliases') else: language_aliases = {} for engine_lang in getattr(engine, 'supported_languages'): iso_lang = match_language(engine_lang, babel_langs, fallback=None) if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ iso_lang not in getattr(engine, 'supported_languages'): language_aliases[iso_lang] = engine_lang setattr(engine, 'language_aliases', language_aliases) # language_support setattr(engine, 'language_support', len(getattr(engine, 'supported_languages', [])) > 0) # assign language fetching method if auxiliary method exists if hasattr(engine, '_fetch_supported_languages'): headers = { 'User-Agent': gen_useragent(), 'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language } setattr( engine, 'fetch_supported_languages', lambda: engine._fetch_supported_languages( get(engine.supported_languages_url, headers=headers))) engine.stats = { 'sent_search_count': 0, # sent search 'search_count': 0, # succesful search 'result_count': 0, 'engine_time': 0, 'engine_time_count': 0, 'score_count': 0, 'errors': 0 } engine_type = getattr(engine, 'engine_type', 'online') if engine_type != 'offline': engine.stats['page_load_time'] = 0 engine.stats['page_load_count'] = 0 # tor related settings if settings['outgoing'].get('using_tor_proxy'): # use onion url if using tor. if hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr( engine, 'search_path', '') elif 'onions' in engine.categories: # exclude onion engines if not using tor. return None engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format( engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name return engine
def _validate_selection(self, selection): if selection != "" and not match_language( # pylint: disable=no-member selection, self.choices, fallback=None): raise ValidationException('Invalid language code: "{0}"'.format(selection))