def test_highlight_content(self): self.assertEqual(webutils.highlight_content(0, None), None) self.assertEqual(webutils.highlight_content(None, None), None) self.assertEqual(webutils.highlight_content('', None), None) self.assertEqual(webutils.highlight_content(False, None), None) contents = [ '<html></html>' 'not<' ] for content in contents: self.assertEqual(webutils.highlight_content(content, None), content) content = 'a' query = 'test' self.assertEqual(webutils.highlight_content(content, query), content) query = 'a test' self.assertEqual(webutils.highlight_content(content, query), content)
def test_highlight_content(self): self.assertEqual(webutils.highlight_content(0, None), None) self.assertEqual(webutils.highlight_content(None, None), None) self.assertEqual(webutils.highlight_content('', None), None) self.assertEqual(webutils.highlight_content(False, None), None) contents = ['<html></html>' 'not<'] for content in contents: self.assertEqual(webutils.highlight_content(content, None), content) content = 'a' query = 'test' self.assertEqual(webutils.highlight_content(content, query), content) query = 'a test' self.assertEqual(webutils.highlight_content(content, query), content) data = (( '" test "', 'a test string', 'a <span class="highlight">test</span> string' ), ( '"a"', 'this is a test string', 'this is<span class="highlight"> a </span>test string' ), ( 'a test', 'this is a test string that matches entire query', 'this is <span class="highlight">a test</span> string that matches entire query' ), ('this a test', 'this is a string to test.', ( '<span class="highlight">this</span> is<span class="highlight"> a </span>' 'string to <span class="highlight">test</span>.' )), ( 'match this "exact phrase"', 'this string contains the exact phrase we want to match', ('<span class="highlight">this</span> string contains the <span class="highlight">exact</span>' ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>' ))) for query, content, expected in data: self.assertEqual(webutils.highlight_content(content, query), expected)
def search(): """Search query in q and return results. Supported outputs: html, json, csv, rss. """ # output_format output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' # check if there is query (not None and not an empty string) if not request.form.get('q'): if output_format == 'html': return render( 'index.html', advanced_search=request.preferences.get_value('advanced_search'), selected_categories=get_selected_categories(request.preferences, request.form), ) else: return index_error(output_format, 'No query'), 400 # search search_query = None raw_text_query = None result_container = None try: search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) result_container = search.search() except SearxParameterException as e: logger.exception('search error: SearxParameterException') return index_error(output_format, e.message), 400 except Exception as e: logger.exception('search error') return index_error(output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() number_of_results = result_container.results_number() if number_of_results < result_container.results_length(): number_of_results = 0 # checkin for a external bang if result_container.redirect_url: return redirect(result_container.redirect_url) # Server-Timing header request.timings = result_container.get_timings() # output for result in results: if output_format == 'html': if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) if 'title' in result and result['title']: result['title'] = highlight_content(escape(result['title'] or ''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join(html_to_text(result['title']).strip().split()) if 'url' in result: result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right if 'publishedDate' in result: try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(result['publishedDate']) if output_format == 'json': return Response(json.dumps({'query': search_query.query, 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines)}, # noqa default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc row['type'] = 'result' csv.writerow([row.get(key, '') for key in keys]) for a in result_container.answers: row = {'title': a, 'type': 'answer'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.suggestions: row = {'title': a, 'type': 'suggestion'} csv.writerow([row.get(key, '') for key in keys]) for a in result_container.corrections: row = {'title': a, 'type': 'correction'} csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, answers=result_container.answers, corrections=result_container.corrections, suggestions=result_container.suggestions, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') # HTML output format # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list(map(lambda suggestion: { 'url': raw_text_query.changeQuery(suggestion).getFullQuery(), 'title': suggestion }, result_container.suggestions)) correction_urls = list(map(lambda correction: { 'url': raw_text_query.changeQuery(correction).getFullQuery(), 'title': correction }, result_container.corrections)) # return render( 'results.html', results=results, q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), suggestions=suggestion_urls, answers=result_container.answers, corrections=correction_urls, infoboxes=result_container.infoboxes, paging=result_container.paging, unresponsive_engines=__get_translated_errors(result_container.unresponsive_engines), current_language=match_language(search_query.lang, LANGUAGE_CODES, fallback=request.preferences.get_value("language")), base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())], timeout_limit=request.form.get('timeout_limit', None) )