Example #1
0
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    # output_format
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # check if there is query
    if request.form.get('q') is None:
        if output_format == 'html':
            return render(
                'index.html',
            )
        else:
            return index_error(output_format, 'No query'), 400

    # search
    search_query = None
    raw_text_query = None
    result_container = None
    try:
        search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form)
        search = Search(search_query)
        result_container = search.search()
    except Exception as e:
        # log exception
        logger.exception('search error')

        # is it an invalid input parameter or something else ?
        if (issubclass(e.__class__, SearxParameterException)):
            return index_error(output_format, e.message), 400
        else:
            return index_error(output_format, gettext('search error')), 500

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    # UI
    advanced_search = request.form.get('advanced_search', None)

    # Server-Timing header
    request.timings = result_container.get_timings()

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
            if 'title' in result and result['title']:
                result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        if 'url' in result:
            result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])

    if output_format == 'json':
        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
                                    'number_of_results': number_of_results,
                                    'results': results,
                                    'answers': list(result_container.answers),
                                    'corrections': list(result_container.corrections),
                                    'infoboxes': result_container.infoboxes,
                                    'suggestions': list(result_container.suggestions),
                                    'unresponsive_engines': list(result_container.unresponsive_engines)},
                                   default=lambda item: list(item) if isinstance(item, set) else item),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url(),
            override_theme='__common__',
        )
        return Response(response_rss, mimetype='text/xml')

    # HTML output format

    # suggestions: use RawTextQuery to get the suggestion URLs with the same bang
    suggestion_urls = map(lambda suggestion: {
                          'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
                          'title': suggestion
                          },
                          result_container.suggestions)

    correction_urls = list(map(lambda correction: {
                               'url': raw_text_query.changeSearchQuery(correction).getFullQuery(),
                               'title': correction
                               },
                               result_container.corrections))
    #
    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        number_of_results=format_decimal(number_of_results),
        advanced_search=advanced_search,
        suggestions=suggestion_urls,
        answers=result_container.answers,
        corrections=correction_urls,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        unresponsive_engines=result_container.unresponsive_engines,
        current_language=match_language(search_query.lang,
                                        LANGUAGE_CODES,
                                        fallback=settings['search']['language']),
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())],
        timeout_limit=request.form.get('timeout_limit', None)
    )
Example #2
0
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render('index.html', )

    try:
        search = Search(request)
    except:
        return render('index.html', )

    if plugins.call('pre_search', request, locals()):
        search.search(request)

    plugins.call('post_search', request, locals())

    for result in search.result_container.get_ordered_results():

        plugins.call('on_result', request, locals())
        if not search.paging and engines[result['engine']].paging:
            search.paging = True

        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(
                    result['content'], search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(
                html_to_text(result['title']).strip().split())

        result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            result['pubdate'] = result['publishedDate'].strftime(
                '%Y-%m-%d %H:%M:%S%z')
            if result['publishedDate'].replace(
                    tzinfo=None) >= datetime.now() - timedelta(days=1):
                timedifference = datetime.now(
                ) - result['publishedDate'].replace(tzinfo=None)
                minutes = int((timedifference.seconds / 60) % 60)
                hours = int(timedifference.seconds / 60 / 60)
                if hours == 0:
                    result['publishedDate'] = gettext(
                        u'{minutes} minute(s) ago').format(minutes=minutes)
                else:
                    result['publishedDate'] = gettext(
                        u'{hours} hour(s), {minutes} minute(s) ago').format(
                            hours=hours, minutes=minutes)  # noqa
            else:
                result['publishedDate'] = format_date(result['publishedDate'])

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({
            'query':
            search.query,
            'results':
            search.result_container.get_ordered_results()
        }),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in search.result_container.get_ordered_results():
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=search.result_container.get_ordered_results(),
            q=search.request_data['q'],
            number_of_results=search.result_container.results_length(),
            base_url=get_base_url())
        return Response(response_rss, mimetype='text/xml')

    return render('results.html',
                  results=search.result_container.get_ordered_results(),
                  q=search.request_data['q'],
                  selected_categories=search.categories,
                  paging=search.paging,
                  pageno=search.pageno,
                  base_url=get_base_url(),
                  suggestions=search.result_container.suggestions,
                  answers=search.result_container.answers,
                  infoboxes=search.result_container.infoboxes,
                  theme=get_current_theme_name(),
                  favicons=global_favicons[themes.index(
                      get_current_theme_name())])
Example #3
0
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render('index.html', )

    try:
        search = Search(request)
    except:
        return render('index.html', )

    # TODO moar refactor - do_search integration into Search class
    search.results, search.suggestions = do_search(search.query, request,
                                                   search.engines,
                                                   search.pageno, search.lang)

    for result in search.results:
        if not search.paging and engines[result['engine']].paging:
            search.paging = True
        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(
                    result['content'], search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if 'content' in result:
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(
                html_to_text(result['title']).strip().split())
        if len(result['url']) > 74:
            url_parts = result['url'][:35], result['url'][-35:]
            result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
        else:
            result['pretty_url'] = result['url']

        for engine in result['engines']:
            if engine in favicons:
                result['favicon'] = engine

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            if result['publishedDate'].replace(tzinfo=None)\
               >= datetime.now() - timedelta(days=1):
                timedifference = datetime.now() - result['publishedDate']\
                    .replace(tzinfo=None)
                minutes = int((timedifference.seconds / 60) % 60)
                hours = int(timedifference.seconds / 60 / 60)
                if hours == 0:
                    result['publishedDate'] = gettext(
                        u'{minutes} minute(s) ago').format(
                            minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = gettext(
                        u'{hours} hour(s), {minutes} minute(s) ago').format(
                            hours=hours, minutes=minutes)  # noqa
            else:
                result['pubdate'] = result['publishedDate']\
                    .strftime('%a, %d %b %Y %H:%M:%S %z')
                result['publishedDate'] = format_date(result['publishedDate'])

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({
            'query': search.query,
            'results': search.results
        }),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        if search.results:
            csv.writerow(keys)
            for row in search.results:
                row['host'] = row['parsed_url'].netloc
                csv.writerow([row.get(key, '') for key in keys])
            csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render('opensearch_response_rss.xml',
                              results=search.results,
                              q=search.request_data['q'],
                              number_of_results=len(search.results),
                              base_url=get_base_url())
        return Response(response_rss, mimetype='text/xml')

    return render('results.html',
                  results=search.results,
                  q=search.request_data['q'],
                  selected_categories=search.categories,
                  paging=search.paging,
                  pageno=search.pageno,
                  base_url=get_base_url(),
                  suggestions=search.suggestions)