コード例 #1
0
ファイル: test_utils.py プロジェクト: zwl1215/searx
    def test_highlight_content(self):
        self.assertEqual(utils.highlight_content(0, None), None)
        self.assertEqual(utils.highlight_content(None, None), None)
        self.assertEqual(utils.highlight_content('', None), None)
        self.assertEqual(utils.highlight_content(False, None), None)

        contents = ['<html></html>' 'not<']
        for content in contents:
            self.assertEqual(utils.highlight_content(content, None), content)

        content = 'a'
        query = b'test'
        self.assertEqual(utils.highlight_content(content, query), content)
        query = b'a test'
        self.assertEqual(utils.highlight_content(content, query), content)
コード例 #2
0
ファイル: test_utils.py プロジェクト: Acidburn0zzz/searx
    def test_highlight_content(self):
        self.assertEqual(utils.highlight_content(0, None), None)
        self.assertEqual(utils.highlight_content(None, None), None)
        self.assertEqual(utils.highlight_content('', None), None)
        self.assertEqual(utils.highlight_content(False, None), None)

        contents = [
            '<html></html>'
            'not<'
        ]
        for content in contents:
            self.assertEqual(utils.highlight_content(content, None), content)

        content = 'a'
        query = 'test'
        self.assertEqual(utils.highlight_content(content, query), content)
        query = 'a test'
        self.assertEqual(utils.highlight_content(content, query), content)
コード例 #3
0
ファイル: webapp.py プロジェクト: c-bug/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render('index.html', )

    try:
        search = Search(request)
    except:
        return render('index.html', )

    if plugins.call('pre_search', request, locals()):
        search.search(request)

    plugins.call('post_search', request, locals())

    for result in search.result_container.get_ordered_results():

        plugins.call('on_result', request, locals())
        if not search.paging and engines[result['engine']].paging:
            search.paging = True

        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(
                    result['content'], search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(
                html_to_text(result['title']).strip().split())

        result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            result['pubdate'] = result['publishedDate'].strftime(
                '%Y-%m-%d %H:%M:%S%z')
            if result['publishedDate'].replace(
                    tzinfo=None) >= datetime.now() - timedelta(days=1):
                timedifference = datetime.now(
                ) - result['publishedDate'].replace(tzinfo=None)
                minutes = int((timedifference.seconds / 60) % 60)
                hours = int(timedifference.seconds / 60 / 60)
                if hours == 0:
                    result['publishedDate'] = gettext(
                        u'{minutes} minute(s) ago').format(minutes=minutes)
                else:
                    result['publishedDate'] = gettext(
                        u'{hours} hour(s), {minutes} minute(s) ago').format(
                            hours=hours, minutes=minutes)  # noqa
            else:
                result['publishedDate'] = format_date(result['publishedDate'])

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({
            'query':
            search.query,
            'results':
            search.result_container.get_ordered_results()
        }),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in search.result_container.get_ordered_results():
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=search.result_container.get_ordered_results(),
            q=search.request_data['q'],
            number_of_results=search.result_container.results_length(),
            base_url=get_base_url())
        return Response(response_rss, mimetype='text/xml')

    return render('results.html',
                  results=search.result_container.get_ordered_results(),
                  q=search.request_data['q'],
                  selected_categories=search.categories,
                  paging=search.paging,
                  pageno=search.pageno,
                  base_url=get_base_url(),
                  suggestions=search.result_container.suggestions,
                  answers=search.result_container.answers,
                  infoboxes=search.result_container.infoboxes,
                  theme=get_current_theme_name(),
                  favicons=global_favicons[themes.index(
                      get_current_theme_name())])
コード例 #4
0
ファイル: webapp.py プロジェクト: asciimoo/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    # output_format
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # check if there is query
    if request.form.get('q') is None:
        if output_format == 'html':
            return render(
                'index.html',
            )
        else:
            return index_error(output_format, 'No query'), 400

    # search
    search_query = None
    result_container = None
    try:
        search_query = get_search_query_from_webapp(request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)
        result_container = search.search()
    except Exception as e:
        # log exception
        logger.exception('search error')

        # is it an invalid input parameter or something else ?
        if (issubclass(e.__class__, SearxParameterException)):
            return index_error(output_format, e.message), 400
        else:
            return index_error(output_format, gettext('search error')), 500

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    # UI
    advanced_search = request.form.get('advanced_search', None)

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
            result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])

    if output_format == 'json':
        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
                                    'number_of_results': number_of_results,
                                    'results': results,
                                    'answers': list(result_container.answers),
                                    'corrections': list(result_container.corrections),
                                    'infoboxes': result_container.infoboxes,
                                    'suggestions': list(result_container.suggestions),
                                    'unresponsive_engines': list(result_container.unresponsive_engines)},
                                   default=lambda item: list(item) if isinstance(item, set) else item),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url(),
            override_theme='__common__',
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range,
        number_of_results=format_decimal(number_of_results),
        advanced_search=advanced_search,
        suggestions=result_container.suggestions,
        answers=result_container.answers,
        corrections=result_container.corrections,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        unresponsive_engines=result_container.unresponsive_engines,
        current_language=match_language(search_query.lang,
                                        LANGUAGE_CODES,
                                        fallback=settings['search']['language']),
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())]
    )
コード例 #5
0
ファイル: webapp.py プロジェクト: wflk/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if request.form.get('q') is None:
        return render(
            'index.html',
        )

    # search
    search_query = None
    result_container = None
    try:
        search_query = get_search_query_from_webapp(request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request)
        result_container = search.search()
    except:
        request.errors.append(gettext('search error'))
        logger.exception('search error')
        return render(
            'index.html',
        )

    results = result_container.get_ordered_results()

    # UI
    advanced_search = request.form.get('advanced_search', None)
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(escape(result['content'][:1024]),
                                                      search_query.query.encode('utf-8'))
            result['title'] = highlight_content(escape(result['title'] or u''),
                                                search_query.query.encode('utf-8'))
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])

    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    if output_format == 'json':
        return Response(json.dumps({'query': search_query.query,
                                    'number_of_results': number_of_results,
                                    'results': results,
                                    'answers': list(result_container.answers),
                                    'infoboxes': result_container.infoboxes,
                                    'suggestions': list(result_container.suggestions)}),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range,
        number_of_results=format_decimal(number_of_results),
        advanced_search=advanced_search,
        suggestions=result_container.suggestions,
        answers=result_container.answers,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())]
    )
コード例 #6
0
ファイル: webapp.py プロジェクト: suksim/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render(
            'index.html',
        )

    try:
        search = Search(request)
    except:
        return render(
            'index.html',
        )

    search.results, search.suggestions,\
        search.answers, search.infoboxes = search.search(request)

    for result in search.results:

        if not search.paging and engines[result['engine']].paging:
            search.paging = True

        # check if HTTPS rewrite is required
        if settings['server']['https_rewrite']\
           and result['parsed_url'].scheme == 'http':

            skip_https_rewrite = False

            # check if HTTPS rewrite is possible
            for target, rules, exclusions in https_rules:

                # check if target regex match with url
                if target.match(result['url']):
                    # process exclusions
                    for exclusion in exclusions:
                        # check if exclusion match with url
                        if exclusion.match(result['url']):
                            skip_https_rewrite = True
                            break

                    # skip https rewrite if required
                    if skip_https_rewrite:
                        break

                    # process rules
                    for rule in rules:
                        try:
                            # TODO, precompile rule
                            p = re.compile(rule[0])

                            # rewrite url if possible
                            new_result_url = p.sub(rule[1], result['url'])
                        except:
                            break

                        # parse new url
                        new_parsed_url = urlparse(new_result_url)

                        # continiue if nothing was rewritten
                        if result['url'] == new_result_url:
                            continue

                        # get domainname from result
                        # TODO, does only work correct with TLD's like
                        #  asdf.com, not for asdf.com.de
                        # TODO, using publicsuffix instead of this rewrite rule
                        old_result_domainname = '.'.join(
                            result['parsed_url'].hostname.split('.')[-2:])
                        new_result_domainname = '.'.join(
                            new_parsed_url.hostname.split('.')[-2:])

                        # check if rewritten hostname is the same,
                        # to protect against wrong or malicious rewrite rules
                        if old_result_domainname == new_result_domainname:
                            # set new url
                            result['url'] = new_result_url

                    # target has matched, do not search over the other rules
                    break

        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(result['content'],
                                                      search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if 'content' in result:
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title'])
                                       .strip().split())

        if len(result['url']) > 74:
            url_parts = result['url'][:35], result['url'][-35:]
            result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
        else:
            result['pretty_url'] = result['url']

        for engine in result['engines']:
            if engine in favicons:
                result['favicon'] = engine

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            if result['publishedDate'].replace(tzinfo=None)\
               >= datetime.now() - timedelta(days=1):
                timedifference = datetime.now() - result['publishedDate']\
                    .replace(tzinfo=None)
                minutes = int((timedifference.seconds / 60) % 60)
                hours = int(timedifference.seconds / 60 / 60)
                if hours == 0:
                    result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
            else:
                result['pubdate'] = result['publishedDate']\
                    .strftime('%a, %d %b %Y %H:%M:%S %z')
                result['publishedDate'] = format_date(result['publishedDate'])

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({'query': search.query,
                                    'results': search.results}),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        if search.results:
            csv.writerow(keys)
            for row in search.results:
                row['host'] = row['parsed_url'].netloc
                csv.writerow([row.get(key, '') for key in keys])
            csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=search.results,
            q=search.request_data['q'],
            number_of_results=len(search.results),
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=search.results,
        q=search.request_data['q'],
        selected_categories=search.categories,
        paging=search.paging,
        pageno=search.pageno,
        base_url=get_base_url(),
        suggestions=search.suggestions,
        answers=search.answers,
        infoboxes=search.infoboxes,
        theme=get_current_theme_name()
    )
コード例 #7
0
ファイル: searchAPI.py プロジェクト: moritan/searx
    def search(self, task):
        global number_of_searches

        # init vars
        requests = []
        results_queue = Queue()
        results = {}

        # increase number of searches
        number_of_searches += 1

        # set default useragent
        # user_agent = request.headers.get('User-Agent', '')
        user_agent = gen_useragent()

        # start search-reqest for all selected engines
        for selected_engine in self.engines:
            if selected_engine['name'] not in engines:
                continue

            engine = engines[selected_engine['name']]

            # if paging is not supported, skip
            if self.pageno > 1 and not engine.paging:
                continue

            # if search-language is set and engine does not
            # provide language-support, skip
            if self.lang != 'all' and not engine.language_support:
                continue

            # set default request parameters
            request_params = default_request_params()
            request_params['headers']['User-Agent'] = user_agent
            request_params['category'] = selected_engine['category']
            request_params['started'] = time()
            request_params['pageno'] = self.pageno

            if hasattr(engine, 'language') and engine.language:
                request_params['language'] = engine.language
            else:
                request_params['language'] = self.lang

                # try:
                # 0 = None, 1 = Moderate, 2 = Strict
                # request_params['safesearch'] = int(request.cookies.get('safesearch'))
                # except Exception:
            request_params['safesearch'] = settings['search']['safe_search']

            # update request parameters dependent on
            # search-engine (contained in engines folder)
            engine.request(task['query'].encode('utf-8'), request_params)

            # update request parameters dependent on
            # search-engine (contained in engines folder)
            if request_params['url'] is None:
                # TODO add support of offline engines
                pass

            # create a callback wrapper for the search engine results
            callback = make_callback(
                selected_engine['name'],
                results_queue,
                engine.response,
                request_params)

            # create dictionary which contain all
            # informations about the request
            request_args = dict(
                headers=request_params['headers'],
                hooks=dict(response=callback),
                cookies=request_params['cookies'],
                timeout=engine.timeout,
                verify=request_params['verify']
            )

            # specific type of request (GET or POST)
            if request_params['method'] == 'GET':
                req = requests_lib.get
            else:
                req = requests_lib.post
                request_args['data'] = request_params['data']

            # ignoring empty urls
            if not request_params['url']:
                continue

            # append request to list
            requests.append((req, request_params['url'],
                             request_args,
                             selected_engine['name']))

        if not requests:
            return self
        # send all search-request
        threaded_requests(requests)

        while not results_queue.empty():
            engine_name, engine_results = results_queue.get_nowait()

            # TODO type checks
            [self.suggestions.append(x['suggestion'])
             for x in list(engine_results)
             if 'suggestion' in x
             and engine_results.remove(x) is None]

            [self.answers.append(x['answer'])
             for x in list(engine_results)
             if 'answer' in x
             and engine_results.remove(x) is None]

            self.infoboxes.extend(x for x in list(engine_results)
                                  if 'infobox' in x
                                  and engine_results.remove(x) is None)

            results[engine_name] = engine_results

        # update engine-specific stats
        for engine_name, engine_results in results.items():
            engines[engine_name].stats['search_count'] += 1
            engines[engine_name].stats['result_count'] += len(engine_results)

        # score results and remove duplications
        self.results = score_results(results)

        # merge infoboxes according to their ids
        self.infoboxes = merge_infoboxes(self.infoboxes)

        # update engine stats, using calculated score
        for result in self.results:
            plugins.callAPI('on_result', self.plugins, locals())

            for res_engine in result['engines']:
                engines[result['engine']] \
                    .stats['score_count'] += result['score']

            result['pretty_url'] = prettify_url(result['url'])

            # TODO, check if timezone is calculated right
            if 'publishedDate' in result:
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')

            if not self.paging and engines[result['engine']].paging:
                self.paging = True

            if 'content' in result:
                result['content_html'] = highlight_content(result['content'],
                                                           self.query.encode('utf-8'))  # noqa
            result['title_html'] = highlight_content(result['title'],
                                                     self.query.encode('utf-8'))

            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

            # return results, suggestions, answers and infoboxes
        return self
コード例 #8
0
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    # output_format
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # check if there is query
    if request.form.get('q') is None or len(request.form.get('q')) == 0:
        if output_format == 'html':
            return render(
                'index.html',
            )
        else:
            return index_error(output_format, 'No query'), 400

    # search
    search_query = None
    result_container = None
    try:
        search_query = get_search_query_from_webapp(request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)
        result_container = search.search()
    except Exception as e:
        # log exception
        logger.exception('search error')

        # is it an invalid input parameter or something else ?
        if (issubclass(e.__class__, SearxParameterException)):
            return index_error(output_format, e.message), 400
        else:
            return index_error(output_format, gettext('search error')), 500

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    # UI
    advanced_search = request.form.get('advanced_search', None)

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(prettify_content(
                    escape(result['content'][:1024])), search_query.query)
            result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        if result.__contains__('showurl'):
            result['pretty_url'] = prettify_url(result['showurl'])
        else:
            result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])

    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range if search_query.time_range else "",
        number_of_results=format_decimal(round(number_of_results)),
        advanced_search=advanced_search,
        suggestions=result_container.suggestions,
        answers=result_container.answers,
        corrections=result_container.corrections,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        unresponsive_engines=result_container.unresponsive_engines,
        current_language=match_language(search_query.lang,
                                        LANGUAGE_CODES,
                                        fallback=settings['search']['language']),
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())]
    )
コード例 #9
0
ファイル: webapp.py プロジェクト: stef/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render('index.html')

    try:
        search = Search(request)
    except:
        return render('index.html')

    # TODO moar refactor - do_search integration into Search class
    search.results, search.suggestions = do_search(search.query,
                                                   request,
                                                   search.engines,
                                                   search.pageno,
                                                   search.lang)

    for result in search.results:
        if not search.paging and engines[result['engine']].paging:
            search.paging = True
        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(result['content'],
                                                      search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if 'content' in result:
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title'])
                                       .strip().split())
        if len(result['url']) > 74:
            url_parts = result['url'][:35], result['url'][-35:]
            result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
        else:
            result['pretty_url'] = result['url']

        for engine in result['engines']:
            if engine in favicons:
                result['favicon'] = engine

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            if result['publishedDate'].replace(tzinfo=None)\
               >= datetime.now() - timedelta(days=1):
                timedifference = datetime.now() - result['publishedDate']\
                    .replace(tzinfo=None)
                minutes = int((timedifference.seconds / 60) % 60)
                hours = int(timedifference.seconds / 60 / 60)
                if hours == 0:
                    result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
            else:
                result['pubdate'] = result['publishedDate']\
                    .strftime('%a, %d %b %Y %H:%M:%S %z')
                result['publishedDate'] = format_date(result['publishedDate'])

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({'query': search.query,
                                    'results': search.results}),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        if search.results:
            csv.writerow(keys)
            for row in search.results:
                row['host'] = row['parsed_url'].netloc
                csv.writerow([row.get(key, '') for key in keys])
            csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=search.results,
            q=search.request_data['q'],
            number_of_results=len(search.results),
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=search.results,
        q=search.request_data['q'],
        selected_categories=search.categories,
        paging=search.paging,
        pageno=search.pageno,
        base_url=get_base_url(),
        suggestions=search.suggestions
    )
コード例 #10
0
ファイル: webapp.py プロジェクト: nschlemm/searx
def index():
    global categories

    if request.method == 'POST':
        request_data = request.form
    else:
        request_data = request.args
    if not request_data.get('q'):
        return render('index.html')

    selected_categories = []

    query, selected_engines = parse_query(request_data['q'].encode('utf-8'))

    if not len(selected_engines):
        for pd_name, pd in request_data.items():
            if pd_name.startswith('category_'):
                category = pd_name[9:]
                if not category in categories:
                    continue
                selected_categories.append(category)
        if not len(selected_categories):
            cookie_categories = request.cookies.get('categories', '')
            cookie_categories = cookie_categories.split(',')
            for ccateg in cookie_categories:
                if ccateg in categories:
                    selected_categories.append(ccateg)
        if not len(selected_categories):
            selected_categories = ['general']

        for categ in selected_categories:
            selected_engines.extend({'category': categ,
                                     'name': x.name}
                                    for x in categories[categ])

    results, suggestions = search(query, request, selected_engines)

    featured_results = []
    for result in results:
        if request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(result['content'], query)
            result['title'] = highlight_content(result['title'], query)
        else:
            if 'content' in result:
                result['content'] = html_to_text(result['content']).strip()
            result['title'] = html_to_text(result['title']).strip()
        if len(result['url']) > 74:
            url_parts = result['url'][:35], result['url'][-35:]
            result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
        else:
            result['pretty_url'] = result['url']

        for engine in result['engines']:
            if engine in favicons:
                result['favicon'] = engine

    if request_data.get('format') == 'json':
        return Response(json.dumps({'query': query, 'results': results}),
                        mimetype='application/json')
    elif request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        if len(results):
            csv.writerow(keys)
            for row in results:
                row['host'] = row['parsed_url'].netloc
                csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
        response.headers.add('Content-Disposition', content_disp)
        return response
    elif request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request_data['q'],
            number_of_results=len(results),
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=results,
        q=request_data['q'],
        selected_categories=selected_categories,
        number_of_results=len(results) + len(featured_results),
        featured_results=featured_results,
        suggestions=suggestions
    )
コード例 #11
0
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render('index.html', )

    try:
        search = Search(request)
    except:
        return render('index.html', )

    # TODO moar refactor - do_search integration into Search class
    search.results, search.suggestions = do_search(search.query, request,
                                                   search.engines,
                                                   search.pageno, search.lang)

    for result in search.results:
        if not search.paging and engines[result['engine']].paging:
            search.paging = True
        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(
                    result['content'], search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if 'content' in result:
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(
                html_to_text(result['title']).strip().split())
        if len(result['url']) > 74:
            url_parts = result['url'][:35], result['url'][-35:]
            result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
        else:
            result['pretty_url'] = result['url']

        for engine in result['engines']:
            if engine in favicons:
                result['favicon'] = engine

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            if result['publishedDate'].replace(tzinfo=None)\
               >= datetime.now() - timedelta(days=1):
                timedifference = datetime.now() - result['publishedDate']\
                    .replace(tzinfo=None)
                minutes = int((timedifference.seconds / 60) % 60)
                hours = int(timedifference.seconds / 60 / 60)
                if hours == 0:
                    result['publishedDate'] = gettext(
                        u'{minutes} minute(s) ago').format(
                            minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = gettext(
                        u'{hours} hour(s), {minutes} minute(s) ago').format(
                            hours=hours, minutes=minutes)  # noqa
            else:
                result['pubdate'] = result['publishedDate']\
                    .strftime('%a, %d %b %Y %H:%M:%S %z')
                result['publishedDate'] = format_date(result['publishedDate'])

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({
            'query': search.query,
            'results': search.results
        }),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        if search.results:
            csv.writerow(keys)
            for row in search.results:
                row['host'] = row['parsed_url'].netloc
                csv.writerow([row.get(key, '') for key in keys])
            csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render('opensearch_response_rss.xml',
                              results=search.results,
                              q=search.request_data['q'],
                              number_of_results=len(search.results),
                              base_url=get_base_url())
        return Response(response_rss, mimetype='text/xml')

    return render('results.html',
                  results=search.results,
                  q=search.request_data['q'],
                  selected_categories=search.categories,
                  paging=search.paging,
                  pageno=search.pageno,
                  base_url=get_base_url(),
                  suggestions=search.suggestions)
コード例 #12
0
ファイル: webapp.py プロジェクト: jibe-b/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render("index.html")

    try:
        search = Search(request)
    except:
        return render("index.html")

    if plugins.call("pre_search", request, locals()):
        search.search(request)

    plugins.call("post_search", request, locals())

    for result in search.result_container.get_ordered_results():

        plugins.call("on_result", request, locals())
        if not search.paging and engines[result["engine"]].paging:
            search.paging = True

        if search.request_data.get("format", "html") == "html":
            if "content" in result:
                result["content"] = highlight_content(result["content"], search.query.encode("utf-8"))  # noqa
            result["title"] = highlight_content(result["title"], search.query.encode("utf-8"))
        else:
            if result.get("content"):
                result["content"] = html_to_text(result["content"]).strip()
            # removing html content and whitespace duplications
            result["title"] = " ".join(html_to_text(result["title"]).strip().split())

        result["pretty_url"] = prettify_url(result["url"])

        # TODO, check if timezone is calculated right
        if "publishedDate" in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result["pubdate"] = result["publishedDate"].strftime("%Y-%m-%d %H:%M:%S%z")
            except ValueError:
                result["publishedDate"] = None
            else:
                if result["publishedDate"].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result["publishedDate"].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result["publishedDate"] = gettext(u"{minutes} minute(s) ago").format(minutes=minutes)
                    else:
                        result["publishedDate"] = gettext(u"{hours} hour(s), {minutes} minute(s) ago").format(
                            hours=hours, minutes=minutes
                        )  # noqa
                else:
                    result["publishedDate"] = format_date(result["publishedDate"])

    if search.request_data.get("format") == "json":
        return Response(
            json.dumps({"query": search.query, "results": search.result_container.get_ordered_results()}),
            mimetype="application/json",
        )
    elif search.request_data.get("format") == "csv":
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ("title", "url", "content", "host", "engine", "score")
        csv.writerow(keys)
        for row in search.result_container.get_ordered_results():
            row["host"] = row["parsed_url"].netloc
            csv.writerow([row.get(key, "") for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype="application/csv")
        cont_disp = "attachment;Filename=searx_-_{0}.csv".format(search.query.encode("utf-8"))
        response.headers.add("Content-Disposition", cont_disp)
        return response
    elif search.request_data.get("format") == "rss":
        response_rss = render(
            "opensearch_response_rss.xml",
            results=search.result_container.get_ordered_results(),
            q=search.request_data["q"],
            number_of_results=search.result_container.results_length(),
            base_url=get_base_url(),
        )
        return Response(response_rss, mimetype="text/xml")

    return render(
        "results.html",
        results=search.result_container.get_ordered_results(),
        q=search.request_data["q"],
        selected_categories=search.categories,
        paging=search.paging,
        pageno=search.pageno,
        base_url=get_base_url(),
        suggestions=search.result_container.suggestions,
        answers=search.result_container.answers,
        infoboxes=search.result_container.infoboxes,
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())],
    )
コード例 #13
0
ファイル: webapp.py プロジェクト: Aigeruth/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """
    paging = False
    lang = 'all'

    if request.cookies.get('language')\
       and request.cookies['language'] in (x[0] for x in language_codes):
        lang = request.cookies['language']

    if request.method == 'POST':
        request_data = request.form
    else:
        request_data = request.args
    if not request_data.get('q'):
        return render('index.html')

    pageno_param = request_data.get('pageno', '1')
    if not pageno_param.isdigit() or int(pageno_param) < 1:
        return render('index.html')

    pageno = int(pageno_param)

    selected_categories = []

    query, selected_engines = parse_query(request_data['q'].encode('utf-8'))

    if len(selected_engines):
        selected_categories = list(set(engine['category']
                                       for engine in selected_engines))
    else:
        for pd_name, pd in request_data.items():
            if pd_name.startswith('category_'):
                category = pd_name[9:]
                if not category in categories:
                    continue
                selected_categories.append(category)
        if not len(selected_categories):
            cookie_categories = request.cookies.get('categories', '')
            cookie_categories = cookie_categories.split(',')
            for ccateg in cookie_categories:
                if ccateg in categories:
                    selected_categories.append(ccateg)
        if not len(selected_categories):
            selected_categories = ['general']

        for categ in selected_categories:
            selected_engines.extend({'category': categ,
                                     'name': x.name}
                                    for x in categories[categ])

    results, suggestions = search(query,
                                  request,
                                  selected_engines,
                                  pageno,
                                  lang)

    for result in results:
        if not paging and engines[result['engine']].paging:
            paging = True
        if request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(result['content'], query)
            result['title'] = highlight_content(result['title'], query)
        else:
            if 'content' in result:
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title'])
                                       .strip().split())
        if len(result['url']) > 74:
            url_parts = result['url'][:35], result['url'][-35:]
            result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
        else:
            result['pretty_url'] = result['url']

        for engine in result['engines']:
            if engine in favicons:
                result['favicon'] = engine

    if request_data.get('format') == 'json':
        return Response(json.dumps({'query': query, 'results': results}),
                        mimetype='application/json')
    elif request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        if len(results):
            csv.writerow(keys)
            for row in results:
                row['host'] = row['parsed_url'].netloc
                csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
        response.headers.add('Content-Disposition', content_disp)
        return response
    elif request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request_data['q'],
            number_of_results=len(results),
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=results,
        q=request_data['q'],
        selected_categories=selected_categories,
        paging=paging,
        pageno=pageno,
        suggestions=suggestions
    )
コード例 #14
0
ファイル: webapp.py プロジェクト: kvch/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if request.form.get("q") is None:
        return render("index.html")

    # search
    search_query = None
    result_container = None
    try:
        search_query = get_search_query_from_webapp(request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request)
        result_container = search.search()
    except:
        request.errors.append(gettext("search error"))
        logger.exception("search error")
        return render("index.html")

    results = result_container.get_ordered_results()

    # UI
    advanced_search = request.form.get("advanced_search", None)
    output_format = request.form.get("format", "html")
    if output_format not in ["html", "csv", "json", "rss"]:
        output_format = "html"

    # output
    for result in results:
        if output_format == "html":
            if "content" in result and result["content"]:
                result["content"] = highlight_content(
                    escape(result["content"][:1024]), search_query.query.encode("utf-8")
                )
            result["title"] = highlight_content(escape(result["title"] or u""), search_query.query.encode("utf-8"))
        else:
            if result.get("content"):
                result["content"] = html_to_text(result["content"]).strip()
            # removing html content and whitespace duplications
            result["title"] = " ".join(html_to_text(result["title"]).strip().split())

        result["pretty_url"] = prettify_url(result["url"])

        # TODO, check if timezone is calculated right
        if "publishedDate" in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result["pubdate"] = result["publishedDate"].strftime("%Y-%m-%d %H:%M:%S%z")
            except ValueError:
                result["publishedDate"] = None
            else:
                if result["publishedDate"].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result["publishedDate"].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result["publishedDate"] = gettext(u"{minutes} minute(s) ago").format(minutes=minutes)
                    else:
                        result["publishedDate"] = gettext(u"{hours} hour(s), {minutes} minute(s) ago").format(
                            hours=hours, minutes=minutes
                        )  # noqa
                else:
                    result["publishedDate"] = format_date(result["publishedDate"])

    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    if output_format == "json":
        return Response(
            json.dumps(
                {
                    "query": search_query.query,
                    "number_of_results": number_of_results,
                    "results": results,
                    "answers": list(result_container.answers),
                    "infoboxes": result_container.infoboxes,
                    "suggestions": list(result_container.suggestions),
                }
            ),
            mimetype="application/json",
        )
    elif output_format == "csv":
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ("title", "url", "content", "host", "engine", "score")
        csv.writerow(keys)
        for row in results:
            row["host"] = row["parsed_url"].netloc
            csv.writerow([row.get(key, "") for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype="application/csv")
        cont_disp = "attachment;Filename=searx_-_{0}.csv".format(search_query.query.encode("utf-8"))
        response.headers.add("Content-Disposition", cont_disp)
        return response
    elif output_format == "rss":
        response_rss = render(
            "opensearch_response_rss.xml",
            results=results,
            q=request.form["q"],
            number_of_results=number_of_results,
            base_url=get_base_url(),
        )
        return Response(response_rss, mimetype="text/xml")

    return render(
        "results.html",
        results=results,
        q=request.form["q"],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range,
        number_of_results=format_decimal(number_of_results),
        advanced_search=advanced_search,
        suggestions=result_container.suggestions,
        answers=result_container.answers,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())],
    )
コード例 #15
0
ファイル: webapp.py プロジェクト: LeNovalis/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    # output_format
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # check if there is query
    if request.form.get('q') is None:
        if output_format == 'html':
            return render(
                'index.html',
            )
        else:
            return index_error(output_format, 'No query'), 400

    # search
    search_query = None
    raw_text_query = None
    result_container = None
    try:
        search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)
        result_container = search.search()
    except Exception as e:
        # log exception
        logger.exception('search error')

        # is it an invalid input parameter or something else ?
        if (issubclass(e.__class__, SearxParameterException)):
            return index_error(output_format, e.message), 400
        else:
            return index_error(output_format, gettext('search error')), 500

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    # UI
    advanced_search = request.form.get('advanced_search', None)

    # Server-Timing header
    request.timings = result_container.get_timings()

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
            if 'title' in result and result['title']:
                result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        if 'url' in result:
            result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])

    if output_format == 'json':
        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
                                    'number_of_results': number_of_results,
                                    'results': results,
                                    'answers': list(result_container.answers),
                                    'corrections': list(result_container.corrections),
                                    'infoboxes': result_container.infoboxes,
                                    'suggestions': list(result_container.suggestions),
                                    'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines)},  # noqa
                                   default=lambda item: list(item) if isinstance(item, set) else item),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            row['type'] = 'result'
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.answers:
            row = {'title': a, 'type': 'answer'}
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.suggestions:
            row = {'title': a, 'type': 'suggestion'}
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.corrections:
            row = {'title': a, 'type': 'correction'}
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            answers=result_container.answers,
            corrections=result_container.corrections,
            suggestions=result_container.suggestions,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url(),
            override_theme='__common__',
        )
        return Response(response_rss, mimetype='text/xml')

    # HTML output format

    # suggestions: use RawTextQuery to get the suggestion URLs with the same bang
    suggestion_urls = list(map(lambda suggestion: {
                               'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
                               'title': suggestion
                               },
                               result_container.suggestions))

    correction_urls = list(map(lambda correction: {
                               'url': raw_text_query.changeSearchQuery(correction).getFullQuery(),
                               'title': correction
                               },
                               result_container.corrections))
    #
    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range,
        number_of_results=format_decimal(number_of_results),
        advanced_search=advanced_search,
        suggestions=suggestion_urls,
        answers=result_container.answers,
        corrections=correction_urls,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        unresponsive_engines=__get_translated_errors(result_container.unresponsive_engines),
        current_language=match_language(search_query.lang,
                                        LANGUAGE_CODES,
                                        fallback=request.preferences.get_value("language")),
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())],
        timeout_limit=request.form.get('timeout_limit', None)
    )
コード例 #16
0
ファイル: webapp.py プロジェクト: germc/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render('index.html')

    try:
        search = Search(request)
    except:
        return render('index.html')

    # TODO moar refactor - do_search integration into Search class
    search.results, search.suggestions = do_search(search.query,
                                                   request,
                                                   search.engines,
                                                   search.pageno,
                                                   search.lang)

    for result in search.results:
        if not search.paging and engines[result['engine']].paging:
            search.paging = True
        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(result['content'],
                                                      search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if 'content' in result:
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title'])
                                       .strip().split())
        if len(result['url']) > 74:
            url_parts = result['url'][:35], result['url'][-35:]
            result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
        else:
            result['pretty_url'] = result['url']

        for engine in result['engines']:
            if engine in favicons:
                result['favicon'] = engine

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({'query': search.query,
                                    'results': search.results}),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        if search.results:
            csv.writerow(keys)
            for row in search.results:
                row['host'] = row['parsed_url'].netloc
                csv.writerow([row.get(key, '') for key in keys])
            csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=search.results,
            q=search.request_data['q'],
            number_of_results=len(search.results),
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=search.results,
        q=search.request_data['q'],
        selected_categories=search.categories,
        paging=search.paging,
        pageno=search.pageno,
        base_url=get_base_url(),
        suggestions=search.suggestions
    )
コード例 #17
0
ファイル: webapp.py プロジェクト: GreenLunar/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if not request.args and not request.form:
        return render(
            'index.html',
        )

    try:
        search = Search(request)
    except:
        return render(
            'index.html',
        )

    if plugins.call('pre_search', request, locals()):
        search.search(request)

    plugins.call('post_search', request, locals())

    for result in search.result_container.get_ordered_results():

        plugins.call('on_result', request, locals())
        if not search.paging and engines[result['engine']].paging:
            search.paging = True

        if search.request_data.get('format', 'html') == 'html':
            if 'content' in result:
                result['content'] = highlight_content(result['content'],
                                                      search.query.encode('utf-8'))  # noqa
            result['title'] = highlight_content(result['title'],
                                                search.query.encode('utf-8'))
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                minutes = int((timedifference.seconds / 60) % 60)
                hours = int(timedifference.seconds / 60 / 60)
                if hours == 0:
                    result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                else:
                    result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
            else:
                result['publishedDate'] = format_date(result['publishedDate'])

    if search.request_data.get('format') == 'json':
        return Response(json.dumps({'query': search.query,
                                    'results': search.result_container.get_ordered_results()}),
                        mimetype='application/json')
    elif search.request_data.get('format') == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in search.result_container.get_ordered_results():
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif search.request_data.get('format') == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=search.result_container.get_ordered_results(),
            q=search.request_data['q'],
            number_of_results=search.result_container.results_length(),
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=search.result_container.get_ordered_results(),
        q=search.request_data['q'],
        selected_categories=search.categories,
        paging=search.paging,
        pageno=search.pageno,
        base_url=get_base_url(),
        suggestions=search.result_container.suggestions,
        answers=search.result_container.answers,
        infoboxes=search.result_container.infoboxes,
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())]
    )
コード例 #18
0
ファイル: webapp.py プロジェクト: NotoriousDev/searx
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    if request.form.get('q') is None:
        return render(
            'index.html',
        )

    # search
    search_query = None
    result_container = None
    try:
        search_query = get_search_query_from_webapp(request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request)
        result_container = search.search()
    except:
        request.errors.append(gettext('search error'))
        logger.exception('search error')
        return render(
            'index.html',
        )

    results = result_container.get_ordered_results()

    # UI
    advanced_search = request.form.get('advanced_search', None)
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(result['content'][:1024], search_query.query.encode('utf-8'))
            result['title'] = highlight_content(result['title'], search_query.query.encode('utf-8'))
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])

    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    if output_format == 'json':
        return Response(json.dumps({'query': search_query.query,
                                    'number_of_results': number_of_results,
                                    'results': results}),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(cStringIO.StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url()
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range,
        number_of_results=format_decimal(number_of_results),
        advanced_search=advanced_search,
        suggestions=result_container.suggestions,
        answers=result_container.answers,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())]
    )