コード例 #1
0
    def __init__(
        # pylint: disable=too-many-arguments
        self,
        enable_http=True,
        verify=True,
        enable_http2=False,
        max_connections=None,
        max_keepalive_connections=None,
        keepalive_expiry=None,
        proxies=None,
        using_tor_proxy=False,
        local_addresses=None,
        retries=0,
        retry_on_http_error=None,
        max_redirects=30,
        logger_name=None,
    ):

        self.enable_http = enable_http
        self.verify = verify
        self.enable_http2 = enable_http2
        self.max_connections = max_connections
        self.max_keepalive_connections = max_keepalive_connections
        self.keepalive_expiry = keepalive_expiry
        self.proxies = proxies
        self.using_tor_proxy = using_tor_proxy
        self.local_addresses = local_addresses
        self.retries = retries
        self.retry_on_http_error = retry_on_http_error
        self.max_redirects = max_redirects
        self._local_addresses_cycle = self.get_ipaddress_cycle()
        self._proxies_cycle = self.get_proxy_cycles()
        self._clients = {}
        self._logger = logger.getChild(logger_name) if logger_name else logger
        self.check_parameters()
コード例 #2
0
ファイル: yandex.py プロジェクト: med15060/oma
"""
 Yahoo (Web)

 @website     https://yandex.ru/
 @provide-api ?
 @using-api   no
 @results     HTML (using search portal)
 @stable      no (HTML can change)
 @parse       url, title, content
"""

from lxml import html
from searx import logger
from searx.url_utils import urlencode

logger = logger.getChild('yandex engine')

# engine dependent config
categories = ['general']
paging = True
language_support = True  # TODO

default_tld = 'com'
language_map = {'ru': 'ru', 'ua': 'ua', 'be': 'by', 'kk': 'kz', 'tr': 'com.tr'}

# search-url
base_url = 'https://yandex.{tld}/'
search_url = 'search/?{query}&p={page}'

results_xpath = '//li[@class="serp-item"]'
url_xpath = './/h2/a/@href'
コード例 #3
0
# SPDX-License-Identifier: AGPL-3.0-or-later

import threading
from time import time
from searx import logger
from searx.metrology.error_recorder import record_exception, record_error
from searx.search.processors.abstract import EngineProcessor

logger = logger.getChild('search.processor.offline')


class OfflineProcessor(EngineProcessor):

    engine_type = 'offline'

    def _record_stats_on_error(self, result_container, start_time):
        engine_time = time() - start_time
        result_container.add_timing(self.engine_name, engine_time, engine_time)

        with threading.RLock():
            self.engine.stats['errors'] += 1

    def _search_basic(self, query, params):
        return self.engine.search(query, params)

    def search(self, query, params, result_container, start_time,
               timeout_limit):
        try:
            search_results = self._search_basic(query, params)

            if search_results:
コード例 #4
0
ファイル: search.py プロジェクト: erdoukki/searx
import re
import searx.poolrequests as requests_lib
from itertools import izip_longest, chain
from operator import itemgetter
from Queue import Queue
from time import time
from urlparse import urlparse, unquote
from searx.engines import (
    categories, engines
)
from searx.languages import language_codes
from searx.utils import gen_useragent, get_blocked_engines
from searx.query import Query
from searx import logger

logger = logger.getChild('search')

number_of_searches = 0


def search_request_wrapper(fn, url, engine_name, **kwargs):
    try:
        return fn(url, **kwargs)
    except:
        # increase errors stats
        engines[engine_name].stats['errors'] += 1

        # print engine name and specific error message
        logger.exception('engine crash: {0}'.format(engine_name))
        return
コード例 #5
0
ファイル: google_news.py プロジェクト: soapy2018/searx
from searx.engines.google import (
    get_lang_country,
    filter_mapping,
)

# about
about = {
    "website": 'https://news.google.com',
    "wikidata_id": 'Q12020',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

logger = logger.getChild('google news')

# compared to other google engines google-news has a different time range
# support.  The time range is included in the search term.
time_range_dict = {
    'day': 'when:1d',
    'week': 'when:7d',
    'month': 'when:1m',
    'year': 'when:1y',
}

# engine dependent config

categories = ['news']
paging = False
language_support = True
コード例 #6
0
ファイル: sqlite.py プロジェクト: searxng/searxng
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=missing-function-docstring
"""SQLite database (Offline)

"""

import sqlite3
import contextlib

from searx import logger

logger = logger.getChild('SQLite engine')

engine_type = 'offline'
database = ""
query_str = ""
limit = 10
paging = True
result_template = 'key-value.html'


def init(engine_settings):
    if 'query_str' not in engine_settings:
        raise ValueError('query_str cannot be empty')

    if not engine_settings['query_str'].lower().startswith('select '):
        raise ValueError('only SELECT query is supported')


@contextlib.contextmanager
コード例 #7
0
    'OnlineProcessor',
    'OnlineDictionaryProcessor',
    'OnlineCurrencyProcessor',
    'processors',
]

from searx import logger
import searx.engines as engines

from .online import OnlineProcessor
from .offline import OfflineProcessor
from .online_dictionary import OnlineDictionaryProcessor
from .online_currency import OnlineCurrencyProcessor
from .abstract import EngineProcessor

logger = logger.getChild('search.processors')
processors = {}
"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""


def get_processor_class(engine_type):
    """Return processor class according to the ``engine_type``"""
    for c in [
            OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor,
            OnlineCurrencyProcessor
    ]:
        if c.engine_type == engine_type:
            return c
    return None

コード例 #8
0
ファイル: memucho.py プロジェクト: einsweniger/searx
from searx.engines import licensing
from searx import logger
from searx.url_utils import urlencode
categories = ['general']  # optional

log = logger.getChild('memucho')


def request(query, params):
    '''pre-request callback
    params<dict>:
      method  : POST/GET
      headers : {}
      data    : {} # if method == POST
      url     : ''
      category: 'search category'
      pageno  : 1 # number of the requested page
    '''
    search = urlencode({'term': query})
    params['url'] = f'https://memucho.de/api/edusharing/search?{search}'

    return params


def response(resp):
    result = []

    json = resp.json()
    log.debug(f'{len(json["Items"])}')
    for item in json['Items']:
        licstr = str(item['Licence']).replace('_', '-').lower()
コード例 #9
0
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.

(C) 2013- by Adam Tauber, <*****@*****.**>
'''

import re
from urlparse import urlparse
from lxml import etree
from os import listdir
from os.path import isfile, isdir, join
from searx import logger

logger = logger.getChild("https_rewrite")

# https://gitweb.torproject.org/\
# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules

# HTTPS rewrite rules
https_rules = []


# load single ruleset from a xml file
def load_single_https_ruleset(filepath):
    ruleset = ()

    # init parser
    parser = etree.XMLParser()
コード例 #10
0
.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1

"""

import json
from urllib.parse import urlencode
from lxml import html

from searx import logger
from searx.data import WIKIDATA_UNITS
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom


logger = logger.getChild('duckduckgo_definitions')

URL = 'https://api.duckduckgo.com/'\
    + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

WIKIDATA_PREFIX = [
    'http://www.wikidata.org/entity/',
    'https://www.wikidata.org/entity/'
]

replace_http_by_https = get_string_replaces_function({'http:': 'https:'})


def is_broken_text(text):
    """ duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
コード例 #11
0
import sys
from time import time
from itertools import cycle
from threading import local

import requests

from searx import settings
from searx import logger
from searx.raise_for_httperror import raise_for_httperror
from collections import OrderedDict

import ast

logger = logger.getChild('poolrequests')

try:
    import ssl
    if ssl.OPENSSL_VERSION_INFO[0:3] < (1, 0, 2):
        # https://github.com/certifi/python-certifi#1024-bit-root-certificates
        logger.critical(
            'You are using an old openssl version({0}), please upgrade above 1.0.2!'
            .format(ssl.OPENSSL_VERSION))
        sys.exit(1)
except ImportError:
    ssl = None
if not getattr(ssl, "HAS_SNI", False):
    try:
        import OpenSSL  # pylint: disable=unused-import
    except ImportError:
        logger.critical(
コード例 #12
0
ファイル: __init__.py プロジェクト: JASON0916/searx
(at your option) any later version.

searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.

(C) 2015 by Adam Tauber, <*****@*****.**>
'''
from sys import exit
from searx import logger

logger = logger.getChild('plugins')

from searx.plugins import (https_rewrite,
                           open_results_on_new_tab,
                           self_info,
                           search_on_category_select,
                           tracker_url_remover,
                           vim_hotkeys)

required_attrs = (('name', (str, unicode)),
                  ('description', (str, unicode)),
                  ('default_on', bool))

optional_attrs = (('js_dependencies', tuple),
                  ('css_dependencies', tuple))
コード例 #13
0
ファイル: webapp.py プロジェクト: jibe-b/searx
if __name__ == "__main__":
    from sys import path
    from os.path import realpath, dirname

    path.append(realpath(dirname(realpath(__file__)) + "/../"))

import json
import cStringIO
import os
import hashlib
import requests

from searx import logger

logger = logger.getChild("webapp")

try:
    from pygments import highlight
    from pygments.lexers import get_lexer_by_name
    from pygments.formatters import HtmlFormatter
except:
    logger.critical("cannot import dependency: pygments")
    from sys import exit

    exit(1)

from datetime import datetime, timedelta
from urllib import urlencode
from urlparse import urlparse, urljoin
from werkzeug.contrib.fixers import ProxyFix
コード例 #14
0
ファイル: __init__.py プロジェクト: NotoriousDev/searx
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.

(C) 2013- by Adam Tauber, <*****@*****.**>
"""

from os.path import realpath, dirname, splitext, join
import sys
from imp import load_source
from flask_babel import gettext
from operator import itemgetter
from searx import settings
from searx import logger


logger = logger.getChild("engines")

engine_dir = dirname(realpath(__file__))

engines = {}

categories = {"general": []}

engine_shortcuts = {}
engine_default_args = {
    "paging": False,
    "categories": ["general"],
    "language_support": True,
    "safesearch": False,
    "timeout": settings["outgoing"]["request_timeout"],
    "shortcut": "-",
コード例 #15
0
ファイル: wikidata.py プロジェクト: asciimoo/searx
 @results     JSON, HTML
 @stable      no (html can change)
 @parse       url, infobox
"""

from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import match_language

from json import loads
from lxml.html import fromstring

logger = logger.getChild('wikidata')
result_count = 1

# urls
wikidata_host = 'https://www.wikidata.org'
url_search = wikidata_host \
    + '/w/index.php?{query}'

wikidata_api = wikidata_host + '/w/api.php'
url_detail = wikidata_api\
    + '?action=parse&format=json&{query}'\
    + '&redirects=1&prop=text%7Cdisplaytitle%7Clanglinks%7Crevid'\
    + '&disableeditsection=1&disabletidy=1&preview=1&sectionpreview=1&disabletoc=1&utf8=1&formatversion=2'

url_map = 'https://www.openstreetmap.org/'\
    + '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
コード例 #16
0
ファイル: __init__.py プロジェクト: 3615pipou/searx
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.

(C) 2013- by Adam Tauber, <*****@*****.**>
'''

from os.path import realpath, dirname, splitext, join
import sys
from imp import load_source
from flask.ext.babel import gettext
from operator import itemgetter
from searx import settings
from searx import logger


logger = logger.getChild('engines')

engine_dir = dirname(realpath(__file__))

engines = {}

categories = {'general': []}

engine_shortcuts = {}
engine_default_args = {'paging': False,
                       'categories': ['general'],
                       'language_support': True,
                       'safesearch': False,
                       'timeout': settings['outgoing']['request_timeout'],
                       'shortcut': '-',
                       'disabled': False,
コード例 #17
0
    engine : xpath
    paging : True
    search_url : https://bitbucket.org/repo/all/{pageno}?name={query}
    url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href
    title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]
    content_xpath : //article[@class="repo-summary"]/p

"""

from urllib.parse import urlencode

from lxml import html
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
from searx import logger

logger = logger.getChild('XPath engine')

search_url = None
"""
Search URL of the engine. Example::

    https://example.org/?search={query}&page={pageno}{time_range}{safe_search}

Replacements are:

``{query}``:
  Search terms from user.

``{pageno}``:
  Page number if engine supports pagging :py:obj:`paging`
コード例 #18
0
ファイル: wikidata.py プロジェクト: 0xn3xus/neovo

from urllib.parse import urlencode
from json import loads

from dateutil.parser import isoparse
from babel.dates import format_datetime, format_date, format_time, get_datetime_format

from searx import logger
from searx.data import WIKIDATA_UNITS
from searx.network import post, get
from searx.utils import match_language, searx_useragent, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import

logger = logger.getChild('wikidata')

# about
about = {
    "website": 'https://wikidata.org/',
    "wikidata_id": 'Q2013',
    "official_api_documentation": 'https://query.wikidata.org/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
}

# SPARQL
SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
WIKIDATA_PROPERTIES = {
コード例 #19
0
ファイル: solidtorrents.py プロジェクト: searxng/searxng
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Solid Torrents

"""

# pylint: disable=missing-function-docstring

from json import loads
from urllib.parse import urlencode
from searx import logger

logger = logger.getChild('solidtor engine')

about = {
    "website": 'https://www.solidtorrents.net/',
    "wikidata_id": None,
    "official_api_documentation": None,
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
}

categories = ['files']
paging = True

base_url = 'https://www.solidtorrents.net/'
search_url = base_url + 'api/v1/search?{query}'


def request(query, params):
コード例 #20
0
ファイル: utils.py プロジェクト: cyrilix/searx
import csv
import os
import re

from babel.dates import format_date
from codecs import getincrementalencoder
from HTMLParser import HTMLParser
from random import choice

from searx.version import VERSION_STRING
from searx.languages import language_codes
from searx import settings
from searx import logger


logger = logger.getChild("utils")

ua_versions = ("40.0", "41.0", "42.0", "43.0", "44.0", "45.0", "46.0", "47.0")

ua_os = ("Windows NT 6.3; WOW64", "X11; Linux x86_64", "X11; Linux x86")

ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"

blocked_tags = ("script", "style")


def gen_useragent():
    # TODO
    return ua.format(os=choice(ua_os), version=choice(ua_versions))

コード例 #21
0
from searx.utils import (
    eval_xpath_list,
    eval_xpath_getindex,
    extract_text,
)

from searx.engines.yahoo import parse_url

# pylint: disable=unused-import
from searx.engines.yahoo import (
    _fetch_supported_languages,
    supported_languages_url,
)
# pylint: enable=unused-import

logger = logger.getChild('yahoo_news engine')

# about
about = {
    "website": 'https://news.yahoo.com',
    "wikidata_id": 'Q3044717',
    "official_api_documentation": 'https://developer.yahoo.com/api/',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

language_support = False
time_range_support = False
safesearch = False
paging = True
コード例 #22
0
ファイル: google.py プロジェクト: asciimoo/searx
# @provide-api yes (https://developers.google.com/custom-search/)
#
# @using-api   no
# @results     HTML
# @stable      no (HTML can change)
# @parse       url, title, content, suggestion

import re
from flask_babel import gettext
from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url
from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl
from searx.utils import match_language

logger = logger.getChild('google engine')


# engine dependent config
categories = ['general']
paging = True
language_support = True
use_locale_domain = True
time_range_support = True

# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
default_hostname = 'www.google.com'

country_to_hostname = {
    'BG': 'www.google.bg',  # Bulgaria
    'CZ': 'www.google.cz',  # Czech Republic
コード例 #23
0
import itertools
import threading
from time import time
from urllib.parse import urlparse

import re
from langdetect import detect_langs
from langdetect.lang_detect_exception import LangDetectException
import httpx

from searx import network, logger
from searx.results import ResultContainer
from searx.search.models import SearchQuery, EngineRef
from searx.search.processors import EngineProcessor

logger = logger.getChild('searx.search.checker')

HTML_TAGS = [
    'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math',
    'canvas', 'noscript', 'script', 'del', 'ins', 'area', 'audio', 'img',
    'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite',
    'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt',
    'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strong', 'sub', 'sup',
    'time', 'u', 'var', 'wbr', 'style', 'blockquote', 'dd', 'div', 'dl', 'dt',
    'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button',
    'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter',
    'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet',
    'frame', 'frameset'
]

コード例 #24
0
ファイル: utils.py プロジェクト: mmuman/searx
from babel.dates import format_date
from codecs import getincrementalencoder
from HTMLParser import HTMLParser
from imp import load_source
from os.path import splitext, join
from random import choice
import sys

from searx.version import VERSION_STRING
from searx.languages import language_codes
from searx import settings
from searx import logger


logger = logger.getChild('utils')

ua_versions = ('40.0',
               '41.0',
               '42.0',
               '43.0',
               '44.0',
               '45.0',
               '46.0',
               '47.0')

ua_os = ('Windows NT 6.3; WOW64',
         'X11; Linux x86_64',
         'X11; Linux x86')

ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
コード例 #25
0
"""Abstract base classes for engine request processores.

"""

import threading
from abc import abstractmethod, ABC
from timeit import default_timer

from searx import logger
from searx.engines import settings
from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
from searx.utils import get_engine_from_settings

logger = logger.getChild('searx.search.processor')
SUSPENDED_STATUS = {}

# pylint: disable=missing-function-docstring


class SuspendedStatus:
    """Class to handle suspend state."""

    __slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'

    def __init__(self):
        self.lock = threading.Lock()
        self.continuous_errors = 0
        self.suspend_end_time = 0
        self.suspend_reason = None
コード例 #26
0
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.

(C) 2015 by Adam Tauber, <*****@*****.**>
'''
from sys import exit, version_info
from searx import logger

if version_info[0] == 3:
    unicode = str

logger = logger.getChild('plugins')

from searx.plugins import (doai_rewrite, https_rewrite, infinite_scroll,
                           open_results_on_new_tab, self_info,
                           search_on_category_select, tracker_url_remover,
                           vim_hotkeys)

required_attrs = (('name', (str, unicode)), ('description', (str, unicode)),
                  ('default_on', bool))

optional_attrs = (('js_dependencies', tuple), ('css_dependencies', tuple))


class Plugin():
    default_on = False
    name = 'Default plugin'
コード例 #27
0
ファイル: online.py プロジェクト: searxng/searxng
from timeit import default_timer
import asyncio
import httpx

import searx.network
from searx import logger
from searx.utils import gen_useragent
from searx.exceptions import (
    SearxEngineAccessDeniedException,
    SearxEngineCaptchaException,
    SearxEngineTooManyRequestsException,
)
from searx.metrics.error_recorder import count_error
from .abstract import EngineProcessor

logger = logger.getChild('searx.search.processor.online')


def default_request_params():
    """Default request parameters for ``online`` engines."""
    return {
        'method': 'GET',
        'headers': {},
        'data': {},
        'url': '',
        'cookies': {},
        'verify': True,
        'auth': None
    }

コード例 #28
0
'''

import sys
import threading
from os.path import realpath, dirname
from babel.localedata import locale_identifiers
from urllib.parse import urlparse
from operator import itemgetter
from searx import settings
from searx import logger
from searx.data import ENGINES_LANGUAGES
from searx.exceptions import SearxEngineResponseException
from searx.network import get, initialize as initialize_network, set_context_network_name
from searx.utils import load_module, match_language, get_engine_from_settings, gen_useragent

logger = logger.getChild('engines')

engine_dir = dirname(realpath(__file__))

engines = {}

categories = {'general': []}

babel_langs = [
    lang_parts[0] + '-' +
    lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
    for lang_parts in (lang_code.split('_')
                       for lang_code in locale_identifiers())
]

engine_shortcuts = {}
コード例 #29
0
if __name__ == "__main__":
    from os.path import realpath, dirname

    sys.path.append(realpath(dirname(realpath(__file__)) + "/../"))

import hashlib
import hmac
import json
import os

import requests

from searx import logger

logger = logger.getChild("webapp")

from datetime import datetime, timedelta
from time import time
from html import escape
from io import StringIO
from urllib.parse import urlencode, urljoin, urlparse

from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter  # pylint: disable=no-name-in-module

from werkzeug.middleware.proxy_fix import ProxyFix
from flask import (
    Flask,
    request,
コード例 #30
0
ファイル: search.py プロジェクト: med15060/oma
from searx.utils import gen_useragent
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
from searx.exceptions import SearxParameterException

try:
    from thread import start_new_thread
except:
    from _thread import start_new_thread

if sys.version_info[0] == 3:
    unicode = str

logger = logger.getChild('search')

number_of_searches = 0


def send_http_request(engine, request_params):
    # create dictionary which contain all
    # informations about the request
    request_args = dict(
        headers=request_params['headers'],
        cookies=request_params['cookies'],
        verify=request_params['verify']
    )

    # specific type of request (GET or POST)
    if request_params['method'] == 'GET':
コード例 #31
0
ファイル: wordnik.py プロジェクト: zlsdzh001/searx
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Wordnik (general)

"""

from lxml.html import fromstring
from searx import logger
from searx.utils import extract_text
from searx.raise_for_httperror import raise_for_httperror

logger = logger.getChild('Wordnik engine')

# about
about = {
    "website": 'https://www.wordnik.com',
    "wikidata_id": 'Q8034401',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

categories = ['general']
paging = False

URL = 'https://www.wordnik.com'
SEARCH_URL = URL + '/words/{query}'


def request(query, params):
    params['url'] = SEARCH_URL.format(query=query)
コード例 #32
0
from os.path import splitext, join
from io import open, StringIO
from random import choice
from html.parser import HTMLParser
from lxml.etree import XPath
from babel.core import get_global
from babel.dates import format_date

from searx import settings
from searx.version import VERSION_STRING
from searx.languages import language_codes
from searx import settings
from searx import logger


logger = logger.getChild('utils')

blocked_tags = ('script',
                'style')

ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)

useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
                             + "/data/useragents.json", 'r', encoding='utf-8').read())

xpath_cache = dict()
lang_to_lc_cache = dict()


def searx_useragent():
コード例 #33
0
# SPDX-License-Identifier: AGPL-3.0-or-later
"""CORE (science)

"""
# pylint: disable=missing-function-docstring

from json import loads
from datetime import datetime
from urllib.parse import urlencode

from searx import logger
from searx.exceptions import SearxEngineAPIException

logger = logger.getChild('CORE engine')

about = {
    "website": 'https://core.ac.uk',
    "wikidata_id": None,
    "official_api_documentation": 'https://core.ac.uk/documentation/api/',
    "use_official_api": True,
    "require_api_key": True,
    "results": 'JSON',
}

categories = ['science']
paging = True
nb_per_page = 10

api_key = 'unset'

logger = logger.getChild('CORE engine')
コード例 #34
0
ファイル: webapp.py プロジェクト: LeNovalis/searx
if __name__ == '__main__':
    from sys import path
    from os.path import realpath, dirname
    path.append(realpath(dirname(realpath(__file__)) + '/../'))

import hashlib
import hmac
import json
import os
import sys

import requests

from searx import logger
logger = logger.getChild('webapp')

try:
    from pygments import highlight
    from pygments.lexers import get_lexer_by_name
    from pygments.formatters import HtmlFormatter
except:
    logger.critical("cannot import dependency: pygments")
    from sys import exit
    exit(1)
try:
    from cgi import escape
except:
    from html import escape
from six import next
from datetime import datetime, timedelta
コード例 #35
0
ファイル: bing.py プロジェクト: gabriel0miranda/searx-tor
 @using-api   no (because of query limit)
 @results     HTML (using search portal)
 @stable      no (HTML can change)
 @parse       url, title, content

 @todo        publishedDate
"""

import re
from lxml import html
from searx import logger, utils
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import match_language, gen_useragent, eval_xpath

logger = logger.getChild('bing engine')

# engine dependent config
categories = ['general']
paging = True
language_support = True
supported_languages_url = 'https://www.bing.com/account/general'
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}

# search-url
base_url = 'https://www.bing.com/'
search_string = 'search?{query}&first={offset}'


def _get_offset_from_pageno(pageno):
    return (pageno - 1) * 10 + 1
コード例 #36
0
ファイル: webapp.py プロジェクト: GreenLunar/searx
(C) 2013- by Adam Tauber, <*****@*****.**>
'''

if __name__ == '__main__':
    from sys import path
    from os.path import realpath, dirname
    path.append(realpath(dirname(realpath(__file__)) + '/../'))

import json
import cStringIO
import os
import hashlib
import requests

from searx import logger
logger = logger.getChild('webapp')

try:
    from pygments import highlight
    from pygments.lexers import get_lexer_by_name
    from pygments.formatters import HtmlFormatter
except:
    logger.critical("cannot import dependency: pygments")
    from sys import exit
    exit(1)

from datetime import datetime, timedelta
from urllib import urlencode
from urlparse import urlparse, urljoin
from werkzeug.contrib.fixers import ProxyFix
from flask import (
コード例 #37
0
# @provide-api yes (https://developers.google.com/custom-search/)
#
# @using-api   no
# @results     HTML
# @stable      no (HTML can change)
# @parse       url, title, content, suggestion

import re
from flask_babel import gettext
from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url
from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl
from searx.utils import match_language

logger = logger.getChild('google engine')


# engine dependent config
categories = ['general']
paging = True
language_support = True
use_locale_domain = True
time_range_support = True

# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
default_hostname = 'www.google.com'

country_to_hostname = {
    'BG': 'www.google.bg',  # Bulgaria
    'CZ': 'www.google.cz',  # Czech Republic
コード例 #38
0
    eval_xpath_getindex,
    extract_text,
)

from searx.engines.google import (
    get_lang_info,
    time_range_dict,
    detect_google_sorry,
)

# pylint: disable=unused-import
from searx.engines.google import (supported_languages_url,
                                  _fetch_supported_languages)
# pylint: enable=unused-import

logger = logger.getChild('google images')

# about
about = {
    "website": 'https://images.google.com',
    "wikidata_id": 'Q521550',
    "official_api_documentation":
    'https://developers.google.com/custom-search',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

# engine dependent config
categories = ['images']
paging = False
コード例 #39
0
from lxml import html
from searx import logger
from searx.exceptions import SearxEngineCaptchaException
from searx.utils import extract_text, eval_xpath
from searx.engines.google import (
    _fetch_supported_languages,
    supported_languages_url,
)  # NOQA # pylint: disable=unused-import

from searx.engines.google import (
    get_lang_country,
    google_domains,
    time_range_dict,
)

logger = logger.getChild("google images")

# engine dependent config

categories = ["images"]
paging = False
language_support = True
use_locale_domain = True
time_range_support = True
safesearch = True

filter_mapping = {0: "images", 1: "active", 2: "active"}


def scrap_out_thumbs(dom):
    """Scrap out thumbnail data from <script> tags."""
コード例 #40
0
ファイル: google_scholar.py プロジェクト: MarcAbonce/searxng
    "wikidata_id": 'Q494817',
    "official_api_documentation": 'https://developers.google.com/custom-search',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

# engine dependent config
categories = ['science']
paging = True
language_support = True
use_locale_domain = True
time_range_support = True
safesearch = False

logger = logger.getChild('google scholar')

def time_range_url(params):
    """Returns a URL query component for a google-Scholar time range based on
    ``params['time_range']``.  Google-Scholar does only support ranges in years.
    To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*)
    are mapped to *year*.  If no range is set, an empty string is returned.
    Example::

        &as_ylo=2019
    """
    # as_ylo=2016&as_yhi=2019
    ret_val = ''
    if params['time_range'] in time_range_dict:
        ret_val= urlencode({'as_ylo': datetime.now().year -1 })
    return '&' + ret_val
コード例 #41
0
ファイル: yandex.py プロジェクト: MrLpk/searx
"""
 Yahoo (Web)

 @website     https://yandex.ru/
 @provide-api ?
 @using-api   no
 @results     HTML (using search portal)
 @stable      no (HTML can change)
 @parse       url, title, content
"""

from lxml import html
from searx import logger
from searx.url_utils import urlencode

logger = logger.getChild('yandex engine')

# engine dependent config
categories = ['general']
paging = True
language_support = True  # TODO

default_tld = 'com'
language_map = {'ru': 'ru',
                'ua': 'ua',
                'be': 'by',
                'kk': 'kz',
                'tr': 'com.tr'}

# search-url
base_url = 'https://yandex.{tld}/'