def __init__( # pylint: disable=too-many-arguments self, enable_http=True, verify=True, enable_http2=False, max_connections=None, max_keepalive_connections=None, keepalive_expiry=None, proxies=None, using_tor_proxy=False, local_addresses=None, retries=0, retry_on_http_error=None, max_redirects=30, logger_name=None, ): self.enable_http = enable_http self.verify = verify self.enable_http2 = enable_http2 self.max_connections = max_connections self.max_keepalive_connections = max_keepalive_connections self.keepalive_expiry = keepalive_expiry self.proxies = proxies self.using_tor_proxy = using_tor_proxy self.local_addresses = local_addresses self.retries = retries self.retry_on_http_error = retry_on_http_error self.max_redirects = max_redirects self._local_addresses_cycle = self.get_ipaddress_cycle() self._proxies_cycle = self.get_proxy_cycles() self._clients = {} self._logger = logger.getChild(logger_name) if logger_name else logger self.check_parameters()
""" Yahoo (Web) @website https://yandex.ru/ @provide-api ? @using-api no @results HTML (using search portal) @stable no (HTML can change) @parse url, title, content """ from lxml import html from searx import logger from searx.url_utils import urlencode logger = logger.getChild('yandex engine') # engine dependent config categories = ['general'] paging = True language_support = True # TODO default_tld = 'com' language_map = {'ru': 'ru', 'ua': 'ua', 'be': 'by', 'kk': 'kz', 'tr': 'com.tr'} # search-url base_url = 'https://yandex.{tld}/' search_url = 'search/?{query}&p={page}' results_xpath = '//li[@class="serp-item"]' url_xpath = './/h2/a/@href'
# SPDX-License-Identifier: AGPL-3.0-or-later import threading from time import time from searx import logger from searx.metrology.error_recorder import record_exception, record_error from searx.search.processors.abstract import EngineProcessor logger = logger.getChild('search.processor.offline') class OfflineProcessor(EngineProcessor): engine_type = 'offline' def _record_stats_on_error(self, result_container, start_time): engine_time = time() - start_time result_container.add_timing(self.engine_name, engine_time, engine_time) with threading.RLock(): self.engine.stats['errors'] += 1 def _search_basic(self, query, params): return self.engine.search(query, params) def search(self, query, params, result_container, start_time, timeout_limit): try: search_results = self._search_basic(query, params) if search_results:
import re import searx.poolrequests as requests_lib from itertools import izip_longest, chain from operator import itemgetter from Queue import Queue from time import time from urlparse import urlparse, unquote from searx.engines import ( categories, engines ) from searx.languages import language_codes from searx.utils import gen_useragent, get_blocked_engines from searx.query import Query from searx import logger logger = logger.getChild('search') number_of_searches = 0 def search_request_wrapper(fn, url, engine_name, **kwargs): try: return fn(url, **kwargs) except: # increase errors stats engines[engine_name].stats['errors'] += 1 # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine_name)) return
from searx.engines.google import ( get_lang_country, filter_mapping, ) # about about = { "website": 'https://news.google.com', "wikidata_id": 'Q12020', "official_api_documentation": None, "use_official_api": False, "require_api_key": False, "results": 'HTML', } logger = logger.getChild('google news') # compared to other google engines google-news has a different time range # support. The time range is included in the search term. time_range_dict = { 'day': 'when:1d', 'week': 'when:7d', 'month': 'when:1m', 'year': 'when:1y', } # engine dependent config categories = ['news'] paging = False language_support = True
# SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint # pylint: disable=missing-function-docstring """SQLite database (Offline) """ import sqlite3 import contextlib from searx import logger logger = logger.getChild('SQLite engine') engine_type = 'offline' database = "" query_str = "" limit = 10 paging = True result_template = 'key-value.html' def init(engine_settings): if 'query_str' not in engine_settings: raise ValueError('query_str cannot be empty') if not engine_settings['query_str'].lower().startswith('select '): raise ValueError('only SELECT query is supported') @contextlib.contextmanager
'OnlineProcessor', 'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', 'processors', ] from searx import logger import searx.engines as engines from .online import OnlineProcessor from .offline import OfflineProcessor from .online_dictionary import OnlineDictionaryProcessor from .online_currency import OnlineCurrencyProcessor from .abstract import EngineProcessor logger = logger.getChild('search.processors') processors = {} """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" def get_processor_class(engine_type): """Return processor class according to the ``engine_type``""" for c in [ OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor ]: if c.engine_type == engine_type: return c return None
from searx.engines import licensing from searx import logger from searx.url_utils import urlencode categories = ['general'] # optional log = logger.getChild('memucho') def request(query, params): '''pre-request callback params<dict>: method : POST/GET headers : {} data : {} # if method == POST url : '' category: 'search category' pageno : 1 # number of the requested page ''' search = urlencode({'term': query}) params['url'] = f'https://memucho.de/api/edusharing/search?{search}' return params def response(resp): result = [] json = resp.json() log.debug(f'{len(json["Items"])}') for item in json['Items']: licstr = str(item['Licence']).replace('_', '-').lower()
GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, <*****@*****.**> ''' import re from urlparse import urlparse from lxml import etree from os import listdir from os.path import isfile, isdir, join from searx import logger logger = logger.getChild("https_rewrite") # https://gitweb.torproject.org/\ # pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules # HTTPS rewrite rules https_rules = [] # load single ruleset from a xml file def load_single_https_ruleset(filepath): ruleset = () # init parser parser = etree.XMLParser()
.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1 """ import json from urllib.parse import urlencode from lxml import html from searx import logger from searx.data import WIKIDATA_UNITS from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom logger = logger.getChild('duckduckgo_definitions') URL = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' WIKIDATA_PREFIX = [ 'http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/' ] replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def is_broken_text(text): """ duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
import sys from time import time from itertools import cycle from threading import local import requests from searx import settings from searx import logger from searx.raise_for_httperror import raise_for_httperror from collections import OrderedDict import ast logger = logger.getChild('poolrequests') try: import ssl if ssl.OPENSSL_VERSION_INFO[0:3] < (1, 0, 2): # https://github.com/certifi/python-certifi#1024-bit-root-certificates logger.critical( 'You are using an old openssl version({0}), please upgrade above 1.0.2!' .format(ssl.OPENSSL_VERSION)) sys.exit(1) except ImportError: ssl = None if not getattr(ssl, "HAS_SNI", False): try: import OpenSSL # pylint: disable=unused-import except ImportError: logger.critical(
(at your option) any later version. searx is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, <*****@*****.**> ''' from sys import exit from searx import logger logger = logger.getChild('plugins') from searx.plugins import (https_rewrite, open_results_on_new_tab, self_info, search_on_category_select, tracker_url_remover, vim_hotkeys) required_attrs = (('name', (str, unicode)), ('description', (str, unicode)), ('default_on', bool)) optional_attrs = (('js_dependencies', tuple), ('css_dependencies', tuple))
if __name__ == "__main__": from sys import path from os.path import realpath, dirname path.append(realpath(dirname(realpath(__file__)) + "/../")) import json import cStringIO import os import hashlib import requests from searx import logger logger = logger.getChild("webapp") try: from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter except: logger.critical("cannot import dependency: pygments") from sys import exit exit(1) from datetime import datetime, timedelta from urllib import urlencode from urlparse import urlparse, urljoin from werkzeug.contrib.fixers import ProxyFix
You should have received a copy of the GNU Affero General Public License along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, <*****@*****.**> """ from os.path import realpath, dirname, splitext, join import sys from imp import load_source from flask_babel import gettext from operator import itemgetter from searx import settings from searx import logger logger = logger.getChild("engines") engine_dir = dirname(realpath(__file__)) engines = {} categories = {"general": []} engine_shortcuts = {} engine_default_args = { "paging": False, "categories": ["general"], "language_support": True, "safesearch": False, "timeout": settings["outgoing"]["request_timeout"], "shortcut": "-",
@results JSON, HTML @stable no (html can change) @parse url, infobox """ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url from searx.url_utils import urlencode from searx.utils import match_language from json import loads from lxml.html import fromstring logger = logger.getChild('wikidata') result_count = 1 # urls wikidata_host = 'https://www.wikidata.org' url_search = wikidata_host \ + '/w/index.php?{query}' wikidata_api = wikidata_host + '/w/api.php' url_detail = wikidata_api\ + '?action=parse&format=json&{query}'\ + '&redirects=1&prop=text%7Cdisplaytitle%7Clanglinks%7Crevid'\ + '&disableeditsection=1&disabletidy=1&preview=1§ionpreview=1&disabletoc=1&utf8=1&formatversion=2' url_map = 'https://www.openstreetmap.org/'\ + '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
You should have received a copy of the GNU Affero General Public License along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, <*****@*****.**> ''' from os.path import realpath, dirname, splitext, join import sys from imp import load_source from flask.ext.babel import gettext from operator import itemgetter from searx import settings from searx import logger logger = logger.getChild('engines') engine_dir = dirname(realpath(__file__)) engines = {} categories = {'general': []} engine_shortcuts = {} engine_default_args = {'paging': False, 'categories': ['general'], 'language_support': True, 'safesearch': False, 'timeout': settings['outgoing']['request_timeout'], 'shortcut': '-', 'disabled': False,
engine : xpath paging : True search_url : https://bitbucket.org/repo/all/{pageno}?name={query} url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"] content_xpath : //article[@class="repo-summary"]/p """ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list from searx import logger logger = logger.getChild('XPath engine') search_url = None """ Search URL of the engine. Example:: https://example.org/?search={query}&page={pageno}{time_range}{safe_search} Replacements are: ``{query}``: Search terms from user. ``{pageno}``: Page number if engine supports pagging :py:obj:`paging`
from urllib.parse import urlencode from json import loads from dateutil.parser import isoparse from babel.dates import format_datetime, format_date, format_time, get_datetime_format from searx import logger from searx.data import WIKIDATA_UNITS from searx.network import post, get from searx.utils import match_language, searx_useragent, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import logger = logger.getChild('wikidata') # about about = { "website": 'https://wikidata.org/', "wikidata_id": 'Q2013', "official_api_documentation": 'https://query.wikidata.org/', "use_official_api": True, "require_api_key": False, "results": 'JSON', } # SPARQL SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql' SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain' WIKIDATA_PROPERTIES = {
# SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint """Solid Torrents """ # pylint: disable=missing-function-docstring from json import loads from urllib.parse import urlencode from searx import logger logger = logger.getChild('solidtor engine') about = { "website": 'https://www.solidtorrents.net/', "wikidata_id": None, "official_api_documentation": None, "use_official_api": True, "require_api_key": False, "results": 'JSON', } categories = ['files'] paging = True base_url = 'https://www.solidtorrents.net/' search_url = base_url + 'api/v1/search?{query}' def request(query, params):
import csv import os import re from babel.dates import format_date from codecs import getincrementalencoder from HTMLParser import HTMLParser from random import choice from searx.version import VERSION_STRING from searx.languages import language_codes from searx import settings from searx import logger logger = logger.getChild("utils") ua_versions = ("40.0", "41.0", "42.0", "43.0", "44.0", "45.0", "46.0", "47.0") ua_os = ("Windows NT 6.3; WOW64", "X11; Linux x86_64", "X11; Linux x86") ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}" blocked_tags = ("script", "style") def gen_useragent(): # TODO return ua.format(os=choice(ua_os), version=choice(ua_versions))
from searx.utils import ( eval_xpath_list, eval_xpath_getindex, extract_text, ) from searx.engines.yahoo import parse_url # pylint: disable=unused-import from searx.engines.yahoo import ( _fetch_supported_languages, supported_languages_url, ) # pylint: enable=unused-import logger = logger.getChild('yahoo_news engine') # about about = { "website": 'https://news.yahoo.com', "wikidata_id": 'Q3044717', "official_api_documentation": 'https://developer.yahoo.com/api/', "use_official_api": False, "require_api_key": False, "results": 'HTML', } language_support = False time_range_support = False safesearch = False paging = True
# @provide-api yes (https://developers.google.com/custom-search/) # # @using-api no # @results HTML # @stable no (HTML can change) # @parse url, title, content, suggestion import re from flask_babel import gettext from lxml import html, etree from searx.engines.xpath import extract_text, extract_url from searx import logger from searx.url_utils import urlencode, urlparse, parse_qsl from searx.utils import match_language logger = logger.getChild('google engine') # engine dependent config categories = ['general'] paging = True language_support = True use_locale_domain = True time_range_support = True # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests default_hostname = 'www.google.com' country_to_hostname = { 'BG': 'www.google.bg', # Bulgaria 'CZ': 'www.google.cz', # Czech Republic
import itertools import threading from time import time from urllib.parse import urlparse import re from langdetect import detect_langs from langdetect.lang_detect_exception import LangDetectException import httpx from searx import network, logger from searx.results import ResultContainer from searx.search.models import SearchQuery, EngineRef from searx.search.processors import EngineProcessor logger = logger.getChild('searx.search.checker') HTML_TAGS = [ 'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script', 'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr', 'style', 'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet', 'frame', 'frameset' ]
from babel.dates import format_date from codecs import getincrementalencoder from HTMLParser import HTMLParser from imp import load_source from os.path import splitext, join from random import choice import sys from searx.version import VERSION_STRING from searx.languages import language_codes from searx import settings from searx import logger logger = logger.getChild('utils') ua_versions = ('40.0', '41.0', '42.0', '43.0', '44.0', '45.0', '46.0', '47.0') ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64', 'X11; Linux x86') ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
"""Abstract base classes for engine request processores. """ import threading from abc import abstractmethod, ABC from timeit import default_timer from searx import logger from searx.engines import settings from searx.network import get_time_for_thread, get_network from searx.metrics import histogram_observe, counter_inc, count_exception, count_error from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException from searx.utils import get_engine_from_settings logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} # pylint: disable=missing-function-docstring class SuspendedStatus: """Class to handle suspend state.""" __slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock' def __init__(self): self.lock = threading.Lock() self.continuous_errors = 0 self.suspend_end_time = 0 self.suspend_reason = None
but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, <*****@*****.**> ''' from sys import exit, version_info from searx import logger if version_info[0] == 3: unicode = str logger = logger.getChild('plugins') from searx.plugins import (doai_rewrite, https_rewrite, infinite_scroll, open_results_on_new_tab, self_info, search_on_category_select, tracker_url_remover, vim_hotkeys) required_attrs = (('name', (str, unicode)), ('description', (str, unicode)), ('default_on', bool)) optional_attrs = (('js_dependencies', tuple), ('css_dependencies', tuple)) class Plugin(): default_on = False name = 'Default plugin'
from timeit import default_timer import asyncio import httpx import searx.network from searx import logger from searx.utils import gen_useragent from searx.exceptions import ( SearxEngineAccessDeniedException, SearxEngineCaptchaException, SearxEngineTooManyRequestsException, ) from searx.metrics.error_recorder import count_error from .abstract import EngineProcessor logger = logger.getChild('searx.search.processor.online') def default_request_params(): """Default request parameters for ``online`` engines.""" return { 'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}, 'verify': True, 'auth': None }
''' import sys import threading from os.path import realpath, dirname from babel.localedata import locale_identifiers from urllib.parse import urlparse from operator import itemgetter from searx import settings from searx import logger from searx.data import ENGINES_LANGUAGES from searx.exceptions import SearxEngineResponseException from searx.network import get, initialize as initialize_network, set_context_network_name from searx.utils import load_module, match_language, get_engine_from_settings, gen_useragent logger = logger.getChild('engines') engine_dir = dirname(realpath(__file__)) engines = {} categories = {'general': []} babel_langs = [ lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) ] engine_shortcuts = {}
if __name__ == "__main__": from os.path import realpath, dirname sys.path.append(realpath(dirname(realpath(__file__)) + "/../")) import hashlib import hmac import json import os import requests from searx import logger logger = logger.getChild("webapp") from datetime import datetime, timedelta from time import time from html import escape from io import StringIO from urllib.parse import urlencode, urljoin, urlparse from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module from werkzeug.middleware.proxy_fix import ProxyFix from flask import ( Flask, request,
from searx.utils import gen_useragent from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE from searx.results import ResultContainer from searx import logger from searx.plugins import plugins from searx.exceptions import SearxParameterException try: from thread import start_new_thread except: from _thread import start_new_thread if sys.version_info[0] == 3: unicode = str logger = logger.getChild('search') number_of_searches = 0 def send_http_request(engine, request_params): # create dictionary which contain all # informations about the request request_args = dict( headers=request_params['headers'], cookies=request_params['cookies'], verify=request_params['verify'] ) # specific type of request (GET or POST) if request_params['method'] == 'GET':
# SPDX-License-Identifier: AGPL-3.0-or-later """Wordnik (general) """ from lxml.html import fromstring from searx import logger from searx.utils import extract_text from searx.raise_for_httperror import raise_for_httperror logger = logger.getChild('Wordnik engine') # about about = { "website": 'https://www.wordnik.com', "wikidata_id": 'Q8034401', "official_api_documentation": None, "use_official_api": False, "require_api_key": False, "results": 'HTML', } categories = ['general'] paging = False URL = 'https://www.wordnik.com' SEARCH_URL = URL + '/words/{query}' def request(query, params): params['url'] = SEARCH_URL.format(query=query)
from os.path import splitext, join from io import open, StringIO from random import choice from html.parser import HTMLParser from lxml.etree import XPath from babel.core import get_global from babel.dates import format_date from searx import settings from searx.version import VERSION_STRING from searx.languages import language_codes from searx import settings from searx import logger logger = logger.getChild('utils') blocked_tags = ('script', 'style') ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE) useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) + "/data/useragents.json", 'r', encoding='utf-8').read()) xpath_cache = dict() lang_to_lc_cache = dict() def searx_useragent():
# SPDX-License-Identifier: AGPL-3.0-or-later """CORE (science) """ # pylint: disable=missing-function-docstring from json import loads from datetime import datetime from urllib.parse import urlencode from searx import logger from searx.exceptions import SearxEngineAPIException logger = logger.getChild('CORE engine') about = { "website": 'https://core.ac.uk', "wikidata_id": None, "official_api_documentation": 'https://core.ac.uk/documentation/api/', "use_official_api": True, "require_api_key": True, "results": 'JSON', } categories = ['science'] paging = True nb_per_page = 10 api_key = 'unset' logger = logger.getChild('CORE engine')
if __name__ == '__main__': from sys import path from os.path import realpath, dirname path.append(realpath(dirname(realpath(__file__)) + '/../')) import hashlib import hmac import json import os import sys import requests from searx import logger logger = logger.getChild('webapp') try: from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter except: logger.critical("cannot import dependency: pygments") from sys import exit exit(1) try: from cgi import escape except: from html import escape from six import next from datetime import datetime, timedelta
@using-api no (because of query limit) @results HTML (using search portal) @stable no (HTML can change) @parse url, title, content @todo publishedDate """ import re from lxml import html from searx import logger, utils from searx.engines.xpath import extract_text from searx.url_utils import urlencode from searx.utils import match_language, gen_useragent, eval_xpath logger = logger.getChild('bing engine') # engine dependent config categories = ['general'] paging = True language_support = True supported_languages_url = 'https://www.bing.com/account/general' language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'} # search-url base_url = 'https://www.bing.com/' search_string = 'search?{query}&first={offset}' def _get_offset_from_pageno(pageno): return (pageno - 1) * 10 + 1
(C) 2013- by Adam Tauber, <*****@*****.**> ''' if __name__ == '__main__': from sys import path from os.path import realpath, dirname path.append(realpath(dirname(realpath(__file__)) + '/../')) import json import cStringIO import os import hashlib import requests from searx import logger logger = logger.getChild('webapp') try: from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter except: logger.critical("cannot import dependency: pygments") from sys import exit exit(1) from datetime import datetime, timedelta from urllib import urlencode from urlparse import urlparse, urljoin from werkzeug.contrib.fixers import ProxyFix from flask import (
eval_xpath_getindex, extract_text, ) from searx.engines.google import ( get_lang_info, time_range_dict, detect_google_sorry, ) # pylint: disable=unused-import from searx.engines.google import (supported_languages_url, _fetch_supported_languages) # pylint: enable=unused-import logger = logger.getChild('google images') # about about = { "website": 'https://images.google.com', "wikidata_id": 'Q521550', "official_api_documentation": 'https://developers.google.com/custom-search', "use_official_api": False, "require_api_key": False, "results": 'HTML', } # engine dependent config categories = ['images'] paging = False
from lxml import html from searx import logger from searx.exceptions import SearxEngineCaptchaException from searx.utils import extract_text, eval_xpath from searx.engines.google import ( _fetch_supported_languages, supported_languages_url, ) # NOQA # pylint: disable=unused-import from searx.engines.google import ( get_lang_country, google_domains, time_range_dict, ) logger = logger.getChild("google images") # engine dependent config categories = ["images"] paging = False language_support = True use_locale_domain = True time_range_support = True safesearch = True filter_mapping = {0: "images", 1: "active", 2: "active"} def scrap_out_thumbs(dom): """Scrap out thumbnail data from <script> tags."""
"wikidata_id": 'Q494817', "official_api_documentation": 'https://developers.google.com/custom-search', "use_official_api": False, "require_api_key": False, "results": 'HTML', } # engine dependent config categories = ['science'] paging = True language_support = True use_locale_domain = True time_range_support = True safesearch = False logger = logger.getChild('google scholar') def time_range_url(params): """Returns a URL query component for a google-Scholar time range based on ``params['time_range']``. Google-Scholar does only support ranges in years. To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*) are mapped to *year*. If no range is set, an empty string is returned. Example:: &as_ylo=2019 """ # as_ylo=2016&as_yhi=2019 ret_val = '' if params['time_range'] in time_range_dict: ret_val= urlencode({'as_ylo': datetime.now().year -1 }) return '&' + ret_val
""" Yahoo (Web) @website https://yandex.ru/ @provide-api ? @using-api no @results HTML (using search portal) @stable no (HTML can change) @parse url, title, content """ from lxml import html from searx import logger from searx.url_utils import urlencode logger = logger.getChild('yandex engine') # engine dependent config categories = ['general'] paging = True language_support = True # TODO default_tld = 'com' language_map = {'ru': 'ru', 'ua': 'ua', 'be': 'by', 'kk': 'kz', 'tr': 'com.tr'} # search-url base_url = 'https://yandex.{tld}/'