Beispiel #1
0
async def fetch_one(instance: str) -> dict:
    timings = {}
    try:
        user_pool_limits = httpx.PoolLimits(soft_limit=10, hard_limit=300)
        network_type = get_network_type(instance)
        async with new_client(pool_limits=user_pool_limits, network_type=network_type) as session:
            # check index with a new connection each time
            print('🏠 ' + instance)
            await request_stat_with_exception(timings, 'index',
                                              session, instance,
                                              REQUEST_COUNT, 20, 40, None)
            # check wikipedia engine with a new connection each time
            print('🔎 ' + instance)
            await request_stat_with_exception(timings, 'search_wp',
                                              session, instance,
                                              REQUEST_COUNT, 30, 60, check_wikipedia_result,
                                              params={'q': '!wp time'})
            # check google engine with a new connection each time
            print('🔍 ' + instance)
            await request_stat_with_exception(timings, 'search_go',
                                              session, instance,
                                              2, 60, 80, check_google_result,
                                              params={'q': '!google time'})
    except RequestErrorException as ex:
        print('❌ {0}: {1}'.format(str(instance), str(ex)))
    except Exception as ex:
        print('❌❌ {0}: unexpected {1} {2}'.format(str(instance), type(ex), str(ex)))
        timings['error'] = exception_to_str(ex)
        traceback.print_exc(file=sys.stdout)
    else:
        print('🏁 {0}'.format(str(instance)))
    return timings
Beispiel #2
0
async def fetch_one(searx_stats_result: SearxStatisticsResult, url: str,
                    detail):
    network_type = get_network_type(url)
    async with new_client(network_type=network_type) as session:
        # get config and config
        result_status = await get_status(session, url)
        result_config, result_instance = await get_config(session, url)
        if result_status is None:
            result_stats = await get_stats_multi(session, url)
            result_status = get_status_from_stats(result_stats)

        # update config and status for the instance
        detail_engines = detail.setdefault('engines', dict())
        if result_instance is not None:
            dict_merge(detail_engines, result_instance)
        if result_status is not None:
            dict_merge(detail_engines, result_status)

        # update existing engine and category list
        if result_config is not None:
            # engines
            searx_stats_result.engines.update(result_config['engines'])
            # categories
            for category in result_config['categories']:
                if category not in searx_stats_result.categories:
                    searx_stats_result.categories.append(category)
        print('💡 {0:30}'.format(url))
Beispiel #3
0
 async def get_ip(url):
     async with new_client() as session:
         response, error = await get(session, url, timeout=10.0)
     if error is None:
         return response.text, None
     else:
         return False, error
Beispiel #4
0
async def get_instance_urls():
    instance_urls = []

    # fetch the .rst source
    async with new_client() as session:
        response = await session.get(SEARX_INSTANCES_URL, timeout=10)
    # get source after 'Alive and running'
    match = re.search(AFTER_ALIVE_AND_RUNNING, response.text)
    if match:
        # for each item of a list
        lines = re.findall(ITEM_RE, match.group(0))
        for line in lines:
            # for each link
            links = re.findall(LINK_RE, line)
            for link in links:
                # normalize the link
                url = normalize_url(link[1])
                if url:
                    # add it
                    instance_urls.append(url)

    # remove duplicates
    instance_urls = list(set(instance_urls))

    # sort list
    instance_urls.sort()

    #
    return instance_urls
Beispiel #5
0
async def analyze(host):
    user_url = USER_ENDPOINT.format(host)
    json = None
    try:
        # get the result from cryptcheck.fr
        async with new_client() as session:
            json, pending = await get_existing_result(session, host, CACHE_EXPIRE_TIME)
            if json is None:
                # no existing result or too old
                if not pending:
                    # ask for refresh
                    await refresh_result(session, host)
                # pool the response
                json = await pool_result(session, host)

        # get the ranks from the result
        if json is not None and json.get('result') is not None:
            # get the grades from the different IPs (use a set to remove duplicates)
            ranks = list(
                set(map(lambda r: r.get('grade', '?'), json['result'])))
            # concat all the grades in one line, worse grade in front
            ranks.sort(reverse=True)
            ranks = ', '.join(ranks)
            #
            return (ranks, user_url)
        else:
            return ('?', user_url)
    except Exception as ex:
        print(host, exception_to_str(ex))
        return ('?', user_url)
Beispiel #6
0
async def test_do_get_404(httpserver: pytest_httpserver.HTTPServer):
    httpserver.expect_request('/404.html').\
        respond_with_data('Not Found', content_type='text/html', status=404)

    async with http.new_client() as session:
        response, error = await http.get(session,
                                         httpserver.url_for('/404.html'))

    assert response.text == 'Not Found'
    assert error == 'HTTP status code 404'
Beispiel #7
0
async def test_do_get_ok(httpserver: pytest_httpserver.HTTPServer):
    httpserver.expect_request('/index.html').\
        respond_with_data('OK', content_type='text/html')

    async with http.new_client() as session:
        response, error = await http.get(session,
                                         httpserver.url_for('/index.html'))

    assert response.text == 'OK'
    assert error is None
Beispiel #8
0
async def fetch_one(instance_url):
    detail = dict()
    # no cookie ( cookies=DEFAULT_COOKIES,  )
    try:
        network_type = get_network_type(instance_url)
        async with new_client(network_type=network_type) as session:
            response, error = await get(session,
                                        instance_url,
                                        headers=DEFAULT_HEADERS,
                                        timeout=10)
            if response is not None:
                version = await get_searx_version(response)
                detail = {
                    'network_type': network_type.name.lower(),
                    'http': {
                        'status_code': response.status_code,
                        'error': error
                    },
                    'version': version,
                    'timing': {
                        'initial': response.elapsed.total_seconds()
                    },
                    'alternativeUrls': {},
                }
                response_url = str(response.url)
                # add trailing slash
                if not response_url.endswith('/'):
                    response_url = response_url + '/'
                # redirect
                if response_url != instance_url:
                    if 'redirect_from' not in detail:
                        detail['redirect_from'] = []
                    detail['alternativeUrls'][instance_url] = 'redirect'
                    instance_url = response_url
            else:
                detail = {
                    'network_type': network_type.name.lower(),
                    'http': {
                        'status_code': None,
                        'error': error
                    },
                    'version': None,
                    'timing': {},
                    'alternativeUrls': {},
                }
    except concurrent.futures.TimeoutError:
        # This exception occurs on new_client()
        detail['error'] = 'Timeout error'

    if error is not None:
        detail['error'] = error

    if network_type == NetworkType.NORMAL:
        detail['tls'] = get_ssl_info(get_host(instance_url))
    return instance_url, detail
Beispiel #9
0
async def fetch_one(instance_url: str, private: bool) -> dict:
    # no cookie ( cookies=DEFAULT_COOKIES,  )
    network_type = get_network_type(instance_url)
    detail = {
        'network_type': network_type.name.lower(),
        'http': {},
        'version': None,
    }
    try:
        async with new_client(network_type=network_type) as session:
            response, error = await get(session,
                                        instance_url,
                                        headers=DEFAULT_HEADERS,
                                        timeout=10)
            status_code = response.status_code if response is not None else None
            detail['http'] = {
                'status_code': status_code,
                'error': error,
            }
            if response is not None:
                response_url = str(response.url)
                # add trailing slash
                if not response_url.endswith('/'):
                    response_url = response_url + '/'
                # redirect
                if 'alternativeUrls' not in detail:
                    detail['alternativeUrls'] = dict()
                if response_url != instance_url:
                    detail['alternativeUrls'][instance_url] = 'redirect from'
                    instance_url = response_url

                # get the searx version
                if error is None:
                    await asyncio.sleep(0.5)
                    await set_searx_version(detail, session, response_url,
                                            response)

                # set initial response time
                detail['timing'] = {}
                response_time_stats = ResponseTimeStats()
                response_time_stats.add_response(response)
                detail['timing']['initial'] = response_time_stats.get()
    except concurrent.futures.TimeoutError:
        # This exception occurs on new_client()
        error = 'Timeout error'

    if (detail['version'] is not None
            or private) and network_type == NetworkType.NORMAL:
        detail['tls'] = get_ssl_info(get_host(instance_url))

    if error is not None:
        detail['http']['error'] = error
        detail['error'] = error

    return instance_url, detail
Beispiel #10
0
async def test_do_get_connection_refused(
        httpserver: pytest_httpserver.HTTPServer):
    httpserver.expect_request('/index.html').\
        respond_with_data('Not Found', content_type='text/html', status=404)
    # close HTTP server on purpose: make sure the connection will be refused
    httpserver.stop()
    try:
        async with http.new_client() as session:
            response, error = await http.get(session,
                                             httpserver.url_for('/index.html'))
    finally:
        # start again to avoid side effect
        httpserver.start()

    assert response is None
    assert error == 'Connection refused'
Beispiel #11
0
async def get_instance_urls():
    instance_urls = []

    # fetch html page
    async with new_client() as session:
        response = await session.get(SEARX_INSTANCES_URL,
                                     headers=DEFAULT_HEADERS,
                                     cookies=DEFAULT_COOKIES,
                                     timeout=10)
    html = await html_fromstring(response.text)
    # remove content before MARKDOWN_ELEMENTS_XPATH
    for element in MARKDOWN_ELEMENTS_XPATH(html)[0].getchildren():
        text = stringify_children(element)
        if text.lower().find(REMOVE_BEFORE_LOWER_CASE) >= 0:
            break
        element.clear()
    # check all links
    for aelement in INSTANCES_XPATH(html):
        ahref = aelement.get('href')
        if ahref.startswith('https://www.ssllabs.com/') or \
           ahref.startswith('https://hstspreload.org/') or \
           ahref.startswith('https://geti2p.net/') or \
           ahref.endswith('/cert/'):
            continue
        if ahref.endswith('/'):
            ahref = ahref[:-1]
        if ahref.endswith('/search'):
            ahref = ahref[:-7]
        # Remove .i2p (keep .onion URL)
        host = get_host(ahref)
        if host.endswith('.i2p'):
            continue
        ahref = ahref + '/'
        instance_urls.append(ahref)

    # remove duplicates
    instance_urls = list(set(instance_urls))

    # sort list
    instance_urls.sort()

    #
    return instance_urls
Beispiel #12
0
async def fetch_one(instance: str) -> dict:
    timings = {}
    try:
        network_type = get_network_type(instance)
        timeout = 15 if network_type == NetworkType.NORMAL else 30
        async with new_client(timeout=timeout, network_type=network_type) as client:
            # check if cookie settings is supported
            # intended side effect: add one HTTP connection to the pool
            cookies = await get_cookie_settings(client, instance)

            # check the default engines
            print('🔎 ' + instance)
            await request_stat_with_log(instance, timings, 'search',
                                        client, instance,
                                        3, 120, 160, check_search_result,
                                        params={'q': 'time'},
                                        cookies=cookies, headers=DEFAULT_HEADERS)

            # check the wikipedia engine
            print('🐘 ' + instance)
            await request_stat_with_log(instance, timings, 'search_wp',
                                        client, instance,
                                        2, 60, 160, check_wikipedia_result,
                                        params={'q': '!wp time'},
                                        cookies=cookies, headers=DEFAULT_HEADERS)

            # check the google engine
            # may include google results too, so wikipedia engine check before
            print('🔍 ' + instance)
            await request_stat_with_log(instance, timings, 'search_go',
                                        client, instance,
                                        2, 60, 160, check_google_result,
                                        params={'q': '!google time'},
                                        cookies=cookies, headers=DEFAULT_HEADERS)
    except Exception as ex:
        print('❌❌ {0}: unexpected {1} {2}'.format(str(instance), type(ex), str(ex)))
        timings['error'] = exception_to_str(ex)
        traceback.print_exc(file=sys.stdout)
    else:
        print('🏁 {0}'.format(str(instance)))
    return timings
Beispiel #13
0
async def analyze(host):
    grade_url = USER_ENDPOINT.format(host)
    try:
        async with new_client() as session:
            response = await session.post(API_NEW.format(host))
            json = response.json()
            if json.get('error') == 'rescan-attempt-too-soon':
                return False

            finished = False
            grade = None
            remaining_tries = MAX_RETRY
            while not finished:
                await asyncio.sleep(TIME_BETWEEN_RETRY)
                response = await session.get(API_GET.format(host), timeout=5)
                json = response.json()
                state = json.get('state', '')
                if state == 'FINISHED':
                    finished = True
                    grade = json.get('grade')
                elif state in ['ABORTED', 'FAILED']:
                    finished = True
                    grade = None
                elif state not in ['PENDING', 'STARTING', 'RUNNING']:
                    print(host, 'unknow state ', state)
                    finished = True
                    grade = None
                #
                if remaining_tries == 0:
                    finished = True
                    grade = None
                else:
                    remaining_tries = remaining_tries - 1
    except Exception as ex:
        print(host, exception_to_str(ex))
        grade = None
    return (grade, grade_url)
Beispiel #14
0
async def test_new_client():
    async with http.new_client() as session:
        cookies = session.cookies
    assert cookies is not None