async def fetch_one(instance: str) -> dict: timings = {} try: user_pool_limits = httpx.PoolLimits(soft_limit=10, hard_limit=300) network_type = get_network_type(instance) async with new_client(pool_limits=user_pool_limits, network_type=network_type) as session: # check index with a new connection each time print('🏠 ' + instance) await request_stat_with_exception(timings, 'index', session, instance, REQUEST_COUNT, 20, 40, None) # check wikipedia engine with a new connection each time print('🔎 ' + instance) await request_stat_with_exception(timings, 'search_wp', session, instance, REQUEST_COUNT, 30, 60, check_wikipedia_result, params={'q': '!wp time'}) # check google engine with a new connection each time print('🔍 ' + instance) await request_stat_with_exception(timings, 'search_go', session, instance, 2, 60, 80, check_google_result, params={'q': '!google time'}) except RequestErrorException as ex: print('❌ {0}: {1}'.format(str(instance), str(ex))) except Exception as ex: print('❌❌ {0}: unexpected {1} {2}'.format(str(instance), type(ex), str(ex))) timings['error'] = exception_to_str(ex) traceback.print_exc(file=sys.stdout) else: print('🏁 {0}'.format(str(instance))) return timings
def dns_query(qname, field): dns_answers = None dns_error = None try: dns_answers = dns.resolver.query(qname, field) except dns.resolver.NXDOMAIN: # ignore: The DNS query name does not exist. dns_answers = None dns_error = None except dns.resolver.NoAnswer: # ignore: The DNS response does not contain an answer to the question. dns_answers = None dns_error = None except dns.resolver.NoNameservers: # All nameservers failed to answer the query. # dns_error='No non-broken nameservers are available to answer the question' dns_answers = None dns_error = None except dns.exception.Timeout: # The DNS operation timed out. dns_answers = None dns_error = 'Timeout' except dns.resolver.YXDOMAIN: # The DNS query name is too long after DNAME substitution. dns_answers = None dns_error = 'Timeout after DNAME substitution' except Exception as ex: dns_answers = None dns_error = exception_to_str(ex) return dns_answers, dns_error
def check_https_port(address: str): try: sock = socket.create_connection((address, HTTPS_PORT), 5) sock.close() return True, None except Exception as ex: return False, exception_to_str(ex)
async def analyze(host): user_url = USER_ENDPOINT.format(host) json = None try: # get the result from cryptcheck.fr async with new_client() as session: json, pending = await get_existing_result(session, host, CACHE_EXPIRE_TIME) if json is None: # no existing result or too old if not pending: # ask for refresh await refresh_result(session, host) # pool the response json = await pool_result(session, host) # get the ranks from the result if json is not None and json.get('result') is not None: # get the grades from the different IPs (use a set to remove duplicates) ranks = list( set(map(lambda r: r.get('grade', '?'), json['result']))) # concat all the grades in one line, worse grade in front ranks.sort(reverse=True) ranks = ', '.join(ranks) # return (ranks, user_url) else: return ('?', user_url) except Exception as ex: print(host, exception_to_str(ex)) return ('?', user_url)
def get_whois(address: str): whois_error = None result = None try: obj = ipwhois.IPWhois(address) rdap_answer = obj.lookup_rdap(depth=1) except Exception as ex: # should be ipwhois.exceptions.BaseIpwhoisException # but ipwhois can raise AttributeError: 'NoneType' object has no attribute 'strip' whois_error = exception_to_str(ex) else: result = { 'asn': rdap_answer.get('asn', ''), 'asn_cidr': rdap_answer.get('asn_cidr', ''), 'asn_description': rdap_answer.get('asn_description', ''), 'asn_country_code': safe_upper(rdap_answer.get('asn_country_code')), 'network_name': rdap_answer.get('network', {}).get('name', ''), 'network_country': safe_upper(rdap_answer.get('network', {}).get('country', '')), } asn_privacy = ASN_PRIVACY.get(result['asn'], AsnPrivacy.UNKNOWN) if asn_privacy is not None: result['asn_privacy'] = asn_privacy.value return result, whois_error
def analyze(url: str): parsed_url = urlparse(url) grade_url = USER_ENDPOINT.format(parsed_url.hostname) grade = None try: result = scan(str(parsed_url.hostname), path=str(parsed_url.path), headers=DEFAULT_HEADERS) grade = result.get('scan', {}).get('grade', None) except Exception as ex: print(url, exception_to_str(ex)) grade = None return (grade, grade_url)
def get_address_info(searx_stats_result: SearxStatisticsResult, address: str, field_type: str, https_port: bool): reverse_dns, reverse_dns_error = dns_query_reverse(address) whois_info, whois_info_error = get_whois(address) result = { 'reverse': reverse_dns, 'field_type': field_type, } if whois_info is not None: # asn_cidr asn_cidr = whois_info['asn_cidr'] del whois_info['asn_cidr'] # fall back if whois_info['asn_description'] is None: whois_info['asn_description'] = whois_info['network_name'] del whois_info['network_name'] # overwrite the network_country with ip2location if MMDB_DATABASE: try: mmdb_country = MMDB_DATABASE.country(address) whois_info['network_country'] = mmdb_country.country.iso_code except (ValueError, geoip2.errors.AddressNotFoundError): pass except Exception as ex: print('MMDB Error', exception_to_str(ex)) # result['asn_cidr'] = asn_cidr if asn_cidr not in searx_stats_result.cidrs: searx_stats_result.cidrs[asn_cidr] = whois_info else: if whois_info != searx_stats_result.cidrs[asn_cidr]: print('different asn info\n', whois_info, '\n', searx_stats_result.cidrs[asn_cidr]) if reverse_dns_error is not None: result['reverse_error'] = reverse_dns_error if whois_info_error is not None: result['whois_error'] = whois_info_error # check https ports if https_port: https_port, https_port_error = check_https_port(address) result['https_port'] = https_port if https_port_error is not None: result['https_port_error'] = https_port_error return result
async def fetch(searx_stats_result: SearxStatisticsResult): seen_git_url = set() for _, detail in searx_stats_result.iter_instances(only_valid=True): git_url = normalize_git_url(detail['git_url']) if git_url and git_url not in seen_git_url: try: await fetch_hashes_from_url(git_url) except Exception as ex: print(exception_to_str(ex)) else: if git_url not in searx_stats_result.forks: searx_stats_result.forks.append(git_url) seen_git_url.add(git_url)
async def fetch_one(instance: str) -> dict: timings = {} try: network_type = get_network_type(instance) timeout = 15 if network_type == NetworkType.NORMAL else 30 async with new_client(timeout=timeout, network_type=network_type) as client: # check if cookie settings is supported # intended side effect: add one HTTP connection to the pool cookies = await get_cookie_settings(client, instance) # check the default engines print('🔎 ' + instance) await request_stat_with_log(instance, timings, 'search', client, instance, 3, 120, 160, check_search_result, params={'q': 'time'}, cookies=cookies, headers=DEFAULT_HEADERS) # check the wikipedia engine print('🐘 ' + instance) await request_stat_with_log(instance, timings, 'search_wp', client, instance, 2, 60, 160, check_wikipedia_result, params={'q': '!wp time'}, cookies=cookies, headers=DEFAULT_HEADERS) # check the google engine # may include google results too, so wikipedia engine check before print('🔍 ' + instance) await request_stat_with_log(instance, timings, 'search_go', client, instance, 2, 60, 160, check_google_result, params={'q': '!google time'}, cookies=cookies, headers=DEFAULT_HEADERS) except Exception as ex: print('❌❌ {0}: unexpected {1} {2}'.format(str(instance), type(ex), str(ex))) timings['error'] = exception_to_str(ex) traceback.print_exc(file=sys.stdout) else: print('🏁 {0}'.format(str(instance))) return timings
def get_whois(address: str): whois_error = None result = None obj = ipwhois.IPWhois(address) try: rdap_answer = obj.lookup_rdap(depth=1) except ipwhois.exceptions.BaseIpwhoisException as ex: whois_error = exception_to_str(ex) else: result = { 'asn': rdap_answer.get('asn', ''), 'asn_description': rdap_answer.get('asn_description', ''), 'asn_country_code': safe_upper(rdap_answer.get('asn_country_code')), 'asn_registry': rdap_answer.get('asn_registry', ''), 'network_name': rdap_answer.get('network', {}).get('name', ''), 'network_country': safe_upper(rdap_answer.get('network', {}).get('country', '')), 'network_type': rdap_answer.get('network', {}).get('type', ''), } asn_privacy = ASN_PRIVACY.get(result['asn'], AsnPrivacy.UNKNOWN) if asn_privacy is not None: result['asn_privacy'] = asn_privacy.value return result, whois_error
async def analyze(host): grade_url = USER_ENDPOINT.format(host) try: async with new_client() as session: response = await session.post(API_NEW.format(host)) json = response.json() if json.get('error') == 'rescan-attempt-too-soon': return False finished = False grade = None remaining_tries = MAX_RETRY while not finished: await asyncio.sleep(TIME_BETWEEN_RETRY) response = await session.get(API_GET.format(host), timeout=5) json = response.json() state = json.get('state', '') if state == 'FINISHED': finished = True grade = json.get('grade') elif state in ['ABORTED', 'FAILED']: finished = True grade = None elif state not in ['PENDING', 'STARTING', 'RUNNING']: print(host, 'unknow state ', state) finished = True grade = None # if remaining_tries == 0: finished = True grade = None else: remaining_tries = remaining_tries - 1 except Exception as ex: print(host, exception_to_str(ex)) grade = None return (grade, grade_url)
def test_exception_to_str(): assert utils.exception_to_str(ValueError('test')) == 'test' assert utils.exception_to_str( ValueError(utils.ERROR_REMOVE_PREFIX + 'test')) == 'test' assert utils.exception_to_str(ValueError('')) == 'ValueError'