class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = u'https://investigate.api.opendns.com/' def __init__(self, api_key, cache_file_name=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-categorization') def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = u'domains/categorization/?showLabels' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-domain_score') def domain_score(self, domains): url_path = 'domains/score/' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [ key for key in url_params if key not in all_responses.keys() ] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls) responses = dict(zip(url_params, responses)) for url_param in responses.keys(): if self._cache: self._cache.cache_value(cache_api_name, url_param, responses[url_param]) all_responses[url_param] = responses[url_param] return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = u'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = u'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def domain_tag(self, domains): """Get the data range when a domain is part of OpenDNS block list. Args: domains: an enumerable of strings domain names Returns: An enumerable of string with period, category, and url """ api_name = 'opendns-domain_tag' fmt_url_path = u'domains/{0}/latest_tags' return self._multi_get(api_name, fmt_url_path, domains) def related_domains(self, domains): """Get list of domain names that have been seen requested around the same time (up to 60 seconds before or after) to the given domain name. Args: domains: an enumerable of strings domain names Returns: An enumerable of [domain name, scores] """ api_name = 'opendns-related_domains' fmt_url_path = u'links/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of resource records and features """ api_name = 'opendns-rr_history' fmt_url_path = u'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def dns_rr(self, ips): """Get the domains related to input domains. Args: domains: an enumerable of strings as domains Returns: An enumerable of resource records and features """ api_name = 'opendns-dns_rr' fmt_url_path = u'dnsdb/name/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def latest_malicious(self, ips): """Get the a list of malicious domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of strings for the malicious domains """ api_name = 'opendns-latest_malicious' fmt_url_path = u'ips/{0}/latest_domains' return self._multi_get(api_name, fmt_url_path, ips)
class ShadowServerApi(object): BINTEST_URL = u'http://bin-test.shadowserver.org/api' def __init__(self, cache_file_name=None, update_cache=True, req_timeout=90.0): """Establishes basic HTTP params and loads a cache. Args: cache_file_name: String file name of cache. update_cache: Determines whether cache should be written out back to the disk when closing it. Default is `True`. req_timeout: Maximum number of seconds to wait without reading a response byte before deciding an error has occurred. Default is 90.0 seconds. """ # TODO - lookup request rate limit # By observation, ShadowServer can be quite slow, so give it 90 seconds before it times out. self._requests = MultiRequest(max_requests=2, req_timeout=req_timeout) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name, update_cache) if cache_file_name else None @MultiRequest.error_handling def get_bin_test(self, hashes): """Test hashes against a list of known software applications. Known hashes will return a dictionary of information. Unknown hashes will return nothing. Args: hashes: list of string hashes. Returns: A dict with the hash as key and the shadowserver report as value. """ all_responses = {} if self._cache: api_name = 'shadowserver-bin-test' all_responses = self._cache.bulk_lookup(api_name, hashes) hashes = [key for key in hashes if key not in all_responses.keys()] all_responses = dict([(key, val) for key, val in all_responses.iteritems() if len(val) >= 2]) HASHES_PER_REQ = 25 hash_chunks = [ '\n'.join(hashes[pos:pos + HASHES_PER_REQ]) for pos in xrange(0, len(hashes), HASHES_PER_REQ) ] responses = self._requests.multi_post(self.BINTEST_URL, data=hash_chunks, to_json=False, send_as_file=True) for response in responses: if response is not None and 200 == response.status_code: response_lines = response.text.split('\n') for line in response_lines: # Set an initial val. val = {} # There is just a key, no value. This means the hash was unknown to ShadowServer. index_of_first_space = line.find(' ') if -1 == index_of_first_space: index_of_first_space = len(line) key = line[:index_of_first_space].lower() # The response only has a JSON body if the hash was known. json_text = line[index_of_first_space + 1:] if len(json_text): try: val = simplejson.loads(json_text) # A very short response indicates an error? if len(val.keys()) >= 2: all_responses[key] = val except ValueError: # Sometimes ShadowServer returns invalid data. Silently skip it. pass if self._cache: self._cache.cache_value(api_name, key, val) return all_responses
class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = u'https://investigate.api.opendns.com/' def __init__(self, api_key, cache_file_name=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-categorization') def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = u'domains/categorization/?showLabels' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-domain_score') def domain_score(self, domains): url_path = 'domains/score/' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [key for key in url_params if key not in all_responses.keys()] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls) responses = dict(zip(url_params, responses)) for url_param in responses.keys(): if self._cache: self._cache.cache_value(cache_api_name, url_param, responses[url_param]) all_responses[url_param] = responses[url_param] return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = u'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = u'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def domain_tag(self, domains): """Get the data range when a domain is part of OpenDNS block list. Args: domains: an enumerable of strings domain names Returns: An enumerable of string with period, category, and url """ api_name = 'opendns-domain_tag' fmt_url_path = u'domains/{0}/latest_tags' return self._multi_get(api_name, fmt_url_path, domains) def related_domains(self, domains): """Get list of domain names that have been seen requested around the same time (up to 60 seconds before or after) to the given domain name. Args: domains: an enumerable of strings domain names Returns: An enumerable of [domain name, scores] """ api_name = 'opendns-related_domains' fmt_url_path = u'links/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of resource records and features """ api_name = 'opendns-rr_history' fmt_url_path = u'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def dns_rr(self, ips): """Get the domains related to input domains. Args: domains: an enumerable of strings as domains Returns: An enumerable of resource records and features """ api_name = 'opendns-dns_rr' fmt_url_path = u'dnsdb/name/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def latest_malicious(self, ips): """Get the a list of malicious domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of strings for the malicious domains """ api_name = 'opendns-latest_malicious' fmt_url_path = u'ips/{0}/latest_domains' return self._multi_get(api_name, fmt_url_path, ips)
class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = u'https://investigate.api.opendns.com/' # TODO: consider moving this to a config file MAX_DOMAINS_IN_POST = 1000 def __init__(self, api_key, cache_file_name=None, update_cache=True, req_timeout=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest( default_headers=auth_header, max_requests=12, rate_limit=30, req_timeout=req_timeout, drop_404s=True, ) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name, update_cache) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling def _multi_post(self, url_path, domains): data = [ simplejson.dumps(domains[pos:pos + self.MAX_DOMAINS_IN_POST]) for pos in range(0, len(domains), self.MAX_DOMAINS_IN_POST) ] # multi_post() returns list of dictionaries, so they need to be merged into one dict all_responses = self._requests.multi_post(self._to_url(url_path), data=data) responses = {} for r in all_responses: responses.update(r) return responses @_cached_by_domain(api_name='opendns-categorization') def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = u'domains/categorization/?showLabels' return self._multi_post(url_path, domains) @_cached_by_domain(api_name='opendns-domain_score') def domain_score(self, domains): """Calls domain scores endpoint. This method is deprecated since OpenDNS Investigate API endpoint is also deprecated. """ warn( 'OpenDNS Domain Scores endpoint is deprecated. Use ' 'InvestigateApi.categorization() instead', DeprecationWarning, ) url_path = 'domains/score/' return self._multi_post(url_path, domains) @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params, query_params=None): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs query_params - None / dict / list of dicts containing query params Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [ key for key in url_params if key not in all_responses.keys() ] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls, query_params) for url_param, response in zip(url_params, responses): if self._cache: self._cache.cache_value(cache_api_name, url_param, response) all_responses[url_param] = response return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = u'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def whois_emails(self, emails): """Calls WHOIS Email end point Args: emails: An enumerable of string Emails Returns: A dict of {email: domain_result} """ api_name = 'opendns-whois-emails' fmt_url_path = u'whois/emails/{0}' return self._multi_get(api_name, fmt_url_path, emails) def whois_nameservers(self, nameservers): """Calls WHOIS Nameserver end point Args: emails: An enumerable of nameservers Returns: A dict of {nameserver: domain_result} """ api_name = 'opendns-whois-nameservers' fmt_url_path = u'whois/nameservers/{0}' return self._multi_get(api_name, fmt_url_path, nameservers) def whois_domains(self, domains): """Calls WHOIS domain end point Args: domains: An enumerable of domains Returns: A dict of {domain: domain_result} """ api_name = 'opendns-whois-domain' fmt_url_path = u'whois/{0}' return self._multi_get(api_name, fmt_url_path, domains) def whois_domains_history(self, domains): """Calls WHOIS domain history end point Args: domains: An enumerable of domains Returns: A dict of {domain: domain_history_result} """ api_name = 'opendns-whois-domain-history' fmt_url_path = u'whois/{0}/history' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = u'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def domain_tag(self, domains): """Get the data range when a domain is part of OpenDNS block list. Args: domains: an enumerable of strings domain names Returns: An enumerable of string with period, category, and url """ api_name = 'opendns-domain_tag' fmt_url_path = u'domains/{0}/latest_tags' return self._multi_get(api_name, fmt_url_path, domains) def related_domains(self, domains): """Get list of domain names that have been seen requested around the same time (up to 60 seconds before or after) to the given domain name. Args: domains: an enumerable of strings domain names Returns: An enumerable of [domain name, scores] """ api_name = 'opendns-related_domains' fmt_url_path = u'links/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of resource records and features """ api_name = 'opendns-rr_history' fmt_url_path = u'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def dns_rr(self, ips): """Get the domains related to input domains. Args: domains: an enumerable of strings as domains Returns: An enumerable of resource records and features """ api_name = 'opendns-dns_rr' fmt_url_path = u'dnsdb/name/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def latest_malicious(self, ips): """Get the a list of malicious domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of strings for the malicious domains """ api_name = 'opendns-latest_malicious' fmt_url_path = u'ips/{0}/latest_domains' return self._multi_get(api_name, fmt_url_path, ips) def sample(self, hashes): """Get the information about a sample based on its hash. Args: hashes: an enumerable of strings as hashes Returns: An enumerable of arrays which contains the information about the original samples """ api_name = 'opendns-sample' fmt_url_path = u'sample/{0}' return self._multi_get(api_name, fmt_url_path, hashes) def search(self, patterns, start=30, limit=1000, include_category=False): """Performs pattern searches against the Investigate database. Args: patterns: An enumerable of RegEx domain patterns to search for start: How far back results extend from in days (max is 30) limit: Number of results to show (max is 1000) include_category: Include OpenDNS security categories Returns: An enumerable of matching domain strings """ api_name = 'opendns-patterns' fmt_url_path = u'search/{0}' start = '-{0}days'.format(start) include_category = str(include_category).lower() query_params = { 'start': start, 'limit': limit, 'includecategory': include_category, } return self._multi_get(api_name, fmt_url_path, patterns, query_params) def risk_score(self, domains): """Performs Umbrella risk score analysis on the input domains Args: domains: an enumerable of domains Returns: An enumerable of associated domain risk scores """ api_name = 'opendns-risk_score' fmt_url_path = u'domains/risk-score/{0}' return self._multi_get(api_name, fmt_url_path, domains)
class ShadowServerApi(object): BINTEST_URL = u'http://bin-test.shadowserver.org/api' def __init__(self, cache_file_name=None): """Establishes basic HTTP params and loads a cache. Args: cache_file_name: String file name of cache. """ # TODO - lookup request rate limit # By observation, ShadowServer can be quite slow, so give it 90 seconds before it times out. self._requests = MultiRequest(max_requests=2, req_timeout=90.0) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @MultiRequest.error_handling def get_bin_test(self, hashes): """Test hashes against a list of known software applications. Known hashes will return a dictionary of information. Unknown hashes will return nothing. Args: hashes: list of string hashes. Returns: A dict with the hash as key and the shadowserver report as value. """ all_responses = {} if self._cache: api_name = 'shadowserver-bin-test' all_responses = self._cache.bulk_lookup(api_name, hashes) hashes = [key for key in hashes if key not in all_responses.keys()] all_responses = dict([(key, val) for key, val in all_responses.iteritems() if len(val) >= 2]) HASHES_PER_REQ = 25 hash_chunks = ['\n'.join(hashes[pos:pos + HASHES_PER_REQ]) for pos in xrange(0, len(hashes), HASHES_PER_REQ)] responses = self._requests.multi_post(self.BINTEST_URL, data=hash_chunks, to_json=False, send_as_file=True) for response in responses: if 200 == response.status_code: response_lines = response.text.split('\n') for line in response_lines: # Set an initial val. val = {} # There is just a key, no value. This means the hash was unknown to ShadowServer. index_of_first_space = line.find(' ') if -1 == index_of_first_space: index_of_first_space = len(line) key = line[:index_of_first_space].lower() # The response only has a JSON body if the hash was known. json_text = line[index_of_first_space + 1:] if len(json_text): try: val = simplejson.loads(json_text) # A very short response indicates an error? if len(val.keys()) >= 2: all_responses[key] = val except ValueError: # Sometimes ShadowServer returns invalid data. Silently skip it. pass if self._cache: self._cache.cache_value(api_name, key, val) return all_responses