class ShadowServerApi(object): BINTEST_URL = 'http://bin-test.shadowserver.org/api' def __init__(self, cache_file_name=None): """Establishes basic HTTP params and loads a cache. Args: cache_file_name: String file name of cache. """ # TODO - lookup request rate limit # By observation, ShadowServer can be quite slow, so give it 90 seconds before it times out. self._requests = MultiRequest(max_requests=2, req_timeout=90.0) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @MultiRequest.error_handling def get_bin_test(self, hashes): """Test hashes against a list of known software applications. Known hashes will return a dictionary of information. Unknown hashes will return nothing. Args: hashes: list of string hashes. Returns: A dict with the hash as key and the shadowserver report as value. """ all_responses = {} if self._cache: api_name = 'shadowserver-bin-test' all_responses = self._cache.bulk_lookup(api_name, hashes) hashes = [key for key in hashes if key not in all_responses.keys()] all_responses = {key: val for key, val in all_responses.iteritems() if len(val) >= 2} HASHES_PER_REQ = 25 hash_chunks = ['\n'.join(hashes[pos:pos + HASHES_PER_REQ]) for pos in xrange(0, len(hashes), HASHES_PER_REQ)] responses = self._requests.multi_post(self.BINTEST_URL, data=hash_chunks, to_json=False, send_as_file=True) for response in responses: if 200 == response.status_code: response_lines = response.text.split('\n') for line in response_lines: # Set an initial val. val = {} # There is just a key, no value. This means the hash was unknown to ShadowServer. index_of_first_space = line.find(' ') if -1 == index_of_first_space: index_of_first_space = len(line) key = line[:index_of_first_space].lower() # The response only has a JSON body if the hash was known. json_text = line[index_of_first_space + 1:] if len(json_text): try: val = simplejson.loads(json_text) # A very short response indicates an error? if len(val.keys()) >= 2: all_responses[key] = val except ValueError: # Sometimes ShadowServer returns invalid data. Silently skip it. pass if self._cache: self._cache.cache_value(api_name, key, val) return all_responses
class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = 'https://investigate.api.opendns.com/' def __init__(self, api_key, cache_file_name=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = 'domains/categorization/?showLabels' all_responses = {} if self._cache: api_name = 'opendns-categorization' all_responses = self._cache.bulk_lookup(api_name, domains) domains = [key for key in domains if key not in all_responses.keys()] if len(domains): response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) response = response[0] # TODO: Some better more expressive exception if not response: raise Exception('dang') for domain in response.keys(): if self._cache: self._cache.cache_value(api_name, domain, response[domain]) all_responses[domain] = response[domain] return all_responses @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [key for key in url_params if key not in all_responses.keys()] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls) responses = dict(zip(url_params, responses)) for url_param in responses.keys(): if self._cache: self._cache.cache_value(cache_api_name, url_param, responses[url_param]) all_responses[url_param] = responses[url_param] return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = 'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = 'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of string domain names """ api_name = 'opendns-rr_history' fmt_url_path = 'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips)