def __init__(self, api_key, cache_file_name=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None
def __init__(self, api_key, cache_file_name=None): """Establishes basic HTTP params and loads a cache. Args: api_key: VirusTotal API key cache_file_name: String file name of cache. """ self._api_key = api_key self._requests = MultiRequest() # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None
def __init__(self, cache_file_name=None): """Establishes basic HTTP params and loads a cache. Args: cache_file_name: String file name of cache. """ # TODO - lookup request rate limit # By observation, ShadowServer can be quite slow, so give it 90 seconds before it times out. self._requests = MultiRequest(max_requests=2, req_timeout=90.0) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None
class VirusTotalApi(object): BASE_DOMAIN = 'https://www.virustotal.com/vtapi/v2/' def __init__(self, api_key, cache_file_name=None): """Establishes basic HTTP params and loads a cache. Args: api_key: VirusTotal API key cache_file_name: String file name of cache. """ self._api_key = api_key self._requests = MultiRequest() # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @MultiRequest.error_handling def get_file_reports(self, resources): """Retrieves the most recent reports for a set of md5, sha1, and/or sha2 hashes. Args: resources: list of string hashes. Returns: A dict with the hash as key and the VT report as value. """ api_name = 'virustotal-file-reports' all_responses, resources = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources) response_chunks = self._request_reports("resource", resource_chunks, 'file/report') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_domain_reports(self, domains): """Retrieves the most recent VT info for a set of domains. Args: domains: list of string domains. Returns: A dict with the domain as key and the VT report as value. """ api_name = 'virustotal-domain-reports' (all_responses, domains) = self._bulk_cache_lookup(api_name, domains) responses = self._request_reports("domain", domains, 'domain/report') for domain, response in zip(domains, responses): if self._cache: self._cache.cache_value(api_name, domain, response) all_responses[domain] = response return all_responses @MultiRequest.error_handling def get_url_reports(self, resources): """Retrieves a scan report on a given URL. Args: resources: list of URLs. Returns: A dict with the URL as key and the VT report as value. """ api_name = 'virustotal-url-reports' (all_responses, resources) = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources, '\n') response_chunks = self._request_reports("resource", resource_chunks, 'url/report') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses def _bulk_cache_lookup(self, api_name, keys): """Performes a bulk cache lookup and returns a tuple with the results found and the keys missing in the cache. If cached is not configured it will return an empty dictionary of found results and the initial list of keys. Args: api_name: a string name of the API. keys: an enumerable of string keys. Returns: A tuple: (responses found, missing keys). """ if self._cache: responses = self._cache.bulk_lookup(api_name, keys) missing_keys = [key for key in keys if key not in responses.keys()] return (responses, missing_keys) return ({}, keys) def _prepare_resource_chunks(self, resources, resource_delim=','): """As in some VirusTotal API methods the call can be made for multiple resources at once this method prepares a list of concatenated resources according to the maximum number of resources per requests. Args: resources: a list of the resources. resource_delim: a string used to separate the resources. Default value is a comma. Returns: A list of the concatenated resources. """ resources_per_req = config_get_deep('virustotal.resources_per_req', 25) return [resource_delim.join(resources[pos:pos + resources_per_req]) for pos in xrange(0, len(resources), resources_per_req)] def _request_reports(self, resource_param_name, resources, endpoint_name): """Sends multiples requests for the resources to a particular endpoint. Args: resource_param_name: a string name of the resource parameter. resources: list of of the resources. endpoint_name: VirusTotal endpoint URL suffix. Returns: A list of the responses. """ params = [{resource_param_name: resource, 'apikey': self._api_key} for resource in resources] return self._requests.multi_get(self.BASE_DOMAIN + endpoint_name, query_params=params) def _extract_response_chunks(self, all_responses, response_chunks, api_name): """Extracts and caches the responses from the response chunks in case of the responses for the requests containing multiple concatenated resources. Extracted responses are added to the already cached responses passed in the all_responses parameter. Args: all_responses: a list containing already cached responses. response_chunks: a list with response chunks. api_name: a string name of the API. """ for response_chunk in response_chunks: if not isinstance(response_chunk, list): response_chunk = [response_chunk] for response in response_chunk: if not response: continue if self._cache: self._cache.cache_value(api_name, response['resource'], response) all_responses[response['resource']] = response
class ShadowServerApi(object): BINTEST_URL = 'http://bin-test.shadowserver.org/api' def __init__(self, cache_file_name=None): """Establishes basic HTTP params and loads a cache. Args: cache_file_name: String file name of cache. """ # TODO - lookup request rate limit # By observation, ShadowServer can be quite slow, so give it 90 seconds before it times out. self._requests = MultiRequest(max_requests=2, req_timeout=90.0) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @MultiRequest.error_handling def get_bin_test(self, hashes): """Test hashes against a list of known software applications. Known hashes will return a dictionary of information. Unknown hashes will return nothing. Args: hashes: list of string hashes. Returns: A dict with the hash as key and the shadowserver report as value. """ all_responses = {} if self._cache: api_name = 'shadowserver-bin-test' all_responses = self._cache.bulk_lookup(api_name, hashes) hashes = [key for key in hashes if key not in all_responses.keys()] all_responses = {key: val for key, val in all_responses.iteritems() if len(val) >= 2} HASHES_PER_REQ = 25 hash_chunks = ['\n'.join(hashes[pos:pos + HASHES_PER_REQ]) for pos in xrange(0, len(hashes), HASHES_PER_REQ)] responses = self._requests.multi_post(self.BINTEST_URL, data=hash_chunks, to_json=False, send_as_file=True) for response in responses: if 200 == response.status_code: response_lines = response.text.split('\n') for line in response_lines: # Set an initial val. val = {} # There is just a key, no value. This means the hash was unknown to ShadowServer. index_of_first_space = line.find(' ') if -1 == index_of_first_space: index_of_first_space = len(line) key = line[:index_of_first_space].lower() # The response only has a JSON body if the hash was known. json_text = line[index_of_first_space + 1:] if len(json_text): try: val = simplejson.loads(json_text) # A very short response indicates an error? if len(val.keys()) >= 2: all_responses[key] = val except ValueError: # Sometimes ShadowServer returns invalid data. Silently skip it. pass if self._cache: self._cache.cache_value(api_name, key, val) return all_responses
class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = 'https://investigate.api.opendns.com/' def __init__(self, api_key, cache_file_name=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = 'domains/categorization/?showLabels' all_responses = {} if self._cache: api_name = 'opendns-categorization' all_responses = self._cache.bulk_lookup(api_name, domains) domains = [key for key in domains if key not in all_responses.keys()] if len(domains): response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) response = response[0] # TODO: Some better more expressive exception if not response: raise Exception('dang') for domain in response.keys(): if self._cache: self._cache.cache_value(api_name, domain, response[domain]) all_responses[domain] = response[domain] return all_responses @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [key for key in url_params if key not in all_responses.keys()] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls) responses = dict(zip(url_params, responses)) for url_param in responses.keys(): if self._cache: self._cache.cache_value(cache_api_name, url_param, responses[url_param]) all_responses[url_param] = responses[url_param] return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = 'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = 'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of string domain names """ api_name = 'opendns-rr_history' fmt_url_path = 'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips)