class VirusTotalApi(object): BASE_DOMAIN = u'https://www.virustotal.com/vtapi/v2/' def __init__(self, api_key, resources_per_req=25, cache_file_name=None, update_cache=True, req_timeout=None): """Establishes basic HTTP params and loads a cache. Args: api_key: VirusTotal API key resources_per_req: Maximum number of resources (hashes, URLs) to be send in a single request cache_file_name: String file name of cache. update_cache: Determines whether cache should be written out back to the disk when closing it. Default is `True`. req_timeout: Maximum number of seconds to wait without reading a response byte before deciding an error has occurred. Default is None. """ self._api_key = api_key self._resources_per_req = resources_per_req self._requests = MultiRequest(req_timeout=req_timeout) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name, update_cache) if cache_file_name else None @MultiRequest.error_handling def get_file_reports(self, resources): """Retrieves the most recent reports for a set of md5, sha1, and/or sha2 hashes. Args: resources: list of string hashes. Returns: A dict with the hash as key and the VT report as value. """ api_name = 'virustotal-file-reports' all_responses, resources = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources) response_chunks = self._request_reports("resource", resource_chunks, 'file/report') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses def _extract_all_responses(self, resources, api_endpoint, api_name): """ Aux function to extract all the API endpoint responses. Args: resources: list of string hashes. api_endpoint: endpoint path api_name: endpoint name Returns: A dict with the hash as key and the VT report as value. """ all_responses, resources = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources) response_chunks = self._request_reports("resource", resource_chunks, api_endpoint) self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_behaviour(self, resources): """Retrieves a report about the behaviour of a md5, sha1, and/or sha2 hash of a file when executed in a sandboxed environment (Cuckoo sandbox). Args: resources: list of string hashes. """ api_name = 'virustotal-file-behaviour' api_endpoint = 'file/behaviour' return self._extract_all_responses(resources, api_endpoint, api_name) @MultiRequest.error_handling def get_file_download(self, resources): """Retrieves a file from its a md5, sha1, and/or sha2 hash. Args: resources: list of string hashes. Returns: a file download """ api_name = 'virustotal-file-download' api_endpoint = 'file/download' return self._extract_all_responses(resources, api_endpoint, api_name) @MultiRequest.error_handling def get_file_network_traffic(self, resources): """Retrieves a report about the network traffic of a md5, sha1, and/or sha2 hash of file, when it is executed. Args: resources: list of string hashes. """ api_name = 'virustotal-file-network-traffic' api_endpoint = 'file/network-traffic' return self._extract_all_responses(resources, api_endpoint, api_name) @MultiRequest.error_handling def get_domain_reports(self, domains): """Retrieves the most recent VT info for a set of domains. Args: domains: list of string domains. Returns: A dict with the domain as key and the VT report as value. """ api_name = 'virustotal-domain-reports' (all_responses, domains) = self._bulk_cache_lookup(api_name, domains) responses = self._request_reports("domain", domains, 'domain/report') for domain, response in zip(domains, responses): if self._cache: self._cache.cache_value(api_name, domain, response) all_responses[domain] = response return all_responses @MultiRequest.error_handling def get_url_distribution(self, params=None): """Retrieves a live feed with the latest URLs submitted to VT. Args: resources: a dictionary with name and value for optional arguments Returns: A dict with the VT report. """ params = params or {} all_responses = {} api_name = 'virustotal-url-distribution' response_chunks = self._request_reports(params.keys(), params.values(), 'url/distribution') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_distribution(self, params=None): """Retrieves a live feed with the latest hashes submitted to VT. Args: params: a dictionary with name and values for optional arguments, such as: before (timestampe), after (timestamp), reports (boolean), limit (retrieve limit file items). Example: 'reports': 'true' Returns: A dict with the VT report. """ params = params or [] all_responses = {} api_name = 'virustotal-file-distribution' response_chunks = self._request_reports(params.keys(), params.value(), 'file/distribution') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_url_reports(self, resources): """Retrieves a scan report on a given URL. Args: resources: list of URLs. Returns: A dict with the URL as key and the VT report as value. """ api_name = 'virustotal-url-reports' (all_responses, resources) = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources, '\n') response_chunks = self._request_reports("resource", resource_chunks, 'url/report') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_ip_reports(self, ips): """Retrieves the most recent VT info for a set of ips. Args: ips: list of IPs. Returns: A dict with the IP as key and the VT report as value. """ api_name = 'virustotal-ip-address-reports' (all_responses, ips) = self._bulk_cache_lookup(api_name, ips) responses = self._request_reports("ip", ips, 'ip-address/report') for ip, response in zip(ips, responses): if self._cache: self._cache.cache_value(api_name, ip, response) all_responses[ip] = response return all_responses @MultiRequest.error_handling def get_file_search(self, query): """Performs advanced search on samples, matching certain binary/ metadata/detection criteria. Possible queries: file size, file type, first or last submission to VT, number of positives, bynary content, etc. Args: query: dictionary with search arguments Example: 'query': 'type:peexe size:90kb+ positives:5+ behaviour:"taskkill"' Returns: A dict with the VT report. """ api_name = 'virustotal-file-search' (all_responses, query) = self._bulk_cache_lookup(api_name, query) response_chunks = self._request_reports("query", query, 'file/search') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_clusters(self, date): """Retrieves file similarity clusters for a given time frame. Args: date: the specific date for which we want the clustering details. Example: 'date': '2013-09-10' Returns: A dict with the VT report. """ api_name = 'virustotal-file-clusters' (all_responses, resources) = self._bulk_cache_lookup(api_name, date) response = self._request_reports("date", date, 'file/clusters') self._extract_response_chunks(all_responses, response, api_name) return all_responses def _bulk_cache_lookup(self, api_name, keys): """Performes a bulk cache lookup and returns a tuple with the results found and the keys missing in the cache. If cached is not configured it will return an empty dictionary of found results and the initial list of keys. Args: api_name: a string name of the API. keys: an enumerable of string keys. Returns: A tuple: (responses found, missing keys). """ if self._cache: responses = self._cache.bulk_lookup(api_name, keys) missing_keys = [key for key in keys if key not in responses.keys()] return (responses, missing_keys) return ({}, keys) def _prepare_resource_chunks(self, resources, resource_delim=','): """As in some VirusTotal API methods the call can be made for multiple resources at once this method prepares a list of concatenated resources according to the maximum number of resources per requests. Args: resources: a list of the resources. resource_delim: a string used to separate the resources. Default value is a comma. Returns: A list of the concatenated resources. """ return [self._prepare_resource_chunk(resources, resource_delim, pos) for pos in xrange(0, len(resources), self._resources_per_req)] def _prepare_resource_chunk(self, resources, resource_delim, pos): return resource_delim.join( resources[pos:pos + self._resources_per_req]) def _request_reports(self, resource_param_name, resources, endpoint_name): """Sends multiples requests for the resources to a particular endpoint. Args: resource_param_name: a string name of the resource parameter. resources: list of of the resources. endpoint_name: VirusTotal endpoint URL suffix. Returns: A list of the responses. """ params = [{resource_param_name: resource, 'apikey': self._api_key} for resource in resources] return self._requests.multi_get(self.BASE_DOMAIN + endpoint_name, query_params=params) def _extract_response_chunks(self, all_responses, response_chunks, api_name): """Extracts and caches the responses from the response chunks in case of the responses for the requests containing multiple concatenated resources. Extracted responses are added to the already cached responses passed in the all_responses parameter. Args: all_responses: a list containing already cached responses. response_chunks: a list with response chunks. api_name: a string name of the API. """ for response_chunk in response_chunks: if not isinstance(response_chunk, list): response_chunk = [response_chunk] for response in response_chunk: if not response: continue if self._cache: self._cache.cache_value(api_name, response['resource'], response) all_responses[response['resource']] = response
class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = u'https://investigate.api.opendns.com/' def __init__(self, api_key, cache_file_name=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-categorization') def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = u'domains/categorization/?showLabels' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-domain_score') def domain_score(self, domains): url_path = 'domains/score/' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [ key for key in url_params if key not in all_responses.keys() ] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls) responses = dict(zip(url_params, responses)) for url_param in responses.keys(): if self._cache: self._cache.cache_value(cache_api_name, url_param, responses[url_param]) all_responses[url_param] = responses[url_param] return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = u'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = u'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def domain_tag(self, domains): """Get the data range when a domain is part of OpenDNS block list. Args: domains: an enumerable of strings domain names Returns: An enumerable of string with period, category, and url """ api_name = 'opendns-domain_tag' fmt_url_path = u'domains/{0}/latest_tags' return self._multi_get(api_name, fmt_url_path, domains) def related_domains(self, domains): """Get list of domain names that have been seen requested around the same time (up to 60 seconds before or after) to the given domain name. Args: domains: an enumerable of strings domain names Returns: An enumerable of [domain name, scores] """ api_name = 'opendns-related_domains' fmt_url_path = u'links/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of resource records and features """ api_name = 'opendns-rr_history' fmt_url_path = u'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def dns_rr(self, ips): """Get the domains related to input domains. Args: domains: an enumerable of strings as domains Returns: An enumerable of resource records and features """ api_name = 'opendns-dns_rr' fmt_url_path = u'dnsdb/name/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def latest_malicious(self, ips): """Get the a list of malicious domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of strings for the malicious domains """ api_name = 'opendns-latest_malicious' fmt_url_path = u'ips/{0}/latest_domains' return self._multi_get(api_name, fmt_url_path, ips)
class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = u'https://investigate.api.opendns.com/' # TODO: consider moving this to a config file MAX_DOMAINS_IN_POST = 1000 def __init__(self, api_key, cache_file_name=None, update_cache=True, req_timeout=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest( default_headers=auth_header, max_requests=12, rate_limit=30, req_timeout=req_timeout, drop_404s=True, ) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name, update_cache) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling def _multi_post(self, url_path, domains): data = [ simplejson.dumps(domains[pos:pos + self.MAX_DOMAINS_IN_POST]) for pos in range(0, len(domains), self.MAX_DOMAINS_IN_POST) ] # multi_post() returns list of dictionaries, so they need to be merged into one dict all_responses = self._requests.multi_post(self._to_url(url_path), data=data) responses = {} for r in all_responses: responses.update(r) return responses @_cached_by_domain(api_name='opendns-categorization') def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = u'domains/categorization/?showLabels' return self._multi_post(url_path, domains) @_cached_by_domain(api_name='opendns-domain_score') def domain_score(self, domains): """Calls domain scores endpoint. This method is deprecated since OpenDNS Investigate API endpoint is also deprecated. """ warn( 'OpenDNS Domain Scores endpoint is deprecated. Use ' 'InvestigateApi.categorization() instead', DeprecationWarning, ) url_path = 'domains/score/' return self._multi_post(url_path, domains) @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params, query_params=None): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs query_params - None / dict / list of dicts containing query params Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [ key for key in url_params if key not in all_responses.keys() ] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls, query_params) for url_param, response in zip(url_params, responses): if self._cache: self._cache.cache_value(cache_api_name, url_param, response) all_responses[url_param] = response return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = u'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def whois_emails(self, emails): """Calls WHOIS Email end point Args: emails: An enumerable of string Emails Returns: A dict of {email: domain_result} """ api_name = 'opendns-whois-emails' fmt_url_path = u'whois/emails/{0}' return self._multi_get(api_name, fmt_url_path, emails) def whois_nameservers(self, nameservers): """Calls WHOIS Nameserver end point Args: emails: An enumerable of nameservers Returns: A dict of {nameserver: domain_result} """ api_name = 'opendns-whois-nameservers' fmt_url_path = u'whois/nameservers/{0}' return self._multi_get(api_name, fmt_url_path, nameservers) def whois_domains(self, domains): """Calls WHOIS domain end point Args: domains: An enumerable of domains Returns: A dict of {domain: domain_result} """ api_name = 'opendns-whois-domain' fmt_url_path = u'whois/{0}' return self._multi_get(api_name, fmt_url_path, domains) def whois_domains_history(self, domains): """Calls WHOIS domain history end point Args: domains: An enumerable of domains Returns: A dict of {domain: domain_history_result} """ api_name = 'opendns-whois-domain-history' fmt_url_path = u'whois/{0}/history' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = u'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def domain_tag(self, domains): """Get the data range when a domain is part of OpenDNS block list. Args: domains: an enumerable of strings domain names Returns: An enumerable of string with period, category, and url """ api_name = 'opendns-domain_tag' fmt_url_path = u'domains/{0}/latest_tags' return self._multi_get(api_name, fmt_url_path, domains) def related_domains(self, domains): """Get list of domain names that have been seen requested around the same time (up to 60 seconds before or after) to the given domain name. Args: domains: an enumerable of strings domain names Returns: An enumerable of [domain name, scores] """ api_name = 'opendns-related_domains' fmt_url_path = u'links/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of resource records and features """ api_name = 'opendns-rr_history' fmt_url_path = u'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def dns_rr(self, ips): """Get the domains related to input domains. Args: domains: an enumerable of strings as domains Returns: An enumerable of resource records and features """ api_name = 'opendns-dns_rr' fmt_url_path = u'dnsdb/name/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def latest_malicious(self, ips): """Get the a list of malicious domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of strings for the malicious domains """ api_name = 'opendns-latest_malicious' fmt_url_path = u'ips/{0}/latest_domains' return self._multi_get(api_name, fmt_url_path, ips) def sample(self, hashes): """Get the information about a sample based on its hash. Args: hashes: an enumerable of strings as hashes Returns: An enumerable of arrays which contains the information about the original samples """ api_name = 'opendns-sample' fmt_url_path = u'sample/{0}' return self._multi_get(api_name, fmt_url_path, hashes) def search(self, patterns, start=30, limit=1000, include_category=False): """Performs pattern searches against the Investigate database. Args: patterns: An enumerable of RegEx domain patterns to search for start: How far back results extend from in days (max is 30) limit: Number of results to show (max is 1000) include_category: Include OpenDNS security categories Returns: An enumerable of matching domain strings """ api_name = 'opendns-patterns' fmt_url_path = u'search/{0}' start = '-{0}days'.format(start) include_category = str(include_category).lower() query_params = { 'start': start, 'limit': limit, 'includecategory': include_category, } return self._multi_get(api_name, fmt_url_path, patterns, query_params) def risk_score(self, domains): """Performs Umbrella risk score analysis on the input domains Args: domains: an enumerable of domains Returns: An enumerable of associated domain risk scores """ api_name = 'opendns-risk_score' fmt_url_path = u'domains/risk-score/{0}' return self._multi_get(api_name, fmt_url_path, domains)
class InvestigateApi(object): """Calls the OpenDNS investigate API. Applies rate limits and issues parallel requests. """ BASE_URL = u'https://investigate.api.opendns.com/' def __init__(self, api_key, cache_file_name=None): auth_header = {'Authorization': 'Bearer {0}'.format(api_key)} self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @classmethod def _to_url(cls, url_path): try: return u'{0}{1}'.format(cls.BASE_URL, url_path) except Exception as e: write_error_message(url_path) write_exception(e) raise e @classmethod def _to_urls(cls, fmt_url_path, url_path_args): url_paths = [] for path_arg in url_path_args: try: url_paths.append(fmt_url_path.format(path_arg)) except Exception as e: write_error_message(path_arg) write_exception(e) raise e return [cls._to_url(url_path) for url_path in url_paths] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-categorization') def categorization(self, domains): """Calls categorization end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of domains Returns: A dict of {domain: categorization_result} """ url_path = u'domains/categorization/?showLabels' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling @_cached_by_domain(api_name='opendns-domain_score') def domain_score(self, domains): url_path = 'domains/score/' response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains)) return response[0] @MultiRequest.error_handling def _multi_get(self, cache_api_name, fmt_url_path, url_params): """Makes multiple GETs to an OpenDNS endpoint. Args: cache_api_name: string api_name for caching fmt_url_path: format string for building URL paths url_params: An enumerable of strings used in building URLs Returns: A dict of {url_param: api_result} """ all_responses = {} if self._cache: all_responses = self._cache.bulk_lookup(cache_api_name, url_params) url_params = [key for key in url_params if key not in all_responses.keys()] if len(url_params): urls = self._to_urls(fmt_url_path, url_params) responses = self._requests.multi_get(urls) responses = dict(zip(url_params, responses)) for url_param in responses.keys(): if self._cache: self._cache.cache_value(cache_api_name, url_param, responses[url_param]) all_responses[url_param] = responses[url_param] return all_responses def security(self, domains): """Calls security end point and adds an 'is_suspicious' key to each response. Args: domains: An enumerable of strings Returns: A dict of {domain: security_result} """ api_name = 'opendns-security' fmt_url_path = u'security/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def cooccurrences(self, domains): """Get the domains related to input domains. Args: domains: an enumerable of strings domain names Returns: An enumerable of string domain names """ api_name = 'opendns-cooccurrences' fmt_url_path = u'recommendations/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def domain_tag(self, domains): """Get the data range when a domain is part of OpenDNS block list. Args: domains: an enumerable of strings domain names Returns: An enumerable of string with period, category, and url """ api_name = 'opendns-domain_tag' fmt_url_path = u'domains/{0}/latest_tags' return self._multi_get(api_name, fmt_url_path, domains) def related_domains(self, domains): """Get list of domain names that have been seen requested around the same time (up to 60 seconds before or after) to the given domain name. Args: domains: an enumerable of strings domain names Returns: An enumerable of [domain name, scores] """ api_name = 'opendns-related_domains' fmt_url_path = u'links/name/{0}.json' return self._multi_get(api_name, fmt_url_path, domains) def rr_history(self, ips): """Get the domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of resource records and features """ api_name = 'opendns-rr_history' fmt_url_path = u'dnsdb/ip/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def dns_rr(self, ips): """Get the domains related to input domains. Args: domains: an enumerable of strings as domains Returns: An enumerable of resource records and features """ api_name = 'opendns-dns_rr' fmt_url_path = u'dnsdb/name/a/{0}.json' return self._multi_get(api_name, fmt_url_path, ips) def latest_malicious(self, ips): """Get the a list of malicious domains related to input ips. Args: ips: an enumerable of strings as ips Returns: An enumerable of strings for the malicious domains """ api_name = 'opendns-latest_malicious' fmt_url_path = u'ips/{0}/latest_domains' return self._multi_get(api_name, fmt_url_path, ips)
class VirusTotalApi(object): BASE_DOMAIN = u'https://www.virustotal.com/vtapi/v2/' def __init__(self, api_key, resources_per_req=25, cache_file_name=None): """Establishes basic HTTP params and loads a cache. Args: api_key: VirusTotal API key resources_per_req: Maximum number of resources (hashes, URLs) to be send in a single request cache_file_name: String file name of cache. """ self._api_key = api_key self._resources_per_req = resources_per_req self._requests = MultiRequest() # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name) if cache_file_name else None @MultiRequest.error_handling def get_file_reports(self, resources): """Retrieves the most recent reports for a set of md5, sha1, and/or sha2 hashes. Args: resources: list of string hashes. Returns: A dict with the hash as key and the VT report as value. """ api_name = 'virustotal-file-reports' all_responses, resources = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources) response_chunks = self._request_reports("resource", resource_chunks, 'file/report') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses def _extract_all_responses(self, resources, api_endpoint, api_name): """ Aux function to extract all the API endpoint responses. Args: resources: list of string hashes. api_endpoint: endpoint path api_name: endpoint name Returns: A dict with the hash as key and the VT report as value. """ all_responses, resources = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources) response_chunks = self._request_reports("resource", resource_chunks, api_endpoint) self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_behaviour(self, resources): """Retrieves a report about the behaviour of a md5, sha1, and/or sha2 hash of a file when executed in a sandboxed environment (Cuckoo sandbox). Args: resources: list of string hashes. """ api_name = 'virustotal-file-behaviour' api_endpoint = 'file/behaviour' return self._extract_all_responses(resources, api_endpoint, api_name) @MultiRequest.error_handling def get_file_download(self, resources): """Retrieves a file from its a md5, sha1, and/or sha2 hash. Args: resources: list of string hashes. Returns: a file download """ api_name = 'virustotal-file-download' api_endpoint = 'file/download' return self._extract_all_responses(resources, api_endpoint, api_name) @MultiRequest.error_handling def get_file_network_traffic(self, resources): """Retrieves a report about the network traffic of a md5, sha1, and/or sha2 hash of file, when it is executed. Args: resources: list of string hashes. """ api_name = 'virustotal-file-network-traffic' api_endpoint = 'file/network-traffic' return self._extract_all_responses(resources, api_endpoint, api_name) @MultiRequest.error_handling def get_domain_reports(self, domains): """Retrieves the most recent VT info for a set of domains. Args: domains: list of string domains. Returns: A dict with the domain as key and the VT report as value. """ api_name = 'virustotal-domain-reports' (all_responses, domains) = self._bulk_cache_lookup(api_name, domains) responses = self._request_reports("domain", domains, 'domain/report') for domain, response in zip(domains, responses): if self._cache: self._cache.cache_value(api_name, domain, response) all_responses[domain] = response return all_responses @MultiRequest.error_handling def get_url_distribution(self, params=None): """Retrieves a live feed with the latest URLs submitted to VT. Args: resources: a dictionary with name and value for optional arguments Returns: A dict with the VT report. """ params = params or {} all_responses = {} api_name = 'virustotal-url-distribution' response_chunks = self._request_reports(params.keys(), params.values(), 'url/distribution') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_distribution(self, params=None): """Retrieves a live feed with the latest hashes submitted to VT. Args: params: a dictionary with name and values for optional arguments, such as: before (timestampe), after (timestamp), reports (boolean), limit (retrieve limit file items). Example: 'reports': 'true' Returns: A dict with the VT report. """ params = params or [] all_responses = {} api_name = 'virustotal-file-distribution' response_chunks = self._request_reports(params.keys(), params.value(), 'file/distribution') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_url_reports(self, resources): """Retrieves a scan report on a given URL. Args: resources: list of URLs. Returns: A dict with the URL as key and the VT report as value. """ api_name = 'virustotal-url-reports' (all_responses, resources) = self._bulk_cache_lookup(api_name, resources) resource_chunks = self._prepare_resource_chunks(resources, '\n') response_chunks = self._request_reports("resource", resource_chunks, 'url/report') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_ip_reports(self, ips): """Retrieves the most recent VT info for a set of ips. Args: ips: list of IPs. Returns: A dict with the IP as key and the VT report as value. """ api_name = 'virustotal-ip-address-reports' (all_responses, ips) = self._bulk_cache_lookup(api_name, ips) responses = self._request_reports("ip", ips, 'ip-address/report') for ip, response in zip(ips, responses): if self._cache: self._cache.cache_value(api_name, ip, response) all_responses[ip] = response return all_responses @MultiRequest.error_handling def get_file_search(self, query): """Performs advanced search on samples, matching certain binary/ metadata/detection criteria. Possible queries: file size, file type, first or last submission to VT, number of positives, bynary content, etc. Args: query: dictionary with search arguments Example: 'query': 'type:peexe size:90kb+ positives:5+ behaviour:"taskkill"' Returns: A dict with the VT report. """ api_name = 'virustotal-file-search' (all_responses, query) = self._bulk_cache_lookup(api_name, query) response_chunks = self._request_reports("query", query, 'file/search') self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_clusters(self, date): """Retrieves file similarity clusters for a given time frame. Args: date: the specific date for which we want the clustering details. Example: 'date': '2013-09-10' Returns: A dict with the VT report. """ api_name = 'virustotal-file-clusters' (all_responses, resources) = self._bulk_cache_lookup(api_name, date) response = self._request_reports("date", date, 'file/clusters') self._extract_response_chunks(all_responses, response, api_name) return all_responses def _bulk_cache_lookup(self, api_name, keys): """Performes a bulk cache lookup and returns a tuple with the results found and the keys missing in the cache. If cached is not configured it will return an empty dictionary of found results and the initial list of keys. Args: api_name: a string name of the API. keys: an enumerable of string keys. Returns: A tuple: (responses found, missing keys). """ if self._cache: responses = self._cache.bulk_lookup(api_name, keys) missing_keys = [key for key in keys if key not in responses.keys()] return (responses, missing_keys) return ({}, keys) def _prepare_resource_chunks(self, resources, resource_delim=','): """As in some VirusTotal API methods the call can be made for multiple resources at once this method prepares a list of concatenated resources according to the maximum number of resources per requests. Args: resources: a list of the resources. resource_delim: a string used to separate the resources. Default value is a comma. Returns: A list of the concatenated resources. """ return [self._prepare_resource_chunk(resources, resource_delim, pos) for pos in xrange(0, len(resources), self._resources_per_req)] def _prepare_resource_chunk(self, resources, resource_delim, pos): return resource_delim.join( resources[pos:pos + self._resources_per_req]) def _request_reports(self, resource_param_name, resources, endpoint_name): """Sends multiples requests for the resources to a particular endpoint. Args: resource_param_name: a string name of the resource parameter. resources: list of of the resources. endpoint_name: VirusTotal endpoint URL suffix. Returns: A list of the responses. """ params = [{resource_param_name: resource, 'apikey': self._api_key} for resource in resources] return self._requests.multi_get(self.BASE_DOMAIN + endpoint_name, query_params=params) def _extract_response_chunks(self, all_responses, response_chunks, api_name): """Extracts and caches the responses from the response chunks in case of the responses for the requests containing multiple concatenated resources. Extracted responses are added to the already cached responses passed in the all_responses parameter. Args: all_responses: a list containing already cached responses. response_chunks: a list with response chunks. api_name: a string name of the API. """ for response_chunk in response_chunks: if not isinstance(response_chunk, list): response_chunk = [response_chunk] for response in response_chunk: if not response: continue if self._cache: self._cache.cache_value(api_name, response['resource'], response) all_responses[response['resource']] = response
class VirusTotalApi(object): BASE_DOMAIN = u'https://www.virustotal.com/api/v3/' def __init__(self, api_key, cache_file_name=None, update_cache=True, req_timeout=None): """Establishes basic HTTP params and loads a cache. Args: api_key: VirusTotal API key cache_file_name: String file name of cache. update_cache: Determines whether cache should be written out back to the disk when closing it. Default is `True`. req_timeout: Maximum number of seconds to wait without reading a response byte before deciding an error has occurred. Default is None. """ self._requests = MultiRequest(req_timeout=req_timeout, default_headers={'x-apikey': api_key}, drop_404s=True) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name, update_cache) if cache_file_name else None @MultiRequest.error_handling def get_file_reports(self, file_hash_list): """Retrieves the most recent reports for a set of md5, sha1, and/or sha2 hashes. Args: file_hash_list: list of string hashes. Returns: A dict with the hash as key and the VT report as value. """ api_name = 'virustotal-file-reports' api_endpoint = 'files/{}' all_responses, file_hash_list = self._bulk_cache_lookup( api_name, file_hash_list) response_chunks = self._request_reports(file_hash_list, api_endpoint) self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_behaviour(self, file_hash_list): """Retrieves a report about the behaviour of a md5, sha1, and/or sha2 hash of a file when executed in a sandboxed environment (Cuckoo sandbox). Args: file_hash_list: list of string hashes. """ api_name = 'virustotal-file-behaviour' api_endpoint = 'files/{}/behaviours' all_responses, file_hash_list = self._bulk_cache_lookup( api_name, file_hash_list) response_chunks = self._request_reports(file_hash_list, api_endpoint) self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses @MultiRequest.error_handling def get_file_download(self, file_hash_list): """Retrieves a file from its a md5, sha1, and/or sha2 hash. Args: file_hash_list: list of string hashes. Returns: a base64encoded string of the file """ api_name = 'virustotal-file-download' api_endpoint = 'files/{}/download' return self._extract_all_responses(file_hash_list, api_endpoint, api_name, file_download=True) @MultiRequest.error_handling def get_file_contacted_domains(self, file_hash_list): """Retrieves a report about the contacted domains of a md5, sha1, and/or sha2 hash of file, when it is executed. Args: file_hash_list: list of string hashes. """ api_name = 'virustotal-file-contacted-domains' api_endpoint = 'files/{}/contacted_domains' return self._extract_all_responses(file_hash_list, api_endpoint, api_name) @MultiRequest.error_handling def get_file_contacted_ips(self, file_hash_list): """Retrieves a report about the contacted ip addresses of a md5, sha1, and/or sha2 hash of file, when it is executed. Args: resources: list of string hashes. """ api_name = 'virustotal-file-contacted-ips' api_endpoint = 'files/{}/contacted_ips' return self._extract_all_responses(file_hash_list, api_endpoint, api_name) @MultiRequest.error_handling def get_file_contacted_urls(self, file_hash_list): """Retrieves a report about the contacted urls of a md5, sha1, and/or sha2 hash of file, when it is executed. Args: file_hash_list: list of string hashes. """ api_name = 'virustotal-file-contacted-urls' api_endpoint = 'files/{}/contacted_urls' return self._extract_all_responses(file_hash_list, api_endpoint, api_name) @MultiRequest.error_handling def get_file_itw_urls(self, file_hash_list): """Retrieves a report about the in the wild URLs from where the file with the hash has been downloaded. Args: file_hash_list: list of string hashes. """ api_name = 'virustotal-file-itw-urls' api_endpoint = 'files/{}/itw_urls' return self._extract_all_responses(file_hash_list, api_endpoint, api_name) @MultiRequest.error_handling def get_domain_communicating_files(self, domain_list): """Retrieves a report about the files that communicate with this internet domain. Args: domain_list: list of string domains. """ api_name = 'virustotal-domain-communicating-files' api_endpoint = 'domains/{}/communicating_files' return self._extract_all_responses(domain_list, api_endpoint, api_name) @MultiRequest.error_handling def get_domain_referrer_files(self, domain_list): """Retrieves a report about the files containing the internet domain. Args: domain_list: list of string domains. """ api_name = 'virustotal-domain-referrer-files' api_endpoint = 'domains/{}/referrer_files' return self._extract_all_responses(domain_list, api_endpoint, api_name) @MultiRequest.error_handling def get_domain_reports(self, domain_list): """Retrieves the most recent VT info for a set of domains. Args: domain_list: list of string domains. Returns: A dict with the domain as key and the VT report as value. """ api_name = 'virustotal-domain-reports' (all_responses, domain_list) = self._bulk_cache_lookup(api_name, domain_list) responses = self._request_reports(domain_list, 'domains/{}') for domain, response in zip(domain_list, responses): if self._cache: self._cache.cache_value(api_name, domain, response) all_responses[domain] = response return all_responses @MultiRequest.error_handling def get_feeds_url(self, time_frame): """Retrieves a live feed with the latest URLs submitted to VT. Args: time_frame: a list of timeframe strings in date format YYYYMMDDhhmm. Returns: A base64 encoded bzip2 compressed UTF-8 text file contains one JSON structure per line. """ api_name = 'virustotal-url-distribution' all_responses = {} response = self._request_reports(time_frame, 'feeds/urls/{}', file_download=True) self._extract_response_chunks(all_responses, response, api_name) return all_responses @MultiRequest.error_handling def get_file_distribution(self, time_frame): """Retrieves a live feed with the latest hashes submitted to VT. Args: time_frame: A list of strings in format YYYYMMDDhhmm. Returns: A dict with the VT report. """ all_responses = {} api_name = 'virustotal-file-distribution' response = self._request_reports(time_frame, 'feeds/files/{}') self._extract_response_chunks(all_responses, response, api_name) return all_responses @MultiRequest.error_handling def get_url_reports(self, url_hash_list): """Retrieves a scan report on a given URL. Args: url_hash_list: list of sha256 hashed urls. Returns: A dict with the URL hash as key and the VT report as value. """ api_name = 'virustotal-url-reports' api_endpoint = 'urls/{}' return self._extract_all_responses(url_hash_list, api_endpoint, api_name) @MultiRequest.error_handling def get_ip_reports(self, ips): """Retrieves the most recent VT info for a set of ips. Args: ips: list of IPs. Returns: A dict with the IP as key and the VT report as value. """ api_name = 'virustotal-ip-address-reports' (all_responses, ips) = self._bulk_cache_lookup(api_name, ips) responses = self._request_reports(ips, 'ip_addresses/{}') for ip, response in zip(ips, responses): if self._cache: self._cache.cache_value(api_name, ip, response) all_responses[ip] = response return all_responses @MultiRequest.error_handling def get_file_search(self, query): """Performs advanced search on samples, matching certain binary/ metadata/detection criteria. Possible queries: file size, file type, first or last submission to VT, number of positives, bynary content, etc. Args: query: dictionary with search arguments Example: 'query': 'type:peexe size:90kb+ positives:5+ behaviour:"taskkill"' Returns: A dict with the VT report. """ api_name = 'virustotal-file-search' api_endpoint = 'intelligence/search?query={}' return self._extract_all_responses(query, api_endpoint, api_name) @MultiRequest.error_handling def get_file_clusters(self, time_frame): """Retrieves file similarity clusters for a given time frame. Args: time_frame: a list of time frames for which we want the clustering details in YYYYMMDDhhmm format. Returns: A dict with the VT report. """ api_name = 'virustotal-file-clusters' api_endpoint = 'feeds/file-behaviours/{}' return self._extract_all_responses(time_frame, api_endpoint, api_name) def _bulk_cache_lookup(self, api_name, keys): """Performes a bulk cache lookup and returns a tuple with the results found and the keys missing in the cache. If cached is not configured it will return an empty dictionary of found results and the initial list of keys. Args: api_name: a string name of the API. keys: an enumerable of string keys. Returns: A tuple: (responses found, missing keys). """ if self._cache: responses = self._cache.bulk_lookup(api_name, keys) missing_keys = [key for key in keys if key not in responses.keys()] return (responses, missing_keys) return ({}, keys) def _request_reports(self, ids, endpoint_name, file_download=False): """Sends multiples requests for the resources to a particular endpoint. Args: ids: list of the hash identifying the file. endpoint_name: VirusTotal endpoint URL suffix. file_download: boolean, whether a file download is expected Returns: A list of the responses. """ urls = [ '{}{}'.format(self.BASE_DOMAIN, endpoint_name.format(id)) for id in ids ] return self._requests.multi_get( urls, file_download=file_download) if urls else [] def _extract_cache_id(self, response): """Extracts the object hash from the response to be used to uniquely identify the result. Args: response: response object. Returns: A hash that uniquely identities the result. """ cache_id = None if isinstance(response['data'], list): if response['data']: # gets the first data items' id cache_id = response['data'][0]['id'] else: cache_id = response['data']['id'] # sandbox id output has an underscore as the separator if cache_id and '_' in cache_id: cache_id = cache_id.split('_')[0] return cache_id def _extract_all_responses(self, resources, api_endpoint, api_name, file_download=False): """ Aux function to extract all the API endpoint responses. Args: resources: list of string hashes. api_endpoint: endpoint path api_name: endpoint name Returns: A dict with the hash as key and the VT report as value. """ all_responses, resources = self._bulk_cache_lookup(api_name, resources) response_chunks = self._request_reports(resources, api_endpoint, file_download) self._extract_response_chunks(all_responses, response_chunks, api_name) return all_responses def _extract_response_chunks(self, all_responses, response_chunks, api_name): """Extracts and caches the responses from the response chunks in case of the responses for the requests containing multiple concatenated resources. Extracted responses are added to the already cached responses passed in the all_responses parameter. Args: all_responses: a list containing already cached responses. response_chunks: a list with response chunks. api_name: a string name of the API. """ for response_chunk in response_chunks: if not isinstance(response_chunk, list): response_chunk = [response_chunk] for response in response_chunk: if not response: continue cache_id = self._extract_cache_id(response) if cache_id: if self._cache: self._cache.cache_value(api_name, cache_id, response) all_responses[cache_id] = response
class AlexaRankingApi(object): BASE_URL = u'https://data.alexa.com/data?cli=10' def __init__(self, resources_per_req=10, cache_file_name=None, update_cache=True, req_timeout=None): """Establishes basic HTTP params and loads a cache. Args: resources_per_req: Maximum number of resources (hashes, URLs) to be send in a single request cache_file_name: String file name of cache. update_cache: Determines whether cache should be written out back to the disk when closing it. Default is `True`. req_timeout: Maximum number of seconds to wait without reading a response byte before deciding an error has occurred. Default is None. """ self._resources_per_req = resources_per_req self._requests = MultiRequest(req_timeout=req_timeout) # Create an ApiCache if instructed to self._cache = ApiCache(cache_file_name, update_cache) if cache_file_name else None @MultiRequest.error_handling def get_alexa_rankings(self, domains): """Retrieves the most recent VT info for a set of domains. Args: domains: list of string domains. Returns: A dict with the domain as key and the VT report as value. """ api_name = 'alexa_rankings' (all_responses, domains) = self._bulk_cache_lookup(api_name, domains) responses = self._request_reports(domains) for domain, response in zip(domains, responses): xml_response = self._extract_response_xml(domain, response) if self._cache: self._cache.cache_value(api_name, domain, response) all_responses[domain] = xml_response return all_responses def _request_reports(self, domains): """Sends multiples requests for the resources to a particular endpoint. Args: resource_param_name: a string name of the resource parameter. resources: list of of the resources. endpoint_name: AlexaRankingApi endpoint URL suffix. Returns: A list of the responses. """ params = [{'url': domain} for domain in domains] responses = self._requests.multi_get(self.BASE_URL, query_params=params, to_json=False) return responses def _extract_response_xml(self, domain, response): """Extract XML content of an HTTP response into dictionary format. Args: response: HTML Response objects Returns: A dictionary: {alexa-ranking key : alexa-ranking value}. """ attributes = {} alexa_keys = {'POPULARITY': 'TEXT', 'REACH': 'RANK', 'RANK': 'DELTA'} try: xml_root = ET.fromstring(response._content) for xml_child in xml_root.findall('SD//'): if xml_child.tag in alexa_keys and \ alexa_keys[xml_child.tag] in xml_child.attrib: attributes[xml_child.tag.lower()] = xml_child.attrib[ alexa_keys[xml_child.tag]] except ParseError: # Skip ill-formatted XML and return no Alexa attributes pass attributes['domain'] = domain return {'attributes': attributes} def _bulk_cache_lookup(self, api_name, keys): """Performes a bulk cache lookup and returns a tuple with the results found and the keys missing in the cache. If cached is not configured it will return an empty dictionary of found results and the initial list of keys. Args: api_name: a string name of the API. keys: an enumerable of string keys. Returns: A tuple: (responses found, missing keys). """ if self._cache: responses = self._cache.bulk_lookup(api_name, keys) missing_keys = [key for key in keys if key not in responses.keys()] return (responses, missing_keys) return ({}, keys)