예제 #1
0
    def __init__(self, api_key, cache_file_name=None):
        auth_header = {'Authorization': 'Bearer {0}'.format(api_key)}
        self._requests = MultiRequest(default_headers=auth_header,
                                      max_requests=12,
                                      rate_limit=30)

        # Create an ApiCache if instructed to
        self._cache = ApiCache(cache_file_name) if cache_file_name else None
예제 #2
0
    def __init__(self, api_key, cache_file_name=None):
        """Establishes basic HTTP params and loads a cache.

        Args:
            api_key: VirusTotal API key
            cache_file_name: String file name of cache.
        """
        self._api_key = api_key
        self._requests = MultiRequest()

        # Create an ApiCache if instructed to
        self._cache = ApiCache(cache_file_name) if cache_file_name else None
예제 #3
0
    def __init__(self, cache_file_name=None):
        """Establishes basic HTTP params and loads a cache.

        Args:
            cache_file_name: String file name of cache.
        """

        # TODO - lookup request rate limit
        # By observation, ShadowServer can be quite slow, so give it 90 seconds before it times out.
        self._requests = MultiRequest(max_requests=2, req_timeout=90.0)

        # Create an ApiCache if instructed to
        self._cache = ApiCache(cache_file_name) if cache_file_name else None
예제 #4
0
class VirusTotalApi(object):
    BASE_DOMAIN = 'https://www.virustotal.com/vtapi/v2/'

    def __init__(self, api_key, cache_file_name=None):
        """Establishes basic HTTP params and loads a cache.

        Args:
            api_key: VirusTotal API key
            cache_file_name: String file name of cache.
        """
        self._api_key = api_key
        self._requests = MultiRequest()

        # Create an ApiCache if instructed to
        self._cache = ApiCache(cache_file_name) if cache_file_name else None

    @MultiRequest.error_handling
    def get_file_reports(self, resources):
        """Retrieves the most recent reports for a set of md5, sha1, and/or sha2 hashes.

        Args:
            resources: list of string hashes.
        Returns:
            A dict with the hash as key and the VT report as value.
        """
        api_name = 'virustotal-file-reports'
        all_responses, resources = self._bulk_cache_lookup(api_name, resources)

        resource_chunks = self._prepare_resource_chunks(resources)
        response_chunks = self._request_reports("resource", resource_chunks, 'file/report')

        self._extract_response_chunks(all_responses, response_chunks, api_name)

        return all_responses

    @MultiRequest.error_handling
    def get_domain_reports(self, domains):
        """Retrieves the most recent VT info for a set of domains.

        Args:
            domains: list of string domains.
        Returns:
            A dict with the domain as key and the VT report as value.
        """
        api_name = 'virustotal-domain-reports'
        (all_responses, domains) = self._bulk_cache_lookup(api_name, domains)

        responses = self._request_reports("domain", domains, 'domain/report')

        for domain, response in zip(domains, responses):
            if self._cache:
                self._cache.cache_value(api_name, domain, response)
            all_responses[domain] = response

        return all_responses

    @MultiRequest.error_handling
    def get_url_reports(self, resources):
        """Retrieves a scan report on a given URL.

        Args:
            resources: list of URLs.
        Returns:
            A dict with the URL as key and the VT report as value.
        """
        api_name = 'virustotal-url-reports'
        (all_responses, resources) = self._bulk_cache_lookup(api_name, resources)

        resource_chunks = self._prepare_resource_chunks(resources, '\n')
        response_chunks = self._request_reports("resource", resource_chunks, 'url/report')

        self._extract_response_chunks(all_responses, response_chunks, api_name)

        return all_responses

    def _bulk_cache_lookup(self, api_name, keys):
        """Performes a bulk cache lookup and returns a tuple with the results
        found and the keys missing in the cache. If cached is not configured
        it will return an empty dictionary of found results and the initial
        list of keys.

        Args:
            api_name: a string name of the API.
            keys: an enumerable of string keys.
        Returns:
            A tuple: (responses found, missing keys).
        """
        if self._cache:
            responses = self._cache.bulk_lookup(api_name, keys)
            missing_keys = [key for key in keys if key not in responses.keys()]
            return (responses, missing_keys)

        return ({}, keys)

    def _prepare_resource_chunks(self, resources, resource_delim=','):
        """As in some VirusTotal API methods the call can be made for multiple
        resources at once this method prepares a list of concatenated resources
        according to the maximum number of resources per requests.

        Args:
            resources: a list of the resources.
            resource_delim: a string used to separate the resources.
              Default value is a comma.
        Returns:
            A list of the concatenated resources.
        """
        resources_per_req = config_get_deep('virustotal.resources_per_req', 25)
        return [resource_delim.join(resources[pos:pos + resources_per_req]) for pos in xrange(0, len(resources), resources_per_req)]

    def _request_reports(self, resource_param_name, resources, endpoint_name):
        """Sends multiples requests for the resources to a particular endpoint.

        Args:
            resource_param_name: a string name of the resource parameter.
            resources: list of of the resources.
            endpoint_name: VirusTotal endpoint URL suffix.
        Returns:
            A list of the responses.
        """
        params = [{resource_param_name: resource, 'apikey': self._api_key} for resource in resources]
        return self._requests.multi_get(self.BASE_DOMAIN + endpoint_name, query_params=params)

    def _extract_response_chunks(self, all_responses, response_chunks, api_name):
        """Extracts and caches the responses from the response chunks in case
        of the responses for the requests containing multiple concatenated
        resources. Extracted responses are added to the already cached
        responses passed in the all_responses parameter.

        Args:
            all_responses: a list containing already cached responses.
            response_chunks: a list with response chunks.
            api_name: a string name of the API.
        """
        for response_chunk in response_chunks:
            if not isinstance(response_chunk, list):
                response_chunk = [response_chunk]
            for response in response_chunk:
                if not response:
                    continue

                if self._cache:
                    self._cache.cache_value(api_name, response['resource'], response)
                all_responses[response['resource']] = response
예제 #5
0
class ShadowServerApi(object):
    BINTEST_URL = 'http://bin-test.shadowserver.org/api'

    def __init__(self, cache_file_name=None):
        """Establishes basic HTTP params and loads a cache.

        Args:
            cache_file_name: String file name of cache.
        """

        # TODO - lookup request rate limit
        # By observation, ShadowServer can be quite slow, so give it 90 seconds before it times out.
        self._requests = MultiRequest(max_requests=2, req_timeout=90.0)

        # Create an ApiCache if instructed to
        self._cache = ApiCache(cache_file_name) if cache_file_name else None

    @MultiRequest.error_handling
    def get_bin_test(self, hashes):
        """Test hashes against a list of known software applications.

        Known hashes will return a dictionary of information.
        Unknown hashes will return nothing.

        Args:
            hashes: list of string hashes.
        Returns:
            A dict with the hash as key and the shadowserver report as value.
        """
        all_responses = {}

        if self._cache:
            api_name = 'shadowserver-bin-test'
            all_responses = self._cache.bulk_lookup(api_name, hashes)
            hashes = [key for key in hashes if key not in all_responses.keys()]
            all_responses = {key: val for key, val in all_responses.iteritems() if len(val) >= 2}

        HASHES_PER_REQ = 25
        hash_chunks = ['\n'.join(hashes[pos:pos + HASHES_PER_REQ]) for pos in xrange(0, len(hashes), HASHES_PER_REQ)]

        responses = self._requests.multi_post(self.BINTEST_URL, data=hash_chunks, to_json=False, send_as_file=True)
        for response in responses:
            if 200 == response.status_code:
                response_lines = response.text.split('\n')
                for line in response_lines:
                    # Set an initial val.
                    val = {}

                    # There is just a key, no value. This means the hash was unknown to ShadowServer.
                    index_of_first_space = line.find(' ')
                    if -1 == index_of_first_space:
                        index_of_first_space = len(line)
                    key = line[:index_of_first_space].lower()

                    # The response only has a JSON body if the hash was known.
                    json_text = line[index_of_first_space + 1:]
                    if len(json_text):
                        try:
                            val = simplejson.loads(json_text)
                            # A very short response indicates an error?
                            if len(val.keys()) >= 2:
                                all_responses[key] = val

                        except ValueError:
                            # Sometimes ShadowServer returns invalid data. Silently skip it.
                            pass

                    if self._cache:
                        self._cache.cache_value(api_name, key, val)

        return all_responses
예제 #6
0
class InvestigateApi(object):

    """Calls the OpenDNS investigate API.

    Applies rate limits and issues parallel requests.
    """

    BASE_URL = 'https://investigate.api.opendns.com/'

    def __init__(self, api_key, cache_file_name=None):
        auth_header = {'Authorization': 'Bearer {0}'.format(api_key)}
        self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30)

        # Create an ApiCache if instructed to
        self._cache = ApiCache(cache_file_name) if cache_file_name else None

    @classmethod
    def _to_url(cls, url_path):
        try:
            return u'{0}{1}'.format(cls.BASE_URL, url_path)
        except Exception as e:
            write_error_message(url_path)
            write_exception(e)
            raise e

    @classmethod
    def _to_urls(cls, fmt_url_path, url_path_args):
        url_paths = []
        for path_arg in url_path_args:
            try:
                url_paths.append(fmt_url_path.format(path_arg))
            except Exception as e:
                write_error_message(path_arg)
                write_exception(e)
                raise e

        return [cls._to_url(url_path) for url_path in url_paths]

    @MultiRequest.error_handling
    def categorization(self, domains):
        """Calls categorization end point and adds an 'is_suspicious' key to each response.

        Args:
            domains: An enumerable of domains
        Returns:
            A dict of {domain: categorization_result}
        """
        url_path = 'domains/categorization/?showLabels'
        all_responses = {}

        if self._cache:
            api_name = 'opendns-categorization'
            all_responses = self._cache.bulk_lookup(api_name, domains)
            domains = [key for key in domains if key not in all_responses.keys()]

        if len(domains):
            response = self._requests.multi_post(self._to_url(url_path), data=simplejson.dumps(domains))
            response = response[0]

            # TODO: Some better more expressive exception
            if not response:
                raise Exception('dang')

            for domain in response.keys():
                if self._cache:
                    self._cache.cache_value(api_name, domain, response[domain])
                all_responses[domain] = response[domain]

        return all_responses

    @MultiRequest.error_handling
    def _multi_get(self, cache_api_name, fmt_url_path, url_params):
        """Makes multiple GETs to an OpenDNS endpoint.

        Args:
            cache_api_name: string api_name for caching
            fmt_url_path: format string for building URL paths
            url_params: An enumerable of strings used in building URLs
        Returns:
            A dict of {url_param: api_result}
        """
        all_responses = {}

        if self._cache:
            all_responses = self._cache.bulk_lookup(cache_api_name, url_params)
            url_params = [key for key in url_params if key not in all_responses.keys()]

        if len(url_params):
            urls = self._to_urls(fmt_url_path, url_params)
            responses = self._requests.multi_get(urls)
            responses = dict(zip(url_params, responses))
            for url_param in responses.keys():
                if self._cache:
                    self._cache.cache_value(cache_api_name, url_param, responses[url_param])
                all_responses[url_param] = responses[url_param]

        return all_responses

    def security(self, domains):
        """Calls security end point and adds an 'is_suspicious' key to each response.

        Args:
            domains: An enumerable of strings
        Returns:
            A dict of {domain: security_result}
        """
        api_name = 'opendns-security'
        fmt_url_path = 'security/name/{0}.json'
        return self._multi_get(api_name, fmt_url_path, domains)

    def cooccurrences(self, domains):
        """Get the domains related to input domains.

        Args:
            domains: an enumerable of strings domain names
        Returns:
            An enumerable of string domain names
        """
        api_name = 'opendns-cooccurrences'
        fmt_url_path = 'recommendations/name/{0}.json'
        return self._multi_get(api_name, fmt_url_path, domains)

    def rr_history(self, ips):
        """Get the domains related to input ips.

        Args:
            ips: an enumerable of strings as ips
        Returns:
            An enumerable of string domain names
        """
        api_name = 'opendns-rr_history'
        fmt_url_path = 'dnsdb/ip/a/{0}.json'
        return self._multi_get(api_name, fmt_url_path, ips)