def authenticate(self, api_username="", api_key="", **kwargs): """ Authenticate to the DomainTools API. Calling this function directly is OK, but you won't get the benefit of consulting ~/.huntlibrc for default creds if you do. :param api_username: The API authentication username (OPTIONAL) :type api_username: str :param api_key: The API authentication key (OPTIONAL) :type api_key: str :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Exceptions: Will raise ValueError if the api_username or api_key are not provided or are of the wrong type. """ # Sanity check the provided authentication info if not api_username: raise ValueError("You must supply a value for 'api_username'.") elif not isinstance(api_username, str): raise ValueError("The 'api_username' field must be a string.") if not api_key: raise ValueError("You must supply a value for 'api_key'.") elif not isinstance(api_key, str): raise ValueError("The 'api_key' field must be a string.") # Actually authenticate now self._handle = API( api_username, api_key, **kwargs )
def connect(self, params={}): self.logger.info("Connect: Connecting..") username = params.get(Input.USERNAME) key = params.get(Input.API_KEY).get("secretKey") self.api = API(username, key) try: response = self.api.account_information() response.data() except NotAuthorizedException as e: raise ConnectionTestException( cause="Authorization failed.", assistance= "Double-check that your credentials configured in your connection are correct and try again.", ) except Exception as e: raise ConnectionTestException( cause="Unable to connect to DomainTools.", assistance=f"Exception was: {e}") phisheye_terms_list = Helper.make_request(self.api.phisheye_term_list, self.logger) self.terms = [] for term in phisheye_terms_list.get("response").get("terms"): self.terms.append(term.get("term"))
def test_exception_handling(): exception = None api_call = api.reverse_ip('ss') assert api_call.status == 400 try: api_call.data() except Exception as e: exception = e assert exception assert exception.code == 400 assert 'not understand' in exception.reason['error']['message'] with pytest.raises(exceptions.NotFoundException): api._results('i_made_this_product_up', '/v1/steianrstierstnrsiatiarstnsto.com/whois').data() with pytest.raises(exceptions.NotAuthorizedException): API('notauser', 'notakey').domain_search('amazon').data() with pytest.raises( ValueError, match= r"Invalid value 'notahash' for 'key_sign_hash'. Values available are sha1,sha256,md5" ): API('notauser', 'notakey', always_sign_api_key=True, key_sign_hash='notahash').domain_search('amazon')
def domaintools(self, data): response = None api = API(self.get_param("config.username"), self.get_param("config.key")) APP_PARAMETERS = { "app_partner": "cortex", "app_name": "Iris", "app_version": 1 } if self.service == "investigate-domain" and self.data_type in [ "domain" ]: response = api.iris_investigate(data, **APP_PARAMETERS).response() if response["results_count"]: response = self.format_single_domain( response.get("results")[0]) elif self.service == "pivot" and self.data_type in [ "hash", "ip", "mail" ]: iris_investigate_args_map = { "ip": "ip", "mail": "email", "hash": "ssl_hash", } APP_PARAMETERS[iris_investigate_args_map[self.data_type]] = data response = api.iris_investigate(**APP_PARAMETERS).response() response = DomainToolsAnalyzer.format_pivot_domains( response.get("results"), iris_investigate_args_map[self.data_type], data) return response
def test_no_https(): try: no_https_api = API(environ.get('TEST_USER', 'test_user'), environ.get('TEST_KEY', 'test_key'), https=False) assert no_https_api.domain_search('google').data() except exceptions.NotAuthorizedException: pass
def domaintools(self, data): """ :param service: :return: """ if (self.service == 'reverse-ip' and self.data_type == 'ip'): self.service = 'host-domains' api = API(self.get_param('config.username'), self.get_param('config.key')) if self.service == 'reverse-ip' and self.data_type in ['domain', 'ip', 'fqdn']: response = api.reverse_ip(data).response() elif self.service == 'host-domains' and self.data_type == 'ip': response = api.host_domains(data).response() elif self.service == 'name-server-domains' and self.data_type == 'domain': response = api.reverse_name_server(data).response() elif self.service == 'whois/history' and self.data_type == 'domain': response = api.whois_history(data).response() elif self.service == 'whois/parsed' and self.data_type == 'domain': response = api.parsed_whois(data).response() elif self.service == 'reverse-whois': response = api.reverse_whois(data, mode='purchase').response() elif self.service == 'whois' and self.data_type == 'ip': response = api.whois(data).response() return response
def on_load_configuration(self, config): """ Invoked after the application-specific configuration has been loaded This callback provides the opportunity for the application to parse additional configuration properties. :param config: The application configuration """ logger.info("On 'load configuration' callback.") # API Key try: self._api_key = config.get(self.GENERAL_CONFIG_SECTION, self.GENERAL_API_KEY_CONFIG_PROP) except Exception: pass if not self._api_key: raise Exception( "DomainTools API Key not found in configuration file: {0}". format(self._app_config_path)) # API User try: self._api_user = config.get(self.GENERAL_CONFIG_SECTION, self.GENERAL_API_USER_CONFIG_PROP) except Exception: pass if not self._api_user: raise Exception( "DomainTools API User not found in configuration file: {0}". format(self._app_config_path)) self._api = API(self._api_user, self._api_key)
def _api(self): """Instantiates Domaintools API""" credentials = self.api_key.split(":") api = API(credentials[0], credentials[1]) return api
def handler(q=False): if not q: return q request = json.loads(q) to_query = None for t in mispattributes['input']: to_query = request.get(t) if to_query: break if not to_query: misperrors['error'] = "Unsupported attributes type" return misperrors if request.get('config'): if (request['config'].get('username') is None) or (request['config'].get('api_key') is None): misperrors['error'] = 'DomainTools authentication is incomplete' return misperrors else: domtools = API(request['config'].get('username'), request['config'].get('api_key')) else: misperrors['error'] = 'DomainTools authentication is missing' return misperrors values = DomainTools() services = get_services(request) if services: try: for s in services: globals()[s](domtools, to_query, values) except Exception as e: print(to_query, type(e), e) return {'results': values.dump()}
def connect(self, params={}): self.logger.info("Connect: Connecting..") username = params.get("username") key = params.get("api_key").get("secretKey") api = API(username, key) try: response = api.account_information() response.data() except NotAuthorizedException: self.logger.error("DomainTools: Connect: error %s") raise Exception( "DomainTools: Connect: Authorization failed. Please try again") except Exception as e: self.logger.error("DomainTools: Connect: error %s", str(e)) raise Exception( "DomainTools: Connect: Failed to connect to server {}".format( e)) self.api = api
def domaintools(self, data): """ :param service: :return: """ if (self.service == 'reverse-ip' and self.data_type == 'ip'): self.service = 'host-domains' api = API(self.get_param('config.username'), self.get_param('config.key')) if self.service == 'reverse-ip' and self.data_type in ['domain', 'ip', 'fqdn']: response = api.reverse_ip(data).response() elif self.service == 'host-domains' and self.data_type == 'ip': response = api.host_domains(data).response() elif self.service == 'name-server-domains' and self.data_type == 'domain': response = api.reverse_name_server(data).response() elif self.service == 'whois/history' and self.data_type == 'domain': response = api.whois_history(data).response() elif self.service == 'whois/parsed' and self.data_type == 'domain': response = api.parsed_whois(data).response() elif self.service == 'risk_evidence' and self.data_type in ['domain', 'fqdn']: response = api.risk_evidence(data).response() elif self.service == 'reputation' and self.data_type in ['domain', 'fqdn']: response = api.reputation(data, include_reasons=True).response() elif self.service == 'reverse-whois': response = api.reverse_whois(data, mode='purchase').response() elif self.service == 'whois' and self.data_type == 'ip': response = api.whois(data).response() return response
def run(): # pragma: no cover """Defines how to start the CLI for the DomainTools API""" out_file, out_format, arguments = parse() user, key = arguments.pop('user', None), arguments.pop('key', None) if not user or not key: sys.stderr.write('Credentials are required to perform API calls.\n') sys.exit(1) api = API(user, key, https=arguments.pop('https'), verify_ssl=arguments.pop('verify_ssl'), rate_limit=arguments.pop('rate_limit')) response = getattr(api, arguments.pop('api_call'))(**arguments) output = str( getattr(response, out_format) if out_format != 'list' else response. as_list()) out_file.write(output if output.endswith('\n') else output + '\n')
def test_exception_handling(): exception = None api_call = api.reverse_ip('ss') assert api_call.status == 400 try: api_call.data() except Exception as e: exception = e assert exception assert exception.code == 400 assert 'not understand' in exception.reason['error']['message'] with pytest.raises(exceptions.NotFoundException): api._results('i_made_this_product_up', '/v1/steianrstierstnrsiatiarstnsto.com/whois').data() with pytest.raises(exceptions.NotAuthorizedException): API('notauser', 'notakey').domain_search('amazon').data()
"""Defines all test wide settings and variables""" from os import environ from domaintools import API from vcr import VCR def remove_server(response): response.get('headers', {}).pop('server', None) return response vcr = VCR(before_record_response=remove_server, filter_query_parameters=['api_key', 'api_username'], cassette_library_dir='tests/fixtures/vcr/', path_transformer=VCR.ensure_suffix('.yaml'), record_mode='new_episodes') with vcr.use_cassette('init_user_account'): api = API(environ.get('TEST_USER', 'test_user'), environ.get('TEST_KEY', 'test_key'))
def handler(q=False): if not q: return q request = json.loads(q) to_query = None for t in mispattributes['input']: to_query = request.get(t) if to_query: break if not to_query: misperrors['error'] = "Unsupported attributes type" return misperrors if request.get('config'): if (request['config'].get('username') is None) or (request['config'].get('api_key') is None): misperrors['error'] = 'DomainTools authentication is incomplete' return misperrors else: domtools = API(request['config'].get('username'), request['config'].get('api_key')) else: misperrors['error'] = 'DomainTools authentication is missing' return misperrors whois_entry = domtools.parsed_whois(to_query) profile = domtools.domain_profile(to_query) # NOTE: profile['website_data']['response_code'] could be used to see if the host is still up. Maybe set a tag. reputation = domtools.reputation(to_query, include_reasons=True) # NOTE: use that value in a tag when we will have attribute level tagging values = DomainTools() if whois_entry.get('error'): misperrors['error'] = whois_entry['error']['message'] return misperrors if profile.get('error'): misperrors['error'] = profile['error']['message'] return misperrors if reputation and not reputation.get('error'): reasons = ', '.join(reputation['reasons']) values.risk = [ reputation['risk_score'], 'Risk value of {} (via Domain Tools), Reasons: {}'.format( to_query, reasons) ] if whois_entry.get('registrant'): values.add_name(whois_entry['registrant'], 'Parsed registrant') if profile.get('registrant'): values.add_name(profile['registrant']['name'], 'Profile registrant') if profile.get('server'): other_domains = profile['server']['other_domains'] values.add_ip( profile['server']['ip_address'], 'IP of {} (via DomainTools). Has {} other domains.'.format( to_query, other_domains)) if profile.get('registration'): if profile['registration'].get('created'): values.add_creation_date(profile['registration']['created'], 'created') if profile['registration'].get('updated'): values.add_creation_date(profile['registration']['updated'], 'updated') if profile['registration'].get('registrar'): values.add_registrar(profile['registration']['registrar'], 'name') if whois_entry.get('registration'): values.add_creation_date(whois_entry['registration']['created'], 'timestamp') if whois_entry.get('whois'): values.freetext = whois_entry['whois']['record'] if whois_entry.get('parsed_whois'): if whois_entry['parsed_whois']['created_date']: values.add_creation_date( whois_entry['parsed_whois']['created_date'], 'created') if whois_entry['parsed_whois']['registrar']['name']: values.add_registrar( whois_entry['parsed_whois']['registrar']['name'], 'name') if whois_entry['parsed_whois']['registrar']['url']: values.add_registrar( whois_entry['parsed_whois']['registrar']['url'], 'url') if whois_entry['parsed_whois']['registrar']['iana_id']: values.add_registrar( whois_entry['parsed_whois']['registrar']['iana_id'], 'iana_id') for key, entry in whois_entry['parsed_whois']['contacts'].items(): if entry['email']: values.add_mail(entry['email'], key) if entry['phone']: values.add_phone(entry['phone'], key) if entry['name']: values.add_name(entry['name'], key) if whois_entry.emails(): for mail in whois_entry.emails(): if mail not in values.reg_mail.keys(): values.add_mail(mail, 'Maybe registrar') return {'results': values.dump()}
"""Defines all test wide settings and variables""" from os import environ from domaintools import API from vcr import VCR def remove_server(response): response.get('headers', {}).pop('server', None) if 'url' in response: response['url'] = response['url'].update_query(api_username='******', api_key='test') return response vcr = VCR( before_record_response=remove_server, filter_query_parameters=['timestamp', 'signature', 'api_username'], filter_post_data_parameters=['timestamp', 'signature', 'api_username'], cassette_library_dir='tests/fixtures/vcr/', path_transformer=VCR.ensure_suffix('.yaml'), record_mode='new_episodes') with vcr.use_cassette('init_user_account'): api = API(environ.get('TEST_USER', 'test'), environ.get('TEST_KEY', 'test'), always_sign_api_key=True)
class DomainTools(object): ''' The DomainTools class allows you to easily perform some common types of calls to the DomainTools API. It uses their official `domaintools_api` Python module to do most of the work but is not a complete replacement for that module. In particular, this class concentrates on a few calls that are most relevant for data analytic style threat hunting (risk & reputation scores, WHOIS info, etc). This most methods pass through any kwargs to the underlying domaintools methods, with one important exception: the class consults the user's ~/.huntlibrc (if present) to determine the API username and key so you don't always have to provide them during authentication. :param api_username: The API authentication username (OPTIONAL) :type api_username: str :param api_key: The API authentication key (OPTIONAL) :type api_key: str :param `**kwargs`: Additional keyword args are passed to the underlying domaintools module init ''' _DEFAULT_CONFIG_FILE = os.path.expanduser("~/.huntlibrc") _handle = None # API Handle _account_information = None # Cached information about the API user's account _available_api_calls = None # Cached list of API endpoints the account has access to def __init__(self, *args, **kwargs): # Read our config file, if it exists config = ConfigParser() config.read(self._DEFAULT_CONFIG_FILE) api_username = "" api_key = "" if 'api_username' in kwargs: api_username = kwargs['api_username'] elif config.has_option('domaintools', 'api_username'): api_username = config.get('domaintools', 'api_username') if 'api_key' in kwargs: api_key = kwargs['api_key'] elif config.has_option('domaintools', 'api_key'): api_key = config.get('domaintools', 'api_key') # Remove these from kwargs, so we don't have duplicate args. kwargs.pop('api_username', None) kwargs.pop('api_key', None) self.authenticate( api_username=api_username, api_key=api_key, **kwargs ) self._account_information = self.account_information(force_refresh=True) self._available_api_calls = self.available_api_calls(force_refresh=True) @retry() def authenticate(self, api_username="", api_key="", **kwargs): """ Authenticate to the DomainTools API. Calling this function directly is OK, but you won't get the benefit of consulting ~/.huntlibrc for default creds if you do. :param api_username: The API authentication username (OPTIONAL) :type api_username: str :param api_key: The API authentication key (OPTIONAL) :type api_key: str :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Exceptions: Will raise ValueError if the api_username or api_key are not provided or are of the wrong type. """ # Sanity check the provided authentication info if not api_username: raise ValueError("You must supply a value for 'api_username'.") elif not isinstance(api_username, str): raise ValueError("The 'api_username' field must be a string.") if not api_key: raise ValueError("You must supply a value for 'api_key'.") elif not isinstance(api_key, str): raise ValueError("The 'api_key' field must be a string.") # Actually authenticate now self._handle = API( api_username, api_key, **kwargs ) @retry() def account_information(self, force_refresh=False, **kwargs): ''' Return a dict containing information about limits and usage of the various domaintools API calls for the authenticated API user. :param force_refresh: A boolean controlling whether or not to refresh the cached info :type force_refresh: bool :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A single dict, where each key is the name of an endpoint from the underlying domaintools API, and the values are dicts containing detailed about that endpoint. For example: { 'domain-profile': { 'per_month_limit': None, 'per_minute_limit': '180', 'absolute_limit': None, 'usage': { 'today': '0', 'month': '100' }, 'expiration_date': '2020-12-31' }, 'whois': { 'per_month_limit': None, 'per_minute_limit': '180', 'absolute_limit': None, 'usage': { 'today': '9', 'month': '997' }, 'expiration_date': '2020-12-31' } } ''' if self._account_information and not force_refresh: return self._account_information res = self._handle.account_information(**kwargs) if res: info = dict() for item in res: id = item.pop('id') info[id] = dict(item) self._account_information = info return self._account_information @retry() def available_api_calls(self, force_refresh=False, **kwargs): ''' Returns a list of endpoints available to the authenticated API user. :param force_refresh: A boolean controlling whether or not to refresh the cached info :type force_refresh: bool :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A list of strings containing the API endpoint names. For example: [ 'domain_profile', 'whois', 'whois_history', 'reverse_ip', ... ] ''' if self._available_api_calls and not force_refresh: return self._available_api_calls res = self._handle.available_api_calls(**kwargs) if res: self._available_api_calls = list(res) return self._available_api_calls @retry() def whois(self, query=None, **kwargs): ''' Return basic WHOIS info for a given domain or IP address. :param query: A domain or IP address :param type: string :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A dict containing basic WHOIS information. For example: { 'registrant': 'Google LLC', 'registration': { 'created': '1997-09-15', 'expires': '2028-09-14', 'updated': '2019-09-09', 'registrar': 'MarkMonitor Inc.', 'statuses': [ 'clientDeleteProhibited', 'clientTransferProhibited', 'clientUpdateProhibited', 'serverDeleteProhibited', 'serverTransferProhibited', 'serverUpdateProhibited' ] }, 'name_servers': [ 'NS1.GOOGLE.COM', 'NS2.GOOGLE.COM', 'NS3.GOOGLE.COM', 'NS4.GOOGLE.COM' ], 'whois': { 'date': '2020-07-12', 'record': 'Domain Name: google.com\nRegistry Domain ID: 2138514_DOMAIN_COM-VRSN\n...' }, 'record_source': 'google.com' } :Exceptions: Will raise ValueError if no query is supplied, or if the query is not a string. ''' if not query: raise ValueError("You must supply either a domain or an IP address.") elif not isinstance(query, str): raise ValueError("The query parameter must be a string.") try: whois_info = dict( list( self._handle.whois(query, **kwargs) ) ) except (BadRequestException, NotFoundException): return dict() return whois_info @retry() def parsed_whois(self, query=None, flatten=False, **kwargs): ''' Return extended WHOIS info for a given domain or IP address. :param query: A domain or IP address :param type: string :param flatten: A boolean controlling whether to attempt to normalize the nested dicts or lists into a single flat dict (DEFAULT False) :type flatten: bool :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A dict containing basic and extended WHOIS information. For example: { ... 'parsed_whois': { 'domain': 'google.com', 'created_date': '1997-09-15T00:00:00-07:00', 'updated_date': '2019-09-09T08:39:04-07:00', 'expired_date': '2028-09-13T00:00:00-07:00', 'statuses': [ 'clientDeleteProhibited', 'clientTransferProhibited', 'clientUpdateProhibited', 'serverDeleteProhibited', 'serverTransferProhibited', 'serverUpdateProhibited' ], 'name_servers': [ 'ns1.google.com', 'ns2.google.com', 'ns3.google.com', 'ns4.google.com' ], 'registrar': { 'name': 'MarkMonitor, Inc. MarkMonitor Inc.', 'abuse_contact_phone': '12083895770', 'abuse_contact_email': '*****@*****.**', 'iana_id': '292', 'url': 'http://www.markmonitor.com', 'whois_server': 'whois.markmonitor.com' }, 'contacts': { 'registrant': { 'name': '', 'org': 'Google LLC', 'street': [], 'city': '', 'state': 'CA', 'postal': '', 'country': 'us', 'phone': '', 'fax': '', 'email': 'REDACTED FOR PRIVACY (DT)' } ... } } ... } :Exceptions: Will raise ValueError if no query is supplied, or if the query is not a string. ''' if not query: raise ValueError( "You must supply either a domain or an IP address.") elif not isinstance(query, str): raise ValueError( "The query parameter must be a string.") try: whois_info = dict( list( self._handle.parsed_whois(query, **kwargs) ) ) except (BadRequestException, NotFoundException): return dict() if flatten: # Normalize the nested dictionary keys into a single level. whois_info = huntlib_data_flatten(whois_info) return whois_info @retry() def brand_monitor(self, query=None, **kwargs): ''' Given a query string containing one or more search terms (separated by '|'), return a list of any newly-active or pending domain registrationations containing ALL of the terms. :param query: A string containing one or more search terms (separated by '|') :type query: string :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A list of dicts, with each dict containing a result. For example: [ { 'domain': '54google.com', 'status': 'new' }, { 'domain': 'aboutmicrosoftandgoogleapps.com', 'status': 'on-hold' }, ... ] :Exceptions: Will raise ValueError if no query is supplied or if the query is not a string. ''' if not query: raise ValueError("You must specify a query pattern.") elif isinstance(query, list): query = "|".join(query) elif not isinstance(query, str): raise ValueError("The 'query' parameter must be either a string or a list of strings.") return list(self._handle.brand_monitor(query, **kwargs)) @retry() def domain_profile(self, query=None, flatten=False, **kwargs): ''' Look up basic information about a domain, including DNS, WHOIS, history and web site info along with pointers to more detailed info. :param query: The domain to look up :type query: string :param flatten: A boolean controlling whether to attempt to normalize the nested dicts/lists into a single flat dict (DEFAULT False) :type flatten: bool :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A dict containing the various pieces of info. For example: { 'registrant': { 'name': 'Google LLC', 'domains': 18696, 'product_url': 'https://reversewhois.domaintools.com/?all[]=Google+LLC&none[]=' }, 'server': { 'ip_address': '172.217.14.196', 'other_domains': 151, 'product_url': 'https://reverseip.domaintools.com/search/?q=google.com' }, 'registration': { 'created': '1997-09-15', 'expires': '2028-09-14', 'updated': '2019-09-09', 'registrar': 'MarkMonitor Inc.', 'statuses': [ 'clientDeleteProhibited', 'clientTransferProhibited', 'clientUpdateProhibited', 'serverDeleteProhibited', 'serverTransferProhibited', 'serverUpdateProhibited' ] }, 'name_servers': [ { 'server': 'NS1.GOOGLE.COM', 'product_url': 'https://reversens.domaintools.com/search/?q=NS1.GOOGLE.COM' }, { 'server': 'NS2.GOOGLE.COM', 'product_url': 'https://reversens.domaintools.com/search/?q=NS2.GOOGLE.COM' }, { 'server': 'NS3.GOOGLE.COM', 'product_url': 'https://reversens.domaintools.com/search/?q=NS3.GOOGLE.COM' }, { 'server': 'NS4.GOOGLE.COM', 'product_url': 'https://reversens.domaintools.com/search/?q=NS4.GOOGLE.COM' } ], ... } :Exceptions: Raises ValueError if no query is supplied or if it is not a string. ''' if not query: raise ValueError("You must specify a query domain.") elif not isinstance(query, str): raise ValueError("query parameter must be a string.") profile = dict( list( self._handle.domain_profile(query, **kwargs) ) ) if flatten: # Normalize the nested dictionary keys into a single level. profile = huntlib_data_flatten(profile) return profile @retry() def domain_reputation(self, domain=None, reasons=False, **kwargs): ''' Return a risk score based on the reputation of the given domain, with an optional list of reasons contributing to the score. :param domain: The domain for which to retrieve the score :type domain: string :param reasons: Determines whether or not to include a list of reasons for the score (DEFAULT False) :type reasons: bool :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A Dict containing the requested information. If the domain cannot be found, return an empty dict. For example: { 'domain': 'domaintools.xyz', 'risk_score': 18.69, 'reasons': [ 'registrant' ] } :Exceptions: Raises ValueError if no domain is provided, or if the domain is not a string. ''' if not domain: raise ValueError("You must specify a query domain.") elif not isinstance(domain, str): raise ValueError("The domain parameter must be a string.") try: reputation = dict( list( self._handle.reputation(domain, reasons, **kwargs) ) ) except (BadRequestException, NotFoundException): return dict() return reputation @retry() def risk(self, domain=None, **kwargs): ''' Return risk scores for a domain with respect to individual risk factors. :param domain: The domain for which to retrieve the score :type domain: string :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: A Dict containing the requested information. If the domain cannot be found, return an empty dict. For example: { 'proximity': 18, 'threat_profile': 36, 'threat_profile_phishing': 36, 'threat_profile_malware': 17, 'threat_profile_spam': 2 } :Exceptions: Raises ValueError if no domain is provided, or if the domain is not a string. ''' if not domain: raise ValueError("You must specify a query domain.") elif not isinstance(domain, str): raise ValueError("The domain parameter must be a string.") try: risk = self._handle.risk(domain, **kwargs) # Turn the list of individual dictionaries (with duplicate # keys) into a single dictionary. risk = {x['name']: x['risk_score'] for x in risk} except (BadRequestException, NotFoundException): return dict() return risk @retry() def iris_enrich(self, query=None, flatten=False, asframe=False, **kwargs): ''' Bulk enrichment for lists of domains against the DomainTools IRIS database. This will do basic deduplication (e.g., 'google.com' will only be looked up once no matter how many times it appears in the input list, but 'google.com', 'www.google.com' and 'drive.google.com' are not considered duplicates). :param query: The domain(s) to enrich :type query: list or pandas Series object :param flatten: A boolean controlling whether to attempt to normalize the nested dicts/lists into a single flat dict (DEFAULT False) :type flatten: bool :param asframe: Return the enriched data as a pandas DataFrame instead of a dict (DEFAULT False) :param `**kwargs`: Additional arguments to pass to the underlying domaintools module :Return Value: Returns a dict where each key is an enriched domain and the corresponding value is a dict with the enrichment data for that domain. For example: { 'google.com': { 'whois_url': 'https://whois.domaintools.com/google.com', 'active': True, [...] }, 'microsoft.com': { 'whois_url': 'https://whois.domaintools.com/microsoft.com', 'active': True, [...] } } If `asframe` is True, the result is returned instead as a pandas DataFrame object, where the 'domain' column contains the enriched domains, with their enrichment data flattened into columns, like so: domain whois_url active [...] 0 google.com https://whois.domaintools.com/google.com True [...] 1 microsoft.com https://whois.domaintools.com/microsoft.com True [...] ''' if query is None: raise ValueError("You must specify a domain or list of domains to query.") if isinstance(query, list) or isinstance(query, pd.core.series.Series): # Convert a list of strings to a single comma-separated string query = ','.join(query) elif not isinstance(query, str): raise ValueError("The query must be either a string or a list of strings.") try: enrich = list(self._handle.iris_enrich(query, **kwargs)) except (BadRequestException, NotFoundException): return dict() data = dict() for i in enrich: if 'domain' in i: domain = i.pop('domain') if flatten: data[domain] = huntlib_data_flatten(i) else: data[domain] = i if asframe: return pd.DataFrame(data).transpose().reset_index().rename(columns={'index': 'domain'}) else: return data def enrich(self, df=None, column=None, prefix='dt_enrich.', progress_bar=False, fields=None, batch_size=100): ''' Enrich a pandas DataFrame object with information from DomainTools. Note that the original DataFrame is not modified, so you must assign the return value to a variable if you want to keep it. e.g. `df = dt.enrich(df, column='domains')`. :param df: The DataFrame to enrich :type df: pandas.DataFrame :param column: The name of the column containin domains and/or IPs to enrich (as strings) :type column: string :param prefix: Naming prefix for the newly-added columns (DEFAULT 'dt_whois.') :type prefix: string :param progress_bar: If True, attempt to show enrichment progress (DEFAULT False) :type progress_bar: bool :param fields: A list of specific enrichment field names to add (DEFAULT add all fields) :type fields: list of strings :param batch_size: The number of domains/IPs to enrich in one "batch" (DEFAULT 100) :type batch_size: integer :Return Value: A pandas DataFrame object containing all of the original information plus many additional enrichment columns. :Exceptions: Raises ValueError if the required options are not present or are of the wrong type. ''' if df is None: raise ValueError( "You must supply a pandas DataFrame in the 'df' parameter.") elif not isinstance(df, pd.core.frame.DataFrame): raise ValueError( "The argument for the 'df' parameter must be a pandas DataFrame.") if not column: raise ValueError("You must supply a column name to enrich.") elif not isinstance(column, str): raise ValueError("The column name must be a 'str'.") elif not column in df.columns: raise ValueError( f"The column '{column}' does not exist in the frame.") if prefix is None or not isinstance(prefix, str): raise ValueError("The column name prefix must be a 'str'.") if fields is not None and not isinstance(fields, list): raise ValueError( "The 'fields' parameter must be a list of strings.") # Attempt some basic deduplication to save API calls unique_domains = pd.Series( df[column].unique(), dtype='object' ) if progress_bar: tqdm.pandas(desc='Enriching') enrichment_df = pd.DataFrame() with tqdm(desc="Enriching", total=unique_domains.size, disable=not progress_bar) as pbar: for batch in [unique_domains[i:i+batch_size] for i in range(0, unique_domains.size, batch_size)]: res = self.iris_enrich( batch, flatten=True, asframe=True ) # We have to do this the hard way, instead of just DataFrame(res) # because some of the items in res contain lists of unequal length, # which causes pandas to throw an exception. results = pd.DataFrame( dict( [(k, pd.Series(v, dtype='object')) for k,v in res.items()] ) ) enrichment_df = enrichment_df.append( results, ignore_index=True ) pbar.update(batch.size) enrichment_df = enrichment_df.add_prefix(prefix) # If we asked for only certain fields, filter for those if fields: if not f'{prefix}domain' in fields: # Make sure this is in the final fields list no matter what, # because we rely on it as a merge column below fields.append(f'{prefix}domain') enrichment_df = enrichment_df[fields] df = pd.merge( df, enrichment_df, how='left', left_on=column, right_on=f'{prefix}domain' ) df = df.drop(f'{prefix}domain', axis='columns') return df
def domaintools(self, data): """ :param service: :return: """ if (self.service == 'reverse-ip' and self.data_type == 'ip'): self.service = 'host-domains' api = API(self.get_param('config.username'), self.get_param('config.key')) if self.service == 'reverse-ip' and self.data_type in [ 'domain', 'ip', 'fqdn' ]: response = api.reverse_ip(data).response() elif self.service == 'host-domains' and self.data_type == 'ip': response = api.host_domains(data).response() elif self.service == 'name-server-domains' and self.data_type == 'domain': response = api.reverse_name_server(data).response() elif self.service == 'whois/history' and self.data_type == 'domain': response = api.whois_history(data).response() elif self.service == 'whois/parsed' and self.data_type in [ 'domain', 'ip' ]: response = api.parsed_whois(data).response() elif self.service == 'hosting-history' and self.data_type == 'domain': response = api.hosting_history(data).response() elif self.service == 'risk_evidence' and self.data_type in [ 'domain', 'fqdn' ]: response = api.risk_evidence(data).response() elif self.service == 'reputation' and self.data_type in [ 'domain', 'fqdn' ]: response = api.reputation(data, include_reasons=True).response() elif self.service == 'reverse-whois': scope = self.getParam('parameters.scope', 'current', None) response = api.reverse_whois(data, mode='purchase', scope=scope).response() elif self.service == 'reverse-ip-whois': response = api.reverse_ip_whois(data).response() elif self.service == 'whois' and self.data_type in ['domain', 'ip']: response = api.whois(data).response() return response
def test_no_https(): no_https_api = API(environ.get('TEST_USER', 'test_user'), environ.get('TEST_KEY', 'test_key'), https=False) assert no_https_api.domain_search('google').data()
def run(api=None, args=None): """Defines how to start the CLI for the DomainTools API""" parser = argparse.ArgumentParser( description='The DomainTools CLI API Client') parser.add_argument('-u', '--username', dest='user', default='', help='API Username') parser.add_argument('-k', '--key', dest='key', default='', help='API Key') parser.add_argument( '-c', '--credfile', dest='credentials', default=os.path.expanduser('~/.dtapi'), help='Optional file with API username and API key, one per line.') parser.add_argument( '-l', '--rate-limit', dest='rate_limit', action='store_false', default=False, help='Rate limit API calls against the API based on per minute limits.' ) parser.add_argument('-f', '--format', dest='format', choices=['list', 'json', 'xml', 'html'], default='json') parser.add_argument('-o', '--outfile', dest='out_file', type=argparse.FileType('wbU'), default=sys.stdout, help='Output file (defaults to stdout)') parser.add_argument( '-v', '--version', action='version', version='DomainTools CLI API Client {0}'.format(version)) parser.add_argument('--no-https', dest='https', action='store_false', default=True, help='Use HTTP instead of HTTPS.') parser.add_argument( '--no-verify-ssl', dest='verify_ssl', action='store_false', default=True, help='Skip verification of SSL certificate when making HTTPs API calls' ) subparsers = parser.add_subparsers( help= 'The name of the API call you wish to perform (`whois` for example)', dest='api_call') subparsers.required = True for api_call in API_CALLS: api_method = getattr(API, api_call) subparser = subparsers.add_parser(api_call, help=api_method.__name__) spec = inspect.getargspec(api_method) for argument_name, default in reversed( list( zip_longest(reversed(spec.args or []), reversed(spec.defaults or []), fillvalue='EMPTY'))): if argument_name == 'self': continue elif default == 'EMPTY': subparser.add_argument(argument_name) else: subparser.add_argument('--{0}'.format( argument_name.replace('_', '-')), dest=argument_name, default=default, nargs='*') arguments = vars(parser.parse_args(args) if args else parser.parse_args()) out_file = arguments.pop('out_file') out_format = arguments.pop('format') user, key = arguments.pop('user', None), arguments.pop('key', None) if not user or not key: try: with open(arguments.pop('credentials')) as credentials: user, key = credentials.readline().strip( ), credentials.readline().strip() except Exception: sys.stderr.write( 'Credentials are required to perform API calls.\n') sys.exit(1) if not api: # pragma: no cover api = API(user, key, https=arguments.pop('https'), verify_ssl=arguments.pop('verify_ssl'), rate_limit=arguments.pop('rate_limit')) command = getattr(api, arguments.pop('api_call')) for key, value in arguments.items(): if value in ('-', ['-']): arguments[key] == (line.strip() for line in sys.stdin.readlines()) elif value == []: arguments[key] = True elif type(value) == list and len(value) == 1: arguments[key] = value[0] response = command(**arguments) output = str( getattr(response, out_format) if out_format != 'list' else response. as_list()) out_file.write(output if output.endswith('\n') else output + '\n')