Esempio n. 1
0
    def authenticate(self, api_username="", api_key="", **kwargs):
        """
        Authenticate to the DomainTools API. Calling this function directly is OK,
        but you won't get the benefit of consulting ~/.huntlibrc for default creds
        if you do.  

        :param api_username: The API authentication username (OPTIONAL)
        :type api_username: str
        :param api_key: The API authentication key (OPTIONAL)
        :type api_key: str
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Exceptions:
          Will raise ValueError if the api_username or api_key are not provided
          or are of the wrong type.
        """
        # Sanity check the provided authentication info
        if not api_username:
            raise ValueError("You must supply a value for 'api_username'.")
        elif not isinstance(api_username, str):
            raise ValueError("The 'api_username' field must be a string.")

        if not api_key:
            raise ValueError("You must supply a value for 'api_key'.")
        elif not isinstance(api_key, str):
            raise ValueError("The 'api_key' field must be a string.")


        # Actually authenticate now
        self._handle = API(
            api_username,
            api_key,
            **kwargs
        )
Esempio n. 2
0
    def connect(self, params={}):
        self.logger.info("Connect: Connecting..")
        username = params.get(Input.USERNAME)
        key = params.get(Input.API_KEY).get("secretKey")
        self.api = API(username, key)

        try:
            response = self.api.account_information()
            response.data()
        except NotAuthorizedException as e:
            raise ConnectionTestException(
                cause="Authorization failed.",
                assistance=
                "Double-check that your credentials configured in your connection are correct and try again.",
            )
        except Exception as e:
            raise ConnectionTestException(
                cause="Unable to connect to DomainTools.",
                assistance=f"Exception was: {e}")

        phisheye_terms_list = Helper.make_request(self.api.phisheye_term_list,
                                                  self.logger)
        self.terms = []
        for term in phisheye_terms_list.get("response").get("terms"):
            self.terms.append(term.get("term"))
Esempio n. 3
0
def test_exception_handling():
    exception = None
    api_call = api.reverse_ip('ss')
    assert api_call.status == 400
    try:
        api_call.data()
    except Exception as e:
        exception = e

    assert exception
    assert exception.code == 400
    assert 'not understand' in exception.reason['error']['message']

    with pytest.raises(exceptions.NotFoundException):
        api._results('i_made_this_product_up',
                     '/v1/steianrstierstnrsiatiarstnsto.com/whois').data()
    with pytest.raises(exceptions.NotAuthorizedException):
        API('notauser', 'notakey').domain_search('amazon').data()
    with pytest.raises(
            ValueError,
            match=
            r"Invalid value 'notahash' for 'key_sign_hash'. Values available are sha1,sha256,md5"
    ):
        API('notauser',
            'notakey',
            always_sign_api_key=True,
            key_sign_hash='notahash').domain_search('amazon')
Esempio n. 4
0
    def domaintools(self, data):
        response = None
        api = API(self.get_param("config.username"),
                  self.get_param("config.key"))

        APP_PARAMETERS = {
            "app_partner": "cortex",
            "app_name": "Iris",
            "app_version": 1
        }

        if self.service == "investigate-domain" and self.data_type in [
                "domain"
        ]:
            response = api.iris_investigate(data, **APP_PARAMETERS).response()
            if response["results_count"]:
                response = self.format_single_domain(
                    response.get("results")[0])

        elif self.service == "pivot" and self.data_type in [
                "hash", "ip", "mail"
        ]:
            iris_investigate_args_map = {
                "ip": "ip",
                "mail": "email",
                "hash": "ssl_hash",
            }
            APP_PARAMETERS[iris_investigate_args_map[self.data_type]] = data
            response = api.iris_investigate(**APP_PARAMETERS).response()
            response = DomainToolsAnalyzer.format_pivot_domains(
                response.get("results"),
                iris_investigate_args_map[self.data_type], data)

        return response
Esempio n. 5
0
def test_no_https():
    try:
        no_https_api = API(environ.get('TEST_USER', 'test_user'),
                           environ.get('TEST_KEY', 'test_key'),
                           https=False)
        assert no_https_api.domain_search('google').data()
    except exceptions.NotAuthorizedException:
        pass
    def domaintools(self, data):
        """

        :param service:
        :return:
        """
        if (self.service == 'reverse-ip' and self.data_type == 'ip'):
            self.service = 'host-domains'

        api = API(self.get_param('config.username'), self.get_param('config.key'))

        if self.service == 'reverse-ip' and self.data_type in ['domain', 'ip', 'fqdn']:
            response = api.reverse_ip(data).response()

        elif self.service == 'host-domains' and self.data_type == 'ip':
            response = api.host_domains(data).response()

        elif self.service == 'name-server-domains' and self.data_type == 'domain':
            response = api.reverse_name_server(data).response()

        elif self.service == 'whois/history' and self.data_type == 'domain':
            response = api.whois_history(data).response()

        elif self.service == 'whois/parsed' and self.data_type == 'domain':
            response = api.parsed_whois(data).response()

        elif self.service == 'reverse-whois':
            response = api.reverse_whois(data, mode='purchase').response()

        elif self.service == 'whois' and self.data_type == 'ip':
            response = api.whois(data).response()

        return response
Esempio n. 7
0
    def on_load_configuration(self, config):
        """
        Invoked after the application-specific configuration has been loaded

        This callback provides the opportunity for the application to parse
        additional configuration properties.

        :param config: The application configuration
        """
        logger.info("On 'load configuration' callback.")

        # API Key
        try:
            self._api_key = config.get(self.GENERAL_CONFIG_SECTION,
                                       self.GENERAL_API_KEY_CONFIG_PROP)
        except Exception:
            pass
        if not self._api_key:
            raise Exception(
                "DomainTools API Key not found in configuration file: {0}".
                format(self._app_config_path))

        # API User
        try:
            self._api_user = config.get(self.GENERAL_CONFIG_SECTION,
                                        self.GENERAL_API_USER_CONFIG_PROP)
        except Exception:
            pass
        if not self._api_user:
            raise Exception(
                "DomainTools API User not found in configuration file: {0}".
                format(self._app_config_path))

        self._api = API(self._api_user, self._api_key)
Esempio n. 8
0
    def _api(self):
        """Instantiates Domaintools API"""

        credentials = self.api_key.split(":")
        api = API(credentials[0], credentials[1])

        return api
Esempio n. 9
0
def handler(q=False):
    if not q:
        return q

    request = json.loads(q)
    to_query = None
    for t in mispattributes['input']:
        to_query = request.get(t)
        if to_query:
            break
    if not to_query:
        misperrors['error'] = "Unsupported attributes type"
        return misperrors

    if request.get('config'):
        if (request['config'].get('username') is None) or (request['config'].get('api_key') is None):
            misperrors['error'] = 'DomainTools authentication is incomplete'
            return misperrors
        else:
            domtools = API(request['config'].get('username'), request['config'].get('api_key'))
    else:
        misperrors['error'] = 'DomainTools authentication is missing'
        return misperrors

    values = DomainTools()
    services = get_services(request)
    if services:
        try:
            for s in services:
                globals()[s](domtools, to_query, values)
        except Exception as e:
            print(to_query, type(e), e)

    return {'results': values.dump()}
Esempio n. 10
0
    def connect(self, params={}):
        self.logger.info("Connect: Connecting..")
        username = params.get("username")
        key = params.get("api_key").get("secretKey")
        api = API(username, key)
        try:
            response = api.account_information()
            response.data()
        except NotAuthorizedException:
            self.logger.error("DomainTools: Connect: error %s")
            raise Exception(
                "DomainTools: Connect: Authorization failed. Please try again")
        except Exception as e:
            self.logger.error("DomainTools: Connect: error %s", str(e))
            raise Exception(
                "DomainTools: Connect: Failed to connect to server {}".format(
                    e))

        self.api = api
    def domaintools(self, data):
        """

        :param service:
        :return:
        """
        if (self.service == 'reverse-ip' and self.data_type == 'ip'):
            self.service = 'host-domains'

        api = API(self.get_param('config.username'), self.get_param('config.key'))

        if self.service == 'reverse-ip' and self.data_type in ['domain', 'ip', 'fqdn']:
            response = api.reverse_ip(data).response()

        elif self.service == 'host-domains' and self.data_type == 'ip':
            response = api.host_domains(data).response()

        elif self.service == 'name-server-domains' and self.data_type == 'domain':
            response = api.reverse_name_server(data).response()

        elif self.service == 'whois/history' and self.data_type == 'domain':
            response = api.whois_history(data).response()

        elif self.service == 'whois/parsed' and self.data_type == 'domain':
            response = api.parsed_whois(data).response()

        elif self.service == 'risk_evidence' and self.data_type in ['domain', 'fqdn']:
            response = api.risk_evidence(data).response()

        elif self.service == 'reputation' and self.data_type in ['domain', 'fqdn']:
            response = api.reputation(data, include_reasons=True).response()

        elif self.service == 'reverse-whois':
            response = api.reverse_whois(data, mode='purchase').response()

        elif self.service == 'whois' and self.data_type == 'ip':
            response = api.whois(data).response()

        return response
Esempio n. 12
0
def run():  # pragma: no cover
    """Defines how to start the CLI for the DomainTools API"""
    out_file, out_format, arguments = parse()
    user, key = arguments.pop('user', None), arguments.pop('key', None)
    if not user or not key:
        sys.stderr.write('Credentials are required to perform API calls.\n')
        sys.exit(1)

    api = API(user,
              key,
              https=arguments.pop('https'),
              verify_ssl=arguments.pop('verify_ssl'),
              rate_limit=arguments.pop('rate_limit'))
    response = getattr(api, arguments.pop('api_call'))(**arguments)
    output = str(
        getattr(response, out_format) if out_format != 'list' else response.
        as_list())
    out_file.write(output if output.endswith('\n') else output + '\n')
Esempio n. 13
0
def test_exception_handling():
    exception = None
    api_call = api.reverse_ip('ss')
    assert api_call.status == 400
    try:
        api_call.data()
    except Exception as e:
        exception = e

    assert exception
    assert exception.code == 400
    assert 'not understand' in exception.reason['error']['message']

    with pytest.raises(exceptions.NotFoundException):
        api._results('i_made_this_product_up',
                     '/v1/steianrstierstnrsiatiarstnsto.com/whois').data()
    with pytest.raises(exceptions.NotAuthorizedException):
        API('notauser', 'notakey').domain_search('amazon').data()
Esempio n. 14
0
"""Defines all test wide settings and variables"""
from os import environ

from domaintools import API
from vcr import VCR


def remove_server(response):
    response.get('headers', {}).pop('server', None)
    return response


vcr = VCR(before_record_response=remove_server, filter_query_parameters=['api_key', 'api_username'],
          cassette_library_dir='tests/fixtures/vcr/', path_transformer=VCR.ensure_suffix('.yaml'),
          record_mode='new_episodes')
with vcr.use_cassette('init_user_account'):
    api = API(environ.get('TEST_USER', 'test_user'), environ.get('TEST_KEY', 'test_key'))
Esempio n. 15
0
def handler(q=False):
    if not q:
        return q

    request = json.loads(q)
    to_query = None
    for t in mispattributes['input']:
        to_query = request.get(t)
        if to_query:
            break
    if not to_query:
        misperrors['error'] = "Unsupported attributes type"
        return misperrors

    if request.get('config'):
        if (request['config'].get('username') is
                None) or (request['config'].get('api_key') is None):
            misperrors['error'] = 'DomainTools authentication is incomplete'
            return misperrors
        else:
            domtools = API(request['config'].get('username'),
                           request['config'].get('api_key'))
    else:
        misperrors['error'] = 'DomainTools authentication is missing'
        return misperrors

    whois_entry = domtools.parsed_whois(to_query)
    profile = domtools.domain_profile(to_query)
    # NOTE: profile['website_data']['response_code'] could be used to see if the host is still up. Maybe set a tag.
    reputation = domtools.reputation(to_query, include_reasons=True)
    # NOTE: use that value in a tag when we will have attribute level tagging
    values = DomainTools()

    if whois_entry.get('error'):
        misperrors['error'] = whois_entry['error']['message']
        return misperrors

    if profile.get('error'):
        misperrors['error'] = profile['error']['message']
        return misperrors

    if reputation and not reputation.get('error'):
        reasons = ', '.join(reputation['reasons'])
        values.risk = [
            reputation['risk_score'],
            'Risk value of {} (via Domain Tools), Reasons: {}'.format(
                to_query, reasons)
        ]

    if whois_entry.get('registrant'):
        values.add_name(whois_entry['registrant'], 'Parsed registrant')
    if profile.get('registrant'):
        values.add_name(profile['registrant']['name'], 'Profile registrant')

    if profile.get('server'):
        other_domains = profile['server']['other_domains']
        values.add_ip(
            profile['server']['ip_address'],
            'IP of {} (via DomainTools). Has {} other domains.'.format(
                to_query, other_domains))

    if profile.get('registration'):
        if profile['registration'].get('created'):
            values.add_creation_date(profile['registration']['created'],
                                     'created')
        if profile['registration'].get('updated'):
            values.add_creation_date(profile['registration']['updated'],
                                     'updated')
        if profile['registration'].get('registrar'):
            values.add_registrar(profile['registration']['registrar'], 'name')

    if whois_entry.get('registration'):
        values.add_creation_date(whois_entry['registration']['created'],
                                 'timestamp')

    if whois_entry.get('whois'):
        values.freetext = whois_entry['whois']['record']
    if whois_entry.get('parsed_whois'):
        if whois_entry['parsed_whois']['created_date']:
            values.add_creation_date(
                whois_entry['parsed_whois']['created_date'], 'created')
        if whois_entry['parsed_whois']['registrar']['name']:
            values.add_registrar(
                whois_entry['parsed_whois']['registrar']['name'], 'name')
        if whois_entry['parsed_whois']['registrar']['url']:
            values.add_registrar(
                whois_entry['parsed_whois']['registrar']['url'], 'url')
        if whois_entry['parsed_whois']['registrar']['iana_id']:
            values.add_registrar(
                whois_entry['parsed_whois']['registrar']['iana_id'], 'iana_id')
        for key, entry in whois_entry['parsed_whois']['contacts'].items():
            if entry['email']:
                values.add_mail(entry['email'], key)
            if entry['phone']:
                values.add_phone(entry['phone'], key)
            if entry['name']:
                values.add_name(entry['name'], key)
    if whois_entry.emails():
        for mail in whois_entry.emails():
            if mail not in values.reg_mail.keys():
                values.add_mail(mail, 'Maybe registrar')
    return {'results': values.dump()}
Esempio n. 16
0
"""Defines all test wide settings and variables"""
from os import environ

from domaintools import API
from vcr import VCR


def remove_server(response):
    response.get('headers', {}).pop('server', None)
    if 'url' in response:
        response['url'] = response['url'].update_query(api_username='******',
                                                       api_key='test')
    return response


vcr = VCR(
    before_record_response=remove_server,
    filter_query_parameters=['timestamp', 'signature', 'api_username'],
    filter_post_data_parameters=['timestamp', 'signature', 'api_username'],
    cassette_library_dir='tests/fixtures/vcr/',
    path_transformer=VCR.ensure_suffix('.yaml'),
    record_mode='new_episodes')
with vcr.use_cassette('init_user_account'):
    api = API(environ.get('TEST_USER', 'test'),
              environ.get('TEST_KEY', 'test'),
              always_sign_api_key=True)
Esempio n. 17
0
class DomainTools(object):
    '''
    The DomainTools class allows you to easily perform some common types of calls
    to the DomainTools API.  It uses their official `domaintools_api` Python module
    to do most of the work but is not a complete replacement for that module. In
    particular, this class concentrates on a few calls that are most relevant for
    data analytic style threat hunting (risk & reputation scores, WHOIS info, etc).

    This most methods pass through any kwargs to the underlying domaintools methods, 
    with one important exception: the class consults the user's ~/.huntlibrc 
    (if present) to determine the API username and key so you don't always have to provide
    them during authentication.

    :param api_username: The API authentication username (OPTIONAL)
    :type api_username: str
    :param api_key: The API authentication key (OPTIONAL)
    :type api_key: str
    :param `**kwargs`: Additional keyword args are passed to the underlying domaintools module init
    '''

    _DEFAULT_CONFIG_FILE = os.path.expanduser("~/.huntlibrc")

    _handle = None # API Handle
    _account_information = None # Cached information about the API user's account
    _available_api_calls = None # Cached list of API endpoints the account has access to

    def __init__(self, *args, **kwargs):

        # Read our config file, if it exists
        config = ConfigParser()
        config.read(self._DEFAULT_CONFIG_FILE)
        
        api_username = ""
        api_key = ""

        if 'api_username' in kwargs:
            api_username = kwargs['api_username']
        elif config.has_option('domaintools', 'api_username'):
            api_username = config.get('domaintools', 'api_username')

        if 'api_key' in kwargs:
            api_key = kwargs['api_key']
        elif config.has_option('domaintools', 'api_key'):
            api_key = config.get('domaintools', 'api_key')

        # Remove these from kwargs, so we don't have duplicate args.
        kwargs.pop('api_username', None)
        kwargs.pop('api_key', None)

        self.authenticate(
            api_username=api_username,
            api_key=api_key,
            **kwargs
        )

        self._account_information = self.account_information(force_refresh=True)
        self._available_api_calls = self.available_api_calls(force_refresh=True)

    @retry()
    def authenticate(self, api_username="", api_key="", **kwargs):
        """
        Authenticate to the DomainTools API. Calling this function directly is OK,
        but you won't get the benefit of consulting ~/.huntlibrc for default creds
        if you do.  

        :param api_username: The API authentication username (OPTIONAL)
        :type api_username: str
        :param api_key: The API authentication key (OPTIONAL)
        :type api_key: str
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Exceptions:
          Will raise ValueError if the api_username or api_key are not provided
          or are of the wrong type.
        """
        # Sanity check the provided authentication info
        if not api_username:
            raise ValueError("You must supply a value for 'api_username'.")
        elif not isinstance(api_username, str):
            raise ValueError("The 'api_username' field must be a string.")

        if not api_key:
            raise ValueError("You must supply a value for 'api_key'.")
        elif not isinstance(api_key, str):
            raise ValueError("The 'api_key' field must be a string.")


        # Actually authenticate now
        self._handle = API(
            api_username,
            api_key,
            **kwargs
        )

    @retry()
    def account_information(self, force_refresh=False, **kwargs):
        '''
        Return a dict containing information about limits and usage of the various
        domaintools API calls for the authenticated API user.

        :param force_refresh: A boolean controlling whether or not to refresh the cached info
        :type force_refresh: bool
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value:
        A single dict, where each key is the name of an endpoint from the underlying
        domaintools API, and the values are dicts containing detailed about that endpoint.

        For example:

        {
            'domain-profile': {
                'per_month_limit': None, 
                'per_minute_limit': '180', 
                'absolute_limit': None, 
                'usage': {
                    'today': '0', 
                    'month': '100'
                }, 
                'expiration_date': '2020-12-31'
            }, 
            'whois': {
                'per_month_limit': None, 
                'per_minute_limit': '180', 
                'absolute_limit': None, 
                'usage': {
                    'today': '9', 
                    'month': '997'
                }, 
                'expiration_date': '2020-12-31'
            }
        }
        '''

        if self._account_information and not force_refresh:
            return self._account_information

        res = self._handle.account_information(**kwargs)

        if res:
            info = dict()
            for item in res:
                id = item.pop('id')
                info[id] = dict(item)
            self._account_information = info

        return self._account_information

    @retry()
    def available_api_calls(self, force_refresh=False, **kwargs):
        '''
        Returns a list of endpoints available to the authenticated API user.

        :param force_refresh: A boolean controlling whether or not to refresh the cached info
        :type force_refresh: bool
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value:

        A list of strings containing the API endpoint names.

        For example:

        [
            'domain_profile', 
            'whois', 
            'whois_history', 
            'reverse_ip', 
            ...
        ]
        '''

        if self._available_api_calls and not force_refresh:
            return self._available_api_calls

        res = self._handle.available_api_calls(**kwargs)

        if res:
            self._available_api_calls = list(res)

        return self._available_api_calls

    @retry()
    def whois(self, query=None, **kwargs):
        '''
        Return basic WHOIS info for a given domain or IP address.

        :param query: A domain or IP address
        :param type: string
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value:
        A dict containing basic WHOIS information.

        For example:

        {
            'registrant': 'Google LLC', 
            'registration': {
                'created': '1997-09-15', 
                'expires': '2028-09-14', 
                'updated': '2019-09-09', 
                'registrar': 'MarkMonitor Inc.', 
                'statuses': [
                    'clientDeleteProhibited', 
                    'clientTransferProhibited', 
                    'clientUpdateProhibited', 
                    'serverDeleteProhibited', 
                    'serverTransferProhibited', 
                    'serverUpdateProhibited'
                ]
            }, 
            'name_servers': [
                'NS1.GOOGLE.COM', 
                'NS2.GOOGLE.COM', 
                'NS3.GOOGLE.COM', 
                'NS4.GOOGLE.COM'
            ], 
            'whois': {
                'date': '2020-07-12', 
                'record': 'Domain Name: google.com\nRegistry Domain ID: 2138514_DOMAIN_COM-VRSN\n...'
            }, 
            'record_source': 'google.com'
        }

        :Exceptions:
        Will raise ValueError if no query is supplied, or if the query is not a string.

        '''

        if not query:
            raise ValueError("You must supply either a domain or an IP address.")
        elif not isinstance(query, str):
            raise ValueError("The query parameter must be a string.")
        
        try:
            whois_info = dict(
                list(
                    self._handle.whois(query, **kwargs)
                )
            )
        except (BadRequestException, NotFoundException):
            return dict()

        return whois_info

    @retry()
    def parsed_whois(self, query=None, flatten=False, **kwargs):

        '''
        Return extended WHOIS info for a given domain or IP address.

        :param query: A domain or IP address
        :param type: string
        :param flatten: A boolean controlling whether to attempt to normalize the nested dicts or lists into a single flat dict (DEFAULT False)
        :type flatten: bool 
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value:
        A dict containing basic and extended WHOIS information.

        For example:

        {
            ...
            'parsed_whois': {
                'domain': 'google.com', 
                'created_date': '1997-09-15T00:00:00-07:00', 
                'updated_date': '2019-09-09T08:39:04-07:00', 
                'expired_date': '2028-09-13T00:00:00-07:00', 
                'statuses': [
                    'clientDeleteProhibited', 
                    'clientTransferProhibited', 
                    'clientUpdateProhibited', 
                    'serverDeleteProhibited', 
                    'serverTransferProhibited', 
                    'serverUpdateProhibited'
                ], 
                'name_servers': [
                    'ns1.google.com', 
                    'ns2.google.com', 
                    'ns3.google.com', 
                    'ns4.google.com'
                ], 
                'registrar': {
                    'name': 'MarkMonitor, Inc. MarkMonitor Inc.', 
                    'abuse_contact_phone': '12083895770', 
                    'abuse_contact_email': '*****@*****.**', 
                    'iana_id': '292', 
                    'url': 'http://www.markmonitor.com', 
                    'whois_server': 'whois.markmonitor.com'
                }, 
                'contacts': {
                    'registrant': {
                        'name': '', 
                        'org': 'Google LLC', 
                        'street': [], 
                        'city': '', 
                        'state': 'CA', 
                        'postal': '', 
                        'country': 'us', 
                        'phone': '', 
                        'fax': '', 
                        'email': 'REDACTED FOR PRIVACY (DT)'
                    }
                    ...
                }
            }
            ...
        }

        :Exceptions:
        Will raise ValueError if no query is supplied, or if the query is not a string.

        '''

        if not query:
            raise ValueError(
                "You must supply either a domain or an IP address.")
        elif not isinstance(query, str):
            raise ValueError(
                "The query parameter must be a string.")

        try:
            whois_info = dict(
                list(
                    self._handle.parsed_whois(query, **kwargs)
                )
            )
        except (BadRequestException, NotFoundException):
            return dict()

        
        if flatten:
            # Normalize the nested dictionary keys into a single level.
            whois_info = huntlib_data_flatten(whois_info)

        return whois_info

    @retry()
    def brand_monitor(self, query=None, **kwargs):
        '''
        Given a query string containing one or more search terms (separated by '|'),
        return a list of any newly-active or pending domain registrationations 
        containing ALL of the terms.

        :param query: A string containing one or more search terms (separated by '|')
        :type query: string
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value:

        A list of dicts, with each dict containing a result.

        For example:

        [
            {
                'domain': '54google.com', 
                'status': 'new'
            }, 
            {
                'domain': 'aboutmicrosoftandgoogleapps.com',
                 'status': 'on-hold'
            },
            ...
        ]

        :Exceptions:

        Will raise ValueError if no query is supplied or if the query is not a string.
        '''

        if not query:
            raise ValueError("You must specify a query pattern.")
        elif isinstance(query, list):
            query = "|".join(query)
        elif not isinstance(query, str):
            raise ValueError("The 'query' parameter must be either a string or a list of strings.")

        return list(self._handle.brand_monitor(query, **kwargs))

    @retry()
    def domain_profile(self, query=None, flatten=False, **kwargs):
        '''
        Look up basic information about a domain, including DNS, WHOIS, history and
        web site info along with pointers to more detailed info.

        :param query: The domain to look up
        :type query: string
        :param flatten: A boolean controlling whether to attempt to normalize the nested dicts/lists into a single flat dict (DEFAULT False)
        :type flatten: bool 
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value:

        A dict containing the various pieces of info.  

        For example:

        {
            'registrant': {
                'name': 'Google LLC', 
                'domains': 18696, 
                'product_url': 'https://reversewhois.domaintools.com/?all[]=Google+LLC&none[]='
            }, 
            'server': {
                'ip_address': '172.217.14.196', 
                'other_domains': 151, 
                'product_url': 'https://reverseip.domaintools.com/search/?q=google.com'
            }, 
            'registration': {
                'created': '1997-09-15', 
                'expires': '2028-09-14', 
                'updated': '2019-09-09', 
                'registrar': 'MarkMonitor Inc.', 
                'statuses': [
                    'clientDeleteProhibited', 
                    'clientTransferProhibited', 
                    'clientUpdateProhibited', 
                    'serverDeleteProhibited', 
                    'serverTransferProhibited', 
                    'serverUpdateProhibited'
                ]
            }, 
            'name_servers': [
                {
                    'server': 'NS1.GOOGLE.COM', 
                    'product_url': 'https://reversens.domaintools.com/search/?q=NS1.GOOGLE.COM'
                }, 
                {
                    'server': 'NS2.GOOGLE.COM', 
                    'product_url': 'https://reversens.domaintools.com/search/?q=NS2.GOOGLE.COM'
                }, 
                {
                    'server': 'NS3.GOOGLE.COM', 
                    'product_url': 'https://reversens.domaintools.com/search/?q=NS3.GOOGLE.COM'
                }, 
                {
                    'server': 'NS4.GOOGLE.COM', 
                    'product_url': 'https://reversens.domaintools.com/search/?q=NS4.GOOGLE.COM'
                }
            ], 
            ...
        }

        :Exceptions:

        Raises ValueError if no query is supplied or if it is not a string.
        '''
        if not query:
            raise ValueError("You must specify a query domain.")
        elif not isinstance(query, str):
            raise ValueError("query parameter must be a string.")

        profile = dict(
            list(
                self._handle.domain_profile(query, **kwargs)
            )
        )

        if flatten:
            # Normalize the nested dictionary keys into a single level.
            profile = huntlib_data_flatten(profile)

        return profile

    @retry()
    def domain_reputation(self, domain=None, reasons=False, **kwargs):
        '''
        Return a risk score based on the reputation of the given domain, 
        with an optional list of reasons contributing to the score.

        :param domain: The domain for which to retrieve the score
        :type domain: string
        :param reasons: Determines whether or not to include a list of reasons for the score (DEFAULT False)
        :type reasons: bool
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value: 

        A Dict containing the requested information.  If the domain cannot be found, return
        an empty dict.

        For example:

        {
            'domain': 'domaintools.xyz', 
            'risk_score': 18.69, 
            'reasons': [
                'registrant'
            ]
        }

        :Exceptions:

        Raises ValueError if no domain is provided, or if the domain is not a string.

        '''
        if not domain:
            raise ValueError("You must specify a query domain.")
        elif not isinstance(domain, str):
            raise ValueError("The domain parameter must be a string.")

        try:
            reputation = dict(
                list(
                    self._handle.reputation(domain, reasons, **kwargs)
                )
            )
        except (BadRequestException, NotFoundException):
            return dict()

        return reputation

    @retry()
    def risk(self, domain=None,  **kwargs):
        '''
        Return risk scores for a domain with respect to individual risk factors.

        :param domain: The domain for which to retrieve the score
        :type domain: string
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value: 

        A Dict containing the requested information.  If the domain cannot be found, return
        an empty dict.

        For example:

        {
            'proximity': 18, 
            'threat_profile': 36, 
            'threat_profile_phishing': 36, 
            'threat_profile_malware': 17, 
            'threat_profile_spam': 2
        }

        :Exceptions:

        Raises ValueError if no domain is provided, or if the domain is not a string.

        '''
        if not domain:
            raise ValueError("You must specify a query domain.")
        elif not isinstance(domain, str):
            raise ValueError("The domain parameter must be a string.")

        try:
            risk = self._handle.risk(domain, **kwargs)
            # Turn the list of individual dictionaries (with duplicate
            # keys) into a single dictionary.
            risk = {x['name']: x['risk_score'] for x in risk}
        except (BadRequestException, NotFoundException):
            return dict()


        return risk
    
    @retry() 
    def iris_enrich(self, query=None, flatten=False, asframe=False, **kwargs):
        '''
        Bulk enrichment for lists of domains against the DomainTools IRIS database.
        This will do basic deduplication (e.g., 'google.com' will only be looked up 
        once no matter how many times it appears in the input list, but 'google.com',
        'www.google.com' and 'drive.google.com' are not considered duplicates).  

        :param query: The domain(s) to enrich
        :type query: list or pandas Series object
        :param flatten: A boolean controlling whether to attempt to normalize the nested dicts/lists into a single flat dict (DEFAULT False)
        :type flatten: bool 
        :param asframe: Return the enriched data as a pandas DataFrame instead of a dict (DEFAULT False)
        :param `**kwargs`: Additional arguments to pass to the underlying domaintools module

        :Return Value:
        
        Returns a dict where each key is an enriched domain and the corresponding
        value is a dict with the enrichment data for that domain.  For example:

        {
            'google.com': {
                'whois_url': 'https://whois.domaintools.com/google.com',
                'active': True,
                [...]
            },
            'microsoft.com': {
                'whois_url': 'https://whois.domaintools.com/microsoft.com',
                'active': True,
                [...]
            }
        }

        If `asframe` is True, the result is returned instead as a pandas DataFrame 
        object, where the 'domain' column contains the enriched domains, with their
        enrichment data flattened into columns, like so:

                domain          whois_url                                   active  [...]
            0   google.com      https://whois.domaintools.com/google.com    True    [...]
            1   microsoft.com   https://whois.domaintools.com/microsoft.com True    [...]
        
        '''
        if query is None:
            raise ValueError("You must specify a domain or list of domains to query.")

        if isinstance(query, list) or isinstance(query, pd.core.series.Series):
            # Convert a list of strings to a single comma-separated string
            query = ','.join(query)
        elif not isinstance(query, str):
            raise ValueError("The query must be either a string or a list of strings.")
        
        try:
            enrich = list(self._handle.iris_enrich(query, **kwargs))
        except (BadRequestException, NotFoundException):
            return dict()

        data = dict()
        for i in enrich:
            if 'domain' in i:
                domain = i.pop('domain')
                if flatten:
                    data[domain] = huntlib_data_flatten(i)
                else:
                    data[domain] = i

        if asframe:
            return pd.DataFrame(data).transpose().reset_index().rename(columns={'index': 'domain'})
        else:
            return data 
    
    def enrich(self, df=None, column=None, prefix='dt_enrich.', progress_bar=False, fields=None, batch_size=100):
        '''
        Enrich a pandas DataFrame object with information from DomainTools.  Note that the 
        original DataFrame is not modified, so you must assign the return value to a variable
        if you want to keep it.  e.g. `df = dt.enrich(df, column='domains')`.

        :param df: The DataFrame to enrich
        :type df: pandas.DataFrame
        :param column: The name of the column containin domains and/or IPs to enrich (as strings)
        :type column: string
        :param prefix: Naming prefix for the newly-added columns (DEFAULT 'dt_whois.')
        :type prefix: string
        :param progress_bar: If True, attempt to show enrichment progress (DEFAULT False)
        :type progress_bar: bool
        :param fields: A list of specific enrichment field names to add (DEFAULT add all fields)
        :type fields: list of strings
        :param batch_size: The number of domains/IPs to enrich in one "batch" (DEFAULT 100)
        :type batch_size: integer

        :Return Value:

        A pandas DataFrame object containing all of the original information plus many 
        additional enrichment columns.

        :Exceptions:
        Raises ValueError if the required options are not present or are of the wrong type.

        '''

        if df is None:
            raise ValueError(
                "You must supply a pandas DataFrame in the 'df' parameter.")
        elif not isinstance(df, pd.core.frame.DataFrame):
            raise ValueError(
                "The argument for the 'df' parameter must be a pandas DataFrame.")

        if not column:
            raise ValueError("You must supply a column name to enrich.")
        elif not isinstance(column, str):
            raise ValueError("The column name must be a 'str'.")
        elif not column in df.columns:
            raise ValueError(
                f"The column '{column}' does not exist in the frame.")

        if prefix is None or not isinstance(prefix, str):
            raise ValueError("The column name prefix must be a 'str'.")

        if fields is not None and not isinstance(fields, list):
            raise ValueError(
                "The 'fields' parameter must be a list of strings.")

        # Attempt some basic deduplication to save API calls
        unique_domains = pd.Series(
            df[column].unique(),
            dtype='object'
        )

        if progress_bar:
            tqdm.pandas(desc='Enriching')

        enrichment_df = pd.DataFrame()

        with tqdm(desc="Enriching", total=unique_domains.size, disable=not progress_bar) as pbar:

            for batch in [unique_domains[i:i+batch_size] for i in range(0, unique_domains.size, batch_size)]:

                res = self.iris_enrich(
                    batch,
                    flatten=True,
                    asframe=True
                )

                # We have to do this the hard way, instead of just DataFrame(res)
                # because some of the items in res contain lists of unequal length,
                # which causes pandas to throw an exception.
                results = pd.DataFrame(
                    dict(
                        [(k, pd.Series(v, dtype='object')) for k,v in res.items()]
                    )
                )
                
                enrichment_df = enrichment_df.append(
                    results,
                    ignore_index=True
                )
            
                pbar.update(batch.size)

        enrichment_df = enrichment_df.add_prefix(prefix)

        # If we asked for only certain fields, filter for those
        if fields:
            if not f'{prefix}domain' in fields:
                # Make sure this is in the final fields list no matter what,
                # because we rely on it as a merge column below
                fields.append(f'{prefix}domain')
            enrichment_df = enrichment_df[fields]

        df = pd.merge(
            df,
            enrichment_df,
            how='left',
            left_on=column,
            right_on=f'{prefix}domain'
        )

        df = df.drop(f'{prefix}domain', axis='columns')

        return df
    def domaintools(self, data):
        """

        :param service:
        :return:
        """
        if (self.service == 'reverse-ip' and self.data_type == 'ip'):
            self.service = 'host-domains'

        api = API(self.get_param('config.username'),
                  self.get_param('config.key'))

        if self.service == 'reverse-ip' and self.data_type in [
                'domain', 'ip', 'fqdn'
        ]:
            response = api.reverse_ip(data).response()

        elif self.service == 'host-domains' and self.data_type == 'ip':
            response = api.host_domains(data).response()

        elif self.service == 'name-server-domains' and self.data_type == 'domain':
            response = api.reverse_name_server(data).response()

        elif self.service == 'whois/history' and self.data_type == 'domain':
            response = api.whois_history(data).response()

        elif self.service == 'whois/parsed' and self.data_type in [
                'domain', 'ip'
        ]:
            response = api.parsed_whois(data).response()

        elif self.service == 'hosting-history' and self.data_type == 'domain':
            response = api.hosting_history(data).response()

        elif self.service == 'risk_evidence' and self.data_type in [
                'domain', 'fqdn'
        ]:
            response = api.risk_evidence(data).response()

        elif self.service == 'reputation' and self.data_type in [
                'domain', 'fqdn'
        ]:
            response = api.reputation(data, include_reasons=True).response()

        elif self.service == 'reverse-whois':
            scope = self.getParam('parameters.scope', 'current', None)
            response = api.reverse_whois(data, mode='purchase',
                                         scope=scope).response()

        elif self.service == 'reverse-ip-whois':
            response = api.reverse_ip_whois(data).response()

        elif self.service == 'whois' and self.data_type in ['domain', 'ip']:
            response = api.whois(data).response()

        return response
Esempio n. 19
0
def test_no_https():
    no_https_api = API(environ.get('TEST_USER', 'test_user'),
                       environ.get('TEST_KEY', 'test_key'),
                       https=False)
    assert no_https_api.domain_search('google').data()
Esempio n. 20
0
def run(api=None, args=None):
    """Defines how to start the CLI for the DomainTools API"""
    parser = argparse.ArgumentParser(
        description='The DomainTools CLI API Client')
    parser.add_argument('-u',
                        '--username',
                        dest='user',
                        default='',
                        help='API Username')
    parser.add_argument('-k', '--key', dest='key', default='', help='API Key')
    parser.add_argument(
        '-c',
        '--credfile',
        dest='credentials',
        default=os.path.expanduser('~/.dtapi'),
        help='Optional file with API username and API key, one per line.')
    parser.add_argument(
        '-l',
        '--rate-limit',
        dest='rate_limit',
        action='store_false',
        default=False,
        help='Rate limit API calls against the API based on per minute limits.'
    )
    parser.add_argument('-f',
                        '--format',
                        dest='format',
                        choices=['list', 'json', 'xml', 'html'],
                        default='json')
    parser.add_argument('-o',
                        '--outfile',
                        dest='out_file',
                        type=argparse.FileType('wbU'),
                        default=sys.stdout,
                        help='Output file (defaults to stdout)')
    parser.add_argument(
        '-v',
        '--version',
        action='version',
        version='DomainTools CLI API Client {0}'.format(version))
    parser.add_argument('--no-https',
                        dest='https',
                        action='store_false',
                        default=True,
                        help='Use HTTP instead of HTTPS.')
    parser.add_argument(
        '--no-verify-ssl',
        dest='verify_ssl',
        action='store_false',
        default=True,
        help='Skip verification of SSL certificate when making HTTPs API calls'
    )

    subparsers = parser.add_subparsers(
        help=
        'The name of the API call you wish to perform (`whois` for example)',
        dest='api_call')
    subparsers.required = True
    for api_call in API_CALLS:
        api_method = getattr(API, api_call)
        subparser = subparsers.add_parser(api_call, help=api_method.__name__)
        spec = inspect.getargspec(api_method)

        for argument_name, default in reversed(
                list(
                    zip_longest(reversed(spec.args or []),
                                reversed(spec.defaults or []),
                                fillvalue='EMPTY'))):
            if argument_name == 'self':
                continue
            elif default == 'EMPTY':
                subparser.add_argument(argument_name)
            else:
                subparser.add_argument('--{0}'.format(
                    argument_name.replace('_', '-')),
                                       dest=argument_name,
                                       default=default,
                                       nargs='*')

    arguments = vars(parser.parse_args(args) if args else parser.parse_args())
    out_file = arguments.pop('out_file')
    out_format = arguments.pop('format')

    user, key = arguments.pop('user', None), arguments.pop('key', None)
    if not user or not key:
        try:
            with open(arguments.pop('credentials')) as credentials:
                user, key = credentials.readline().strip(
                ), credentials.readline().strip()
        except Exception:
            sys.stderr.write(
                'Credentials are required to perform API calls.\n')
            sys.exit(1)

    if not api:  # pragma: no cover
        api = API(user,
                  key,
                  https=arguments.pop('https'),
                  verify_ssl=arguments.pop('verify_ssl'),
                  rate_limit=arguments.pop('rate_limit'))

    command = getattr(api, arguments.pop('api_call'))
    for key, value in arguments.items():
        if value in ('-', ['-']):
            arguments[key] == (line.strip() for line in sys.stdin.readlines())
        elif value == []:
            arguments[key] = True
        elif type(value) == list and len(value) == 1:
            arguments[key] = value[0]

    response = command(**arguments)
    output = str(
        getattr(response, out_format) if out_format != 'list' else response.
        as_list())
    out_file.write(output if output.endswith('\n') else output + '\n')