def ingest_iam(landing_table, aws_access_key=None, aws_secret_key=None, session=None, account=None): users = get_iam_users( aws_access_key=aws_access_key, aws_secret_key=aws_secret_key, session=session, account=account, ) monitor_time = datetime.utcnow().isoformat() db.insert( landing_table, values=[( row, monitor_time, row['Path'], row['UserName'], row['UserId'], row.get('Arn'), row['CreateDate'], row.get('PasswordLastUsed'), row.get('Account', {}).get('ACCOUNT_ID'), ) for row in users], select=db.derive_insert_select(LANDING_TABLES_COLUMNS['IAM']), columns=db.derive_insert_columns(LANDING_TABLES_COLUMNS['IAM']), ) return len(users)
def ingest(table_name, options): landing_table = f'data.{table_name}' token = options['token'] asset_entity_id = options['asset_entity_id'] general_url = ( f"https://api.assetpanda.com:443//v2/entities/{asset_entity_id}/objects" ) fields_url = f"https://api.assetpanda.com:443//v2/entities/{asset_entity_id}" params = {"offset": 0, "limit": PAGE_SIZE} total_object_count = 0 insert_time = datetime.utcnow() while params['offset'] <= total_object_count: log.debug("total_object_count: ", total_object_count) assets = get_data(token=token, url=general_url, params=params) list_object, total_object_count = get_list_objects_and_total_from_get_object( assets) dict_fields = get_data(token, fields_url, params=params) list_field = dict_fields["fields"] # Stripping down the metadata to remove unnecessary fields. We only really care about the following: # {"field_140": "MAC_Address", "field_135" :"IP"} clear_fields: dict = reduce(reduce_fields, list_field, {}) # replace every key "field_NO" by the value of the clear_field["field_NO"] list_object_without_field_id = replace_device_key( list_object, clear_fields) db.insert( landing_table, values=[(entry, entry.get('id', None), insert_time) for entry in list_object_without_field_id], select=db.derive_insert_select(LANDING_TABLE_COLUMNS), columns=db.derive_insert_columns(LANDING_TABLE_COLUMNS), ) log.info( f'Inserted {len(list_object_without_field_id)} rows ({landing_table}).' ) yield len(list_object_without_field_id) # increment the offset to get new entries each iteration in the while loop params["offset"] += PAGE_SIZE
def ingest_ami( landing_table, aws_access_key=None, aws_secret_key=None, session=None, account=None ): images = get_images( aws_access_key=aws_access_key, aws_secret_key=aws_secret_key, session=session, account=account, ) monitor_time = datetime.utcnow().isoformat() db.insert( landing_table, values=[ ( row, monitor_time, row.get('VirtualizationType'), row.get('Description'), row.get('Tags'), row.get('Hypervisor'), row.get('EnaSupport'), row.get('SriovNetSupport'), row.get('ImageId'), row.get('State'), row.get('BlockDeviceMappings'), row.get('Architecture'), row.get('ImageLocation'), row.get('RootDeviceType'), row.get('RootDeviceName'), row.get('OwnerId'), row.get('CreationDate'), row.get('Public'), row.get('ImageType'), row.get('Name'), row.get('Account', {}).get('ACCOUNT_ID'), row['Region']['RegionName'], ) for row in images ], select=db.derive_insert_select(LANDING_TABLES_COLUMNS['AMI']), columns=db.derive_insert_columns(LANDING_TABLES_COLUMNS['AMI']), ) return len(images)
def ingest_agents(table_name, options): last_export_time = next( db.fetch( f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME'] timestamp = datetime.now(timezone.utc) if (last_export_time is None or (timestamp - last_export_time).total_seconds() > 86400): agents = {a['uuid']: a for a in get_agent_data()}.values() for page in groups_of(10000, agents): db.insert( table=f'data.{table_name}', values=[(agent, timestamp) for agent in page], select=db.derive_insert_select(AGENT_LANDING_TABLE), columns=db.derive_insert_columns(AGENT_LANDING_TABLE), ) else: log.info('Not time to import Tenable Agents')
def test_db_derive_insert_select(): tests = [ { "test": [('A', 'AUTOINCREMENT')], "expected": "" }, { "test": [ ('A', 'AUTOINCREMENT'), ('V', 'VARIANT'), ("T", "TIMESTAMP"), ("N", "NUMBER"), ], "expected": "PARSE_JSON(column1),TRY_TO_TIMESTAMP(column2),column3", }, ] for test in tests: assert test['expected'] == db.derive_insert_select(test['test'])
def ingest_vulns(table_name): last_export_time = next( db.fetch( f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME'] timestamp = datetime.now(timezone.utc) if (last_export_time is None or (timestamp - last_export_time).total_seconds() > 86400): log.info("Exporting vulnerabilities...") vulns = TIO.exports.vulns() for page in groups_of(10000, vulns): db.insert( table=f'data.{table_name}', values=[(vuln, timestamp) for vuln in page], select=db.derive_insert_select(VULN_LANDING_TABLE), columns=db.derive_insert_columns(AGENT_LANDING_TABLE), ) else: log.info('Not time to import Tenable vulnerabilities yet')
def ingest(table_name, options): landing_table = f'data.{table_name}' timestamp = datetime.utcnow() organization_id = options['organization_id'] api_secret = options['api_secret'] api_key = options['api_key'] params: dict = {"limit": PAGE_SIZE, "page": 1} # API starts at 1 while 1: devices: dict = get_data(organization_id, api_key, api_secret, params) params["page"] += 1 if len(devices) == 0: break db.insert( landing_table, values=[( timestamp, device, device.get('deviceId'), device.get('osVersionName', None), device.get('lastSyncStatus', None), device.get('type', None), device.get('version', None), device.get('lastSync', None), device.get('osVersion', None), device.get('name', None), device.get('status', None), device.get('originId', None), device.get('appliedBundle', None), device.get('hasIpBlocking', None), ) for device in devices], select=db.derive_insert_select(LANDING_TABLE_COLUMNS), columns=db.derive_insert_columns(LANDING_TABLE_COLUMNS), ) log.info(f'Inserted {len(devices)} rows.') yield len(devices)
def ingest(table_name, options): landing_table = f'data.{table_name}' timestamp = datetime.utcnow() client_id = options['client_id'] client_secret = options['client_secret'] # Call the authorization endpoint so we can make subsequent calls to the API with an auth token token: str = get_token_basic(client_id, client_secret) offset = "" params_get_id_devices: dict = {"limit": PAGE_SIZE, "offset": offset} while 1: dict_id_devices: dict = get_data(token, CROWDSTRIKE_DEVICES_BY_ID_URL, params_get_id_devices) resources: list = dict_id_devices["resources"] params_get_id_devices["offset"] = get_offset_from_devices_results( dict_id_devices) if len(resources) == 0: break device_details_url_and_params: str = create_url_params_get_devices( CROWDSTRIKE_DEVICE_DETAILS_URL, resources) dict_devices: dict = get_data(token, device_details_url_and_params) devices = dict_devices["resources"] db.insert( landing_table, values=[( timestamp, device, device.get('device_id'), device.get('first_seen', None), device.get('system_manufacturer', None), device.get('config_id_base', None), device.get('last_seen', None), device.get('policies', None), device.get('slow_changing_modified_timestamp', None), device.get('minor_version', None), device.get('system_product_name', None), device.get('hostname', None), device.get('mac_address', None), device.get('product_type_desc', None), device.get('platform_name', None), device.get('external_ip', None), device.get('agent_load_flags', None), device.get('group_hash', None), device.get('provision_status', None), device.get('os_version', None), device.get('groups', None), device.get('bios_version', None), device.get('modified_timestamp', None), device.get('local_ip', None), device.get('agent_version', None), device.get('major_version', None), device.get('meta', None), device.get('agent_local_time', None), device.get('bios_manufacturer', None), device.get('platform_id', None), device.get('device_policies', None), device.get('config_id_build', None), device.get('config_id_platform', None), device.get('cid', None), device.get('status', None), device.get('service_pack_minor', None), device.get('product_type', None), device.get('service_pack_major', None), device.get('build_number', None), device.get('pointer_size', None), device.get('site_name', None), device.get('machine_domain', None), device.get('ou', None), ) for device in devices], select=db.derive_insert_select(LANDING_TABLE_COLUMNS), columns=db.derive_insert_columns(LANDING_TABLE_COLUMNS), ) log.info(f'Inserted {len(devices)} rows.') yield len(devices)
def ingest(table_name, options): host_airwatch = options['host_airwatch'] api_key = options['api_key'] device_auth = options['device_auth'] custom_attributes_auth = options['custom_attributes_auth'] ingest_type = ( 'device' if table_name.endswith('_DEVICE_CONNECTION') else 'custom_attributes' ) timestamp = datetime.utcnow() landing_table = f'data.{table_name}' if ingest_type == 'device': device_params: dict = {'PageSize': PAGE_SIZE, 'Page': 0} url = f'https://{host_airwatch}/api/mdm/devices/search' while 1: result: dict = get_data(url, device_auth, api_key, device_params) devices = result['Devices'] db.insert( landing_table, values=[ ( timestamp, device, device.get('EasIds'), device.get('Udid'), device.get('SerialNumber'), device.get('MacAddress'), device.get('Imei'), device.get('EasId'), device.get('AssetNumber'), device.get('DeviceFriendlyName'), device.get('LocationGroupId'), device.get('LocationGroupName'), device.get('UserId'), device.get('UserName'), device.get('DataProtectionStatus'), device.get('UserEmailAddress'), device.get('Ownership'), device.get('PlatformId'), device.get('Platform'), device.get('ModelId'), device.get('Model'), device.get('OperatingSystem'), device.get('PhoneNumber'), device.get('LastSeen'), device.get('EnrollmentStatus'), device.get('ComplianceStatus'), device.get('CompromisedStatus'), device.get('LastEnrolledOn'), device.get('LastComplianceCheckOn'), device.get('LastCompromisedCheckOn'), device.get('IsSupervised'), device.get('VirtualMemory'), device.get('DeviceCapacity'), device.get('AvailableDeviceCapacity'), device.get('IsDeviceDNDEnabled'), device.get('IsDeviceLocatorEnabled'), device.get('IsCloudBackupEnabled'), device.get('IsActivationLockEnabled'), device.get('IsNetworkTethered'), device.get('BatteryLevel'), device.get('IsRoaming'), device.get('SystemIntegrityProtectionEnabled'), device.get('ProcessorArchitecture'), device.get('TotalPhysicalMemory'), device.get('AvailablePhysicalMemory'), device.get('DeviceCellularNetworkInfo'), device.get('EnrollmentUserUuid'), device.get('Id'), device.get('Uuid'), ) for device in devices ], select=db.derive_insert_select(LANDING_TABLE_COLUMNS_DEVICE), columns=db.derive_insert_columns(LANDING_TABLE_COLUMNS_DEVICE), ) log.info(f'Inserted {len(devices)} rows ({landing_table}).') yield len(devices) processed_total = (result['Page'] + 1) * result['PageSize'] if processed_total >= result['Total']: break device_params['Page'] += 1 else: custom_device_params: dict = {'PageSize': PAGE_SIZE, 'Page': 0} url = f'https://{host_airwatch}/api/mdm/devices/customattribute/search' while 1: result: dict = get_data( url, custom_attributes_auth, api_key, custom_device_params ) device_attributes = result['Devices'] db.insert( landing_table, values=[ ( timestamp, device_attr, device_attr.get('DeviceId'), device_attr.get('Udid'), device_attr.get('SerialNumber'), device_attr.get('EnrollmentUserName'), device_attr.get('AssetNumber'), device_attr.get('CustomAttributes'), ) for device_attr in device_attributes ], select=db.derive_insert_select(LANDING_TABLE_COLUMNS_CUSTOM_ATTRIBUTES), columns=db.derive_insert_columns( LANDING_TABLE_COLUMNS_CUSTOM_ATTRIBUTES ), ) log.info(f'Inserted {len(device_attributes)} rows ({landing_table}).') yield len(device_attributes) processed_total = (result['Page'] + 1) * result['PageSize'] if processed_total >= result['Total']: break custom_device_params['Page'] += 1
def ingest(table_name, options): ingest_type = 'client' if table_name.endswith( '_CLIENT_CONNECTION') else 'device' landing_table = f'data.{table_name}' timestamp = datetime.utcnow() api_token = options['api_token'] whitelist = set(options['network_id_whitelist']) organizations = get_data(f"https://api.meraki.com/api/v0/organizations", api_token) for organization in organizations: organization_id = organization.get('id') log.debug(f'Processing Meraki organization id {organization_id}') if not organization_id: continue networks = get_data( f"https://api.meraki.com/api/v0/organizations/{organization_id}/networks", api_token, ) network_ids = {network.get('id') for network in networks} if whitelist: network_ids = network_ids.intersection(whitelist) for network in network_ids: log.debug(f'Processing Meraki network {network}') try: devices = get_data( f"https://api.meraki.com/api/v0/networks/{network}/devices", api_token, ) except requests.exceptions.HTTPError as e: log.error(f"{network} not accessible, ") log.error(e) continue if ingest_type == 'device': db.insert( landing_table, values=[( timestamp, device, device.get('serial'), device.get('address'), device.get('name'), device.get('networkId'), device.get('model'), device.get('mac'), device.get('lanIp'), device.get('wan1Ip'), device.get('wan2Ip'), device.get('tags'), device.get('lng'), device.get('lat'), ) for device in devices], select=db.derive_insert_select( LANDING_TABLE_COLUMNS_DEVICE), columns=db.derive_insert_columns( LANDING_TABLE_COLUMNS_DEVICE), ) log.info(f'Inserted {len(devices)} rows ({landing_table}).') yield len(devices) else: for device in devices: serial_number = device['serial'] try: clients = get_data( f"https://api.meraki.com/api/v0/devices/{serial_number}/clients", api_token, ) except requests.exceptions.HTTPError as e: log.error(f"{network} not accessible, ") log.error(e) continue db.insert( landing_table, values=[ ( timestamp, client, client.get('id'), client.get('mac'), client.get('description'), client.get('mdnsName'), client.get('dhcpHostname'), client.get('ip'), client.get('switchport'), # vlan sometimes set to '' client.get('vlan') or None, client.get('usage', {}).get('sent') or None, client.get('usage', {}).get('recv') or None, serial_number, ) for client in clients ], select=db.derive_insert_select( LANDING_TABLE_COLUMNS_CLIENT), columns=db.derive_insert_columns( LANDING_TABLE_COLUMNS_CLIENT), ) log.info( f'Inserted {len(clients)} rows ({landing_table}).') yield len(clients)