def get_data(accounts_list): start = datetime.datetime.now() results_list = Pool(4).map(get_data_worker, accounts_list) if results_list: policies_list = [result['policy'] for result in results_list if result] reports_list = [ report for result in results_list if result for report in result['report'] ] sf_client = get_snowflake_client() snapshot_time = datetime.datetime.utcnow().isoformat() if reports_list: report_groups = groups_of(15000, reports_list) for group in report_groups: query = LOAD_REPORT_LIST_QUERY.format( snapshotclock=snapshot_time, format_string=", ".join(["(%s)"] * len(group))) sf_client.cursor().execute(query, group) if policies_list: policy_groups = groups_of(15000, policies_list) for group in policy_groups: query = LOAD_POLICY_LIST_QUERY.format( snapshotclock=snapshot_time, format_string=", ".join(["(%s)"] * len(group))) sf_client.cursor().execute(query, group) end = datetime.datetime.now() print( f"iam_credential_report: start: {start} end: {end} total: {(end - start).total_seconds()}" )
def insert(table, values, overwrite=False, select="", columns=[], dryrun=False): num_rows_inserted = 0 # snowflake limits the number of rows inserted in a single statement: # snowflake.connector.errors.ProgrammingError: 001795 (42601): # SQL compilation error: error line 3 at position 158 # maximum number of expressions in a list exceeded, # expected at most 16,384, got 169,667 for group in utils.groups_of(16384, values): num_rows_inserted += do_insert( table, group, overwrite, select, columns, dryrun )['number of rows inserted'] return {'number of rows inserted': num_rows_inserted}
def log_alerts(ctx, alerts): if len(alerts): print("Recording alerts.") try: VALUES_INSERT_LIMIT = 16384 for alert_group in groups_of(VALUES_INSERT_LIMIT, alerts): db.insert_alerts(list(filter(None, alert_group))) except Exception as e: log.error("Failed to log alert", e) else: print("No alerts to log.")
def get_data(accounts_list): start = datetime.datetime.now() instance_list_list = Pool(4).map(get_data_worker, accounts_list) instance_list = [x for l in instance_list_list if l for x in l] if instance_list: sf_client = get_snowflake_client() instance_groups = groups_of(15000, instance_list) for group in instance_groups: query = LOAD_INSTANCE_LIST_QUERY.format( snapshotclock=datetime.datetime.utcnow().isoformat(), format_string=", ".join(["(%s)"] * len(group))) sf_client.cursor().execute(query, group) end = datetime.datetime.now() print(f"start: {start} end: {end} total: {(end - start).total_seconds()}")
def ingest_agents(table_name, options): last_export_time = next( db.fetch( f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME'] timestamp = datetime.now(timezone.utc) if (last_export_time is None or (timestamp - last_export_time).total_seconds() > 86400): agents = {a['uuid']: a for a in get_agent_data()}.values() for page in groups_of(10000, agents): db.insert( table=f'data.{table_name}', values=[(agent, timestamp) for agent in page], select=db.derive_insert_select(AGENT_LANDING_TABLE), columns=db.derive_insert_columns(AGENT_LANDING_TABLE), ) else: log.info('Not time to import Tenable Agents')
def ingest_vulns(table_name): last_export_time = next( db.fetch( f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME'] timestamp = datetime.now(timezone.utc) if (last_export_time is None or (timestamp - last_export_time).total_seconds() > 86400): log.info("Exporting vulnerabilities...") vulns = TIO.exports.vulns() for page in groups_of(10000, vulns): db.insert( table=f'data.{table_name}', values=[(vuln, timestamp) for vuln in page], select=db.derive_insert_select(VULN_LANDING_TABLE), columns=db.derive_insert_columns(AGENT_LANDING_TABLE), ) else: log.info('Not time to import Tenable vulnerabilities yet')
def ingest(table_name, options): base_name = re.sub(r'_CONNECTION$', '', table_name) storage_account = options['storage_account'] sas_token = vault.decrypt_if_encrypted(options['sas_token']) suffix = options['suffix'] container_name = options['container_name'] snowflake_account = options['snowflake_account'] sa_user = options['sa_user'] database = options['database'] block_blob_service = BlockBlobService(account_name=storage_account, sas_token=sas_token, endpoint_suffix=suffix) db.execute(f"select SYSTEM$PIPE_FORCE_RESUME('DATA.{base_name}_PIPE');") last_loaded = db.fetch_latest(f'data.{table_name}', 'loaded_on') log.info(f"Last loaded time is {last_loaded}") blobs = block_blob_service.list_blobs(container_name) new_files = [ StagedFile(b.name, None) for b in blobs if (last_loaded is None or b.properties.creation_time > last_loaded) ] log.info(f"Found {len(new_files)} files to ingest") # Proxy object that abstracts the Snowpipe REST API ingest_manager = SimpleIngestManager( account=snowflake_account, host=f'{snowflake_account}.snowflakecomputing.com', user=sa_user, pipe=f'{database}.data.{base_name}_PIPE', private_key=load_pkb_rsa(PRIVATE_KEY, PRIVATE_KEY_PASSWORD)) if len(new_files) > 0: for file_group in groups_of(4999, new_files): response = ingest_manager.ingest_files(file_group) log.info(response) yield len(file_group)
async def main(table_name): async with aiohttp.ClientSession() as session: cids = [ c['id'] for c in (await fetch(session, '/computers')).get('computers', []) ] log.info(f'loading {len(cids)} computer details') computers = await asyncio.gather( *[fetch_computer(session, cid, i) for i, cid in enumerate(cids)]) log.info(f'inserting {len(computers)} computers into {table_name}') rows = [ updated(c.get('computer'), computer_id=cid, recorded_at=c.get('recorded_at')) for cid, c in zip(cids, computers) ] for g in groups_of(100, rows): db.insert(table_name, g) return len(rows)
def ingest(table_name, options): table_name = f'data.{table_name}' now = datetime.utcnow() subscription_connection_name = options['subscription_connection_name'] cloud_type = options.get('cloud_type', 'reg') creds = { 'clientId': options['client_id'], 'clientSecret': options['client_secret'], 'tenantId': options['tenant_id'], } virtual_machines = [] for sub in db.fetch( GET_SUBSCRIPTION_IDS_SQL.format(subscription_connection_name)): options = creds.copy() options.update(API_ENDPOINTS[cloud_type]) options['subscriptionId'] = sub['SUBSCRIPTION_ID'] vms = get_vms(options) db.insert( table=AZURE_COLLECTION_METADATA, values=[(now, RUN_ID, options['subscriptionId'], len(vms))], columns=[ 'SNAPSHOT_AT', 'RUN_ID', 'SUBSCRIPTION_ID', 'VM_INSTANCE_COUNT' ], ) nics = get_nics(options) for vm in vms: enrich_vm_with_nics(vm, nics) virtual_machines.append(vms) virtual_machines = [( now, elem, elem.get('hardware_profile'), elem.get('id'), elem.get('location'), elem.get('name'), elem.get('network_profile'), elem.get('os_profile'), elem.get('provisioning_state'), elem.get('storage_profile'), elem.get('subscription_id'), elem.get('tags'), elem.get('type'), elem.get('vm_id'), ) for elem in itertools.chain(*virtual_machines)] for group in groups_of(15000, virtual_machines): db.insert( table_name, group, select=( 'column1', 'PARSE_JSON(column2)', 'PARSE_JSON(column3)', 'column4', 'column5', 'column6', 'PARSE_JSON(column7)', 'PARSE_JSON(column8)', 'column9', 'PARSE_JSON(column10)', 'column11', 'PARSE_JSON(column12)', 'column13', 'column14', ), ) yield len(virtual_machines)
def ingest(table_name, options): table_name = f'data.{table_name}' now = datetime.utcnow() client_id = options['client_id'] secret = options['client_secret'] tenant = options['tenant_id'] subscription_connection_name = options['subscription_connection_name'] creds = ServicePrincipalCredentials(client_id=client_id, secret=secret, tenant=tenant) subscription_table = f'AZURE_SUBSCRIPTION_{subscription_connection_name}_CONNECTION' virtual_machines = [] for sub in db.fetch(f"SELECT * FROM data.{subscription_table}"): sub_id = sub['SUBSCRIPTION_ID'] vms = get_vms(creds, sub_id) db.insert( table=AZURE_COLLECTION_METADATA, values=[(now, RUN_ID, sub_id, len(vms))], columns=[ 'SNAPSHOT_AT', 'RUN_ID', 'SUBSCRIPTION_ID', 'VM_INSTANCE_COUNT' ], ) nics = get_nics(creds, sub_id) for vm in vms: enrich_vm_with_nics(vm, nics) virtual_machines.append(vms) virtual_machines = [( now, elem, elem.get('hardware_profile'), elem.get('id'), elem.get('location'), elem.get('name'), elem.get('network_profile'), elem.get('os_profile'), elem.get('provisioning_state'), elem.get('storage_profile'), elem.get('subscription_id'), elem.get('tags'), elem.get('type'), elem.get('vm_id'), ) for elem in itertools.chain(*virtual_machines)] for group in groups_of(15000, virtual_machines): db.insert(table_name, group, select=( 'column1', 'PARSE_JSON(column2)', 'PARSE_JSON(column3)', 'column4', 'column5', 'column6', 'PARSE_JSON(column7)', 'PARSE_JSON(column8)', 'column9', 'PARSE_JSON(column10)', 'column11', 'PARSE_JSON(column12)', 'column13', 'column14', )) yield len(virtual_machines)