def get_data(accounts_list):
    start = datetime.datetime.now()
    results_list = Pool(4).map(get_data_worker, accounts_list)
    if results_list:
        policies_list = [result['policy'] for result in results_list if result]
        reports_list = [
            report for result in results_list if result
            for report in result['report']
        ]
        sf_client = get_snowflake_client()
        snapshot_time = datetime.datetime.utcnow().isoformat()
        if reports_list:
            report_groups = groups_of(15000, reports_list)
            for group in report_groups:
                query = LOAD_REPORT_LIST_QUERY.format(
                    snapshotclock=snapshot_time,
                    format_string=", ".join(["(%s)"] * len(group)))
                sf_client.cursor().execute(query, group)
        if policies_list:
            policy_groups = groups_of(15000, policies_list)
            for group in policy_groups:
                query = LOAD_POLICY_LIST_QUERY.format(
                    snapshotclock=snapshot_time,
                    format_string=", ".join(["(%s)"] * len(group)))
                sf_client.cursor().execute(query, group)
    end = datetime.datetime.now()
    print(
        f"iam_credential_report: start: {start} end: {end} total: {(end - start).total_seconds()}"
    )
Exemple #2
0
def insert(table, values, overwrite=False, select="", columns=[], dryrun=False):
    num_rows_inserted = 0
    # snowflake limits the number of rows inserted in a single statement:
    #   snowflake.connector.errors.ProgrammingError: 001795 (42601):
    #     SQL compilation error: error line 3 at position 158
    #   maximum number of expressions in a list exceeded,
    #     expected at most 16,384, got 169,667
    for group in utils.groups_of(16384, values):
        num_rows_inserted += do_insert(
            table, group, overwrite, select, columns, dryrun
        )['number of rows inserted']
    return {'number of rows inserted': num_rows_inserted}
Exemple #3
0
def log_alerts(ctx, alerts):
    if len(alerts):
        print("Recording alerts.")
        try:
            VALUES_INSERT_LIMIT = 16384
            for alert_group in groups_of(VALUES_INSERT_LIMIT, alerts):
                db.insert_alerts(list(filter(None, alert_group)))

        except Exception as e:
            log.error("Failed to log alert", e)

    else:
        print("No alerts to log.")
Exemple #4
0
def get_data(accounts_list):
    start = datetime.datetime.now()
    instance_list_list = Pool(4).map(get_data_worker, accounts_list)
    instance_list = [x for l in instance_list_list if l for x in l]
    if instance_list:
        sf_client = get_snowflake_client()
        instance_groups = groups_of(15000, instance_list)
        for group in instance_groups:
            query = LOAD_INSTANCE_LIST_QUERY.format(
                snapshotclock=datetime.datetime.utcnow().isoformat(),
                format_string=", ".join(["(%s)"] * len(group)))
            sf_client.cursor().execute(query, group)
    end = datetime.datetime.now()
    print(f"start: {start} end: {end} total: {(end - start).total_seconds()}")
def ingest_agents(table_name, options):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME']
    timestamp = datetime.now(timezone.utc)

    if (last_export_time is None
            or (timestamp - last_export_time).total_seconds() > 86400):
        agents = {a['uuid']: a for a in get_agent_data()}.values()
        for page in groups_of(10000, agents):
            db.insert(
                table=f'data.{table_name}',
                values=[(agent, timestamp) for agent in page],
                select=db.derive_insert_select(AGENT_LANDING_TABLE),
                columns=db.derive_insert_columns(AGENT_LANDING_TABLE),
            )
    else:
        log.info('Not time to import Tenable Agents')
def ingest_vulns(table_name):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME']
    timestamp = datetime.now(timezone.utc)

    if (last_export_time is None
            or (timestamp - last_export_time).total_seconds() > 86400):
        log.info("Exporting vulnerabilities...")
        vulns = TIO.exports.vulns()

        for page in groups_of(10000, vulns):
            db.insert(
                table=f'data.{table_name}',
                values=[(vuln, timestamp) for vuln in page],
                select=db.derive_insert_select(VULN_LANDING_TABLE),
                columns=db.derive_insert_columns(AGENT_LANDING_TABLE),
            )
    else:
        log.info('Not time to import Tenable vulnerabilities yet')
Exemple #7
0
def ingest(table_name, options):
    base_name = re.sub(r'_CONNECTION$', '', table_name)
    storage_account = options['storage_account']
    sas_token = vault.decrypt_if_encrypted(options['sas_token'])
    suffix = options['suffix']
    container_name = options['container_name']
    snowflake_account = options['snowflake_account']
    sa_user = options['sa_user']
    database = options['database']

    block_blob_service = BlockBlobService(account_name=storage_account,
                                          sas_token=sas_token,
                                          endpoint_suffix=suffix)

    db.execute(f"select SYSTEM$PIPE_FORCE_RESUME('DATA.{base_name}_PIPE');")

    last_loaded = db.fetch_latest(f'data.{table_name}', 'loaded_on')

    log.info(f"Last loaded time is {last_loaded}")

    blobs = block_blob_service.list_blobs(container_name)
    new_files = [
        StagedFile(b.name, None) for b in blobs
        if (last_loaded is None or b.properties.creation_time > last_loaded)
    ]

    log.info(f"Found {len(new_files)} files to ingest")

    # Proxy object that abstracts the Snowpipe REST API
    ingest_manager = SimpleIngestManager(
        account=snowflake_account,
        host=f'{snowflake_account}.snowflakecomputing.com',
        user=sa_user,
        pipe=f'{database}.data.{base_name}_PIPE',
        private_key=load_pkb_rsa(PRIVATE_KEY, PRIVATE_KEY_PASSWORD))

    if len(new_files) > 0:
        for file_group in groups_of(4999, new_files):
            response = ingest_manager.ingest_files(file_group)
            log.info(response)
            yield len(file_group)
Exemple #8
0
async def main(table_name):
    async with aiohttp.ClientSession() as session:
        cids = [
            c['id']
            for c in (await fetch(session, '/computers')).get('computers', [])
        ]

        log.info(f'loading {len(cids)} computer details')
        computers = await asyncio.gather(
            *[fetch_computer(session, cid, i) for i, cid in enumerate(cids)])

        log.info(f'inserting {len(computers)} computers into {table_name}')
        rows = [
            updated(c.get('computer'),
                    computer_id=cid,
                    recorded_at=c.get('recorded_at'))
            for cid, c in zip(cids, computers)
        ]
        for g in groups_of(100, rows):
            db.insert(table_name, g)
        return len(rows)
Exemple #9
0
def ingest(table_name, options):
    table_name = f'data.{table_name}'
    now = datetime.utcnow()
    subscription_connection_name = options['subscription_connection_name']
    cloud_type = options.get('cloud_type', 'reg')
    creds = {
        'clientId': options['client_id'],
        'clientSecret': options['client_secret'],
        'tenantId': options['tenant_id'],
    }

    virtual_machines = []
    for sub in db.fetch(
            GET_SUBSCRIPTION_IDS_SQL.format(subscription_connection_name)):
        options = creds.copy()
        options.update(API_ENDPOINTS[cloud_type])
        options['subscriptionId'] = sub['SUBSCRIPTION_ID']
        vms = get_vms(options)
        db.insert(
            table=AZURE_COLLECTION_METADATA,
            values=[(now, RUN_ID, options['subscriptionId'], len(vms))],
            columns=[
                'SNAPSHOT_AT', 'RUN_ID', 'SUBSCRIPTION_ID', 'VM_INSTANCE_COUNT'
            ],
        )
        nics = get_nics(options)
        for vm in vms:
            enrich_vm_with_nics(vm, nics)
        virtual_machines.append(vms)

    virtual_machines = [(
        now,
        elem,
        elem.get('hardware_profile'),
        elem.get('id'),
        elem.get('location'),
        elem.get('name'),
        elem.get('network_profile'),
        elem.get('os_profile'),
        elem.get('provisioning_state'),
        elem.get('storage_profile'),
        elem.get('subscription_id'),
        elem.get('tags'),
        elem.get('type'),
        elem.get('vm_id'),
    ) for elem in itertools.chain(*virtual_machines)]

    for group in groups_of(15000, virtual_machines):
        db.insert(
            table_name,
            group,
            select=(
                'column1',
                'PARSE_JSON(column2)',
                'PARSE_JSON(column3)',
                'column4',
                'column5',
                'column6',
                'PARSE_JSON(column7)',
                'PARSE_JSON(column8)',
                'column9',
                'PARSE_JSON(column10)',
                'column11',
                'PARSE_JSON(column12)',
                'column13',
                'column14',
            ),
        )

    yield len(virtual_machines)
Exemple #10
0
def ingest(table_name, options):
    table_name = f'data.{table_name}'
    now = datetime.utcnow()
    client_id = options['client_id']
    secret = options['client_secret']
    tenant = options['tenant_id']
    subscription_connection_name = options['subscription_connection_name']

    creds = ServicePrincipalCredentials(client_id=client_id,
                                        secret=secret,
                                        tenant=tenant)

    subscription_table = f'AZURE_SUBSCRIPTION_{subscription_connection_name}_CONNECTION'

    virtual_machines = []
    for sub in db.fetch(f"SELECT * FROM data.{subscription_table}"):
        sub_id = sub['SUBSCRIPTION_ID']
        vms = get_vms(creds, sub_id)
        db.insert(
            table=AZURE_COLLECTION_METADATA,
            values=[(now, RUN_ID, sub_id, len(vms))],
            columns=[
                'SNAPSHOT_AT', 'RUN_ID', 'SUBSCRIPTION_ID', 'VM_INSTANCE_COUNT'
            ],
        )
        nics = get_nics(creds, sub_id)
        for vm in vms:
            enrich_vm_with_nics(vm, nics)
        virtual_machines.append(vms)

    virtual_machines = [(
        now,
        elem,
        elem.get('hardware_profile'),
        elem.get('id'),
        elem.get('location'),
        elem.get('name'),
        elem.get('network_profile'),
        elem.get('os_profile'),
        elem.get('provisioning_state'),
        elem.get('storage_profile'),
        elem.get('subscription_id'),
        elem.get('tags'),
        elem.get('type'),
        elem.get('vm_id'),
    ) for elem in itertools.chain(*virtual_machines)]

    for group in groups_of(15000, virtual_machines):
        db.insert(table_name,
                  group,
                  select=(
                      'column1',
                      'PARSE_JSON(column2)',
                      'PARSE_JSON(column3)',
                      'column4',
                      'column5',
                      'column6',
                      'PARSE_JSON(column7)',
                      'PARSE_JSON(column8)',
                      'column9',
                      'PARSE_JSON(column10)',
                      'column11',
                      'PARSE_JSON(column12)',
                      'column13',
                      'column14',
                  ))

    yield len(virtual_machines)