def ingest_elb(landing_table,
               aws_access_key=None,
               aws_secret_key=None,
               session=None,
               account=None):
    elbs = get_all_elbs(
        aws_access_key=aws_access_key,
        aws_secret_key=aws_secret_key,
        session=session,
        account=account,
    )
    monitor_time = datetime.utcnow().isoformat()

    db.insert(
        landing_table,
        values=[(
            row,
            monitor_time,
            row.get('CanonicalHostedZoneName', ''),
            row.get('CanonicalHostedZoneNameID', ''),
            row['CreatedTime'],
            row['DNSName'],
            row['LoadBalancerName'],
            row['Region']['RegionName'],
            row['Scheme'],
            row.get('VPCId', 'VpcId'),
            row.get('Account', {}).get('ACCOUNT_ID'),
        ) for row in elbs],
        select=
        'PARSE_JSON(column1), column2, column3, column4, column5, column6, '
        'column7, column8, column9, column10',
    )
    return len(elbs)
Exemple #2
0
def ingest_users(tio, table_name):
    users = tio.users.list()
    timestamp = datetime.utcnow()

    for user in users:
        user['role'] = {
            16: 'Basic',
            24: 'Scan Operator',
            32: 'Standard',
            40: 'Scan Manager',
            64: 'Administrator',
        }.get(user['permissions'], 'unknown permissions {permissions}')

    db.insert(table=f'data.{table_name}',
              values=[(user.get('username',
                                None), user.get('role', None), user, timestamp,
                       user.get('uuid', None), user.get('id', None),
                       user.get('user_name', None), user.get('email', None),
                       user.get('type', None), user.get('permissions', None),
                       user.get('last_login_attempt',
                                None), user.get('login_fail_count', None),
                       user.get('login_fail_total', None),
                       user.get('enabled', None), user.get('two_factor', None),
                       user.get('lastlogin', None), user.get('uuid_id', None))
                      for user in users],
              select="""
            column1, column2, PARSE_JSON(column3), column4, column5, column6,
            column7, column8, column9, column10,
            to_timestamp(column11, 3)::timestamp_ltz, column12, column13,
            column14, PARSE_JSON(column15),
            to_timestamp(column16, 3)::timestamp_ltz, column17
        """)
Exemple #3
0
def ingest(table_name, options):
    current_time = datetime.datetime.utcnow()
    org_client = sts_assume_role(
        src_role_arn=options['source_role_arn'],
        dest_role_arn=options['destination_role_arn'],
        dest_external_id=options['destination_role_external_id']).client(
            'organizations')

    account_pages = org_client.get_paginator('list_accounts').paginate()
    accounts = [a for page in account_pages for a in page['Accounts']]
    db.insert(table=f'data.{table_name}',
              values=[(
                  a,
                  current_time,
                  a['Arn'],
                  a['Email'],
                  a['Id'],
                  a['JoinedMethod'],
                  a['JoinedTimestamp'],
                  a['Name'],
                  a['Status'],
              ) for a in accounts],
              select=(
                  'PARSE_JSON(column1)',
                  'column2',
                  'column3::STRING',
                  'column4::STRING',
                  'column5::NUMBER',
                  'column6::STRING',
                  'column7::TIMESTAMP_LTZ',
                  'column8::STRING',
                  'column9::STRING',
              ))
    return len(accounts)
Exemple #4
0
def ingest_users(url, headers, landing_table, timestamp):
    while 1:
        response = requests.get(url=url, headers=headers)
        if response.status_code != 200:
            log.error('OKTA REQUEST FAILED: ', response.text)
            return

        result = response.json()
        if result == []:
            break

        db.insert(
            landing_table,
            values=[(row, timestamp) for row in result],
            select='PARSE_JSON(column1), column2',
        )

        log.info(f'Inserted {len(result)} rows.')
        yield len(result)

        url = ''
        links = requests.utils.parse_header_links(response.headers['Link'])
        for link in links:
            if link['rel'] == 'next':
                url = link['url']

        if len(url) == 0:
            break
def ingest_iam(landing_table,
               aws_access_key=None,
               aws_secret_key=None,
               session=None,
               account=None):
    users = get_iam_users(
        aws_access_key=aws_access_key,
        aws_secret_key=aws_secret_key,
        session=session,
        account=account,
    )

    monitor_time = datetime.utcnow().isoformat()

    db.insert(
        landing_table,
        values=[(
            row,
            monitor_time,
            row['Path'],
            row['UserName'],
            row['UserId'],
            row.get('Arn'),
            row['CreateDate'],
            row.get('PasswordLastUsed'),
            row.get('Account', {}).get('ACCOUNT_ID'),
        ) for row in users],
        select=db.derive_insert_select(LANDING_TABLES_COLUMNS['IAM']),
        columns=db.derive_insert_columns(LANDING_TABLES_COLUMNS['IAM']),
    )

    return len(users)
Exemple #6
0
def ingest_users(url, headers, landing_table, now):
    while 1:
        response = requests.get(url=url, headers=headers)
        if response.status_code != 200:
            log.error('OKTA REQUEST FAILED: ', response.text)
            return

        result = response.json()
        if result == []:
            break

        db.insert(
            landing_table, [{'raw': row, 'event_time': now} for row in result],
        )

        log.info(f'Inserted {len(result)} rows.')
        yield len(result)

        url = ''
        links = requests.utils.parse_header_links(response.headers['Link'])
        for link in links:
            if link['rel'] == 'next':
                url = link['url']

        if len(url) == 0:
            break
def run_baseline(name, comment):
    try:
        metadata = yaml.load(comment)
        assert type(metadata) is dict

        source = metadata['log source']
        required_values = metadata['required values']
        code_location = metadata['module name']
        time_filter = metadata['filter']
        time_column = metadata['history']

    except Exception as e:
        log.error(e, f"{name} has invalid metadata: >{metadata}<, skipping")
        return

    with open(f"../baseline_modules/{code_location}/{code_location}.R") as f:
        r_code = f.read()

    r_code = format_code(r_code, required_values)
    frame = query_log_source(source, time_filter, time_column)
    ro.globalenv['input_table'] = frame

    output = ro.r(r_code)
    output = output.to_dict()

    results = unpack(output)
    try:
        log.info(f"{name} generated {len(results)} rows")
        db.insert(f"{DATA_SCHEMA}.{name}", results, overwrite=True)
    except Exception as e:
        log.error("Failed to insert the results into the target table", e)
def ingest_sg(landing_table,
              aws_access_key=None,
              aws_secret_key=None,
              session=None,
              account=None):
    groups = get_all_security_groups(
        aws_access_key=aws_access_key,
        aws_secret_key=aws_secret_key,
        session=session,
        account=account,
    )
    monitor_time = datetime.utcnow().isoformat()
    db.insert(
        landing_table,
        values=[(
            row,
            row['Description'],
            monitor_time,
            row['GroupId'],
            row['GroupName'],
            row['OwnerId'],
            row['Region']['RegionName'],
            row.get('VpcId'),
        ) for row in groups],
        select=
        'PARSE_JSON(column1), column2, column3, column4, column5, column6, column7, column8',
    )
    return len(groups)
Exemple #9
0
def ingest_ec2(
    landing_table, aws_access_key=None, aws_secret_key=None, session=None, account=None
):
    instances = get_ec2_instances(
        aws_access_key=aws_access_key,
        aws_secret_key=aws_secret_key,
        session=session,
        account=account,
    )

    monitor_time = datetime.utcnow().isoformat()
    db.insert(
        landing_table,
        values=[
            (
                row,
                row['InstanceId'],
                row['Architecture'],
                monitor_time,
                row['InstanceType'],
                # can be not present if a managed instance such as EMR
                row.get('KeyName', ''),
                row['LaunchTime'],
                row['Region']['RegionName'],
                row['State']['Name'],
                row.get('InstanceName', ''),
                row.get('Account', {}).get('ACCOUNT_ID'),
            )
            for row in instances
        ],
        select='PARSE_JSON(column1), column2, column3, column4, column5, column6, column7, column8, column9, column10',
    )

    return len(instances)
Exemple #10
0
def run_baseline(name, comment):
    from rpy2 import robjects as ro
    try:
        metadata = yaml.safe_load(comment)
        assert type(metadata) is dict

        source = metadata['log source']
        required_values = metadata['required values']
        code_location = metadata['module name']
        time_filter = metadata['filter']
        time_column = metadata['history']

    except Exception as e:
        log.error(e, f"{name} has invalid metadata: >{metadata}<, skipping")
        return

    os.mkdir(FORMATTED_CODE_DIRECTORY)
    files = os.listdir(f'../baseline_modules/{code_location}')

    shutil.copyfile("../baseline_modules/run_module.R",
                    f"{FORMATTED_CODE_DIRECTORY}/run_module.R")

    for file in files:
        print(file)
        if not file.startswith('.'):
            with open(f"../baseline_modules/{code_location}/{file}") as f:
                r_code = f.read()
            r_code = format_code(r_code, required_values)
            with open(f"{FORMATTED_CODE_DIRECTORY}/{file}", 'w+') as ff:
                ff.write(r_code)

    with open(f"{FORMATTED_CODE_DIRECTORY}/run_module.R") as fr:
        r_code = fr.read()
    frame = query_log_source(source, time_filter, time_column)
    ro.globalenv['input_table'] = frame
    ro.r(f"setwd('./{FORMATTED_CODE_DIRECTORY}')")
    output = ro.r(r_code)
    output = output.to_dict()

    results = unpack(output)

    # Get the columns of the baseline table; find the timestamp column and pop it from the list

    columns = [
        row['name'] for row in db.fetch(f'desc table {DATA_SCHEMA}.{name}')
    ]
    columns.remove('EXPORT_TIME')
    try:
        log.info(f"{name} generated {len(results)} rows")
        db.insert(f"{DATA_SCHEMA}.{name}",
                  results,
                  columns=columns,
                  overwrite=True)
    except Exception as e:
        log.error("Failed to insert the results into the target table", e)
    finally:
        shutil.rmtree(f"../{FORMATTED_CODE_DIRECTORY}")
def ami_dispatch(
    landing_table,
    aws_access_key='',
    aws_secret_key='',
    accounts=None,
    source_role_arn='',
    destination_role_name='',
    external_id='',
):
    results = 0
    if accounts:
        for account in accounts:
            id = account['ACCOUNT_ID']
            name = account['ACCOUNT_ALIAS']
            target_role = f'arn:aws:iam::{id}:role/{destination_role_name}'
            log.info(f"Using role {target_role}")
            try:
                session = sts_assume_role(source_role_arn, target_role,
                                          external_id)

                results += ingest_ami(landing_table,
                                      session=session,
                                      account=account)

                db.insert(
                    AWS_ACCOUNTS_METADATA,
                    values=[(datetime.utcnow(), RUN_ID, id, name, results)],
                    columns=[
                        'snapshot_at',
                        'run_id',
                        'account_id',
                        'account_alias',
                        'ami_count',
                    ],
                )

            except Exception as e:
                db.insert(
                    AWS_ACCOUNTS_METADATA,
                    values=[(datetime.utcnow(), RUN_ID, id, name, 0, e)],
                    columns=[
                        'snapshot_at',
                        'run_id',
                        'account_id',
                        'account_alias',
                        'ami_count',
                        'error',
                    ],
                )
                log.error(f"Unable to assume role {target_role} with error", e)
    else:
        results += ingest_ami(landing_table,
                              aws_access_key=aws_access_key,
                              aws_secret_key=aws_secret_key)

    return results
Exemple #12
0
def ingest(table_name, options):
    landing_table = f'data.{table_name}'
    service_user_creds = options['service_user_creds']
    for subject in options.get('subjects_list') or ['']:
        for event in LOGIN_EVENTS:
            items = get_logs(
                service_user_creds,
                with_subject=subject,
                event_name=event,
                start_time=db.fetch_latest(
                    landing_table,
                    where=(
                        f"delegating_subject='{subject}' AND " f"event_name='{event}'"
                    ),
                ),
            ).get('items', [])

            db.insert(
                landing_table,
                values=[
                    (
                        item['id']['time'],
                        item['etag'].strip('"'),
                        subject,
                        item.get('events', [{}])[0].get('name'),
                        {
                            p['name']: (
                                p.get('value')
                                or p.get('boolValue')
                                or p.get('multiValue')
                            )
                            for p in item.get('events', [{}])[0].get('parameters', [])
                        },
                        item['id']['customerId'],
                        item['actor'].get('email'),
                        item['actor'].get('profileId'),
                        item.get('ipAddress'),
                        item,
                    )
                    for item in items
                ],
                select=(
                    'CURRENT_TIMESTAMP()',
                    'column1',
                    'column2',
                    'column3',
                    'column4',
                    'PARSE_JSON(column5)',
                    'column6',
                    'column7',
                    'column8',
                    'column9',
                    'PARSE_JSON(column10)',
                ),
            )
            yield len(items)
Exemple #13
0
def ingest(table_name, options):
    landing_table = f'data.{table_name}'

    token = options['token']
    asset_entity_id = options['asset_entity_id']

    general_url = (
        f"https://api.assetpanda.com:443//v2/entities/{asset_entity_id}/objects"
    )
    fields_url = f"https://api.assetpanda.com:443//v2/entities/{asset_entity_id}"

    params = {"offset": 0, "limit": PAGE_SIZE}

    total_object_count = 0

    insert_time = datetime.utcnow()

    while params['offset'] <= total_object_count:

        log.debug("total_object_count: ", total_object_count)

        assets = get_data(token=token, url=general_url, params=params)

        list_object, total_object_count = get_list_objects_and_total_from_get_object(
            assets)

        dict_fields = get_data(token, fields_url, params=params)
        list_field = dict_fields["fields"]

        # Stripping down the metadata to remove unnecessary fields. We only really care about the following:
        # {"field_140": "MAC_Address", "field_135" :"IP"}
        clear_fields: dict = reduce(reduce_fields, list_field, {})

        # replace every key "field_NO" by the value of the clear_field["field_NO"]
        list_object_without_field_id = replace_device_key(
            list_object, clear_fields)

        db.insert(
            landing_table,
            values=[(entry, entry.get('id', None), insert_time)
                    for entry in list_object_without_field_id],
            select=db.derive_insert_select(LANDING_TABLE_COLUMNS),
            columns=db.derive_insert_columns(LANDING_TABLE_COLUMNS),
        )

        log.info(
            f'Inserted {len(list_object_without_field_id)} rows ({landing_table}).'
        )
        yield len(list_object_without_field_id)

        # increment the offset to get new entries each iteration in the while loop
        params["offset"] += PAGE_SIZE
Exemple #14
0
def ingest_ami(
    landing_table, aws_access_key=None, aws_secret_key=None, session=None, account=None
):
    images = get_images(
        aws_access_key=aws_access_key,
        aws_secret_key=aws_secret_key,
        session=session,
        account=account,
    )

    monitor_time = datetime.utcnow().isoformat()

    db.insert(
        landing_table,
        values=[
            (
                row,
                monitor_time,
                row.get('VirtualizationType'),
                row.get('Description'),
                row.get('Tags'),
                row.get('Hypervisor'),
                row.get('EnaSupport'),
                row.get('SriovNetSupport'),
                row.get('ImageId'),
                row.get('State'),
                row.get('BlockDeviceMappings'),
                row.get('Architecture'),
                row.get('ImageLocation'),
                row.get('RootDeviceType'),
                row.get('RootDeviceName'),
                row.get('OwnerId'),
                row.get('CreationDate'),
                row.get('Public'),
                row.get('ImageType'),
                row.get('Name'),
                row.get('Account', {}).get('ACCOUNT_ID'),
                row['Region']['RegionName'],
            )
            for row in images
        ],
        select=db.derive_insert_select(LANDING_TABLES_COLUMNS['AMI']),
        columns=db.derive_insert_columns(LANDING_TABLES_COLUMNS['AMI']),
    )
    return len(images)
def ingest(table_name, options):
    tenant_id = options['tenant_id']
    client_id = options['client_id']
    client_secret = options['client_secret']
    cloud_type = options.get('cloud_type', 'reg')

    subscriptions_service = get_subscription_service(
        {
            "tenantId": tenant_id,
            "clientId": client_id,
            "clientSecret": client_secret
        },
        cloud_type,
    )

    subscription_list = subscriptions_service.list()
    subscriptions = [s.as_dict() for s in subscription_list]

    db.insert(
        f'data.{table_name}',
        values=[(
            parse(subscription_list.raw.response.headers['Date']).isoformat(),
            tenant_id,
            row,
            row['id'],
            row['subscription_id'],
            row['display_name'],
            row['state'],
            row['subscription_policies'],
            row['authorization_source'],
        ) for row in subscriptions],
        select=(
            'column1',
            'column2',
            'PARSE_JSON(column3)',
            'column4',
            'column5',
            'column6',
            'column7',
            'PARSE_JSON(column8)',
            'column9',
        ),
    )

    yield len(subscriptions)
def ingest_agents(table_name, options):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME']
    timestamp = datetime.now(timezone.utc)

    if (last_export_time is None
            or (timestamp - last_export_time).total_seconds() > 86400):
        agents = {a['uuid']: a for a in get_agent_data()}.values()
        for page in groups_of(10000, agents):
            db.insert(
                table=f'data.{table_name}',
                values=[(agent, timestamp) for agent in page],
                select=db.derive_insert_select(AGENT_LANDING_TABLE),
                columns=db.derive_insert_columns(AGENT_LANDING_TABLE),
            )
    else:
        log.info('Not time to import Tenable Agents')
Exemple #17
0
def ingest_agents(table_name, options):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME']
    timestamp = datetime.now(timezone.utc)

    if (last_export_time is None
            or (timestamp - last_export_time).total_seconds() > 86400):
        all_agents = sorted(get_agent_data(),
                            key=lambda a: a.get('last_connect', 0))
        unique_agents = {a['uuid']: a for a in all_agents}.values()
        rows = [{'raw': ua, 'export_at': timestamp} for ua in unique_agents]
        log.debug(f'inserting {len(unique_agents)} unique (by uuid) agents')
        db.insert(f'data.{table_name}', rows)
        return len(rows)
    else:
        log.info('Not time to import Tenable Agents')
        return 0
def ingest(table_name, options):
    tenant_id = options['tenant_id']
    client_id = options['client_id']
    client_secret = options['client_secret']

    subscriptions_service = get_client_from_json_dict(
        SubscriptionClient, {
            "tenantId": tenant_id,
            "clientId": client_id,
            "clientSecret": client_secret,
            "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",
            "resourceManagerEndpointUrl": "https://management.azure.com/",
            "managementEndpointUrl": "https://management.core.windows.net/",
        }).subscriptions

    subscription_list = subscriptions_service.list()
    subscriptions = [s.as_dict() for s in subscription_list]

    db.insert(
        f'data.{table_name}',
        values=[(
            parse(subscription_list.raw.response.headers['Date']).isoformat(),
            tenant_id,
            row,
            row['id'],
            row['subscription_id'],
            row['display_name'],
            row['state'],
            row['subscription_policies'],
            row['authorization_source'],
        ) for row in subscriptions],
        select=(
            'column1',
            'column2',
            'PARSE_JSON(column3)',
            'column4',
            'column5',
            'column6',
            'column7',
            'PARSE_JSON(column8)',
            'column9',
        ))

    yield len(subscriptions)
Exemple #19
0
def ingest(table_name, options, dryrun=False):
    domainkey = options['domainkey']
    skey = options['skey']
    ikey = options['ikey']

    admin_api = duo_client.Admin(
        ikey=ikey,
        skey=skey,
        host=f'api-{domainkey}.duosecurity.com',
    )
    admins = list(admin_api.get_admins())
    db.insert(
        f'data.{table_name}',
        [{
            'raw': a
        } for a in admins],
        dryrun=dryrun,
    )
    return len(admins)
def ingest_vulns(table_name):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) as time FROM data.{table_name}'))['TIME']
    timestamp = datetime.now(timezone.utc)

    if (last_export_time is None
            or (timestamp - last_export_time).total_seconds() > 86400):
        log.info("Exporting vulnerabilities...")
        vulns = TIO.exports.vulns()

        for page in groups_of(10000, vulns):
            db.insert(
                table=f'data.{table_name}',
                values=[(vuln, timestamp) for vuln in page],
                select=db.derive_insert_select(VULN_LANDING_TABLE),
                columns=db.derive_insert_columns(AGENT_LANDING_TABLE),
            )
    else:
        log.info('Not time to import Tenable vulnerabilities yet')
async def main(table_name):
    async with aiohttp.ClientSession() as session:
        cids = [
            c['id']
            for c in (await fetch(session, '/computers')).get('computers', [])
        ]

        log.info(f'loading {len(cids)} computer details')
        computers = await asyncio.gather(
            *[fetch_computer(session, cid) for cid in cids])

        log.info(f'inserting {len(computers)} computers into {table_name}')
        rows = [
            updated(c.get('computer'),
                    computer_id=cid,
                    recorded_at=c.get('recorded_at'))
            for cid, c in zip(cids, computers)
        ]
        db.insert(table_name, rows)
        return len(rows)
Exemple #22
0
def process_endpoint(endpoint):
    log.info(f"starting {endpoint}")
    json_body = {'links': {'next': {'href': endpoint}}}
    page = 1
    while json_body['links']['next'] is not None:
        log.info(f"Getting page {str(page)}")

        r = get(json_body['links']['next']['href'])
        if r.status_code != 200:
            log.error(f"Ingest request for {endpoint} failed", r.text)
            db.record_failed_ingestion(ZENGRC_TABLE, r, TIMESTAMP)
            break

        json_body = r.json()
        data = [[json.dumps(i), TIMESTAMP] for i in json_body['data']]
        try:
            db.insert(ZENGRC_TABLE, data, select='PARSE_JSON(column1), column2')
            page += 1
        except Exception as e:
            log.error(e)
Exemple #23
0
def ingest(table_name, options):
    landing_table = f'data.{table_name}'
    timestamp = datetime.utcnow()
    organization_id = options['organization_id']
    api_secret = options['api_secret']
    api_key = options['api_key']

    params: dict = {"limit": PAGE_SIZE, "page": 1}  # API starts at 1

    while 1:
        devices: dict = get_data(organization_id, api_key, api_secret, params)
        params["page"] += 1

        if len(devices) == 0:
            break

        db.insert(
            landing_table,
            values=[(
                timestamp,
                device,
                device.get('deviceId'),
                device.get('osVersionName', None),
                device.get('lastSyncStatus', None),
                device.get('type', None),
                device.get('version', None),
                device.get('lastSync', None),
                device.get('osVersion', None),
                device.get('name', None),
                device.get('status', None),
                device.get('originId', None),
                device.get('appliedBundle', None),
                device.get('hasIpBlocking', None),
            ) for device in devices],
            select=db.derive_insert_select(LANDING_TABLE_COLUMNS),
            columns=db.derive_insert_columns(LANDING_TABLE_COLUMNS),
        )
        log.info(f'Inserted {len(devices)} rows.')
        yield len(devices)
def ingest_elb(aws_access_key, aws_secret_key, landing_table, regions):
    elbs = get_all_elbs(aws_access_key, aws_secret_key, regions)
    monitor_time = datetime.utcnow().isoformat()

    db.insert(
        landing_table,
        values=[(
            row,
            monitor_time,
            row['CanonicalHostedZoneName'],
            row['CanonicalHostedZoneNameID'],
            row['CreatedTime'],
            row['DNSName'],
            row['LoadBalancerName'],
            row['Region']['RegionName'],
            row['Scheme'],
            row['VPCId'])
            for row in elbs],
        select='PARSE_JSON(column1), column2, column3, column4, column5, column6, '
               'column7, column8, column9, column10'
    )
    return len(elbs)
Exemple #25
0
def ingest(table_name, options):
    landing_table = f'data.{table_name}'
    api_key = options['api_key']
    subdomain = options['subdomain']

    url = f'https://{subdomain}.okta.com/api/v1/logs'
    headers = {
        'Accept': 'application/json',
        'Content-Type': 'application/json',
        'Authorization': f'SSWS {api_key}'
    }

    ts = db.fetch_latest(landing_table, 'event_time')
    if ts is None:
        log.error("Unable to find a timestamp of most recent Okta log, "
                  "defaulting to one hour ago")
        ts = datetime.datetime.now() - datetime.timedelta(hours=1)

    params = {'since': ts.strftime("%Y-%m-%dT%H:%M:%S.000Z")}

    while 1:
        response = requests.get(url=url, headers=headers, params=params)
        if response.status_code != 200:
            log.error('OKTA REQUEST FAILED: ', response.text)
            return

        result = response.json()
        if result == []:
            break

        db.insert(landing_table,
                  values=[(row, row['published']) for row in result],
                  select='PARSE_JSON(column1), column2')

        log.info(f'Inserted {len(result)} rows.')
        yield len(result)

        url = response.headers['Link'].split(', ')[1].split(';')[0][1:-1]
Exemple #26
0
def ingest_vulns(table_name):
    last_export_time = next(
        db.fetch(
            f'SELECT MAX(export_at) AS time FROM data.{table_name}'))['TIME']
    now = datetime.now(timezone.utc)

    if (last_export_time is None
            or (now - last_export_time) > timedelta(days=1)):
        log.debug('TIO export vulns')

        # insert empty row...
        db.insert(f'data.{table_name}', [{'export_at': now}])

        # ...because this line takes awhile
        vulns = TIO.exports.vulns()

        rows = [{'raw': v, 'export_at': now} for v in vulns]
        db.insert(f'data.{table_name}', rows)
        return len(rows)

    else:
        log.info('Not time to import Tenable vulnerabilities yet')
        return 0
def get_data_worker(account_id, account_name):
    try:
        ec2_session = get_aws_client(account_id)
        instances = []
        try:
            ec2_regions = [
                region['RegionName'] for region in ec2_session.client(
                    'ec2').describe_regions()['Regions']
            ]
        except Exception as e:
            log.info(
                f"ec2_describe_instances account [{account_id}] {account_name} exception",
                e)
            return None
        for region in ec2_regions:
            try:
                client = ec2_session.client('ec2', region_name=region)
                paginator = client.get_paginator('describe_instances')
                page_iterator = paginator.paginate()
                region = [
                    instance for page in page_iterator
                    for instance_array in page['Reservations']
                    for instance in instance_array['Instances']
                ]
                instances.extend(region)
            except Exception as e:
                log.info(
                    f"ec2_describe_instances: account [{account_id}] {account_name} exception",
                    e)
                db.insert(AWS_ACCOUNTS_INFORMATION_TABLE,
                          values=[(datetime.utcnow(), account_id, account_name,
                                   None, e)])
                return None
        instance_list = [
            json.dumps({
                **instance, "AccountId": account_id
            }, default=str) for instance in instances
        ]
        try:
            db.insert(AWS_ACCOUNTS_INFORMATION_TABLE,
                      values=[(datetime.utcnow(), account_id, account_name,
                               len(instance_list), None)])
        except Exception:
            print('Failed to insert into AWS_ACCOUNT_INFORMATION table.')
        print(
            f"ec2_describe_instances: account: {account_name} instances: {len(instance_list)}"
        )
        return instance_list
    except Exception as e:
        print(
            f"ec2_describe_instances: account: {account_name} exception: {e}")
        db.insert(AWS_ACCOUNTS_INFORMATION_TABLE,
                  values=[(datetime.utcnow(), account_id, account_name, None,
                           e)])
        return None
Exemple #28
0
def ingest(table_name, options):
    landing_table = f'data.{table_name}'
    timestamp = datetime.utcnow()

    client_id = options['client_id']
    client_secret = options['client_secret']

    # Call the authorization endpoint so we can make subsequent calls to the API with an auth token
    token: str = get_token_basic(client_id, client_secret)

    offset = ""
    params_get_id_devices: dict = {"limit": PAGE_SIZE, "offset": offset}

    while 1:
        dict_id_devices: dict = get_data(token, CROWDSTRIKE_DEVICES_BY_ID_URL,
                                         params_get_id_devices)
        resources: list = dict_id_devices["resources"]
        params_get_id_devices["offset"] = get_offset_from_devices_results(
            dict_id_devices)

        if len(resources) == 0:
            break

        device_details_url_and_params: str = create_url_params_get_devices(
            CROWDSTRIKE_DEVICE_DETAILS_URL, resources)

        dict_devices: dict = get_data(token, device_details_url_and_params)
        devices = dict_devices["resources"]

        db.insert(
            landing_table,
            values=[(
                timestamp,
                device,
                device.get('device_id'),
                device.get('first_seen', None),
                device.get('system_manufacturer', None),
                device.get('config_id_base', None),
                device.get('last_seen', None),
                device.get('policies', None),
                device.get('slow_changing_modified_timestamp', None),
                device.get('minor_version', None),
                device.get('system_product_name', None),
                device.get('hostname', None),
                device.get('mac_address', None),
                device.get('product_type_desc', None),
                device.get('platform_name', None),
                device.get('external_ip', None),
                device.get('agent_load_flags', None),
                device.get('group_hash', None),
                device.get('provision_status', None),
                device.get('os_version', None),
                device.get('groups', None),
                device.get('bios_version', None),
                device.get('modified_timestamp', None),
                device.get('local_ip', None),
                device.get('agent_version', None),
                device.get('major_version', None),
                device.get('meta', None),
                device.get('agent_local_time', None),
                device.get('bios_manufacturer', None),
                device.get('platform_id', None),
                device.get('device_policies', None),
                device.get('config_id_build', None),
                device.get('config_id_platform', None),
                device.get('cid', None),
                device.get('status', None),
                device.get('service_pack_minor', None),
                device.get('product_type', None),
                device.get('service_pack_major', None),
                device.get('build_number', None),
                device.get('pointer_size', None),
                device.get('site_name', None),
                device.get('machine_domain', None),
                device.get('ou', None),
            ) for device in devices],
            select=db.derive_insert_select(LANDING_TABLE_COLUMNS),
            columns=db.derive_insert_columns(LANDING_TABLE_COLUMNS),
        )
        log.info(f'Inserted {len(devices)} rows.')
        yield len(devices)
Exemple #29
0
def load_data(messages):
    data = [(m, m['date']) for m in messages]
    try:
        db.insert(AGARI_TABLE, data, select='PARSE_JSON(column1), column2')
    except Exception as e:
        log.error("failed to ingest data", e)
Exemple #30
0
def ingest(table_name, options):
    ingest_type = (
        'users' if table_name.endswith('_USERS_CONNECTION') else
        'groups' if table_name.endswith('_GROUPS_CONNECTION') else 'logs')

    landing_table = f'data.{table_name}'
    api_key = options['api_key']
    subdomain = options['subdomain']

    ingest_urls = {
        'users': f'https://{subdomain}.okta.com/api/v1/users',
        'deprovisioned_users':
        f'https://{subdomain}.okta.com/api/v1/users?filter=status+eq+\"DEPROVISIONED\"',
        'groups': f'https://{subdomain}.okta.com/api/v1/groups',
        'logs': f'https://{subdomain}.okta.com/api/v1/logs',
    }

    headers = {
        'Accept': 'application/json',
        'Content-Type': 'application/json',
        'Authorization': f'SSWS {api_key}',
    }

    timestamp = datetime.datetime.utcnow()

    if ingest_type == 'groups':
        response = requests.get(url=ingest_urls[ingest_type], headers=headers)

        result = response.json()

        for row in result:
            try:
                row['users'] = requests.get(url=row['_links']['users']['href'],
                                            headers=headers).json()
            except TypeError:
                log.info(row)
                raise

        db.insert(
            landing_table,
            values=[(row, timestamp) for row in result],
            select='PARSE_JSON(column1), column2',
        )

        log.info(f'Inserted {len(result)} rows.')
        yield len(result)

    elif ingest_type == 'users':
        yield from ingest_users(ingest_urls['users'], headers, landing_table,
                                timestamp)
        yield from ingest_users(ingest_urls['deprovisioned_users'], headers,
                                landing_table, timestamp)

    else:
        ts = db.fetch_latest(landing_table, 'event_time')
        if ts is None:
            log.error("Unable to find a timestamp of most recent Okta log, "
                      "defaulting to one hour ago")
            ts = datetime.datetime.now() - datetime.timedelta(hours=1)

        params = {'since': ts.strftime("%Y-%m-%dT%H:%M:%S.000Z"), 'limit': 500}

        i = 0
        print(params['since'])
        url = ingest_urls[ingest_type]
        while 1:
            response = requests.get(url=url, headers=headers, params=params)
            if response.status_code != 200:
                log.error('OKTA REQUEST FAILED: ', response.text)
                return

            result = response.json()
            if result == []:
                break

            db.insert(
                landing_table,
                values=[(row, row['published']) for row in result],
                select='PARSE_JSON(column1), column2',
            )

            log.info(f'Inserted {len(result)} rows. {i}')
            i += 1
            yield len(result)

            url = ''
            links = requests.utils.parse_header_links(response.headers['Link'])
            for link in links:
                if link['rel'] == 'next':
                    url = link['url']

            if len(url) == 0:
                break