def connect(connection_name, options): connection_type = options['connection_type'] base_name = f"azure_log_{connection_name}_{connection_type}" account_name = options['account_name'] container_name = options['container_name'] suffix = options['suffix'] cloud_type = options['cloud_type'] sas_token = options['sas_token'] comment = yaml_dump(module='azure_log') db.create_stage( name=f'data.{base_name}_stage', url=f"azure://{account_name}.blob.{suffix}/{container_name}", cloud='azure', prefix='', credentials=sas_token, file_format=FILE_FORMAT, ) db.execute( f'GRANT USAGE ON STAGE data.{base_name}_stage TO ROLE {SA_ROLE}') db.create_table( name=f'data.{base_name}_connection', cols=LANDING_TABLES_COLUMNS[connection_type], comment=comment, ifnotexists=True, ) db.execute( f'GRANT INSERT, SELECT ON data.{base_name}_connection TO ROLE {SA_ROLE}' ) external_table_columns = [( 'timestamp_part', 'TIMESTAMP_LTZ', GET_TIMESTAMP_FROM_FILENAME_SQL[connection_type], )] db.create_external_table( name=f'data.{base_name}_external', location=f'@data.{base_name}_stage', cols=external_table_columns, partition='timestamp_part', file_format=db.TypeOptions(type='JSON'), ) db.execute(f'GRANT SELECT ON data.{base_name}_external TO ROLE {SA_ROLE}') stored_proc_def = f""" var sql_command = "ALTER EXTERNAL TABLE data.{base_name}_external REFRESH"; try {{ snowflake.execute ({{sqlText: sql_command}}); return "Succeeded."; }} catch (err) {{ return "Failed: " + err; }} """ db.create_stored_procedure( name=f'data.{base_name}_procedure', args=[], return_type='string', executor='OWNER', definition=stored_proc_def, ) refresh_task_sql = f'CALL data.{base_name}_procedure()' db.create_task( name=f'data.{base_name}_refresh_task', warehouse=WAREHOUSE, schedule='5 minutes', sql=refresh_task_sql, ) select_statement_sql = { 'reg': (f"SELECT value " f"FROM data.{base_name}_external " f"WHERE timestamp_part >= DATEADD(HOUR, -2, CURRENT_TIMESTAMP())"), 'gov': (f"SELECT value FROM (" f" SELECT value AS a " f" FROM data.{base_name}_external" f" WHERE timestamp_part >= DATEADD(HOUR, -2, CURRENT_TIMESTAMP())" f"), LATERAL FLATTEN (INPUT => a:records)"), } insert_task_sql = { 'operation': f""" INSERT ( RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, IDENTITY, IDENTITY_AUTHORIZATION, IDENTITY_CLAIMS, LEVEL, LOCATION, OPERATION_NAME, PROPERTIES, PROPERTIES_ANCESTORS, PROPERTIES_IS_COMPLIANCE_CHECK, PROPERTIES_POLICIES, PROPERTIES_RESOURCE_LOCATION, RESOURCE_ID, RESULT_SIGNATURE, RESULT_TYPE, EVENT_TIME, LOADED_ON ) VALUES ( VALUE, HASH(VALUE), VALUE:callerIpAddress::STRING, VALUE:category::STRING, VALUE:correlationId::STRING, VALUE:durationMs::NUMBER, VALUE:identity::VARIANT, VALUE:identity.authorization::VARIANT, VALUE:identity.claims::VARIANT, VALUE:level::STRING, VALUE:location::STRING, VALUE:operationName::STRING, VALUE:properties::VARIANT, VALUE:properties.ancestors::STRING, VALUE:properties.isComplianceCheck::STRING, PARSE_JSON(VALUE:properties.policies),VALUE:properties.resourceLocation::STRING, VALUE:resourceId::STRING, VALUE:resultSignature::STRING,VALUE:resultType::STRING, value:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() ) """, 'audit': f""" INSERT ( RAW, HASH_RAW, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, LEVEL, OPERATION_NAME, OPERATION_VERSION, PROPERTIES, PROPERTIES_ACTIVITY_DATE_TIME, PROPERTIES_ACTIVITY_DISPLAY_NAME, PROPERTIES_ADDITIONAL_DETAILS, PROPERTIES_CATEGORY, PROPERTIES_ID, PROPERTIES_INITIATED_BY, PROPERTIES_LOGGED_BY_SERVICE, PROPERTIES_OPERATION_TYPE, PROPERTIES_RESULT, PROPERTIES_RESULT_REASON, PROPERTIES_TARGET_RESOURCES, RESOURCE_ID, RESULT_SIGNATURE, TENANT_ID, EVENT_TIME, LOADED_ON ) VALUES ( VALUE, HASH(VALUE), VALUE:callerIpAddress::STRING, VALUE:category::STRING, VALUE:correlationId::STRING, VALUE:durationMs::NUMBER, VALUE:level::STRING, VALUE:operationName::STRING, VALUE:operationVersion::STRING, VALUE:properties::VARIANT, VALUE:properties.activityDateTime::TIMESTAMP_LTZ, VALUE:properties.activityDisplayName::STRING, VALUE:properties.additionalDetails::VARIANT, VALUE:properties.category::STRING, VALUE:properties.id::STRING, VALUE:properties.initiatedBy::VARIANT, VALUE:properties.loggedByService::STRING, VALUE:properties.operationType::STRING, VALUE:properties.result::STRING, VALUE:resultReason::STRING, VALUE:properties.targetResources::VARIANT, VALUE:resourceId::STRING, VALUE:resultSignature::STRING, VALUE:tenantId::STRING, VALUE:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() ) """, 'signin': f""" INSERT ( RAW, HASH_RAW, LEVEL, CALLER_IP_ADDRESS, CATEGORY, CORRELATION_ID, DURATION_MS, IDENTITY, LOCATION, OPERATION_NAME, OPERATION_VERSION, PROPERTIES, PROPERTIES_APP_DISPLAY_NAME, PROPERTIES_APP_ID, PROPERTIES_APPLIED_CONDITIONAL_ACESS_POLICIES, PROPERTIES_AUTHENTICATION_METHODS_USED, PROPERTIES_AUTHENTICATION_PROCESSING_DETAILS, PROPERTIES_CLIENT_APP_USED, PROPERTIES_CONDITIONAL_ACCESS_STATUS, PROPERTIES_CREATED_DATE_TIME, PROPERTIES_DEVICE_DETAIL, PROPERTIES_ID, PROPERTIES_IP_ADDRESS, PROPERTIES_IS_INTERACTIVE, PROPERTIES_LOCATION, PROPERTIES_MFA_DETAIL, PROPERTIES_NETWORK_LOCATION, PROPERTIES_PROCESSING_TIME_IN_MILLISECONDS, PROPERTIES_RESOURCE_DISPLAY_NAME, PROPERTIES_RESOURCE_ID, PROPERTIES_RISK_DETAIL, PROPERTIES_RISK_EVENT_TYPES, PROPERTIES_RISK_LEVEL_AGGREGATED, PROPERTIES_RISK_LEVEL_DURING_SIGNIN, PROPERTIES_RISK_STATE, PROPERTIES_STATUS, PROPERTIES_TOKEN_ISSUER_TYPE, PROPERTIES_USER_DISPLAY_NAME, PROPERTIES_USER_ID, PROPERTIES_USER_PRINCIPAL_NAME, RESOURCE_ID, RESULT_DESCRIPTION, RESULT_SIGNATURE, RESULT_TYPE, TENANT_ID, EVENT_TIME, LOADED_ON ) VALUES ( VALUE, HASH(VALUE), VALUE:Level::NUMBER, VALUE:callerIpAddress::STRING, VALUE:category::STRING, VALUE:correlationId::STRING, VALUE:durationMs, VALUE:identity::STRING, VALUE:location::STRING, VALUE:operationName::STRING, VALUE:operationVersion::STRING, VALUE:properties::VARIANT, VALUE:properties.appDisplayName::STRING, VALUE:properties.appId::STRING, VALUE:properties.appliedConditionalAccessPolicies::VARIANT, VALUE:properties.authenticationMethodsUsed::VARIANT, VALUE:properties.authenticationProcessingDetails::VARIANT, VALUE:properties.clientAppUsed::STRING, VALUE:properties.conditionalAccessStatus::STRING, VALUE:properties.createdDateTime::TIMESTAMP_LTZ, VALUE:properties.deviceDetail::VARIANT, VALUE:properties.id::STRING, VALUE:properties.ipAddress::STRING, VALUE:properties.isInteractive::BOOLEAN, VALUE:properties.location::VARIANT, VALUE:properties.mfaDetail::VARIANT, VALUE:properties.networkLocationDetails::VARIANT, VALUE:properties.processingTimeInMilliseconds::NUMBER, VALUE:properties.resourceDisplayName::STRING, VALUE:properties.resourceId::STRING, VALUE:properties.riskDetail::STRING, VALUE:properties.riskEventTypes::VARIANT, VALUE:properties.riskLevelAggregated::STRING, VALUE:properties.riskLevelDuringSignIn::STRING, VALUE:properties.riskState::VARIANT, VALUE:properties.status::VARIANT, VALUE:properties.tokenIssuerType::STRING, VALUE:properties.userDisplayName::STRING, VALUE:properties.userId::STRING, VALUE:properties.userPrincipalName::STRING, VALUE:resourceId::STRING, VALUE:resultDescription::STRING, VALUE:resultSignature::STRING, VALUE:resultType::STRING, VALUE:tenantId::STRING, VALUE:time::TIMESTAMP_LTZ, CURRENT_TIMESTAMP() ) """, } ingest_task_sql = f""" MERGE INTO data.{base_name}_connection a USING ( {select_statement_sql[cloud_type]} ) b ON a.raw = b.value WHEN NOT MATCHED THEN {insert_task_sql[connection_type]} """ db.create_task( name=f'data.{base_name}_ingest_task', warehouse=WAREHOUSE, schedule=f'AFTER data.{base_name}_refresh_task', sql=ingest_task_sql, ) return { 'newStage': 'finalized', 'newMessage': 'Created Stage, Tables, Stored Procedure, and Tasks.', }
{ 'type': 'str', 'name': 'suffix', 'title': "Endpoint Suffix (optional)", 'prompt': "If using Azure Storage in an independent cloud, modify the endpoint suffix below", 'default': 'core.windows.net', 'required': True, }, ] FILE_FORMAT = db.TypeOptions( type='JSON', compression='AUTO', enable_octal=False, allow_duplicate=False, strip_outer_array=False, strip_null_values=False, ignore_utf8_errors=False, skip_byte_order_mark=True, ) LANDING_TABLES_COLUMNS = { 'operation': [ ('raw', 'VARIANT'), ('hash_raw', 'NUMBER'), ('caller_ip_address', 'VARCHAR'), ('category', 'VARCHAR'), ('correlation_id', 'VARCHAR'), ('duration_ms', 'NUMBER'), ('identity', 'VARIANT'), ('identity_authorization', 'VARIANT'),
'name': 'aws_role', 'title': "LDAP Log Bucket Reader Role", 'prompt': "Role to be assumed for access to LDAP Log Bucket", 'placeholder': "arn:aws:iam::012345678987:role/my-ldap-log-reader", }, { 'type': 'str', 'name': 'existing_stage', 'title': "Snowflake Stage (alternative)", 'prompt': "Enter to use an existing stage instead", 'placeholder': "snowalert.data.ldap_stage", }, ] FILE_FORMAT = db.TypeOptions(type='CSV', field_delimiter=',', skip_header=1, field_optionally_enclosed_by='"') LANDING_TABLE_COLUMNS = [ ('group_name', 'STRING(256)'), ('display_name', 'STRING(256)'), ('sam', 'STRING(100)'), ('email', 'STRING(256)'), ('account_created', 'TIMESTAMP_LTZ'), ('account_last_modified', 'TIMESTAMP_LTZ'), ('password_last_set', 'TIMESTAMP_LTZ'), ('password_expires', 'TIMESTAMP_LTZ'), ] CONNECT_RESPONSE_MESSAGE = """ STEP 1: Modify the Role "{role}" to include the following trust relationship:
'prompt': "The folder in S3 bucket where Flow Logs are collected", 'default': "AWSLogs/", 'required': True, }, { 'type': 'str', 'name': 'aws_role', 'title': "Flow Bucket Reader Role", 'prompt': "Role to be assumed for access to Flow Logs in S3", 'placeholder': "arn:aws:iam::012345678987:role/my-flow-log-reader-role", 'required': True, }, ] FILE_FORMAT = db.TypeOptions( type='CSV', compression='GZIP', field_delimiter=' ', skip_header=1, null_if='-' ) PROTOCOL_FILE_FORMAT = db.TypeOptions( type='CSV', compression='AUTO', skip_header=1, field_delimiter=',', null_if='' ) LANDING_TABLE_COLUMNS = [ ('VERSION', 'NUMBER'), ('ACCOUNT_ID', 'STRING(50)'), ('INTERFACE_ID', 'STRING(100)'), ('SRCADDR', 'STRING(50)'), ('DSTADDR', 'STRING(50)'), ('SRCPORT', 'NUMBER'), ('DSTPORT', 'NUMBER'), ('PROTOCOL', 'NUMBER'),
def connect(connection_name, options): table_name = f'osquery_log_{connection_name}_connection' landing_table = f'data.{table_name}' prefix = '' bucket_name = '' db.create_table( name=landing_table, cols=LANDING_TABLE_COLUMNS, comment=yaml_dump(module='osquery_log', **options), ) db.execute(f'GRANT INSERT, SELECT ON {landing_table} TO ROLE {SA_ROLE}') stage_name = options.get('existing_stage') if not stage_name: stage_name = f'data.osquery_log_{connection_name}_stage' bucket_name = options['bucket_name'] prefix = options['prefix'] aws_role = options['aws_role'] db.create_stage( name=stage_name, url=f's3://{bucket_name}', prefix=prefix, cloud='aws', credentials=aws_role, file_format=db.TypeOptions(type='JSON'), ) stage_props = db.fetch_props( f'DESC STAGE {stage_name}', filter=('AWS_EXTERNAL_ID', 'SNOWFLAKE_IAM_USER', 'AWS_ROLE', 'URL'), ) if not bucket_name or not prefix: m = re.match(r'^\["s3://([a-z-]*)/(.*)"\]$', stage_props['URL']) if m: bucket_name, prefix = m.groups() else: raise RuntimeError('cannot determine bucket name or prefix') prefix = prefix.rstrip('/') return { 'newStage': 'created', 'newMessage': CONNECT_RESPONSE_MESSAGE.format( role=stage_props[ 'AWS_ROLE'], # this seems better than what we do in other places? role_trust_relationship=dumps( { "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": stage_props['SNOWFLAKE_IAM_USER'] }, "Action": "sts:AssumeRole", "Condition": { "StringEquals": { "sts:ExternalId": stage_props['AWS_EXTERNAL_ID'] } }, }], }, indent=4, ), role_policy=dumps( { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Action": ["s3:GetObject", "s3:GetObjectVersion"], "Resource": f"arn:aws:s3:::{bucket_name}/{prefix}/*", }, { "Effect": "Allow", "Action": "s3:ListBucket", "Resource": f"arn:aws:s3:::{bucket_name}", "Condition": { "StringLike": { "s3:prefix": [f"{prefix}/*"] } }, }, ], }, indent=4, ), ), }