Esempio n. 1
0
def finalize(connection_name):
    base_name = f'AWS_CLOUDTRAIL_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'
    landing_table = f'data.{base_name}_CONNECTION'

    # Step two: Configure the remainder once the role is properly configured.
    cloudtrail_ingest_task = f'''
INSERT INTO {landing_table} (
  insert_time, raw, hash_raw, event_time, aws_region, event_id, event_name, event_source, event_type,
  event_version, recipient_account_id, request_id, request_parameters, response_elements, source_ip_address,
  user_agent, user_identity, user_identity_type, user_identity_principal_id, user_identity_arn,
  user_identity_accountid, user_identity_invokedby, user_identity_access_key_id, user_identity_username,
  user_identity_session_context_attributes_mfa_authenticated, user_identity_session_context_attributes_creation_date,
  user_identity_session_context_session_issuer_type, user_identity_session_context_session_issuer_principal_id,
  user_identity_session_context_session_issuer_arn, user_identity_session_context_session_issuer_account_id,
  user_identity_session_context_session_issuer_user_name, error_code, error_message, additional_event_data,
  api_version, read_only, resources, service_event_details, shared_event_id, vpc_endpoint_id
)
SELECT CURRENT_TIMESTAMP() insert_time
    , value raw
    , HASH(value) hash_raw
    --- In the rare event of an unparsable timestamp, the following COALESCE keeps the pipeline from failing.
    --- Compare event_time to TRY_TO_TIMESTAMP(raw:eventTime::STRING) to establish if the timestamp was parsed.
    , COALESCE(
        TRY_TO_TIMESTAMP(value:eventTime::STRING)::TIMESTAMP_LTZ(9),
        CURRENT_TIMESTAMP()
      ) event_time
    , value:awsRegion::STRING aws_region
    , value:eventID::STRING event_id
    , value:eventName::STRING event_name
    , value:eventSource::STRING event_source
    , value:eventType::STRING event_type
    , value:eventVersion::STRING event_version
    , value:recipientAccountId::STRING recipient_account_id
    , value:requestID::STRING request_id
    , value:requestParameters::VARIANT request_parameters
    , value:responseElements::VARIANT response_elements
    , value:sourceIPAddress::STRING source_ip_address
    , value:userAgent::STRING user_agent
    , value:userIdentity::VARIANT user_identity
    , value:userIdentity.type::STRING user_identity_type
    , value:userIdentity.principalId::STRING user_identity_principal_id
    , value:userIdentity.arn::STRING user_identity_arn
    , value:userIdentity.accountId::STRING user_identity_accountid
    , value:userIdentity.invokedBy::STRING user_identity_invokedby
    , value:userIdentity.accessKeyId::STRING user_identity_access_key_id
    , value:userIdentity.userName::STRING user_identity_username
    , value:userIdentity.sessionContext.attributes.mfaAuthenticated::STRING user_identity_session_context_attributes_mfa_authenticated
    , value:userIdentity.sessionContext.attributes.creationDate::STRING user_identity_session_context_attributes_creation_date
    , value:userIdentity.sessionContext.sessionIssuer.type::STRING user_identity_session_context_session_issuer_type
    , value:userIdentity.sessionContext.sessionIssuer.principalId::STRING user_identity_session_context_session_issuer_principal_id
    , value:userIdentity.sessionContext.sessionIssuer.arn::STRING user_identity_session_context_session_issuer_arn
    , value:userIdentity.sessionContext.sessionIssuer.accountId::STRING user_identity_session_context_session_issuer_account_id
    , value:userIdentity.sessionContext.sessionIssuer.userName::STRING user_identity_session_context_session_issuer_user_name
    , value:errorCode::STRING error_code
    , value:errorMessage::STRING error_message
    , value:additionalEventData::VARIANT additional_event_data
    , value:apiVersion::STRING api_version
    , value:readOnly::BOOLEAN read_only
    , value:resources::VARIANT resources
    , value:serviceEventDetails::STRING service_event_details
    , value:sharedEventId::STRING shared_event_id
    , value:vpcEndpointId::STRING vpc_endpoint_id
FROM data.{base_name}_STREAM, table(flatten(input => v:Records))
WHERE ARRAY_SIZE(v:Records) > 0
'''

    db.create_stream(
        name=f'data.{base_name}_STREAM',
        target=f'data.{base_name}_STAGING'
    )

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=f"COPY INTO data.{base_name}_STAGING(v) FROM @data.{base_name}_STAGE/",
            replace=True,
            autoingest=True
        ),
        n=10,
        sleep_seconds_btw_retry=1
    )

    db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute',
                   warehouse=WAREHOUSE, sql=cloudtrail_ingest_task)

    db.execute(f"ALTER PIPE {pipe} REFRESH")

    pipe_description = list(db.fetch(f'DESC PIPE {pipe}'))
    if len(pipe_description) < 1:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} doesn't exist; please reach out to Snowflake Security for assistance."
        }
    else:
        sqs_arn = pipe_description[0]['notification_channel']

    return {
        'newStage': 'finalized',
        'newMessage': (
            f"Please add this SQS Queue ARN to the bucket event notification"
            f"channel for all object create events: {sqs_arn}"
        )
    }
Esempio n. 2
0
def finalize(connection_name):
    base_name = f'AWS_CONFIG_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'
    landing_table = f'data.{base_name}_CONNECTION'

    DATE_REGEXP = r'.+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z.*'.replace("\\", "\\\\")
    DATE_ISO8601_BACKREFERENCES = r'\1-\2-\3T\4:\5:\6Z'.replace("\\", "\\\\")

    config_ingest_task = f'''
INSERT INTO {landing_table} (
  raw, hash_raw, event_time, configuration_item_capture_time, account_id, aws_region, resource_type, arn,
  availability_zone, resource_creation_time, resource_name, resource_Id, relationships, configuration, tags
)
SELECT value raw
    , HASH(value) hash_raw
    , REGEXP_REPLACE(filename, '{DATE_REGEXP}', '{DATE_ISO8601_BACKREFERENCES}')::TIMESTAMP_LTZ event_time
    , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) configuration_item_capture_time
    , value:awsAccountId::STRING account_id
    , value:awsRegion::STRING aws_region
    , value:resourceType::STRING aws_region
    , value:ARN::STRING arn
    , value:availabilityZone::STRING availability_zone
    , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time
    , value:resourceName::STRING resource_name
    , value:resourceId::STRING resource_Id
    , value:relationships::VARIANT relationships
    , value:configuration::VARIANT configuration
    , value:tags::VARIANT tags
FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems)
WHERE ARRAY_SIZE(v:configurationItems) > 0
'''

    db.create_stream(
        name=f'data.{base_name}_stream',
        target=f'data.{base_name}_staging'
    )

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=(
                f"COPY INTO data.{base_name}_staging(v, filename) "
                f"FROM (SELECT $1, metadata$filename FROM @data.{base_name}_stage/)"
            ),
            replace=True,
            autoingest=True,
        ),
        n=10,
        sleep_seconds_btw_retry=1
    )

    db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute',
                   warehouse=WAREHOUSE, sql=config_ingest_task)

    pipe_description = next(db.fetch(f'DESC PIPE {pipe}'), None)
    if pipe_description is None:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance."
        }
    else:
        sqs_arn = pipe_description['notification_channel']

    return {
        'newStage': 'finalized',
        'newMessage': (
            f"Please add this SQS Queue ARN to the bucket event notification "
            f"channel for all object create events:\n\n  {sqs_arn}\n\n"
            f"To backfill the landing table with existing data, please run:\n\n  ALTER PIPE {pipe} REFRESH;\n\n"
        )
    }
Esempio n. 3
0
def finalize(connection_name):
    base_name = f'CONFIG_{connection_name}_EVENTS'.upper()
    pipe = f'data.{base_name}_PIPE'
    landing_table = f'data.{base_name}_CONNECTION'

    config_ingest_task = f'''
INSERT INTO {landing_table} (
  raw, hash_raw, event_time, account_id, aws_region, resource_type, arn, availability_zone,
  resource_creation_time, resource_name, resource_Id, relationships, configuration, tags
)
SELECT value raw
    , HASH(value) hash_raw
    , value:configurationItemCaptureTime::TIMESTAMP_LTZ(9) event_time
    , value:awsAccountId::STRING account_id
    , value:awsRegion::STRING aws_region
    , value:resourceType::STRING aws_region
    , value:ARN::STRING arn
    , value:availabilityZone::STRING availability_zone
    , value:resourceCreationTime::TIMESTAMP_LTZ(9) resource_creation_time
    , value:resourceName::STRING resource_name
    , value:resourceId::STRING resource_Id
    , value:relationships::VARIANT relationships
    , value:configuration::VARIANT configuration
    , value:tags::VARIANT tags
FROM data.{base_name}_stream, LATERAL FLATTEN(input => v:configurationItems)
WHERE ARRAY_SIZE(v:configurationItems) > 0
'''

    db.create_stream(
        name=f'data.{base_name}_STREAM',
        target=f'data.{base_name}_STAGING'
    )

    # IAM change takes 5-15 seconds to take effect
    sleep(5)
    db.retry(
        lambda: db.create_pipe(
            name=pipe,
            sql=f"COPY INTO data.{base_name}_staging(v) FROM @data.{base_name}_stage/",
            replace=True,
            autoingest=True
        ),
        n=10,
        sleep_seconds_btw_retry=1
    )

    db.create_task(name=f'data.{base_name}_TASK', schedule='1 minute',
                   warehouse=WAREHOUSE, sql=config_ingest_task)

    db.execute(f"ALTER PIPE {pipe} REFRESH")

    pipe_description = list(db.fetch(f'DESC PIPE {pipe}'))
    if len(pipe_description) < 1:
        return {
            'newStage': 'error',
            'newMessage': f"{pipe} does not exist; please reach out to Snowflake Security for assistance."
        }
    else:
        sqs_arn = pipe_description[0]['notification_channel']

    return {
        'newStage': 'finalized',
        'newMessage': (
            f"Please add this SQS Queue ARN to the bucket event notification "
            f"channel for all object create events: {sqs_arn}"
        )
    }