def handler(event, context):

    #get event data
    log.info("Received event {}".format(json.dumps(event)))
    accountid = event['accountId']
    year = event['year']
    month = event['month']

    #Ensure this operation is NOT executed xAcct
    if 'xAccountSource' in event: event['xAccountSource']=False
    if 'roleArn' in event: event['roleArn'] = ''

    athena = ath.AthenaQueryMgr(consts.ATHENA_BASE_OUTPUT_S3_BUCKET, accountid, year, month)

    #construct database name based on input parameters: costusage-accountid
    athena.create_database()

    #drop table for the current month - 20170601-20170701
    athena.drop_table()

    #TODO: dynamically create fields based on the latest AWS CUR manifest
    #TODO: use columnar format, for better performance
    #create new table for the current month
    prefix = consts.CUR_PROCESSOR_DEST_S3_PREFIX + accountid + "/" + utils.get_period_prefix(year, month)
    createtablesql = open(sql_path+'/create_athena_table.sql', 'r').read()
    sqlstatement = createtablesql.replace("{dbname}",athena.dbname).\
                                  replace("{tablename}",athena.tablename).\
                                  replace("{bucket}",consts.CUR_PROCESSOR_DEST_S3_BUCKET).\
                                  replace("{prefix}",prefix)
    athena.execute_query('create_table', sqlstatement)

    return event
Beispiel #2
0
 def __init__(self, athena_base_output_s3_bucket, accountid, year, month):
     #Athena query output is placed in a bucket and prefix with the account id and month.
     self.athena_output_s3_location = "{}/{}/{}".format(
         athena_base_output_s3_bucket, accountid,
         utils.get_period_prefix(year, month))
     self.athena_result_configuration = {
         'OutputLocation':
         self.athena_output_s3_location + QUERY_EXECUTIONS_FOLDER + "/",
         'EncryptionConfiguration': {
             'EncryptionOption': 'SSE_S3'
         }
     }
     self.dbname = "costusage_" + accountid
     self.tablename = "hourly_" + utils.get_period_prefix(
         year, month).replace("-", "_").replace("/", "")
     self.payerAccountid = accountid
Beispiel #3
0
def handler(event, context):

    #get event data
    log.info("Received event {}".format(json.dumps(event)))
    accountid = event['accountId']
    year = event['year']
    month = event['month']
    curManifest = event['curManifest']
    curS3Bucket = event['destBucket']

    #Ensure this operation is NOT executed xAcct
    if 'xAccountSource' in event: event['xAccountSource']=False
    if 'roleArn' in event: event['roleArn'] = ''

    athena = ath.AthenaQueryMgr(consts.ATHENA_BASE_OUTPUT_S3_BUCKET, accountid, year, month)

    #construct database name based on input parameters: costusage-accountid
    athena.create_database()

    #drop table for the current month - 20170601-20170701
    athena.drop_table()

    #TODO: use columnar format, for better performance
    #create new table for the current month
    curS3Prefix = consts.CUR_PROCESSOR_DEST_S3_PREFIX + accountid + "/" + utils.get_period_prefix(year, month)#TODO: move to a method in athena module, so it can be reused
    athena.create_table(curManifest, curS3Bucket, curS3Prefix)

    return event
Beispiel #4
0
def handler(event, context):

    log.info("Received event {}".format(json.dumps(event)))

    try:

        #Get relevant info from S3 event
        s3eventinfo = event['Records'][0]['s3']
        s3key = s3eventinfo['object']['key']

        #Prepare args for CostUsageProcessor
        kwargs = {}
        sourcePrefix, year, month = utils.extract_period(s3key)
        log.info("year:[{}] - month:[{}]".format(year, month))
        kwargs['startTimestamp'] = datetime.datetime.now(pytz.utc).strftime(
            consts.TIMESTAMP_FORMAT)
        kwargs['year'] = year
        kwargs['month'] = month
        kwargs['sourceBucket'] = s3eventinfo['bucket']['name']
        kwargs['sourcePrefix'] = sourcePrefix
        kwargs['destBucket'] = consts.CUR_PROCESSOR_DEST_S3_BUCKET
        kwargs['destPrefix'] = '{}placeholder/'.format(
            consts.CUR_PROCESSOR_DEST_S3_PREFIX
        )  #placeholder is to avoid validation error when instantiating CostUsageProcessor

        curprocessor = cur.CostUsageProcessor(**kwargs)
        curprocessor.destPrefix = '{}{}/'.format(
            consts.CUR_PROCESSOR_DEST_S3_PREFIX, curprocessor.accountId)

        kwargs['accountId'] = curprocessor.accountId

        #Start execution
        period = utils.get_period_prefix(year, month).replace('/', '')
        execname = "{}-{}-{}".format(
            curprocessor.accountId, period,
            hashlib.md5(str(time.time()).encode("utf-8")).hexdigest()[:8])

        sfnresponse = sfnclient.start_execution(
            stateMachineArn=consts.STEP_FUNCTION_PREPARE_CUR_ATHENA,
            name=execname,
            input=json.dumps(kwargs))

        #Prepare SNS notification
        sfn_executionarn = sfnresponse['executionArn']
        sfn_executionlink = "https://console.aws.amazon.com/states/home?region={}#/executions/details/{}\n".format(
            consts.AWS_DEFAULT_REGION, sfn_executionarn)
        snsclient.publish(
            TopicArn=consts.SNS_TOPIC,
            Message=
            'New Cost and Usage report. Started execution. Click here to view status: {}'
            .format(sfn_executionlink),
            Subject=
            'New incoming Cost and Usage report - accountid:{} - period:{}'.
            format(curprocessor.accountId, period))

        log.info(
            "Started execution - executionArn: {}".format(sfn_executionarn))

        return execname

    except Exception as e:
        traceback.print_exc()
        print("Exception message:[" + str(e.message) + "]")
def handler(event, context):

    log.info("Received event {}".format(json.dumps(event, indent=4)))

    #Get accounts that are ready for CUR - the ones with reports older than MINUTE_DELTA
    MINUTE_DELTA = 0
    lastProcessedIncludeTs = (
        datetime.datetime.now(pytz.utc) +
        datetime.timedelta(minutes=-MINUTE_DELTA)).strftime(
            consts.TIMESTAMP_FORMAT)

    log.info(
        "Looking for AwsAccountMetadata items processed before [{}] in table [{}]"
        .format(lastProcessedIncludeTs, consts.AWS_ACCOUNT_METADATA_DDB_TABLE))

    metadatatable = ddbresource.Table(consts.AWS_ACCOUNT_METADATA_DDB_TABLE)
    response = metadatatable.scan(
        Select='ALL_ATTRIBUTES',
        FilterExpression=boto3.dynamodb.conditions.Attr(
            'lastProcessedTimestamp').lt(lastProcessedIncludeTs)
        & boto3.dynamodb.conditions.Attr('dataCollectionStatus').eq(
            consts.DATA_COLLECTION_STATUS_ACTIVE),
        ReturnConsumedCapacity='TOTAL')
    log.info(json.dumps(response, indent=4))

    sfn_executionlinks = ""
    execnames = []
    #Get metadata for each of those accounts and prepare args for CostUsageProcessor
    for item in response['Items']:

        #Prepare args for CostUsageProcessor
        kwargs = {}
        now = datetime.datetime.now(pytz.utc)
        kwargs['startTimestamp'] = now.strftime(consts.TIMESTAMP_FORMAT)
        year = now.strftime("%Y")
        month = now.strftime("%m")
        kwargs['year'] = year
        kwargs['month'] = month
        kwargs['sourceBucket'] = item['curBucket']
        kwargs['sourcePrefix'] = "{}{}/".format(item['curPrefix'],
                                                item['curName'])
        kwargs['destBucket'] = consts.CUR_PROCESSOR_DEST_S3_BUCKET
        kwargs['destPrefix'] = '{}{}/'.format(
            consts.CUR_PROCESSOR_DEST_S3_PREFIX, item['awsPayerAccountId'])
        kwargs['accountId'] = item['awsPayerAccountId']
        kwargs['xAccountSource'] = True
        kwargs['roleArn'] = item['roleArn']

        #See how old is the latest CUR manifest in S3 and compare it against the lastProcessedTimestamp in the AWSAccountMetadata DDB table
        #If the CUR manifest is newer, then start processing
        try:
            log.info("Starting new CUR evaluation for account [{}]".format(
                kwargs['accountId']))
            curprocessor = cur.CostUsageProcessor(**kwargs)
            cur_manifest_lastmodified_ts = curprocessor.get_aws_manifest_lastmodified_ts(
            )
        except ManifestNotFoundError as e:
            log.info("ManifestNotFoundError [{}]".format(e.message))
            cur_manifest_lastmodified_ts = datetime.datetime.strptime(
                consts.EPOCH_TS,
                consts.TIMESTAMP_FORMAT).replace(tzinfo=pytz.utc)
            continue
            #TODO: add CW metric filter and alarm for CURs not found
        except BotoClientError as be:
            errorType = ''
            if be.response['Error']['Code'] == 'AccessDenied':
                errorType = 'BotoAccessDenied'
            else:
                errorType = 'BotoClientError_' + be.response['Error']['Code']
            log.error("{} awsPayerAccountId [{}] roleArn [{}] [{}]".format(
                errorType, curprocessor.accountId, curprocessor.roleArn,
                be.message))

            continue

        except Exception as e:
            log.error(
                "xAcctStepFunctionStarterException awsPayerAccountId [{}] roleArn [{}] [{}]"
                .format(kwargs['accountId'], kwargs['roleArn'], e))
            traceback.print_exc()
            continue

        lastProcessedTs = datetime.datetime.strptime(
            item['lastProcessedTimestamp'],
            consts.TIMESTAMP_FORMAT).replace(tzinfo=pytz.utc)
        log.info(
            "cur_manifest_lastmodified_ts:[{}] - lastProcessedTimestamp:[{}]".
            format(cur_manifest_lastmodified_ts,
                   item['lastProcessedTimestamp']))
        if cur_manifest_lastmodified_ts > lastProcessedTs:
            #Start execution
            period = utils.get_period_prefix(year, month).replace('/', '')
            execname = "{}-{}-{}".format(
                curprocessor.accountId, period,
                hashlib.md5(str(time.time()).encode("utf-8")).hexdigest()[:8])
            sfnresponse = sfnclient.start_execution(
                stateMachineArn=consts.STEP_FUNCTION_PREPARE_CUR_ATHENA,
                name=execname,
                input=json.dumps(kwargs))

            #Prepare SNS notification
            sfn_executionarn = sfnresponse['executionArn']
            sfn_executionlink = 'https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/' + sfn_executionarn + "\n"
            sfn_executionlinks += sfn_executionlink
            execnames.append(execname)

            log.info("Started execution - executionArn: {}".format(
                sfn_executionarn))

    if sfn_executionlinks:
        snsclient.publish(
            TopicArn=consts.SNS_TOPIC,
            Message='New Cost and Usage report. Started execution:\n' +
            sfn_executionlinks,
            Subject='New incoming Cost and Usage report executions')

    log.info("Started executions: [{}]".format(execnames))

    return execnames
Beispiel #6
0
def main(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument('--action', help='', required=True)
    parser.add_argument('--manifest-type', help='', required=False)
    parser.add_argument('--year', help='', required=True)
    parser.add_argument('--month', help='', required=True)
    parser.add_argument('--limit', help='', required=False)
    parser.add_argument('--source-bucket', help='', required=True)
    parser.add_argument('--source-prefix', help='', required=True)
    parser.add_argument('--dest-bucket', help='', required=False)
    parser.add_argument('--dest-prefix', help='', required=False)
    parser.add_argument('--role-arn', help='', required=False)
    parser.add_argument('--xacct-source', help='', required=False)
    parser.add_argument('--xacct-dest', help='', required=False)

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()

    kwargs = {}

    action = ''
    if args.action: action = args.action

    manifestType = ''
    if args.manifest_type: manifestType = args.manifest_type

    kwargs['limit'] = 1000
    if args.limit: kwargs['limit'] = args.limit

    if args.year: kwargs['year'] = args.year
    if args.month: kwargs['month'] = args.month
    if args.source_bucket: kwargs['sourceBucket'] = args.source_bucket
    if args.source_prefix: kwargs['sourcePrefix'] = args.source_prefix
    if args.dest_bucket: kwargs['destBucket'] = args.dest_bucket
    if args.dest_prefix: kwargs['destPrefix'] = args.dest_prefix
    if args.role_arn: kwargs['roleArn'] = args.role_arn
    if args.xacct_source: kwargs['xAccountSource'] = True
    if args.xacct_dest: kwargs['xAccountDest'] = True

    try:

        if action not in consts.VALID_ACTIONS:
            raise Exception("Invalid action, valid options are: {}".format(
                consts.VALID_ACTIONS))

        if action == consts.ACTION_CREATE_MANIFEST:
            kwargs['destBucket'] = kwargs['sourceBucket'] + '-dest'
            kwargs['destPrefix'] = kwargs['sourcePrefix'] + 'dest/'

        curprocessor = cur.CostUsageProcessor(**kwargs)

        if action in (consts.ACTION_PREPARE_ATHENA,
                      consts.ACTION_PREPARE_QUICKSIGHT):
            #Process Cost and Usage Report
            destS3keys = curprocessor.process_latest_aws_cur(action)

            #Then create Athena table for the current month
            athena = ath.AthenaQueryMgr("s3://" + curprocessor.destBucket,
                                        curprocessor.accountId,
                                        curprocessor.year, curprocessor.month)
            athena.create_database()
            athena.drop_table(
            )  #drops the table for the current month (before creating a new one)
            curS3Prefix = curprocessor.destPrefix + curprocessor.accountId + "/" + curutils.get_period_prefix(
                curprocessor.year, curprocessor.month)
            print("Creating Athena table for S3 location [s3://{}/{}]".format(
                curprocessor.destBucket, curS3Prefix))
            athena.create_table(curprocessor.curManifestJson,
                                curprocessor.destBucket, curS3Prefix)

            if action == consts.ACTION_PREPARE_QUICKSIGHT:
                curprocessor.create_manifest(consts.MANIFEST_TYPE_QUICKSIGHT,
                                             kwargs['destBucket'],
                                             kwargs['destPrefix'], destS3keys)
                curprocessor.create_manifest(consts.MANIFEST_TYPE_REDSHIFT,
                                             kwargs['destBucket'],
                                             kwargs['destPrefix'], destS3keys)

        if action == consts.ACTION_CREATE_MANIFEST:
            curprocessor.create_manifest(manifestType, kwargs['sourceBucket'],
                                         kwargs['sourcePrefix'], [])

        if action == consts.ACTION_TEST_ROLE:
            curprocessor.test_role()

    except Exception as e:
        traceback.print_exc()
        print("Exception message:[" + str(e.message) + "]")