def handler(event, context): #get event data log.info("Received event {}".format(json.dumps(event))) accountid = event['accountId'] year = event['year'] month = event['month'] #Ensure this operation is NOT executed xAcct if 'xAccountSource' in event: event['xAccountSource']=False if 'roleArn' in event: event['roleArn'] = '' athena = ath.AthenaQueryMgr(consts.ATHENA_BASE_OUTPUT_S3_BUCKET, accountid, year, month) #construct database name based on input parameters: costusage-accountid athena.create_database() #drop table for the current month - 20170601-20170701 athena.drop_table() #TODO: dynamically create fields based on the latest AWS CUR manifest #TODO: use columnar format, for better performance #create new table for the current month prefix = consts.CUR_PROCESSOR_DEST_S3_PREFIX + accountid + "/" + utils.get_period_prefix(year, month) createtablesql = open(sql_path+'/create_athena_table.sql', 'r').read() sqlstatement = createtablesql.replace("{dbname}",athena.dbname).\ replace("{tablename}",athena.tablename).\ replace("{bucket}",consts.CUR_PROCESSOR_DEST_S3_BUCKET).\ replace("{prefix}",prefix) athena.execute_query('create_table', sqlstatement) return event
def __init__(self, athena_base_output_s3_bucket, accountid, year, month): #Athena query output is placed in a bucket and prefix with the account id and month. self.athena_output_s3_location = "{}/{}/{}".format( athena_base_output_s3_bucket, accountid, utils.get_period_prefix(year, month)) self.athena_result_configuration = { 'OutputLocation': self.athena_output_s3_location + QUERY_EXECUTIONS_FOLDER + "/", 'EncryptionConfiguration': { 'EncryptionOption': 'SSE_S3' } } self.dbname = "costusage_" + accountid self.tablename = "hourly_" + utils.get_period_prefix( year, month).replace("-", "_").replace("/", "") self.payerAccountid = accountid
def handler(event, context): #get event data log.info("Received event {}".format(json.dumps(event))) accountid = event['accountId'] year = event['year'] month = event['month'] curManifest = event['curManifest'] curS3Bucket = event['destBucket'] #Ensure this operation is NOT executed xAcct if 'xAccountSource' in event: event['xAccountSource']=False if 'roleArn' in event: event['roleArn'] = '' athena = ath.AthenaQueryMgr(consts.ATHENA_BASE_OUTPUT_S3_BUCKET, accountid, year, month) #construct database name based on input parameters: costusage-accountid athena.create_database() #drop table for the current month - 20170601-20170701 athena.drop_table() #TODO: use columnar format, for better performance #create new table for the current month curS3Prefix = consts.CUR_PROCESSOR_DEST_S3_PREFIX + accountid + "/" + utils.get_period_prefix(year, month)#TODO: move to a method in athena module, so it can be reused athena.create_table(curManifest, curS3Bucket, curS3Prefix) return event
def handler(event, context): log.info("Received event {}".format(json.dumps(event))) try: #Get relevant info from S3 event s3eventinfo = event['Records'][0]['s3'] s3key = s3eventinfo['object']['key'] #Prepare args for CostUsageProcessor kwargs = {} sourcePrefix, year, month = utils.extract_period(s3key) log.info("year:[{}] - month:[{}]".format(year, month)) kwargs['startTimestamp'] = datetime.datetime.now(pytz.utc).strftime( consts.TIMESTAMP_FORMAT) kwargs['year'] = year kwargs['month'] = month kwargs['sourceBucket'] = s3eventinfo['bucket']['name'] kwargs['sourcePrefix'] = sourcePrefix kwargs['destBucket'] = consts.CUR_PROCESSOR_DEST_S3_BUCKET kwargs['destPrefix'] = '{}placeholder/'.format( consts.CUR_PROCESSOR_DEST_S3_PREFIX ) #placeholder is to avoid validation error when instantiating CostUsageProcessor curprocessor = cur.CostUsageProcessor(**kwargs) curprocessor.destPrefix = '{}{}/'.format( consts.CUR_PROCESSOR_DEST_S3_PREFIX, curprocessor.accountId) kwargs['accountId'] = curprocessor.accountId #Start execution period = utils.get_period_prefix(year, month).replace('/', '') execname = "{}-{}-{}".format( curprocessor.accountId, period, hashlib.md5(str(time.time()).encode("utf-8")).hexdigest()[:8]) sfnresponse = sfnclient.start_execution( stateMachineArn=consts.STEP_FUNCTION_PREPARE_CUR_ATHENA, name=execname, input=json.dumps(kwargs)) #Prepare SNS notification sfn_executionarn = sfnresponse['executionArn'] sfn_executionlink = "https://console.aws.amazon.com/states/home?region={}#/executions/details/{}\n".format( consts.AWS_DEFAULT_REGION, sfn_executionarn) snsclient.publish( TopicArn=consts.SNS_TOPIC, Message= 'New Cost and Usage report. Started execution. Click here to view status: {}' .format(sfn_executionlink), Subject= 'New incoming Cost and Usage report - accountid:{} - period:{}'. format(curprocessor.accountId, period)) log.info( "Started execution - executionArn: {}".format(sfn_executionarn)) return execname except Exception as e: traceback.print_exc() print("Exception message:[" + str(e.message) + "]")
def handler(event, context): log.info("Received event {}".format(json.dumps(event, indent=4))) #Get accounts that are ready for CUR - the ones with reports older than MINUTE_DELTA MINUTE_DELTA = 0 lastProcessedIncludeTs = ( datetime.datetime.now(pytz.utc) + datetime.timedelta(minutes=-MINUTE_DELTA)).strftime( consts.TIMESTAMP_FORMAT) log.info( "Looking for AwsAccountMetadata items processed before [{}] in table [{}]" .format(lastProcessedIncludeTs, consts.AWS_ACCOUNT_METADATA_DDB_TABLE)) metadatatable = ddbresource.Table(consts.AWS_ACCOUNT_METADATA_DDB_TABLE) response = metadatatable.scan( Select='ALL_ATTRIBUTES', FilterExpression=boto3.dynamodb.conditions.Attr( 'lastProcessedTimestamp').lt(lastProcessedIncludeTs) & boto3.dynamodb.conditions.Attr('dataCollectionStatus').eq( consts.DATA_COLLECTION_STATUS_ACTIVE), ReturnConsumedCapacity='TOTAL') log.info(json.dumps(response, indent=4)) sfn_executionlinks = "" execnames = [] #Get metadata for each of those accounts and prepare args for CostUsageProcessor for item in response['Items']: #Prepare args for CostUsageProcessor kwargs = {} now = datetime.datetime.now(pytz.utc) kwargs['startTimestamp'] = now.strftime(consts.TIMESTAMP_FORMAT) year = now.strftime("%Y") month = now.strftime("%m") kwargs['year'] = year kwargs['month'] = month kwargs['sourceBucket'] = item['curBucket'] kwargs['sourcePrefix'] = "{}{}/".format(item['curPrefix'], item['curName']) kwargs['destBucket'] = consts.CUR_PROCESSOR_DEST_S3_BUCKET kwargs['destPrefix'] = '{}{}/'.format( consts.CUR_PROCESSOR_DEST_S3_PREFIX, item['awsPayerAccountId']) kwargs['accountId'] = item['awsPayerAccountId'] kwargs['xAccountSource'] = True kwargs['roleArn'] = item['roleArn'] #See how old is the latest CUR manifest in S3 and compare it against the lastProcessedTimestamp in the AWSAccountMetadata DDB table #If the CUR manifest is newer, then start processing try: log.info("Starting new CUR evaluation for account [{}]".format( kwargs['accountId'])) curprocessor = cur.CostUsageProcessor(**kwargs) cur_manifest_lastmodified_ts = curprocessor.get_aws_manifest_lastmodified_ts( ) except ManifestNotFoundError as e: log.info("ManifestNotFoundError [{}]".format(e.message)) cur_manifest_lastmodified_ts = datetime.datetime.strptime( consts.EPOCH_TS, consts.TIMESTAMP_FORMAT).replace(tzinfo=pytz.utc) continue #TODO: add CW metric filter and alarm for CURs not found except BotoClientError as be: errorType = '' if be.response['Error']['Code'] == 'AccessDenied': errorType = 'BotoAccessDenied' else: errorType = 'BotoClientError_' + be.response['Error']['Code'] log.error("{} awsPayerAccountId [{}] roleArn [{}] [{}]".format( errorType, curprocessor.accountId, curprocessor.roleArn, be.message)) continue except Exception as e: log.error( "xAcctStepFunctionStarterException awsPayerAccountId [{}] roleArn [{}] [{}]" .format(kwargs['accountId'], kwargs['roleArn'], e)) traceback.print_exc() continue lastProcessedTs = datetime.datetime.strptime( item['lastProcessedTimestamp'], consts.TIMESTAMP_FORMAT).replace(tzinfo=pytz.utc) log.info( "cur_manifest_lastmodified_ts:[{}] - lastProcessedTimestamp:[{}]". format(cur_manifest_lastmodified_ts, item['lastProcessedTimestamp'])) if cur_manifest_lastmodified_ts > lastProcessedTs: #Start execution period = utils.get_period_prefix(year, month).replace('/', '') execname = "{}-{}-{}".format( curprocessor.accountId, period, hashlib.md5(str(time.time()).encode("utf-8")).hexdigest()[:8]) sfnresponse = sfnclient.start_execution( stateMachineArn=consts.STEP_FUNCTION_PREPARE_CUR_ATHENA, name=execname, input=json.dumps(kwargs)) #Prepare SNS notification sfn_executionarn = sfnresponse['executionArn'] sfn_executionlink = 'https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/' + sfn_executionarn + "\n" sfn_executionlinks += sfn_executionlink execnames.append(execname) log.info("Started execution - executionArn: {}".format( sfn_executionarn)) if sfn_executionlinks: snsclient.publish( TopicArn=consts.SNS_TOPIC, Message='New Cost and Usage report. Started execution:\n' + sfn_executionlinks, Subject='New incoming Cost and Usage report executions') log.info("Started executions: [{}]".format(execnames)) return execnames
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--action', help='', required=True) parser.add_argument('--manifest-type', help='', required=False) parser.add_argument('--year', help='', required=True) parser.add_argument('--month', help='', required=True) parser.add_argument('--limit', help='', required=False) parser.add_argument('--source-bucket', help='', required=True) parser.add_argument('--source-prefix', help='', required=True) parser.add_argument('--dest-bucket', help='', required=False) parser.add_argument('--dest-prefix', help='', required=False) parser.add_argument('--role-arn', help='', required=False) parser.add_argument('--xacct-source', help='', required=False) parser.add_argument('--xacct-dest', help='', required=False) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() kwargs = {} action = '' if args.action: action = args.action manifestType = '' if args.manifest_type: manifestType = args.manifest_type kwargs['limit'] = 1000 if args.limit: kwargs['limit'] = args.limit if args.year: kwargs['year'] = args.year if args.month: kwargs['month'] = args.month if args.source_bucket: kwargs['sourceBucket'] = args.source_bucket if args.source_prefix: kwargs['sourcePrefix'] = args.source_prefix if args.dest_bucket: kwargs['destBucket'] = args.dest_bucket if args.dest_prefix: kwargs['destPrefix'] = args.dest_prefix if args.role_arn: kwargs['roleArn'] = args.role_arn if args.xacct_source: kwargs['xAccountSource'] = True if args.xacct_dest: kwargs['xAccountDest'] = True try: if action not in consts.VALID_ACTIONS: raise Exception("Invalid action, valid options are: {}".format( consts.VALID_ACTIONS)) if action == consts.ACTION_CREATE_MANIFEST: kwargs['destBucket'] = kwargs['sourceBucket'] + '-dest' kwargs['destPrefix'] = kwargs['sourcePrefix'] + 'dest/' curprocessor = cur.CostUsageProcessor(**kwargs) if action in (consts.ACTION_PREPARE_ATHENA, consts.ACTION_PREPARE_QUICKSIGHT): #Process Cost and Usage Report destS3keys = curprocessor.process_latest_aws_cur(action) #Then create Athena table for the current month athena = ath.AthenaQueryMgr("s3://" + curprocessor.destBucket, curprocessor.accountId, curprocessor.year, curprocessor.month) athena.create_database() athena.drop_table( ) #drops the table for the current month (before creating a new one) curS3Prefix = curprocessor.destPrefix + curprocessor.accountId + "/" + curutils.get_period_prefix( curprocessor.year, curprocessor.month) print("Creating Athena table for S3 location [s3://{}/{}]".format( curprocessor.destBucket, curS3Prefix)) athena.create_table(curprocessor.curManifestJson, curprocessor.destBucket, curS3Prefix) if action == consts.ACTION_PREPARE_QUICKSIGHT: curprocessor.create_manifest(consts.MANIFEST_TYPE_QUICKSIGHT, kwargs['destBucket'], kwargs['destPrefix'], destS3keys) curprocessor.create_manifest(consts.MANIFEST_TYPE_REDSHIFT, kwargs['destBucket'], kwargs['destPrefix'], destS3keys) if action == consts.ACTION_CREATE_MANIFEST: curprocessor.create_manifest(manifestType, kwargs['sourceBucket'], kwargs['sourcePrefix'], []) if action == consts.ACTION_TEST_ROLE: curprocessor.test_role() except Exception as e: traceback.print_exc() print("Exception message:[" + str(e.message) + "]")