def load(config): # If local files exists and is less than a day old, just use it. cache_file = '/tmp/cached-account-names.yaml' caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False) mtime = 0 if caching: try: mtime = os.stat(cache_file).st_mtime except FileNotFoundError: pass if mtime > time.time() - 86400: LOGGER.info("Using existing cache file: " + cache_file) else: account, role = utils.get_master(config) retries = utils.get_config_value(config, 'ACCOUNT_NAMES', 'RETRIES', 5) file = utils.get_config_value(config, 'ACCOUNT_NAMES', 'FILE', '') account_names = {} if role != '': # Organizations should be queried, load that first session = utils.assume_role(boto3.Session(), role) org = session.client('organizations', region_name='us-east-1') rsp = org.list_accounts() while True: for account in rsp['Accounts']: account_names[account['Id']] = account['Name'] if 'NextToken' in rsp: for i in range(retries): try: rsp = org.list_accounts(NextToken=rsp['NextToken']) break except ClientError as e: if i == retries: raise e sleep(0.5 + 0.1 * i) continue break if file != '': # Update account names with file contents with utils.get_read_handle(file) as f: account_names.update(yaml.load(f, Loader=yaml.FullLoader)) with open(cache_file, 'w') as outfile: yaml.dump(account_names, outfile, default_flow_style=False) return account_names with utils.get_read_handle(cache_file) as input: account_names = yaml.load(input, Loader=yaml.FullLoader) return account_names
def load(config): # If local files exists and is less than a day old, just use it. cache_file = '/tmp/cached-unlimited-summary.csv' caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False) mtime = 0 if caching: try: mtime = os.stat(cache_file).st_mtime except FileNotFoundError: mtime = 0 if mtime > time.time() - 86400: LOGGER.info("Using existing cache file: " + cache_file) else: account, role = utils.get_master(config) region = utils.get_config_value( config, 'ATHENA', 'AWS_REGION', utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION', os.environ.get('AWS_DEFAULT_REGION'))) database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE') staging = utils.get_config_value( config, 'ATHENA', 'STAGING', 's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format( account, region)) days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28) offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1) session = boto3.Session() proto, empty, staging_bucket, staging_prefix = staging.split('/', 3) # Assume role if needed if role is not None: session = utils.assume_role(session, role) # Connect to Athena athena = session.client('athena', region_name=region) # Validate database is usable status_id = utils.execute_athena_query( athena, staging, 'SELECT status FROM ' + database + '.cost_and_usage_data_status') # Row 0 is header status = athena.get_query_results( QueryExecutionId=status_id )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue'] if status != 'READY': raise Exception('Athena database not in READY status') # Identify start to end range query today = datetime.datetime.combine(datetime.datetime.today(), datetime.time.min) endtime = today - datetime.timedelta(days=offset) starttime = endtime - datetime.timedelta(days=days) # Download Instance and RI usage query = ' '.join(( "" + "SELECT line_item_usage_account_id AS accountid ," " product_region AS region, " " lower(product_instance) AS instancetypefamily, " " sum(line_item_usage_amount) AS unlimitedusageamount, " " sum(line_item_unblended_cost) AS unlimitedusagecost " + " FROM " + database + ".cur " + " WHERE line_item_usage_type like '%CPUCredits:%' " + " AND line_item_usage_start_date >= cast('{}' as timestamp) ". format(starttime.isoformat(' ')) + " AND line_item_usage_start_date < cast('{}' as timestamp) ". format(endtime.isoformat(' ')) + " GROUP BY line_item_usage_account_id, product_region, lower(product_instance) " + " ORDER BY line_item_usage_account_id, product_region, lower(product_instance) " ).split()) query_id = utils.execute_athena_query(athena, staging, query) session.client('s3').download_file( staging_bucket, '{0}{1}.csv'.format(staging_prefix, query_id), cache_file) result = pd.read_csv(cache_file) if len(result) == 0: result = pd.DataFrame(columns=[ 'accountid', 'region', 'instancetypefamily', 'unlimitedusageamount', 'unlimitedusagecost' ]) result['accountid'] = result['accountid'].map('{:012}'.format) result['unlimitedusageamount'] = result['unlimitedusageamount'].map( '{:.2f}'.format) result['unlimitedusagecost'] = result['unlimitedusagecost'].map( '${:,.2f}'.format) LOGGER.info("Loaded {} unlimited rows".format(len(result))) return result
def load(config): # If local files exists and is less than a day old, just use it. cache_file = '/tmp/cached-reserved-instances.csv' caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False) mtime = 0 if caching: try: mtime = os.stat(cache_file).st_mtime except FileNotFoundError: pass if mtime > time.time() - 86400: LOGGER.info("Using existing cache file: " + cache_file) ris = pd.read_csv(cache_file) else: account, role = utils.get_master(config) region = utils.get_config_value( config, 'RESERVED_INSTANCES', 'AWS_REGION', utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION', os.environ.get('AWS_DEFAULT_REGION'))) # start date cannot be after 2 days ago for GetReservationUtilization monthend = datetime.date.today() monthstart = (datetime.date.today() - datetime.timedelta(days=31)) ris = [] if role != '': session = utils.assume_role(boto3.Session(), role) ce = session.client('ce', region_name=region) rsp = ce.get_reservation_utilization(TimePeriod={ "Start": str(monthstart), "End": str(monthend) }, GroupBy=[{ "Type": "DIMENSION", "Key": "SUBSCRIPTION_ID" }]) while True: groups = rsp['UtilizationsByTime'][0]['Groups'] for row in groups: # Make sure to only capture active RIs endDate = datetime.datetime.strptime( row['Attributes']['endDateTime'], "%Y-%m-%dT%H:%M:%S.000Z") if endDate.date() > datetime.date.today(): operatingSystem = 'Linux' if row['Attributes'][ 'platform'] == 'Linux/UNIX' else row['Attributes'][ 'platform'] # for CUR compatibility ri = { 'accountid': int(row['Attributes']['accountId']), 'accountname': row['Attributes']['accountName'], 'reservationid': row['Attributes']['leaseId'], 'subscriptionid': row['Attributes']['subscriptionId'], 'startdate': row['Attributes']['startDateTime'], 'enddate': row['Attributes']['endDateTime'], 'state': row['Attributes']['subscriptionStatus'], 'quantity': int(row['Attributes']['numberOfInstances']), 'availabilityzone': row['Attributes']['availabilityZone'], 'region': row['Attributes']['region'], 'instancetype': row['Attributes']['instanceType'], 'paymentoption': row['Attributes']['subscriptionType'], 'tenancy': row['Attributes']['tenancy'], 'operatingsystem': operatingSystem, 'amortizedhours': int(row['Utilization']['PurchasedHours']), 'amortizedupfrontprice': float(row['Utilization']['AmortizedUpfrontFee']), 'amortizedrecurringfee': float(row['Utilization']['AmortizedRecurringFee']), 'offeringclass': row['Attributes']['offeringType'], 'scope': row['Attributes']['scope'], } ris.append(ri) if 'NextToken' in rsp: rsp = ce.get_reservation_utilization( NextToken=rsp['NextToken']) continue break ris = pd.DataFrame.from_records(ris) ris.to_csv(cache_file, index=False) LOGGER.info("Loaded {} reserved instances".format(len(ris))) return ris
def load(config): # If local files exists and is less than a day old, just use it. cache_file = '/tmp/cached-locations.yaml' caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False) mtime = 0 if caching: try: mtime = os.stat(cache_file).st_mtime except FileNotFoundError: pass if mtime > time.time() - 86400: LOGGER.info("Using existing cache file: " + cache_file) else: account, role = utils.get_master(config) region = utils.get_config_value( config, 'ATHENA', 'AWS_REGION', utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION', os.environ.get('AWS_DEFAULT_REGION'))) database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE') staging = utils.get_config_value( config, 'ATHENA', 'STAGING', 's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format( account, region)) days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28) offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1) session = boto3.Session() proto, empty, staging_bucket, staging_prefix = staging.split('/', 3) # Assume role if needed if role is not None: session = utils.assume_role(session, role) # Connect to Athena athena = session.client('athena', region_name=region) # Validate database is usable status_id = utils.execute_athena_query( athena, staging, 'SELECT status FROM ' + database + '.cost_and_usage_data_status') # Row 0 is header status = athena.get_query_results( QueryExecutionId=status_id )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue'] if status != 'READY': raise Exception('Athena database not in READY status') # Identify start to end range query today = datetime.datetime.combine(datetime.datetime.today(), datetime.time.min) endtime = today - datetime.timedelta(days=offset) starttime = endtime - datetime.timedelta(days=days) # Retrieve location to region mapping for use with ec2 pricing data query = ' '.join( ("" + "SELECT DISTINCT product_location, product_region " + " FROM " + database + ".cur " + " WHERE line_item_usage_start_date >= cast('{}' as timestamp) ". format(starttime.isoformat(' ')) + " AND line_item_usage_start_date < cast('{}' as timestamp) ". format(endtime.isoformat(' ')) + " AND product_operation = 'RunInstances' ").split()) map_id = utils.execute_athena_query(athena, staging, query) map_result = athena.get_query_results( QueryExecutionId=map_id)['ResultSet']['Rows'] locations = {} for i in range(1, len(map_result)): row = map_result[i]['Data'] location = row[0]['VarCharValue'] region = row[1]['VarCharValue'] locations[location] = region with open(cache_file, 'w') as outfile: yaml.dump(locations, outfile, default_flow_style=False) return locations with utils.get_read_handle(cache_file) as input: locations = yaml.load(input, Loader=yaml.FullLoader) return locations
def load(config): # If local files exists and is less than a day old, just use it. cache_file = '/tmp/cached-account-instance-summary.csv' caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False) mtime = 0 if caching: try: mtime = os.stat(cache_file).st_mtime except FileNotFoundError: mtime = 0 if mtime > time.time() - 86400: LOGGER.info("Using existing cache file: " + cache_file) else: account, role = utils.get_master(config) region = utils.get_config_value( config, 'ATHENA', 'AWS_REGION', utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION', os.environ.get('AWS_DEFAULT_REGION'))) database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE') table_name = utils.get_config_value(config, 'ATHENA', 'CUR_TABLE_NAME', 'cur') staging = utils.get_config_value( config, 'ATHENA', 'STAGING', 's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format( account, region)) days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28) offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1) session = boto3.Session() proto, empty, staging_bucket, staging_prefix = staging.split('/', 3) # Assume role if needed if role is not None: session = utils.assume_role(session, role) # Connect to Athena athena = session.client('athena', region_name=region) # Validate database is usable status_id = utils.execute_athena_query( athena, staging, 'SELECT status FROM ' + database + '.cost_and_usage_data_status') # Row 0 is header status = athena.get_query_results( QueryExecutionId=status_id )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue'] if status != 'READY': raise Exception('Athena database not in READY status') # Identify start to end range query today = datetime.datetime.combine(datetime.datetime.today(), datetime.time.min) endtime = today - datetime.timedelta(days=offset) starttime = endtime - datetime.timedelta(days=days) # Download Instance and RI usage query = ' '.join(( "" + " WITH preprocess AS ( " + " SELECT line_item_usage_start_date AS usagestartdate, " + " line_item_usage_account_id AS usageaccountid, " + " line_item_availability_zone AS availabilityzone, " + " CASE WHEN line_item_usage_type LIKE '%:%' THEN SPLIT(line_item_usage_type, ':')[2] " + " WHEN line_item_line_item_description LIKE '%m1.small%' THEN 'm1.small' " + " WHEN line_item_line_item_description LIKE '%m1.medium%' THEN 'm1.medium' " + " WHEN line_item_line_item_description LIKE '%m1.large%' THEN 'm1.large' " + " WHEN line_item_line_item_description LIKE '%m1.xlarge%' THEN 'm1.xlarge' " + " ELSE 'm1.error' " + " END AS instancetype, " + " product_tenancy AS tenancy, " + " product_operating_system AS operatingsystem, " + " CAST(line_item_usage_amount AS double) as usageamount, " + " CASE WHEN line_item_line_item_type = 'DiscountedUsage' THEN CAST(line_item_usage_amount AS DOUBLE) ELSE 0 END as reservedamount " + " FROM " + database + "." + table_name + " WHERE product_operation = 'RunInstances' " + " AND line_item_availability_zone != '' " + " AND line_item_availability_zone NOT LIKE '%-wlz-%' " # Filter out Wavelength Instances. They're not available for RIs. + " AND product_tenancy = 'Shared' " + " ) " + "SELECT usagestartdate, usageaccountid, availabilityzone, instancetype, tenancy, operatingsystem, SUM(usageamount) as instances, SUM(reservedamount) as reserved " + " FROM preprocess " + " WHERE usagestartdate >= cast('{}' as timestamp) ".format( starttime.isoformat(' ')) + " AND usagestartdate < cast('{}' as timestamp) ".format( endtime.isoformat(' ')) + " GROUP BY usagestartdate, usageaccountid, availabilityzone, instancetype, tenancy, operatingsystem " + " ORDER BY usagestartdate, usageaccountid, availabilityzone, instancetype, tenancy, operatingsystem " ).split()) query_id = utils.execute_athena_query(athena, staging, query) session.client('s3').download_file( staging_bucket, '{0}{1}.csv'.format(staging_prefix, query_id), cache_file) result = pd.read_csv(cache_file, parse_dates=['usagestartdate']) LOGGER.info("Loaded {} instance summary rows".format(len(result))) return result
def load(config): # If local files exists and is less than a day old, just use it. cache_file = '/tmp/cached-unlimited-summary.csv' caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False) mtime = 0 if caching: try: mtime = os.stat(cache_file).st_mtime except FileNotFoundError: mtime = 0 if mtime > time.time() - 86400: LOGGER.info("Using existing cache file: " + cache_file) else: account, role = utils.get_master(config) region = utils.get_config_value( config, 'ATHENA', 'AWS_REGION', utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION', os.environ.get('AWS_DEFAULT_REGION'))) database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE') table_name = utils.get_config_value(config, 'ATHENA', 'CUR_TABLE_NAME', 'cur') staging = utils.get_config_value( config, 'ATHENA', 'STAGING', 's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format( account, region)) days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28) offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1) session = boto3.Session() proto, empty, staging_bucket, staging_prefix = staging.split('/', 3) # Assume role if needed if role is not None: session = utils.assume_role(session, role) # Connect to Athena athena = session.client('athena', region_name=region) # Validate database is usable status_id = utils.execute_athena_query( athena, staging, 'SELECT status FROM ' + database + '.cost_and_usage_data_status') # Row 0 is header status = athena.get_query_results( QueryExecutionId=status_id )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue'] if status != 'READY': raise Exception('Athena database not in READY status') # Identify start to end range query today = datetime.datetime.combine(datetime.datetime.today(), datetime.time.min) endtime = today - datetime.timedelta(days=offset) starttime = endtime - datetime.timedelta(days=days) # Download Instance and RI usage # meckstmd: 07/30/2019 - Something must have changed with the way AWS is exposing CPU Credits. # There is a line_item_line_item_type column of Tax for each account which has CPU Credits which # does not have a product_region or product_instance. Because these fields are empty, Ariel # fails when trying to insert this report data into the unlimited_usage DB table because it does # not allow nulls. The line_item_line_item_type column of Usage in this report has the per-instance # CPU credits for unlimited and does have product_region and product_instance. I am guessing the # Tax one was just added to this report and that is what broke Ariel. # See https://github.com/yahoo/ariel/issues/5 query = ' '.join(( "" + "SELECT line_item_usage_account_id AS accountid ," " product_region AS region, " " lower(product_instance) AS instancetypefamily, " " sum(line_item_usage_amount) AS unlimitedusageamount, " " sum(line_item_unblended_cost) AS unlimitedusagecost " + " FROM " + database + "." + table_name + " WHERE line_item_usage_type like '%CPUCredits:%' " + " AND line_item_usage_start_date >= cast('{}' as timestamp) ". format(starttime.isoformat(' ')) + " AND line_item_usage_start_date < cast('{}' as timestamp) ". format(endtime.isoformat(' ')) + " AND product_region <> '' AND product_instance <> ''" + " GROUP BY line_item_usage_account_id, product_region, lower(product_instance) " + " ORDER BY line_item_usage_account_id, product_region, lower(product_instance) " ).split()) query_id = utils.execute_athena_query(athena, staging, query) session.client('s3').download_file( staging_bucket, '{0}{1}.csv'.format(staging_prefix, query_id), cache_file) result = pd.read_csv(cache_file) if len(result) == 0: result = pd.DataFrame(columns=[ 'accountid', 'region', 'instancetypefamily', 'unlimitedusageamount', 'unlimitedusagecost' ]) result['accountid'] = result['accountid'].map('{:012}'.format) result['unlimitedusageamount'] = result['unlimitedusageamount'].map( '{:.2f}'.format) result['unlimitedusagecost'] = result['unlimitedusagecost'].map( '${:,.2f}'.format) LOGGER.info("Loaded {} unlimited rows".format(len(result))) return result