class OutputConfigParser: """ This class uses configobj python library to download output config file from s3 and parse the output config file by reading the different sections in config files and returns section as dict object. Config file looks like a .ini file with different sections Example: [prometheus] gateway = '' port = 0 [s3] bucket = '' """ def __init__(self): self.logger = LOGGER('__OutputConfig__').config() self.util = Utils() self.heir_dir = os.path.dirname(os.path.dirname(__file__)) self.conf_dir = os.path.join(self.heir_dir, 'conf/output') self.output_spec = os.path.join(self.conf_dir, 'output.spec') self.costbuddy_output_bucket = os.getenv('s3_bucket') self.logger.info("Downloading conf file from s3") self.output_conf = self.util.download_s3_file( bucket=self.costbuddy_output_bucket, filename='conf/output.conf') def parse_output_config(self, config_name): """ Args: config_name : ConfigObj section name, example prometheus or s3 Returns: section as dict object """ config_file_spec = ConfigObj(self.output_spec, interpolation=False, list_values=False, _inspec=True) config_file_obj = ConfigObj(self.output_conf, configspec=config_file_spec) # A simple validator used to check that all members expected in conf file are present. validator = SimpleVal() # test_pass would be True, If every member of a subsection passes, else False test_pass = config_file_obj.validate(validator) if test_pass is False: self.logger.error("Not all required output configs are passed") self.logger.error( "Check src/conf/output/output.spec for required inputs") if config_name in config_file_obj: return config_file_obj[config_name] return {}
class CostExplorerProjected: """ Calculates AWS account and service monthly projected spend information using AWS CostExplorer API. """ def __init__(self, account_id): """ Args: account_id : 12 digit AWS account id without hyphen """ self.cred = SessionCreds() self.client = self.cred.get_client(account_id, 'ce') self.account_id = account_id self.util = Utils() self.logger = LOGGER('__forecast__').config() self.begin_of_month = self.util.first_day_of_month() self.yesterday = self.util.preceding_day() self.today = self.util.day_of_month() self.last_day = self.util.last_day_of_month(self.today) self.first_day_next_month = self.util.first_day_of_next_month() self.next_day_of_month = self.util.next_day_of_month() self.prom_conf = OutputConfigParser().parse_output_config('prometheus') self.prom_endpoint = "%s:%s" % (self.prom_conf['gateway'], self.prom_conf['port']) def account_monthly_projected_spend(self): """ Retrieves a forecast for how much Amazon Web Services predicts that you will spend over the forecast time period i.e from beginning of the month to last day of the month """ self.logger.info("Getting aws account projected spend amount..") # Prometheus labels to be included in the account level metric labels = {'account_id': self.account_id} metric_account_projected_spend = PrometheusPushMetric( self.account_id, self.prom_endpoint, 'ce_aws_account_monthly_forecast', 'AWS account monthly projected', **labels) try: response = self.client.get_cost_forecast( TimePeriod={ 'Start': self.next_day_of_month.isoformat(), 'End': self.first_day_next_month.isoformat() }, Metric='UNBLENDED_COST', Granularity='MONTHLY', PredictionIntervalLevel=90 ## 51 - 99 Range #TODO User input ) metric_account_projected_spend.push(response['Total']['Amount'], **labels) except Exception as error: self.logger.error(error) # If there is exception and projected cost not found for the account, send projected spend as 0 metric_account_projected_spend.push(0, **labels) def get_active_services(self): """ Get the list of actively used services. A service is active or not is determined based cost incurred Returns: services: list of actively used services """ self.logger.info("Getting list of active services..") kwargs = {} results = [] services = [] token = None while True: if token: kwargs = {'NextPageToken': token} else: kwargs = {} data = self.client.get_cost_and_usage( TimePeriod={ 'Start': self.yesterday.isoformat(), 'End': self.today.isoformat() }, Granularity='DAILY', Metrics=['UnblendedCost'], GroupBy=[{ 'Type': 'DIMENSION', 'Key': 'LINKED_ACCOUNT' }, { 'Type': 'DIMENSION', 'Key': 'SERVICE' }], **kwargs) results += data['ResultsByTime'] token = data.get('NextPageToken') if not token: break for result_by_time in results: for group in result_by_time['Groups']: amount = group['Metrics']['UnblendedCost']['Amount'] if ast.literal_eval(amount) != 0: services.append(group['Keys'][1]) return services def service_monthly_projected_spend(self): """ Retrieves a forecast for how much Amazon Web Services predicts that you will spend over for each service for the forecast time period i.e from beginning of the month to last day of the month """ self.logger.info("Getting forecast amount by service..") # Prometheus labels to be included in the service metric labels = {'account_id': '', 'aws_service': ''} # Converting unicode service name to string active_services = [str(srv) for srv in self.get_active_services()] metric_service_projected_spend = PrometheusPushMetric( self.account_id, self.prom_endpoint, 'ce_aws_service_monthly_forecast', 'AWS Service monthly forecast', **labels) for service in active_services: labels['aws_service'] = service labels['account_id'] = self.account_id try: response = self.client.get_cost_forecast( TimePeriod={ 'Start': self.next_day_of_month.isoformat(), 'End': self.first_day_next_month.isoformat() }, Metric='UNBLENDED_COST', Granularity='MONTHLY', Filter={ 'Dimensions': { 'Key': 'SERVICE', 'Values': [service] } }, PredictionIntervalLevel=90 ## 51 - 99 Range ) # Push metric to prometheus gateway instance metric_service_projected_spend.push( response['Total']['Amount'], **labels) except Exception as error: self.logger.error(error) # If there is exception and projected cost not found for a service, send projected spend as 0 metric_service_projected_spend.push(0, **labels)
class AccountBudget: """ This class parse and process the budget file, which has monthly budget information for each AWS accounts """ def __init__(self): self.logger = LOGGER('__AccountBudget__').config() self.util = Utils() # Prometheus labels to be included in the metric self.labels = {'account_id': '', 'account_name': '', 'owner': ''} self.costbuddy_output_bucket = os.getenv('s3_bucket') # Get the current YearMonth for processing budget allocation, example 201912 self.current_month = datetime.strftime(datetime.today(), "%Y%m") def parse_budget_file(self): """ Download and parse the allocated budget file and return the list of excel sheet names and excel file class :return: excel: Pandas excel file class :return sheet_names: list of excel sheet names """ budget_file = self.util.download_s3_file( bucket=self.costbuddy_output_bucket, filename='input/bills.xlsx') self.logger.info('Allocates Budget file downloaded location %s', budget_file) try: excel = pd.ExcelFile(budget_file) sheet_names = excel.sheet_names except Exception as error: self.logger.error("Unable to read XLXS File, error %s", error) return None, [] return excel, sheet_names def get_aws_accounts(self): """ Get the list of accounts from excel sheets :return: accounts: List of AWS Accounts """ accounts = [] excel, sheets = self.parse_budget_file() if len(sheets) == 0: return accounts for sheet in sheets: try: # All the columns in the data frame loaded as string data type # This required because some of the AWS account number has preceding zeros sheet_df = pd.read_excel(excel, sheet_name=sheet, dtype=str) # Convert month field data type from string to float convert_dict = {int(self.current_month): float} sheet_df = sheet_df.astype(convert_dict) # drop last row which has total sheet_df.drop(sheet_df.tail(1).index, inplace=True) accounts.extend(list(sheet_df['AWS Account ID'].unique())) except Exception as error: self.logger.exception( "Unable to read sheet name %s \n Error %s", sheet, error) # In case a sheet malformed, process other continue return accounts def process_budget_by_account(self, sheet_df): """ Process monthly budget allocation for each account and send it to promethous gateway node :param sheet_df: An Excel Sheet data loaded into pandas.DataFrame """ self.logger.info("Processing monthly budget by account") account_ids = sheet_df['AWS Account ID'].unique( ) # get list of unique aws account ids for account_id in account_ids: self.logger.info("Processing Account %s", account_id) # Filter the row matches account_ids account_df = sheet_df[sheet_df['AWS Account ID'] == account_id] total = account_df[int(self.current_month)].sum() # Incase multiple row is matched, use the last row to fetch account name and owner info last_row = account_df.iloc[-1] account_name = getattr(last_row, 'Account Description') owner = getattr(last_row, 'Owner') try: prom_conf = OutputConfigParser().parse_output_config( 'prometheus') prom_endpoint = "%s:%s" % (prom_conf['gateway'], prom_conf['port']) metric_budget = PrometheusPushMetric( account_id, prom_endpoint, 'aws_account_monthly_budget', 'AWS monthly account budget', **self.labels) except Exception as error: self.logger.error(error) self.logger.error('Unable to load output conf.') return self.labels = { 'account_id': account_id, 'account_name': account_name, 'owner': owner } metric_budget.push(total, **self.labels) def process_monthly_budget(self): """" Iterate over each spread sheet in the excel file and process the monthly budget info for each accounts """ excel, sheets = self.parse_budget_file() if len(sheets) == 0: return for sheet in sheets: try: # All the columns in the data frame loaded as string data type # This required because some of the AWS account number has preceding zeros sheet_df = pd.read_excel(excel, sheet_name=sheet, dtype=str) # Convert month field data type from string to float convert_dict = {int(self.current_month): float} sheet_df = sheet_df.astype(convert_dict) # drop last row which has total sheet_df.drop(sheet_df.tail(1).index, inplace=True) self.process_budget_by_account(sheet_df) except Exception as error: self.logger.exception( "Unable to read sheet name %s \n Error %s", sheet, error) # In case a sheet malformed, process other continue
def lambda_handler(event, context): """" Lambda function to process daily and monthly spend using AWS cost utilization report. """ logger = LOGGER('__cur_cost_usage__').config() budget = AccountBudget() # Get the list of AWS accounts from budget file accounts = budget.get_aws_accounts() util = Utils() util.clean_up_tmp_dir() prom_conf = OutputConfigParser().parse_output_config('prometheus') prom_endpoint = "%s:%s" % (prom_conf['gateway'], prom_conf['port']) cur_conf = OutputConfigParser().parse_output_config('cur') s3_bucket = cur_conf['bucket'] daily_file_pattern = cur_conf['daily_file_pattern'] monthly_file_pattern = cur_conf['monthly_file_pattern'] #201912*filefomart.csv #TODO file pattern suffix daily_cur_file = "%s_%s.csv000" % (util.get_current_month_year(), daily_file_pattern) monthly_cur_file = "%s_%s.csv000" % (util.get_current_month_year(), monthly_file_pattern) # Daily cost usage report try: downloaded_file = util.download_s3_file(bucket=s3_bucket, filename=daily_cur_file) logger.info("Downloaded file name %s", downloaded_file) except Exception as error: logger.exception("Unable to download file %s", error) return # TODO Column name change columns = [ 'usagestartdate_date', 'aws_account_number', 'environment', 'aws_account_name', 'aws_service_code', 'operation', 'component', 'app', 'appenv', 'user', 'bu', 'cost_total' ] try: daily_usage_df = pd.read_csv(downloaded_file, dtype=str, header=None) # set the column names daily_usage_df.columns = columns # Convert cost_total column to float convert_dict = {'cost_total': float} daily_usage_df = daily_usage_df.astype(convert_dict) except Exception as error: logger.error("Unable to read daily usage CSV File %s ", error) return # Process latest set of records last_record_date = "1970-01-01" for lastrecord in getattr(daily_usage_df.tail(1), 'usagestartdate_date'): last_record_date = lastrecord today = util.get_day_month_year() latest_df = daily_usage_df[daily_usage_df['usagestartdate_date'] == last_record_date] accounts_df = latest_df[latest_df['aws_account_number'].isin(accounts)] cur_spend = CostUsageReportSpend() cur_spend.account_month_to_date_spend(accounts_df, today, prom_endpoint) # Clean up /tmp dir before processing monthly cur file. util.clean_up_tmp_dir() # Monthly cost and usage report, seperate function try: downloaded_file = util.download_s3_file(bucket=s3_bucket, filename=monthly_cur_file) logger.info("Downloaded file name %s", downloaded_file) except Exception as error: logger.exception("Unable to download file, %s", error) return # TODO Column name change columns = [ 'month_of_year', 'fiscal_quarter_of_year', 'as_of_date', 'bu', 'application_name', 'aws_account_number', 'environment', 'account_name', 'aws_service_code', 'operation', 'component', 'user_app', 'appenv', 'user', 'finance_part', 'monthly_cost_to_date', 'projected_month_end_cost', 'quarterly_cost_to_date', 'projected_quarter_end_cost' ] try: monthly_spend_df = pd.read_csv(downloaded_file, dtype=str, header=None) monthly_spend_df.columns = columns convert_dict = { 'monthly_cost_to_date': float, 'projected_month_end_cost': float, 'quarterly_cost_to_date': float, 'projected_quarter_end_cost': float } monthly_spend_df = monthly_spend_df.astype(convert_dict) except Exception as error: logger.exception("Unable to read CSV File, %s", error) return accounts_df = monthly_spend_df[monthly_spend_df['aws_account_number'].isin( accounts)] cur_projected = CostUsageReportProjected() # Process monthly/projected spend cost by account id process = Process(target=cur_projected.account_monthly_projected_spend, args=(accounts_df, prom_endpoint)) cur_projected.processes.append(process) # start all processes for process in cur_projected.processes: process.start() # Wait for thread completion and ensure all threads have finished for process in cur_projected.processes: process.join()