class OutputConfigParser:
    """
    This class uses configobj python library to download output config file from s3 and parse the output config file by
    reading the different sections in config files and returns section as dict object.

    Config file looks like a .ini file with different sections
    Example:
        [prometheus]
            gateway = ''
            port    = 0

        [s3]
            bucket = ''
    """
    def __init__(self):
        self.logger = LOGGER('__OutputConfig__').config()
        self.util = Utils()
        self.heir_dir = os.path.dirname(os.path.dirname(__file__))
        self.conf_dir = os.path.join(self.heir_dir, 'conf/output')
        self.output_spec = os.path.join(self.conf_dir, 'output.spec')
        self.costbuddy_output_bucket = os.getenv('s3_bucket')

        self.logger.info("Downloading conf file from s3")
        self.output_conf = self.util.download_s3_file(
            bucket=self.costbuddy_output_bucket, filename='conf/output.conf')

    def parse_output_config(self, config_name):
        """

        Args:
            config_name : ConfigObj section name, example prometheus or s3

        Returns:
            section as dict object

        """
        config_file_spec = ConfigObj(self.output_spec,
                                     interpolation=False,
                                     list_values=False,
                                     _inspec=True)
        config_file_obj = ConfigObj(self.output_conf,
                                    configspec=config_file_spec)

        # A simple validator used to check that all members expected in conf file are present.
        validator = SimpleVal()

        # test_pass would be True, If every member of a subsection passes, else False
        test_pass = config_file_obj.validate(validator)

        if test_pass is False:
            self.logger.error("Not all required output configs are passed")
            self.logger.error(
                "Check src/conf/output/output.spec for required inputs")

        if config_name in config_file_obj:
            return config_file_obj[config_name]

        return {}
class CostExplorerProjected:
    """
       Calculates AWS account and service monthly projected spend information using AWS CostExplorer API.
    """
    def __init__(self, account_id):
        """
        Args:
            account_id : 12 digit AWS account id without hyphen
        """
        self.cred = SessionCreds()
        self.client = self.cred.get_client(account_id, 'ce')

        self.account_id = account_id
        self.util = Utils()
        self.logger = LOGGER('__forecast__').config()

        self.begin_of_month = self.util.first_day_of_month()
        self.yesterday = self.util.preceding_day()
        self.today = self.util.day_of_month()
        self.last_day = self.util.last_day_of_month(self.today)
        self.first_day_next_month = self.util.first_day_of_next_month()
        self.next_day_of_month = self.util.next_day_of_month()

        self.prom_conf = OutputConfigParser().parse_output_config('prometheus')
        self.prom_endpoint = "%s:%s" % (self.prom_conf['gateway'],
                                        self.prom_conf['port'])

    def account_monthly_projected_spend(self):
        """
        Retrieves a forecast for how much Amazon Web Services predicts that you will spend over the forecast
        time period i.e from beginning of the month to last day of the month
       """

        self.logger.info("Getting aws account projected spend amount..")

        # Prometheus labels to be included in the account level metric
        labels = {'account_id': self.account_id}

        metric_account_projected_spend = PrometheusPushMetric(
            self.account_id, self.prom_endpoint,
            'ce_aws_account_monthly_forecast', 'AWS account monthly projected',
            **labels)
        try:
            response = self.client.get_cost_forecast(
                TimePeriod={
                    'Start': self.next_day_of_month.isoformat(),
                    'End': self.first_day_next_month.isoformat()
                },
                Metric='UNBLENDED_COST',
                Granularity='MONTHLY',
                PredictionIntervalLevel=90  ## 51 - 99 Range #TODO User input
            )

            metric_account_projected_spend.push(response['Total']['Amount'],
                                                **labels)

        except Exception as error:
            self.logger.error(error)
            # If there is exception and projected cost not found for the account, send projected spend as 0
            metric_account_projected_spend.push(0, **labels)

    def get_active_services(self):
        """
        Get the list of actively used services. A service is active or not is determined based cost incurred

        Returns:
            services: list of actively used services

        """
        self.logger.info("Getting list of active services..")
        kwargs = {}
        results = []
        services = []
        token = None
        while True:
            if token:
                kwargs = {'NextPageToken': token}
            else:
                kwargs = {}
                data = self.client.get_cost_and_usage(
                    TimePeriod={
                        'Start': self.yesterday.isoformat(),
                        'End': self.today.isoformat()
                    },
                    Granularity='DAILY',
                    Metrics=['UnblendedCost'],
                    GroupBy=[{
                        'Type': 'DIMENSION',
                        'Key': 'LINKED_ACCOUNT'
                    }, {
                        'Type': 'DIMENSION',
                        'Key': 'SERVICE'
                    }],
                    **kwargs)

                results += data['ResultsByTime']
                token = data.get('NextPageToken')
                if not token:
                    break

        for result_by_time in results:
            for group in result_by_time['Groups']:
                amount = group['Metrics']['UnblendedCost']['Amount']
                if ast.literal_eval(amount) != 0:
                    services.append(group['Keys'][1])

        return services

    def service_monthly_projected_spend(self):
        """
        Retrieves a forecast for how much Amazon Web Services predicts that you will spend over for each service
        for the forecast time period i.e from beginning of the month to last day of the month
        """

        self.logger.info("Getting forecast amount by service..")

        # Prometheus labels to be included in the service metric
        labels = {'account_id': '', 'aws_service': ''}
        # Converting unicode service name to string
        active_services = [str(srv) for srv in self.get_active_services()]

        metric_service_projected_spend = PrometheusPushMetric(
            self.account_id, self.prom_endpoint,
            'ce_aws_service_monthly_forecast', 'AWS Service monthly forecast',
            **labels)

        for service in active_services:
            labels['aws_service'] = service
            labels['account_id'] = self.account_id
            try:
                response = self.client.get_cost_forecast(
                    TimePeriod={
                        'Start': self.next_day_of_month.isoformat(),
                        'End': self.first_day_next_month.isoformat()
                    },
                    Metric='UNBLENDED_COST',
                    Granularity='MONTHLY',
                    Filter={
                        'Dimensions': {
                            'Key': 'SERVICE',
                            'Values': [service]
                        }
                    },
                    PredictionIntervalLevel=90  ## 51 - 99 Range
                )
                # Push metric to prometheus gateway instance
                metric_service_projected_spend.push(
                    response['Total']['Amount'], **labels)

            except Exception as error:
                self.logger.error(error)
                # If there is exception and projected cost not found for a service, send projected spend as 0
                metric_service_projected_spend.push(0, **labels)
Exemple #3
0
class AccountBudget:
    """
    This class parse and process the budget file, which has monthly budget information for each AWS accounts
    """
    def __init__(self):
        self.logger = LOGGER('__AccountBudget__').config()
        self.util = Utils()

        # Prometheus labels to be included in the metric
        self.labels = {'account_id': '', 'account_name': '', 'owner': ''}

        self.costbuddy_output_bucket = os.getenv('s3_bucket')

        # Get the current YearMonth for processing budget allocation, example 201912
        self.current_month = datetime.strftime(datetime.today(), "%Y%m")

    def parse_budget_file(self):
        """
        Download and parse the allocated budget file and return the list of excel sheet names and excel file class

        :return: excel: Pandas excel file class
        :return  sheet_names: list of excel sheet names
        """

        budget_file = self.util.download_s3_file(
            bucket=self.costbuddy_output_bucket, filename='input/bills.xlsx')
        self.logger.info('Allocates Budget file downloaded location %s',
                         budget_file)

        try:
            excel = pd.ExcelFile(budget_file)
            sheet_names = excel.sheet_names
        except Exception as error:
            self.logger.error("Unable to read XLXS File, error %s", error)
            return None, []

        return excel, sheet_names

    def get_aws_accounts(self):
        """
        Get the list of accounts from excel sheets

        :return: accounts: List of AWS Accounts
        """
        accounts = []
        excel, sheets = self.parse_budget_file()

        if len(sheets) == 0:
            return accounts

        for sheet in sheets:
            try:
                # All the columns in the data frame loaded as string data type
                # This required because some of the AWS account number has preceding zeros
                sheet_df = pd.read_excel(excel, sheet_name=sheet, dtype=str)

                # Convert month field data type from string to float
                convert_dict = {int(self.current_month): float}
                sheet_df = sheet_df.astype(convert_dict)

                # drop last row which has total
                sheet_df.drop(sheet_df.tail(1).index, inplace=True)
                accounts.extend(list(sheet_df['AWS Account ID'].unique()))

            except Exception as error:
                self.logger.exception(
                    "Unable to read sheet name %s  \n Error %s", sheet, error)
                # In case a sheet malformed, process other
                continue

        return accounts

    def process_budget_by_account(self, sheet_df):
        """
            Process monthly budget allocation for each account and send it to promethous gateway node

            :param sheet_df: An Excel Sheet data loaded into pandas.DataFrame
        """
        self.logger.info("Processing monthly budget by account")
        account_ids = sheet_df['AWS Account ID'].unique(
        )  # get list of unique aws account ids

        for account_id in account_ids:
            self.logger.info("Processing Account %s", account_id)
            # Filter the row matches account_ids
            account_df = sheet_df[sheet_df['AWS Account ID'] == account_id]
            total = account_df[int(self.current_month)].sum()

            # Incase multiple row is matched, use the last row to fetch account name and owner info
            last_row = account_df.iloc[-1]

            account_name = getattr(last_row, 'Account Description')
            owner = getattr(last_row, 'Owner')

            try:
                prom_conf = OutputConfigParser().parse_output_config(
                    'prometheus')
                prom_endpoint = "%s:%s" % (prom_conf['gateway'],
                                           prom_conf['port'])

                metric_budget = PrometheusPushMetric(
                    account_id, prom_endpoint, 'aws_account_monthly_budget',
                    'AWS monthly account budget', **self.labels)
            except Exception as error:
                self.logger.error(error)
                self.logger.error('Unable to load output conf.')
                return

            self.labels = {
                'account_id': account_id,
                'account_name': account_name,
                'owner': owner
            }

            metric_budget.push(total, **self.labels)

    def process_monthly_budget(self):
        """"
            Iterate over each spread sheet in the excel file and process the monthly budget info for each accounts
        """

        excel, sheets = self.parse_budget_file()

        if len(sheets) == 0:
            return

        for sheet in sheets:
            try:
                # All the columns in the data frame loaded as string data type
                # This required because some of the AWS account number has preceding zeros
                sheet_df = pd.read_excel(excel, sheet_name=sheet, dtype=str)

                # Convert month field data type from string to float
                convert_dict = {int(self.current_month): float}
                sheet_df = sheet_df.astype(convert_dict)

                # drop last row which has total
                sheet_df.drop(sheet_df.tail(1).index, inplace=True)
                self.process_budget_by_account(sheet_df)

            except Exception as error:
                self.logger.exception(
                    "Unable to read sheet name %s  \n Error %s", sheet, error)
                # In case a sheet malformed, process other
                continue
Exemple #4
0
def lambda_handler(event, context):
    """"
        Lambda function to process daily and monthly spend using AWS cost utilization report.
    """
    logger = LOGGER('__cur_cost_usage__').config()

    budget = AccountBudget()
    # Get the list of AWS accounts from budget file
    accounts = budget.get_aws_accounts()

    util = Utils()
    util.clean_up_tmp_dir()

    prom_conf = OutputConfigParser().parse_output_config('prometheus')
    prom_endpoint = "%s:%s" % (prom_conf['gateway'], prom_conf['port'])

    cur_conf = OutputConfigParser().parse_output_config('cur')
    s3_bucket = cur_conf['bucket']
    daily_file_pattern = cur_conf['daily_file_pattern']
    monthly_file_pattern = cur_conf['monthly_file_pattern']

    #201912*filefomart.csv
    #TODO file pattern suffix
    daily_cur_file = "%s_%s.csv000" % (util.get_current_month_year(),
                                       daily_file_pattern)
    monthly_cur_file = "%s_%s.csv000" % (util.get_current_month_year(),
                                         monthly_file_pattern)

    # Daily cost usage report
    try:
        downloaded_file = util.download_s3_file(bucket=s3_bucket,
                                                filename=daily_cur_file)
        logger.info("Downloaded file name %s", downloaded_file)
    except Exception as error:
        logger.exception("Unable to download file %s", error)
        return

    # TODO Column name change
    columns = [
        'usagestartdate_date', 'aws_account_number', 'environment',
        'aws_account_name', 'aws_service_code', 'operation', 'component',
        'app', 'appenv', 'user', 'bu', 'cost_total'
    ]

    try:
        daily_usage_df = pd.read_csv(downloaded_file, dtype=str, header=None)
        # set the column names
        daily_usage_df.columns = columns

        # Convert cost_total column to float
        convert_dict = {'cost_total': float}
        daily_usage_df = daily_usage_df.astype(convert_dict)
    except Exception as error:
        logger.error("Unable to read daily usage CSV File %s ", error)
        return

    # Process latest set of records
    last_record_date = "1970-01-01"
    for lastrecord in getattr(daily_usage_df.tail(1), 'usagestartdate_date'):
        last_record_date = lastrecord

    today = util.get_day_month_year()

    latest_df = daily_usage_df[daily_usage_df['usagestartdate_date'] ==
                               last_record_date]
    accounts_df = latest_df[latest_df['aws_account_number'].isin(accounts)]

    cur_spend = CostUsageReportSpend()
    cur_spend.account_month_to_date_spend(accounts_df, today, prom_endpoint)

    # Clean up /tmp dir before processing monthly cur file.
    util.clean_up_tmp_dir()

    # Monthly cost and usage report, seperate function
    try:
        downloaded_file = util.download_s3_file(bucket=s3_bucket,
                                                filename=monthly_cur_file)
        logger.info("Downloaded file name %s", downloaded_file)
    except Exception as error:
        logger.exception("Unable to download file, %s", error)
        return

    # TODO Column name change
    columns = [
        'month_of_year', 'fiscal_quarter_of_year', 'as_of_date', 'bu',
        'application_name', 'aws_account_number', 'environment',
        'account_name', 'aws_service_code', 'operation', 'component',
        'user_app', 'appenv', 'user', 'finance_part', 'monthly_cost_to_date',
        'projected_month_end_cost', 'quarterly_cost_to_date',
        'projected_quarter_end_cost'
    ]

    try:
        monthly_spend_df = pd.read_csv(downloaded_file, dtype=str, header=None)
        monthly_spend_df.columns = columns

        convert_dict = {
            'monthly_cost_to_date': float,
            'projected_month_end_cost': float,
            'quarterly_cost_to_date': float,
            'projected_quarter_end_cost': float
        }
        monthly_spend_df = monthly_spend_df.astype(convert_dict)
    except Exception as error:
        logger.exception("Unable to read CSV File, %s", error)
        return

    accounts_df = monthly_spend_df[monthly_spend_df['aws_account_number'].isin(
        accounts)]

    cur_projected = CostUsageReportProjected()

    # Process monthly/projected spend cost by account id
    process = Process(target=cur_projected.account_monthly_projected_spend,
                      args=(accounts_df, prom_endpoint))
    cur_projected.processes.append(process)

    # start all processes
    for process in cur_projected.processes:
        process.start()

    # Wait for thread completion and ensure all threads have finished
    for process in cur_projected.processes:
        process.join()