def __init__(self, aws_account_id, prom_gateway, metric_name, metric_desc,
                 **labels):
        """
        Args:
            aws_account_id : 12 digit AWS account id without hyphen
            prom_gateway : IP or DNS name of push gateway instance
            metric_name : Name of the prometheus metric
            metric_desc : Short description about the metric
            **labels : Is a dict object with key as label name and value as label values
        """
        self.util = Utils()
        self.logger = LOGGER('__PrometheusPushMetric__').config()

        # Create a list of Metric objects
        self.registry = CollectorRegistry()
        self.account_id = aws_account_id
        self.prom_gateway = prom_gateway
        self.metric_name = metric_name

        self.labels = list(labels.keys())

        # Add year, month, day labels
        if not all(label in self.labels for label in ['year', 'month', 'day']):
            self.labels.extend(['year', 'month', 'day'])

        # Update labels dict with key account_id and value 12 digit account id, if account_id label is not passed
        # account_id is a mandatory label required for each metric
        if 'account_id' not in self.labels:
            self.labels.extend(['account_id'])

        self.metric = Gauge(metric_name,
                            metric_desc,
                            self.labels,
                            registry=self.registry)
    def __init__(self, account_id):
        """
        Args:
            account_id : 12 digit AWS account id without hyphen
        """
        self.account_id = account_id
        self.logger = LOGGER('__CostExplorer__').config()
        self.util = Utils()
        self.cred = SessionCreds()

        self.begin_of_month = self.util.first_day_of_month()
        self.yesterday = self.util.preceding_day()

        self.today = self.util.get_day_month_year()
        self.last_day = self.util.last_day_of_month(self.today)

        self.client = self.cred.get_client(account_id, 'ce')

        self.results = []

        self.prom_conf = OutputConfigParser().parse_output_config('prometheus')
        self.prom_endpoint = "%s:%s" % (self.prom_conf['gateway'],
                                        self.prom_conf['port'])

        self.s3_conf = OutputConfigParser().parse_output_config('s3')
        self.s3_bucket = self.s3_conf['bucket']

        self.s3_upload = S3Upload(account_id, self.s3_bucket,
                                  'daily_%s' % self.today.isoformat())
Example #3
0
class CostUsageReportSpend:
    def __init__(self):
        self.logger = LOGGER('__cur_cost_usage__').config()

    def account_month_to_date_spend(self, accounts_df, date, prom_endpoint):
        required_columns = [
            'u_aws_account_number', 'u_environment', 'billing_account_name'
        ]

        tags = ['account_id', 'env', 'account_name']

        labels = {'year': date.year, 'month': date.month, 'day': date.day}

        account_ids = accounts_df['u_aws_account_number'].unique()

        for account_id in account_ids:
            self.logger.info("Processing Account %s", account_id)
            account_df = accounts_df[accounts_df['u_aws_account_number'] ==
                                     account_id]

            total = format(float(account_df['cost_total'].sum()), '.3f')
            # Get the last row of the from pandas data frame, to get the column details
            row = account_df.iloc[-1]

            for index, _ in enumerate(required_columns):
                labels = {tags[index]: getattr(row, required_columns[index])}

            metric_cur_daily_usage = PrometheusPushMetric(
                account_id, prom_endpoint, 'cur_aws_daily_usage_cost',
                'AWS Daily Usage Cost', **labels)

            metric_cur_daily_usage.push(total, **labels)

    def service_month_to_date_spend(self):
        pass
Example #4
0
    def __init__(self):
        self.logger = LOGGER('__AccountBudget__').config()
        self.util = Utils()

        # Prometheus labels to be included in the metric
        self.labels = {'account_id': '', 'account_name': '', 'owner': ''}

        self.costbuddy_output_bucket = os.getenv('s3_bucket')

        # Get the current YearMonth for processing budget allocation, example 201912
        self.current_month = datetime.strftime(datetime.today(), "%Y%m")
    def __init__(self):
        self.logger = LOGGER('__OutputConfig__').config()
        self.util = Utils()
        self.heir_dir = os.path.dirname(os.path.dirname(__file__))
        self.conf_dir = os.path.join(self.heir_dir, 'conf/output')
        self.output_spec = os.path.join(self.conf_dir, 'output.spec')
        self.costbuddy_output_bucket = os.getenv('s3_bucket')

        self.logger.info("Downloading conf file from s3")
        self.output_conf = self.util.download_s3_file(
            bucket=self.costbuddy_output_bucket, filename='conf/output.conf')
    def __init__(self, account_id, bucket, file_name):
        """
        Args:
            account_id : 12 digit AWS account id without hyphen
            bucket : S3 bucket name
            file_name : Complete S3 key name, like conf/example.com or example.com
        """
        self.logger = LOGGER('__S3Upload__').config()
        self.s3_client = boto3.client('s3')
        self.util = Utils()
        self.account_id = account_id
        self.file_name = file_name

        # Extract only bucket name, if bucket name passed with dir, example custbuddy-output/conf, self.bucket would be
        # just bucket name 'custbuddy-output'
        self.bucket = bucket.split('/')[0]
        # Dir name would be remaining, once s3 bucket name extracted
        self.dir = bucket.split('/')[1]
        self.rows = []
    def __init__(self, account_id):
        """
        Args:
            account_id : 12 digit AWS account id without hyphen
        """
        self.cred = SessionCreds()
        self.client = self.cred.get_client(account_id, 'ce')

        self.account_id = account_id
        self.util = Utils()
        self.logger = LOGGER('__forecast__').config()

        self.begin_of_month = self.util.first_day_of_month()
        self.yesterday = self.util.preceding_day()
        self.today = self.util.day_of_month()
        self.last_day = self.util.last_day_of_month(self.today)
        self.first_day_next_month = self.util.first_day_of_next_month()
        self.next_day_of_month = self.util.next_day_of_month()

        self.prom_conf = OutputConfigParser().parse_output_config('prometheus')
        self.prom_endpoint = "%s:%s" % (self.prom_conf['gateway'],
                                        self.prom_conf['port'])
class OutputConfigParser:
    """
    This class uses configobj python library to download output config file from s3 and parse the output config file by
    reading the different sections in config files and returns section as dict object.

    Config file looks like a .ini file with different sections
    Example:
        [prometheus]
            gateway = ''
            port    = 0

        [s3]
            bucket = ''
    """
    def __init__(self):
        self.logger = LOGGER('__OutputConfig__').config()
        self.util = Utils()
        self.heir_dir = os.path.dirname(os.path.dirname(__file__))
        self.conf_dir = os.path.join(self.heir_dir, 'conf/output')
        self.output_spec = os.path.join(self.conf_dir, 'output.spec')
        self.costbuddy_output_bucket = os.getenv('s3_bucket')

        self.logger.info("Downloading conf file from s3")
        self.output_conf = self.util.download_s3_file(
            bucket=self.costbuddy_output_bucket, filename='conf/output.conf')

    def parse_output_config(self, config_name):
        """

        Args:
            config_name : ConfigObj section name, example prometheus or s3

        Returns:
            section as dict object

        """
        config_file_spec = ConfigObj(self.output_spec,
                                     interpolation=False,
                                     list_values=False,
                                     _inspec=True)
        config_file_obj = ConfigObj(self.output_conf,
                                    configspec=config_file_spec)

        # A simple validator used to check that all members expected in conf file are present.
        validator = SimpleVal()

        # test_pass would be True, If every member of a subsection passes, else False
        test_pass = config_file_obj.validate(validator)

        if test_pass is False:
            self.logger.error("Not all required output configs are passed")
            self.logger.error(
                "Check src/conf/output/output.spec for required inputs")

        if config_name in config_file_obj:
            return config_file_obj[config_name]

        return {}
class CostExplorerProjected:
    """
       Calculates AWS account and service monthly projected spend information using AWS CostExplorer API.
    """
    def __init__(self, account_id):
        """
        Args:
            account_id : 12 digit AWS account id without hyphen
        """
        self.cred = SessionCreds()
        self.client = self.cred.get_client(account_id, 'ce')

        self.account_id = account_id
        self.util = Utils()
        self.logger = LOGGER('__forecast__').config()

        self.begin_of_month = self.util.first_day_of_month()
        self.yesterday = self.util.preceding_day()
        self.today = self.util.day_of_month()
        self.last_day = self.util.last_day_of_month(self.today)
        self.first_day_next_month = self.util.first_day_of_next_month()
        self.next_day_of_month = self.util.next_day_of_month()

        self.prom_conf = OutputConfigParser().parse_output_config('prometheus')
        self.prom_endpoint = "%s:%s" % (self.prom_conf['gateway'],
                                        self.prom_conf['port'])

    def account_monthly_projected_spend(self):
        """
        Retrieves a forecast for how much Amazon Web Services predicts that you will spend over the forecast
        time period i.e from beginning of the month to last day of the month
       """

        self.logger.info("Getting aws account projected spend amount..")

        # Prometheus labels to be included in the account level metric
        labels = {'account_id': self.account_id}

        metric_account_projected_spend = PrometheusPushMetric(
            self.account_id, self.prom_endpoint,
            'ce_aws_account_monthly_forecast', 'AWS account monthly projected',
            **labels)
        try:
            response = self.client.get_cost_forecast(
                TimePeriod={
                    'Start': self.next_day_of_month.isoformat(),
                    'End': self.first_day_next_month.isoformat()
                },
                Metric='UNBLENDED_COST',
                Granularity='MONTHLY',
                PredictionIntervalLevel=90  ## 51 - 99 Range #TODO User input
            )

            metric_account_projected_spend.push(response['Total']['Amount'],
                                                **labels)

        except Exception as error:
            self.logger.error(error)
            # If there is exception and projected cost not found for the account, send projected spend as 0
            metric_account_projected_spend.push(0, **labels)

    def get_active_services(self):
        """
        Get the list of actively used services. A service is active or not is determined based cost incurred

        Returns:
            services: list of actively used services

        """
        self.logger.info("Getting list of active services..")
        kwargs = {}
        results = []
        services = []
        token = None
        while True:
            if token:
                kwargs = {'NextPageToken': token}
            else:
                kwargs = {}
                data = self.client.get_cost_and_usage(
                    TimePeriod={
                        'Start': self.yesterday.isoformat(),
                        'End': self.today.isoformat()
                    },
                    Granularity='DAILY',
                    Metrics=['UnblendedCost'],
                    GroupBy=[{
                        'Type': 'DIMENSION',
                        'Key': 'LINKED_ACCOUNT'
                    }, {
                        'Type': 'DIMENSION',
                        'Key': 'SERVICE'
                    }],
                    **kwargs)

                results += data['ResultsByTime']
                token = data.get('NextPageToken')
                if not token:
                    break

        for result_by_time in results:
            for group in result_by_time['Groups']:
                amount = group['Metrics']['UnblendedCost']['Amount']
                if ast.literal_eval(amount) != 0:
                    services.append(group['Keys'][1])

        return services

    def service_monthly_projected_spend(self):
        """
        Retrieves a forecast for how much Amazon Web Services predicts that you will spend over for each service
        for the forecast time period i.e from beginning of the month to last day of the month
        """

        self.logger.info("Getting forecast amount by service..")

        # Prometheus labels to be included in the service metric
        labels = {'account_id': '', 'aws_service': ''}
        # Converting unicode service name to string
        active_services = [str(srv) for srv in self.get_active_services()]

        metric_service_projected_spend = PrometheusPushMetric(
            self.account_id, self.prom_endpoint,
            'ce_aws_service_monthly_forecast', 'AWS Service monthly forecast',
            **labels)

        for service in active_services:
            labels['aws_service'] = service
            labels['account_id'] = self.account_id
            try:
                response = self.client.get_cost_forecast(
                    TimePeriod={
                        'Start': self.next_day_of_month.isoformat(),
                        'End': self.first_day_next_month.isoformat()
                    },
                    Metric='UNBLENDED_COST',
                    Granularity='MONTHLY',
                    Filter={
                        'Dimensions': {
                            'Key': 'SERVICE',
                            'Values': [service]
                        }
                    },
                    PredictionIntervalLevel=90  ## 51 - 99 Range
                )
                # Push metric to prometheus gateway instance
                metric_service_projected_spend.push(
                    response['Total']['Amount'], **labels)

            except Exception as error:
                self.logger.error(error)
                # If there is exception and projected cost not found for a service, send projected spend as 0
                metric_service_projected_spend.push(0, **labels)
class CostExplorerUsage:
    """
    Calculates AWS account billing information using AWS CostExplorer API.
    """
    def __init__(self, account_id):
        """
        Args:
            account_id : 12 digit AWS account id without hyphen
        """
        self.account_id = account_id
        self.logger = LOGGER('__CostExplorer__').config()
        self.util = Utils()
        self.cred = SessionCreds()

        self.begin_of_month = self.util.first_day_of_month()
        self.yesterday = self.util.preceding_day()

        self.today = self.util.get_day_month_year()
        self.last_day = self.util.last_day_of_month(self.today)

        self.client = self.cred.get_client(account_id, 'ce')

        self.results = []

        self.prom_conf = OutputConfigParser().parse_output_config('prometheus')
        self.prom_endpoint = "%s:%s" % (self.prom_conf['gateway'],
                                        self.prom_conf['port'])

        self.s3_conf = OutputConfigParser().parse_output_config('s3')
        self.s3_bucket = self.s3_conf['bucket']

        self.s3_upload = S3Upload(account_id, self.s3_bucket,
                                  'daily_%s' % self.today.isoformat())

    def daily_service_usage(self):
        """
        calculates daily service cost and usage metrics for your account and forwards the metrics to prometheus
        and uploads to s3

        start and end dates are required retrieving AWS costs and granularity daily

        Start: The beginning of the time period that you want the usage and costs for. The start date is inclusive.
        End: The end of the time period that you want the usage and costs for. The end date is exclusive.
        """
        self.logger.info("Getting daily service usage amount..")

        # Prometheus labels to be included in the service metric
        labels = {'account_id': '', 'aws_service': ''}

        metric_service_daily_usage = PrometheusPushMetric(
            self.account_id, self.prom_endpoint, 'ce_aws_service_daily_spend',
            'AWS Service Usage Cost', **labels)
        token = None
        while True:
            if token:
                kwargs = {'NextPageToken': token}
            else:
                kwargs = {}
                # TODO - Linked account
                data = self.client.get_cost_and_usage(
                    TimePeriod={
                        'Start': self.yesterday.isoformat(),
                        'End': self.today.isoformat()
                    },
                    Granularity='DAILY',
                    Metrics=['UnblendedCost'],
                    GroupBy=[{
                        'Type': 'DIMENSION',
                        'Key': 'LINKED_ACCOUNT'
                    }, {
                        'Type': 'DIMENSION',
                        'Key': 'SERVICE'
                    }],
                    **kwargs)
                self.results += data['ResultsByTime']
                token = data.get('NextPageToken')
                if not token:
                    break

        for result_by_time in self.results:
            for group in result_by_time['Groups']:
                amount = group['Metrics']['UnblendedCost']['Amount']
                if ast.literal_eval(amount) != 0:
                    labels = {
                        'account_id': group['Keys'][0],
                        'aws_service': group['Keys'][1]
                    }
                    # Push daily cost usage metrics to prometheus
                    metric_service_daily_usage.push(amount, **labels)
                    # Upload cost usage data to s3
                    self.s3_upload.add_excel_row(self.today.isoformat(),
                                                 amount, labels)

    def service_month_to_date_spend(self):
        """
        calculates month to date spend cost and usage metrics by service.

        Gets the spend data from beginning of the month to month to date by each service and forwards the metrics to
        prometheus and uploads to s3.

       start and end dates are required retrieving AWS costs and granularity monthly

       Start: First date of the current month. The start date is inclusive.
       End: Month to date/current date. The end date is exclusive.
       """

        # Prometheus labels to be included in the service metric
        labels = {'account_id': '', 'aws_service': ''}
        self.logger.info("Getting month to date spend by each service..")
        metric_service_month_to_date_spend = PrometheusPushMetric(
            self.account_id, self.prom_endpoint,
            'ce_aws_service_monthly_spend',
            'AWS Service month to date spend amount', **labels)

        kwargs = {}
        data = self.client.get_cost_and_usage(TimePeriod={
            'Start':
            self.begin_of_month.isoformat(),
            'End':
            self.today.isoformat()
        },
                                              Granularity='MONTHLY',
                                              Metrics=['UnblendedCost'],
                                              GroupBy=[{
                                                  'Type': 'DIMENSION',
                                                  'Key': 'SERVICE'
                                              }],
                                              **kwargs)

        for service in data['ResultsByTime'][0]['Groups']:
            amount = service['Metrics']['UnblendedCost']['Amount']
            if ast.literal_eval(amount) != 0:
                labels = {'aws_service': service['Keys'][0]}

                metric_service_month_to_date_spend.push(amount, **labels)

    def account_month_to_date_spend(self):
        """
        calculates month to date spend cost and usage metrics for your account.
        Gets the account spend data from beginning of the month to month to date and forwards the metrics to prometheus
        and uploads to s3

        start and end dates are required retrieving AWS costs and granularity monthly

        Start: First date of the current month. The start date is inclusive.
        End: Month to date/current date. The end date is exclusive.
        """
        # Prometheus labels to be included in the account metricx
        labels = {'account_id': ''}
        self.logger.info("Getting month to date spend by account..")
        metric_account_month_to_date_spend = PrometheusPushMetric(
            self.account_id, self.prom_endpoint,
            'ce_aws_account_monthly_spend', 'AWS account monthly spend',
            **labels)
        kwargs = {}
        data = self.client.get_cost_and_usage(TimePeriod={
            'Start':
            self.begin_of_month.isoformat(),
            'End':
            self.last_day.isoformat()
        },
                                              Granularity='MONTHLY',
                                              Metrics=['UnblendedCost'],
                                              GroupBy=[{
                                                  'Type': 'DIMENSION',
                                                  'Key': 'LINKED_ACCOUNT'
                                              }],
                                              **kwargs)
        for account in data['ResultsByTime']:
            amount = account['Groups'][0]['Metrics']['UnblendedCost']['Amount']
            account_id = account['Groups'][0]['Keys'][0]
            labels = {'account_id': account_id}
            metric_account_month_to_date_spend.push(amount, **labels)
Example #11
0
class AccountBudget:
    """
    This class parse and process the budget file, which has monthly budget information for each AWS accounts
    """
    def __init__(self):
        self.logger = LOGGER('__AccountBudget__').config()
        self.util = Utils()

        # Prometheus labels to be included in the metric
        self.labels = {'account_id': '', 'account_name': '', 'owner': ''}

        self.costbuddy_output_bucket = os.getenv('s3_bucket')

        # Get the current YearMonth for processing budget allocation, example 201912
        self.current_month = datetime.strftime(datetime.today(), "%Y%m")

    def parse_budget_file(self):
        """
        Download and parse the allocated budget file and return the list of excel sheet names and excel file class

        :return: excel: Pandas excel file class
        :return  sheet_names: list of excel sheet names
        """

        budget_file = self.util.download_s3_file(
            bucket=self.costbuddy_output_bucket, filename='input/bills.xlsx')
        self.logger.info('Allocates Budget file downloaded location %s',
                         budget_file)

        try:
            excel = pd.ExcelFile(budget_file)
            sheet_names = excel.sheet_names
        except Exception as error:
            self.logger.error("Unable to read XLXS File, error %s", error)
            return None, []

        return excel, sheet_names

    def get_aws_accounts(self):
        """
        Get the list of accounts from excel sheets

        :return: accounts: List of AWS Accounts
        """
        accounts = []
        excel, sheets = self.parse_budget_file()

        if len(sheets) == 0:
            return accounts

        for sheet in sheets:
            try:
                # All the columns in the data frame loaded as string data type
                # This required because some of the AWS account number has preceding zeros
                sheet_df = pd.read_excel(excel, sheet_name=sheet, dtype=str)

                # Convert month field data type from string to float
                convert_dict = {int(self.current_month): float}
                sheet_df = sheet_df.astype(convert_dict)

                # drop last row which has total
                sheet_df.drop(sheet_df.tail(1).index, inplace=True)
                accounts.extend(list(sheet_df['AWS Account ID'].unique()))

            except Exception as error:
                self.logger.exception(
                    "Unable to read sheet name %s  \n Error %s", sheet, error)
                # In case a sheet malformed, process other
                continue

        return accounts

    def process_budget_by_account(self, sheet_df):
        """
            Process monthly budget allocation for each account and send it to promethous gateway node

            :param sheet_df: An Excel Sheet data loaded into pandas.DataFrame
        """
        self.logger.info("Processing monthly budget by account")
        account_ids = sheet_df['AWS Account ID'].unique(
        )  # get list of unique aws account ids

        for account_id in account_ids:
            self.logger.info("Processing Account %s", account_id)
            # Filter the row matches account_ids
            account_df = sheet_df[sheet_df['AWS Account ID'] == account_id]
            total = account_df[int(self.current_month)].sum()

            # Incase multiple row is matched, use the last row to fetch account name and owner info
            last_row = account_df.iloc[-1]

            account_name = getattr(last_row, 'Account Description')
            owner = getattr(last_row, 'Owner')

            try:
                prom_conf = OutputConfigParser().parse_output_config(
                    'prometheus')
                prom_endpoint = "%s:%s" % (prom_conf['gateway'],
                                           prom_conf['port'])

                metric_budget = PrometheusPushMetric(
                    account_id, prom_endpoint, 'aws_account_monthly_budget',
                    'AWS monthly account budget', **self.labels)
            except Exception as error:
                self.logger.error(error)
                self.logger.error('Unable to load output conf.')
                return

            self.labels = {
                'account_id': account_id,
                'account_name': account_name,
                'owner': owner
            }

            metric_budget.push(total, **self.labels)

    def process_monthly_budget(self):
        """"
            Iterate over each spread sheet in the excel file and process the monthly budget info for each accounts
        """

        excel, sheets = self.parse_budget_file()

        if len(sheets) == 0:
            return

        for sheet in sheets:
            try:
                # All the columns in the data frame loaded as string data type
                # This required because some of the AWS account number has preceding zeros
                sheet_df = pd.read_excel(excel, sheet_name=sheet, dtype=str)

                # Convert month field data type from string to float
                convert_dict = {int(self.current_month): float}
                sheet_df = sheet_df.astype(convert_dict)

                # drop last row which has total
                sheet_df.drop(sheet_df.tail(1).index, inplace=True)
                self.process_budget_by_account(sheet_df)

            except Exception as error:
                self.logger.exception(
                    "Unable to read sheet name %s  \n Error %s", sheet, error)
                # In case a sheet malformed, process other
                continue
 def __init__(self):
     self.logger = LOGGER('__cur_monthly_projected_spend').config()
     self.util = Utils()
     self.processes = []
Example #13
0
def lambda_handler(event, context):
    """"
        Lambda function to process daily and monthly spend using AWS cost utilization report.
    """
    logger = LOGGER('__cur_cost_usage__').config()

    budget = AccountBudget()
    # Get the list of AWS accounts from budget file
    accounts = budget.get_aws_accounts()

    util = Utils()
    util.clean_up_tmp_dir()

    prom_conf = OutputConfigParser().parse_output_config('prometheus')
    prom_endpoint = "%s:%s" % (prom_conf['gateway'], prom_conf['port'])

    cur_conf = OutputConfigParser().parse_output_config('cur')
    s3_bucket = cur_conf['bucket']
    daily_file_pattern = cur_conf['daily_file_pattern']
    monthly_file_pattern = cur_conf['monthly_file_pattern']

    #201912*filefomart.csv
    #TODO file pattern suffix
    daily_cur_file = "%s_%s.csv000" % (util.get_current_month_year(),
                                       daily_file_pattern)
    monthly_cur_file = "%s_%s.csv000" % (util.get_current_month_year(),
                                         monthly_file_pattern)

    # Daily cost usage report
    try:
        downloaded_file = util.download_s3_file(bucket=s3_bucket,
                                                filename=daily_cur_file)
        logger.info("Downloaded file name %s", downloaded_file)
    except Exception as error:
        logger.exception("Unable to download file %s", error)
        return

    # TODO Column name change
    columns = [
        'usagestartdate_date', 'aws_account_number', 'environment',
        'aws_account_name', 'aws_service_code', 'operation', 'component',
        'app', 'appenv', 'user', 'bu', 'cost_total'
    ]

    try:
        daily_usage_df = pd.read_csv(downloaded_file, dtype=str, header=None)
        # set the column names
        daily_usage_df.columns = columns

        # Convert cost_total column to float
        convert_dict = {'cost_total': float}
        daily_usage_df = daily_usage_df.astype(convert_dict)
    except Exception as error:
        logger.error("Unable to read daily usage CSV File %s ", error)
        return

    # Process latest set of records
    last_record_date = "1970-01-01"
    for lastrecord in getattr(daily_usage_df.tail(1), 'usagestartdate_date'):
        last_record_date = lastrecord

    today = util.get_day_month_year()

    latest_df = daily_usage_df[daily_usage_df['usagestartdate_date'] ==
                               last_record_date]
    accounts_df = latest_df[latest_df['aws_account_number'].isin(accounts)]

    cur_spend = CostUsageReportSpend()
    cur_spend.account_month_to_date_spend(accounts_df, today, prom_endpoint)

    # Clean up /tmp dir before processing monthly cur file.
    util.clean_up_tmp_dir()

    # Monthly cost and usage report, seperate function
    try:
        downloaded_file = util.download_s3_file(bucket=s3_bucket,
                                                filename=monthly_cur_file)
        logger.info("Downloaded file name %s", downloaded_file)
    except Exception as error:
        logger.exception("Unable to download file, %s", error)
        return

    # TODO Column name change
    columns = [
        'month_of_year', 'fiscal_quarter_of_year', 'as_of_date', 'bu',
        'application_name', 'aws_account_number', 'environment',
        'account_name', 'aws_service_code', 'operation', 'component',
        'user_app', 'appenv', 'user', 'finance_part', 'monthly_cost_to_date',
        'projected_month_end_cost', 'quarterly_cost_to_date',
        'projected_quarter_end_cost'
    ]

    try:
        monthly_spend_df = pd.read_csv(downloaded_file, dtype=str, header=None)
        monthly_spend_df.columns = columns

        convert_dict = {
            'monthly_cost_to_date': float,
            'projected_month_end_cost': float,
            'quarterly_cost_to_date': float,
            'projected_quarter_end_cost': float
        }
        monthly_spend_df = monthly_spend_df.astype(convert_dict)
    except Exception as error:
        logger.exception("Unable to read CSV File, %s", error)
        return

    accounts_df = monthly_spend_df[monthly_spend_df['aws_account_number'].isin(
        accounts)]

    cur_projected = CostUsageReportProjected()

    # Process monthly/projected spend cost by account id
    process = Process(target=cur_projected.account_monthly_projected_spend,
                      args=(accounts_df, prom_endpoint))
    cur_projected.processes.append(process)

    # start all processes
    for process in cur_projected.processes:
        process.start()

    # Wait for thread completion and ensure all threads have finished
    for process in cur_projected.processes:
        process.join()
class PrometheusPushMetric:
    """
    This class pushes cost metrics to the given prometheus push gateway.
    """
    def __init__(self, aws_account_id, prom_gateway, metric_name, metric_desc,
                 **labels):
        """
        Args:
            aws_account_id : 12 digit AWS account id without hyphen
            prom_gateway : IP or DNS name of push gateway instance
            metric_name : Name of the prometheus metric
            metric_desc : Short description about the metric
            **labels : Is a dict object with key as label name and value as label values
        """
        self.util = Utils()
        self.logger = LOGGER('__PrometheusPushMetric__').config()

        # Create a list of Metric objects
        self.registry = CollectorRegistry()
        self.account_id = aws_account_id
        self.prom_gateway = prom_gateway
        self.metric_name = metric_name

        self.labels = list(labels.keys())

        # Add year, month, day labels
        if not all(label in self.labels for label in ['year', 'month', 'day']):
            self.labels.extend(['year', 'month', 'day'])

        # Update labels dict with key account_id and value 12 digit account id, if account_id label is not passed
        # account_id is a mandatory label required for each metric
        if 'account_id' not in self.labels:
            self.labels.extend(['account_id'])

        self.metric = Gauge(metric_name,
                            metric_desc,
                            self.labels,
                            registry=self.registry)

    def push(self, metric_value, **labels):
        """
        Push metrics to prometheus push gateway instance        
        Args:
            metric_value : string data type, prometheus metric name, examp
            **labels : Dict data type with promethous labels and values

        Returns:

        """
        today = self.util.get_day_month_year()

        # job label to be attached to all pushed metrics
        job_name = "AWS_%s_%s" % (self.account_id, self.metric_name)

        timestamp_labels = {
            'year': today.year,
            'month': today.month,
            'day': today.day
        }

        labels.update(timestamp_labels)

        # Update the account_id label value
        if 'account_id' not in labels.keys():
            labels['account_id'] = self.account_id

        # Validate if metric has all required params:  name, documentation, type, unit
        self.metric.describe()
        self.logger.info(labels)
        # Update metrics labels
        self.metric.labels(**labels).set(metric_value)

        # Push metrics to Prometheus gateway instance
        push_to_gateway(self.prom_gateway,
                        job=job_name,
                        registry=self.registry)
class CostUsageReportProjected:
    """
       Calculates AWS account and service monthly projected spend information using AWS Cost and Usage report file.
    """
    def __init__(self):
        self.logger = LOGGER('__cur_monthly_projected_spend').config()
        self.util = Utils()
        self.processes = []

    def account_monthly_projected_spend(self, accounts_df, prom_endpoint):
        """
        Parse AWS Cost and Usage Monthly Report and calculate how much Amazon Web Services predicts that you will
        spend over for each account from beginning of the month to last day of the month.

        Also, calculates monthly spend so far by each account.

        Args:
            accounts_df : Pandas data frame containing AWS services month to date spend and projected spend
            prom_endpoint : Prometheus push gateway endpoint, IP address or a DNS name.
        """
        account_ids = accounts_df['u_aws_account_number'].unique(
        )  # get list of unique aws account ids

        for account_id in account_ids:
            self.logger.info("Processing AWS Account ID %s", account_id)
            account_df = accounts_df[accounts_df['u_aws_account_number'] ==
                                     account_id]

            # Process monthly spend / projected spend by aws service
            process = Process(target=self.service_monthly_projected_spend,
                              args=(account_df, account_id, prom_endpoint))
            process.start()
            process.join()

            cost_total = account_df['monthly_cost_to_date'].sum()
            projected_total = account_df['projected_month_end_cost'].sum()
            month_todate_spend = format(float(cost_total), '.3f')
            projected_spend = format(float(projected_total), '.3f')
            last_row = account_df.iloc[-1]

            _, cost_year, cost_month, cost_day = self.util.get_date(
                getattr(last_row, 'as_of_date'))

            labels = {
                'account_id': account_id,
                'account_name': getattr(last_row, 'billing_account_name'),
                'env': getattr(last_row, 'u_environment'),
                'year': cost_year,
                'month': cost_month,
                'day': cost_day
            }

            metric_account_month_todate_spend = PrometheusPushMetric(
                account_id, prom_endpoint, 'cur_aws_account_monthly_spend',
                'AWS account month to date spend cost', **labels)

            metric_account_month_todate_spend.push(month_todate_spend,
                                                   **labels)

            metric_account_projected_spend = PrometheusPushMetric(
                account_id, prom_endpoint, 'cur_aws_account_monthly_forecast',
                'AWS account month to date spend cost', **labels)

            metric_account_projected_spend.push(projected_spend, **labels)

    def service_monthly_projected_spend(self, services_df, account_id,
                                        prom_endpoint):
        """
        Parse AWS Cost and Usage Monthly Report and calculate how much Amazon Web Services predicts that you will
        spend over for each service from beginning of the month to last day of the month.

        Also, calculates monthly spend so far by each service.

        Args:
            services_df : Pandas data frame containing AWS services month to date spend and projected spend
            account_id : 12 digit AWS account id without hyphen
            prom_endpoint : Prometheus push gateway endpoint, IP address or a DNS name.
        """

        # get list of unique aws account ids
        services = services_df['aws_service_code'].unique()
        # Prometheus labels to be included in the service metric
        labels = {
            'account_id': '',
            'account_name': '',
            'env': '',
            'aws_service': '',
            'year': '',
            'month': '',
            'day': ''
        }

        metric_service_monthly_spend = PrometheusPushMetric(
            account_id, prom_endpoint, 'cur_aws_service_monthly_spend',
            'AWS service month to date spend cost', **labels)

        metric_service_projected_spend = PrometheusPushMetric(
            account_id, prom_endpoint, 'cur_aws_service_monthly_forecast',
            'AWS service month to date spend cost', **labels)

        for service in services:
            self.logger.info("Processing AWS Service %s", service)
            service_df = services_df[services_df['aws_service_code'] ==
                                     service]
            cost_total = service_df['monthly_cost_to_date'].sum()
            projected_total = service_df['projected_month_end_cost'].sum()

            month_todate_spend = format(float(cost_total), '.3f')
            projected_spend = format(float(projected_total), '.3f')
            last_row = service_df.iloc[-1]

            _, cost_year, cost_month, cost_day = self.util.get_date(
                getattr(last_row, 'as_of_date'))

            labels = {
                'account_id': account_id,
                'account_name': getattr(last_row, 'billing_account_name'),
                'env': getattr(last_row, 'u_environment'),
                'aws_service': service,
                'year': cost_year,
                'month': cost_month,
                'day': cost_day
            }

            metric_service_monthly_spend.push(month_todate_spend, **labels)
            metric_service_projected_spend.push(projected_spend, **labels)
Example #16
0
 def __init__(self):
     self.logger = LOGGER('__cur_cost_usage__').config()
class S3Upload:
    """
    This class uploads Cost explore metrics to s3 bucket as excel file, this excel file can be used and processed by
    tools like AWS Athena, QuickSight
    """
    def __init__(self, account_id, bucket, file_name):
        """
        Args:
            account_id : 12 digit AWS account id without hyphen
            bucket : S3 bucket name
            file_name : Complete S3 key name, like conf/example.com or example.com
        """
        self.logger = LOGGER('__S3Upload__').config()
        self.s3_client = boto3.client('s3')
        self.util = Utils()
        self.account_id = account_id
        self.file_name = file_name

        # Extract only bucket name, if bucket name passed with dir, example custbuddy-output/conf, self.bucket would be
        # just bucket name 'custbuddy-output'
        self.bucket = bucket.split('/')[0]
        # Dir name would be remaining, once s3 bucket name extracted
        self.dir = bucket.split('/')[1]
        self.rows = []

    def add_excel_row(self, date, amount, labels):
        """
        Create a list to be added as excel rows
        Args:
            date : Current date with format yyyy-mm-dd
            amount : Service or account usage cost
            labels : Is a dict object with key as label name and value as label values, key would be column header
            and value would be row value

        Returns:

        """
        if 'account_id' not in labels.keys():
            labels['account_id'] = self.account_id

        labels['amount'] = amount
        labels['date'] = date
        self.logger.info(labels)
        self.rows.append(labels)

    # TODO Upload function instead of destructor
    # TODO Cleanup function for clean up data

    def __del__(self):
        """
        Adds an rows to excel sheet and uploads to s3 bucket

        When this class is called for multiple accounts, destructor being used to delete the object once file is
        uploaded, so that it can be used for other accounts.
        """
        tmp_dir = tempfile.mkdtemp()
        self.file_name = self.file_name + ".xlsx"
        xlsx_file = os.path.join(tmp_dir, self.file_name)
        self.logger.info("File location :: %s", xlsx_file)
        writer = pd.ExcelWriter(xlsx_file, engine='xlsxwriter')

        account_df = pd.DataFrame(self.rows)
        account_df.set_index("date", inplace=True)
        account_df.fillna(0.0)
        account_df.to_excel(writer)
        writer.save()
        self.s3_client.upload_file(xlsx_file, self.bucket,
                                   '%s/%s' % (self.dir, self.file_name))
        self.logger.info('File has been uploaded to s3')