def test_today_with_timezone_error_raised(self): """Test that an error is raised with an invalid timezone.""" string_tz = "Moon/Mare Tranquillitatis" accessor = DateAccessor() with self.assertRaises(DateAccessorError): accessor.today_with_timezone(string_tz)
class ReportManifestDBAccessor(KokuDBAccess): """Class to interact with the koku database for CUR processing statistics.""" def __init__(self): """Access the AWS report manifest database table.""" self._schema = 'public' super().__init__(self._schema) self._table = \ self.get_base().classes.reporting_common_costusagereportmanifest self.date_accessor = DateAccessor() def get_manifest(self, assembly_id, provider_id): """Get the manifest associated with the provided provider and id.""" query = self._get_db_obj_query() return query.filter_by(provider_id=provider_id)\ .filter_by(assembly_id=assembly_id).first() def get_manifest_by_id(self, manifest_id): """Get the manifest by id.""" query = self._get_db_obj_query() return query.filter_by(id=manifest_id).first() def mark_manifest_as_updated(self, manifest): """Update the updated timestamp.""" manifest.manifest_updated_datetime = \ self.date_accessor.today_with_timezone('UTC') def add(self, use_savepoint=True, **kwargs): """ Add a new row to the CUR stats database. Args: kwargs (dict): Fields containing CUR Manifest attributes. Valid keys are: assembly_id, billing_period_start_datetime, num_processed_files (optional), num_total_files, provider_id, Returns: None """ if 'manifest_creation_datetime' not in kwargs: kwargs['manifest_creation_datetime'] = \ self.date_accessor.today_with_timezone('UTC') if 'num_processed_files' not in kwargs: kwargs['num_processed_files'] = 0 return super().add(use_savepoint, **kwargs)
def test_get_bill_ids_from_provider_with_start_and_end_date(self): """Test that bill IDs are returned for an AWS provider with both dates.""" date_accessor = DateAccessor() with ProviderDBAccessor( provider_uuid=self.aws_provider_uuid) as provider_accessor: provider = provider_accessor.get_provider() with AWSReportDBAccessor(schema=self.schema) as accessor: end_date = date_accessor.today_with_timezone("utc").replace(day=1) start_date = end_date for i in range(2): start_date = start_date - relativedelta(months=i) bills = accessor.get_cost_entry_bills_query_by_provider( provider.uuid) with schema_context(self.schema): bills = (bills.filter( billing_period_start__gte=start_date.date()).filter( billing_period_start__lte=end_date.date()).all()) expected_bill_ids = [str(bill.id) for bill in bills] bills = utils.get_bills_from_provider(self.aws_provider_uuid, self.schema, start_date=start_date, end_date=end_date) with schema_context(self.schema): bill_ids = [str(bill.id) for bill in bills] self.assertEqual(bill_ids, expected_bill_ids)
def test_get_bill_ids_from_provider_with_start_and_end_date(self): """Test that bill IDs are returned for an AWS provider with both dates.""" date_accessor = DateAccessor() with ProviderDBAccessor(provider_uuid=self.aws_test_provider_uuid ) as provider_accessor: provider = provider_accessor.get_provider() with AWSReportDBAccessor(schema=self.test_schema, column_map=self.column_map) as accessor: report_schema = accessor.report_schema creator = ReportObjectCreator(accessor, self.column_map, report_schema.column_types) end_date = date_accessor.today_with_timezone('utc').replace(day=1) start_date = end_date for i in range(2): start_date = start_date - relativedelta(months=i) print(start_date) bill = creator.create_cost_entry_bill(bill_date=start_date) bill_table_name = AWS_CUR_TABLE_MAP['bill'] bill_obj = getattr(accessor.report_schema, bill_table_name) bills = accessor.get_cost_entry_bills_query_by_provider( provider.id) bills = bills.filter(bill_obj.billing_period_start>=start_date.date())\ .filter(bill_obj.billing_period_start<=end_date.date()).all() expected_bill_ids = [str(bill.id) for bill in bills] bills = utils.get_bills_from_provider(self.aws_test_provider_uuid, self.test_schema, start_date=start_date, end_date=end_date) bill_ids = [str(bill.id) for bill in bills] self.assertEqual(bill_ids, expected_bill_ids)
def test_remove_files_not_in_set_from_s3_bucket(self): """Test remove_files_not_in_set_from_s3_bucket.""" removed = utils.remove_files_not_in_set_from_s3_bucket( "request_id", None, "manifest_id") self.assertEqual(removed, []) date_accessor = DateAccessor() start_date = date_accessor.today_with_timezone("utc").replace(day=1) s3_csv_path = get_path_prefix("account", Provider.PROVIDER_AWS, "provider_uuid", start_date, Config.CSV_DATA_TYPE) expected_key = "removed_key" mock_object = Mock(metadata={}, key=expected_key) mock_summary = Mock() mock_summary.Object.return_value = mock_object with patch("masu.util.aws.common.settings", ENABLE_S3_ARCHIVING=True): with patch("masu.util.aws.common.get_s3_resource") as mock_s3: mock_s3.return_value.Bucket.return_value.objects.filter.return_value = [ mock_summary ] removed = utils.remove_files_not_in_set_from_s3_bucket( "request_id", s3_csv_path, "manifest_id") self.assertEqual(removed, [expected_key]) with patch("masu.util.aws.common.settings", ENABLE_S3_ARCHIVING=True): with patch("masu.util.aws.common.get_s3_resource") as mock_s3: mock_s3.side_effect = ClientError({}, "Error") removed = utils.remove_files_not_in_set_from_s3_bucket( "request_id", s3_csv_path, "manifest_id") self.assertEqual(removed, [])
def test_today_with_timezone_string(self): """Test that a timezone string works as expected.""" string_tz = "UTC" current_utc_time = datetime.utcnow() accessor = DateAccessor() result_time = accessor.today_with_timezone(string_tz) self.assertEqual(current_utc_time.date(), result_time.date()) self.assertEqual(current_utc_time.hour, result_time.hour) self.assertEqual(current_utc_time.minute, result_time.minute) self.assertEqual(result_time.tzinfo, pytz.UTC)
def test_today_with_timezone_object(self): """Test that a timezone string works as expected.""" fake_tz_name = self.fake.timezone() fake_tz = pytz.timezone(fake_tz_name) current_time = datetime.now(fake_tz) accessor = DateAccessor() result_time = accessor.today_with_timezone(fake_tz) self.assertEqual(current_time.date(), result_time.date()) self.assertEqual(current_time.hour, result_time.hour) self.assertEqual(current_time.minute, result_time.minute) self.assertEqual(str(result_time.tzinfo), fake_tz_name)
def test_get_bill_ids_from_provider(self): """Test that bill IDs are returned for an AWS provider.""" date_accessor = DateAccessor() with AWSReportDBAccessor(schema=self.test_schema, column_map=self.column_map) as accessor: report_schema = accessor.report_schema creator = ReportObjectCreator(accessor, self.column_map, report_schema.column_types) expected_bill_ids = [] end_date = date_accessor.today_with_timezone('utc').replace(day=1) start_date = end_date for i in range(2): start_date = start_date - relativedelta(months=i) bill = creator.create_cost_entry_bill(bill_date=start_date) expected_bill_ids.append(str(bill.id)) bills = utils.get_bills_from_provider(self.aws_test_provider_uuid, self.test_schema) bill_ids = [str(bill.id) for bill in bills] self.assertEqual(sorted(bill_ids), sorted(expected_bill_ids))
def test_get_bill_ids_from_provider(self): """Test that bill IDs are returned for an AWS provider.""" date_accessor = DateAccessor() creator = ReportObjectCreator(self.schema, self.column_map) expected_bill_ids = [] end_date = date_accessor.today_with_timezone("utc").replace(day=1) start_date = end_date for i in range(2): start_date = start_date - relativedelta(months=i) bill = creator.create_cost_entry_bill( provider_uuid=self.aws_provider_uuid, bill_date=start_date) with schema_context(self.schema): expected_bill_ids.append(str(bill.id)) bills = utils.get_bills_from_provider(self.aws_provider_uuid, self.schema) with schema_context(self.schema): bill_ids = [str(bill.id) for bill in bills] self.assertEqual(sorted(bill_ids), sorted(expected_bill_ids))
class ProviderStatusAccessorTest(MasuTestCase): """Test Cases for the ProviderStatusAccessor object.""" FAKE = Faker() def setUp(self): """Test set up.""" super().setUp() self.date_accessor = DateAccessor() with ProviderDBAccessor(self.aws_provider_uuid) as provider_accessor: provider = provider_accessor.get_provider() self.provider_uuid = provider.uuid def _setup_random_status(self): """Set up a randomized status for testing. This is being done in a separate function instead of in setUp() to facilitate testing the case where there is no status in the DB. """ self.test_status = { 'provider_id': self.provider_uuid, 'status': random.choice(list(ProviderStatusCode)), 'last_message': self.FAKE.word(), 'retries': random.randint(0, 10), 'timestamp': self.date_accessor.today_with_timezone('UTC'), } with ProviderStatusAccessor(self.aws_provider_uuid) as accessor: status = accessor.add(**self.test_status) status.save() self.time_stamp = status.timestamp def test_init(self): """Test __init__() when a status is in the DB.""" self._setup_random_status() with ProviderStatusAccessor(self.aws_provider_uuid) as accessor: self.assertIsNotNone(accessor._table) self.assertIsNotNone(accessor._obj) def test_init_wo_provider(self): """Test __init__() when a provider is not in the DB.""" with self.assertRaises(MasuProviderError): ProviderStatusAccessor(str(uuid.uuid4())) def test_get_status(self): """Test get_status().""" self._setup_random_status() with ProviderStatusAccessor(self.aws_provider_uuid) as accessor: output = accessor.get_status() self.assertEqual(output, self.test_status.get('status')) def test_get_last_message(self): """Test get_last_message().""" self._setup_random_status() with ProviderStatusAccessor(self.aws_provider_uuid) as accessor: output = accessor.get_last_message() self.assertEqual(output, self.test_status.get('last_message')) def test_get_retries(self): """Test get_retries().""" self._setup_random_status() with ProviderStatusAccessor(self.aws_provider_uuid) as accessor: output = accessor.get_retries() self.assertEqual(output, self.test_status.get('retries')) def test_get_provider_uuid(self): """Test get_provider_uuid().""" self._setup_random_status() with ProviderStatusAccessor(self.aws_provider_uuid) as accessor: output = accessor.get_provider_uuid() self.assertEqual(output, self.aws_provider_uuid) def test_get_timestamp(self): """Test get_timestamp().""" self._setup_random_status() with ProviderStatusAccessor(self.aws_provider_uuid) as accessor: output = accessor.get_timestamp() self.assertEqual(output, self.time_stamp)
class AzureReportSummaryUpdater: """Class to update AWS report summary data.""" def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema_name = schema self._provider = provider self._manifest = manifest with ReportingCommonDBAccessor() as reporting_common: self._column_map = reporting_common.column_map self._date_accessor = DateAccessor() def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" with AzureReportDBAccessor(self._schema_name, self._column_map) as accessor: # This is the normal processing route if self._manifest: # Override the bill date to correspond with the manifest bill_date = self._manifest.billing_period_start_datetime.date() bills = accessor.get_cost_entry_bills_query_by_provider( self._provider.id ) bills = bills.filter(billing_period_start=bill_date).all() do_month_update = False with schema_context(self._schema_name): do_month_update = self._determine_if_full_summary_update_needed( bills[0] ) if do_month_update: last_day_of_month = calendar.monthrange( bill_date.year, bill_date.month )[1] start_date = bill_date.strftime('%Y-%m-%d') end_date = bill_date.replace(day=last_day_of_month) end_date = end_date.strftime('%Y-%m-%d') LOG.info('Overriding start and end date to process full month.') return start_date, end_date def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str): A start date and end date. """ LOG.info('update_daily_tables for: %s-%s', str(start_date), str(end_date)) start_date, end_date = self._get_sql_inputs(start_date, end_date) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ LOG.info('update_summary_tables for: %s-%s', str(start_date), str(end_date)) start_date, end_date = self._get_sql_inputs(start_date, end_date) bills = get_bills_from_provider( self._provider.uuid, self._schema_name, datetime.datetime.strptime(start_date, '%Y-%m-%d'), datetime.datetime.strptime(end_date, '%Y-%m-%d') ) bill_ids = [] with schema_context(self._schema_name): bill_ids = [str(bill.id) for bill in bills] with AzureReportDBAccessor(self._schema_name, self._column_map) as accessor: # Need these bills on the session to update dates after processing bills = accessor.bills_for_provider_id(self._provider.id, start_date) LOG.info('Updating Azure report summary tables: \n\tSchema: %s' '\n\tProvider: %s \n\tDates: %s - %s', self._schema_name, self._provider.uuid, start_date, end_date) accessor.populate_line_item_daily_summary_table(start_date, end_date, bill_ids) accessor.populate_tags_summary_table() for bill in bills: if bill.summary_data_creation_datetime is None: bill.summary_data_creation_datetime = \ self._date_accessor.today_with_timezone('UTC') bill.summary_data_updated_datetime = \ self._date_accessor.today_with_timezone('UTC') bill.save() accessor.commit() return start_date, end_date def _determine_if_full_summary_update_needed(self, bill): """Decide whether to update summary tables for full billing period.""" processed_files = self._manifest.num_processed_files total_files = self._manifest.num_total_files summary_creation = bill.summary_data_creation_datetime is_done_processing = processed_files == total_files is_new_bill = summary_creation is None # Do a full month update if we just finished processing a finalized # bill or we just finished processing a bill for the first time if is_done_processing and is_new_bill: return True return False
class ReportSummaryUpdater: """Update reporting summary tables.""" def __init__(self, customer_schema, provider_uuid, manifest_id=None): """ Initializer. Args: customer_schema (str): Schema name for given customer. provider (str): The provider type. """ self._schema = customer_schema self._provider_uuid = provider_uuid self._manifest = None if manifest_id is not None: with ReportManifestDBAccessor() as manifest_accessor: self._manifest = manifest_accessor.get_manifest_by_id( manifest_id) self._date_accessor = DateAccessor() with ProviderDBAccessor(self._provider_uuid) as provider_accessor: self._provider = provider_accessor.get_provider() try: self._updater, self._ocp_cloud_updater = self._set_updater() except Exception as err: raise ReportSummaryUpdaterError(err) if not self._updater: raise ReportSummaryUpdaterError('Invalid provider type specified.') LOG.info('Starting report data summarization for provider uuid: %s.', self._provider.uuid) def _set_updater(self): """ Create the report summary updater object. Object is specific to the report provider. Args: None Returns: (Object) : Provider-specific report summary updater """ if self._provider.type in (AMAZON_WEB_SERVICES, AWS_LOCAL_SERVICE_PROVIDER): return (AWSReportSummaryUpdater(self._schema, self._provider, self._manifest), OCPCloudReportSummaryUpdater(self._schema, self._provider, self._manifest)) if self._provider.type in (AZURE, AZURE_LOCAL_SERVICE_PROVIDER): return (AzureReportSummaryUpdater(self._schema, self._provider, self._manifest), OCPCloudReportSummaryUpdater(self._schema, self._provider, self._manifest)) if self._provider.type in (OPENSHIFT_CONTAINER_PLATFORM, ): return (OCPReportSummaryUpdater(self._schema, self._provider, self._manifest), OCPCloudReportSummaryUpdater(self._schema, self._provider, self._manifest)) return None def _format_dates(self, start_date, end_date): """Convert dates to strings for use in the updater.""" if isinstance(start_date, datetime.date): start_date = start_date.strftime('%Y-%m-%d') if isinstance(end_date, datetime.date): end_date = end_date.strftime('%Y-%m-%d') elif end_date is None: # Run up to the current date end_date = self._date_accessor.today_with_timezone('UTC') end_date = end_date.strftime('%Y-%m-%d') return start_date, end_date def manifest_is_ready(self): """Check if processing should continue.""" if self._manifest and self._manifest.num_processed_files != self._manifest.num_total_files: # Bail if all manifest files have not been processed LOG.error( 'Not all manifest files have completed processing.' 'Summary deferred. Processed Files: %s, Total Files: %s', str(self._manifest.num_processed_files), str(self._manifest.num_total_files)) return True def update_daily_tables(self, start_date, end_date): """ Update report daily rollup tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. manifest_id (str): The particular manifest to use. Returns: (str, str): The start and end date strings used in the daily SQL. """ start_date, end_date = self._format_dates(start_date, end_date) start_date, end_date = self._updater.update_daily_tables( start_date, end_date) return start_date, end_date def update_summary_tables(self, start_date, end_date): """ Update report summary tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. manifest_id (str): The particular manifest to use. Returns: None """ start_date, end_date = self._format_dates(start_date, end_date) LOG.info('Using start date: %s', start_date) LOG.info('Using end date: %s', end_date) start_date, end_date = self._updater.update_summary_tables( start_date, end_date) self._ocp_cloud_updater.update_summary_tables(start_date, end_date) def update_cost_summary_table(self, start_date, end_date): """ Update cost summary tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. Returns: None """ start_date, end_date = self._format_dates(start_date, end_date) self._ocp_cloud_updater.update_cost_summary_table(start_date, end_date)
class AWSReportDBAccessor(SQLScriptAtomicExecutorMixin, ReportDBAccessorBase): """Class to interact with customer reporting tables.""" def __init__(self, schema): """Establish the database connection. Args: schema (str): The customer schema to associate with """ super().__init__(schema) self._datetime_format = Config.AWS_DATETIME_STR_FORMAT self.date_accessor = DateAccessor() self.jinja_sql = JinjaSql() self._table_map = AWS_CUR_TABLE_MAP @property def line_item_daily_summary_table(self): return AWSCostEntryLineItemDailySummary @property def ocpall_line_item_daily_summary_table(self): return get_model("OCPAllCostLineItemDailySummaryP") @property def ocpall_line_item_project_daily_summary_table(self): return get_model("OCPAllCostLineItemProjectDailySummaryP") @property def line_item_table(self): return AWSCostEntryLineItem @property def cost_entry_table(self): return AWSCostEntry @property def line_item_daily_table(self): return AWSCostEntryLineItemDaily def get_cost_entry_bills(self): """Get all cost entry bill objects.""" table_name = AWSCostEntryBill with schema_context(self.schema): columns = ["id", "bill_type", "payer_account_id", "billing_period_start", "provider_id"] bills = self._get_db_obj_query(table_name).values(*columns) return { (bill["bill_type"], bill["payer_account_id"], bill["billing_period_start"], bill["provider_id"]): bill[ "id" ] for bill in bills } def get_cost_entry_bills_by_date(self, start_date): """Return a cost entry bill for the specified start date.""" table_name = AWSCostEntryBill with schema_context(self.schema): return self._get_db_obj_query(table_name).filter(billing_period_start=start_date) def get_cost_entry_bills_query_by_provider(self, provider_uuid): """Return all cost entry bills for the specified provider.""" table_name = AWSCostEntryBill with schema_context(self.schema): return self._get_db_obj_query(table_name).filter(provider_id=provider_uuid) def bills_for_provider_uuid(self, provider_uuid, start_date=None): """Return all cost entry bills for provider_uuid on date.""" bills = self.get_cost_entry_bills_query_by_provider(provider_uuid) if start_date: if isinstance(start_date, str): start_date = parse(start_date) bill_date = start_date.replace(day=1) bills = bills.filter(billing_period_start=bill_date) return bills def get_bill_query_before_date(self, date, provider_uuid=None): """Get the cost entry bill objects with billing period before provided date.""" table_name = AWSCostEntryBill with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) if provider_uuid: cost_entry_bill_query = base_query.filter(billing_period_start__lte=date, provider_id=provider_uuid) else: cost_entry_bill_query = base_query.filter(billing_period_start__lte=date) return cost_entry_bill_query def get_lineitem_query_for_billid(self, bill_id): """Get the AWS cost entry line item for a given bill query.""" table_name = AWSCostEntryLineItem with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) line_item_query = base_query.filter(cost_entry_bill_id=bill_id) return line_item_query def get_daily_query_for_billid(self, bill_id): """Get the AWS cost daily item for a given bill query.""" table_name = AWSCostEntryLineItemDaily with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) daily_item_query = base_query.filter(cost_entry_bill_id=bill_id) return daily_item_query def get_summary_query_for_billid(self, bill_id): """Get the AWS cost summary item for a given bill query.""" table_name = AWSCostEntryLineItemDailySummary with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_ocp_aws_summary_query_for_billid(self, bill_id): """Get the OCP-on-AWS report summary item for a given bill query.""" table_name = self._table_map["ocp_on_aws_daily_summary"] base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_ocp_aws_project_summary_query_for_billid(self, bill_id): """Get the OCP-on-AWS report project summary item for a given bill query.""" table_name = self._table_map["ocp_on_aws_project_daily_summary"] base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_cost_entry_query_for_billid(self, bill_id): """Get the AWS cost entry data for a given bill query.""" table_name = AWSCostEntry with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) line_item_query = base_query.filter(bill_id=bill_id) return line_item_query def get_cost_entries(self): """Make a mapping of cost entries by start time.""" table_name = AWSCostEntry with schema_context(self.schema): cost_entries = self._get_db_obj_query(table_name).all() return {(ce.bill_id, ce.interval_start.strftime(self._datetime_format)): ce.id for ce in cost_entries} def get_products(self): """Make a mapping of product sku to product objects.""" table_name = AWSCostEntryProduct with schema_context(self.schema): columns = ["id", "sku", "product_name", "region"] products = self._get_db_obj_query(table_name, columns=columns).all() return { (product["sku"], product["product_name"], product["region"]): product["id"] for product in products } def get_pricing(self): """Make a mapping of pricing values string to pricing objects.""" table_name = AWSCostEntryPricing with schema_context(self.schema): pricing = self._get_db_obj_query(table_name).all() return {f"{p.term}-{p.unit}": p.id for p in pricing} def get_reservations(self): """Make a mapping of reservation ARN to reservation objects.""" table_name = AWSCostEntryReservation with schema_context(self.schema): columns = ["id", "reservation_arn"] reservs = self._get_db_obj_query(table_name, columns=columns).all() return {res["reservation_arn"]: res["id"] for res in reservs} def populate_line_item_daily_table(self, start_date, end_date, bill_ids): """Populate the daily aggregate of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. bill_ids (list) Returns (None) """ table_name = self._table_map["line_item_daily"] daily_sql = pkgutil.get_data("masu.database", "sql/reporting_awscostentrylineitem_daily.sql") daily_sql = daily_sql.decode("utf-8") daily_sql_params = { "uuid": str(uuid.uuid4()).replace("-", "_"), "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } daily_sql, daily_sql_params = self.jinja_sql.prepare_query(daily_sql, daily_sql_params) self._execute_raw_sql_query(table_name, daily_sql, start_date, end_date, bind_params=list(daily_sql_params)) def populate_line_item_daily_summary_table(self, start_date, end_date, bill_ids): """Populate the daily aggregated summary of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ table_name = self._table_map["line_item_daily_summary"] summary_sql = pkgutil.get_data("masu.database", "sql/reporting_awscostentrylineitem_daily_summary.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "uuid": str(uuid.uuid4()).replace("-", "_"), "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params) self._execute_raw_sql_query( table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params) ) def populate_ui_summary_tables(self, start_date, end_date, source_uuid, tables=UI_SUMMARY_TABLES): """Populate our UI summary tables (formerly materialized views).""" for table_name in tables: summary_sql = pkgutil.get_data("masu.database", f"sql/aws/{table_name}.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "start_date": start_date, "end_date": end_date, "schema": self.schema, "source_uuid": source_uuid, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params) self._execute_raw_sql_query( table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params), operation="DELETE/INSERT", ) def populate_line_item_daily_summary_table_presto(self, start_date, end_date, source_uuid, bill_id, markup_value): """Populate the daily aggregated summary of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_awscostentrylineitem_daily_summary.sql") summary_sql = summary_sql.decode("utf-8") uuid_str = str(uuid.uuid4()).replace("-", "_") summary_sql_params = { "uuid": uuid_str, "start_date": start_date, "end_date": end_date, "schema": self.schema, "table": PRESTO_LINE_ITEM_DAILY_TABLE, "source_uuid": source_uuid, "year": start_date.strftime("%Y"), "month": start_date.strftime("%m"), "markup": markup_value if markup_value else 0, "bill_id": bill_id, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params) self._execute_presto_raw_sql_query( self.schema, summary_sql, log_ref="reporting_awscostentrylineitem_daily_summary.sql" ) def mark_bill_as_finalized(self, bill_id): """Mark a bill in the database as finalized.""" table_name = AWSCostEntryBill with schema_context(self.schema): bill = self._get_db_obj_query(table_name).get(id=bill_id) if bill.finalized_datetime is None: bill.finalized_datetime = self.date_accessor.today_with_timezone("UTC") bill.save() def populate_tags_summary_table(self, bill_ids, start_date, end_date): """Populate the line item aggregated totals data table.""" table_name = self._table_map["tags_summary"] agg_sql = pkgutil.get_data("masu.database", "sql/reporting_awstags_summary.sql") agg_sql = agg_sql.decode("utf-8") agg_sql_params = {"schema": self.schema, "bill_ids": bill_ids, "start_date": start_date, "end_date": end_date} agg_sql, agg_sql_params = self.jinja_sql.prepare_query(agg_sql, agg_sql_params) self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params)) def populate_ocp_on_aws_cost_daily_summary(self, start_date, end_date, cluster_id, bill_ids, markup_value): """Populate the daily cost aggregated summary for OCP on AWS. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ table_name = self._table_map["ocp_on_aws_daily_summary"] summary_sql = pkgutil.get_data("masu.database", "sql/reporting_ocpawscostlineitem_daily_summary.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "uuid": str(uuid.uuid4()).replace("-", "_"), "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "cluster_id": cluster_id, "schema": self.schema, "markup": markup_value, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params) self._execute_raw_sql_query( table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params) ) def populate_ocp_on_aws_ui_summary_tables(self, sql_params, tables=OCPAWS_UI_SUMMARY_TABLES): """Populate our UI summary tables (formerly materialized views).""" for table_name in tables: summary_sql = pkgutil.get_data("masu.database", f"sql/aws/openshift/{table_name}.sql") summary_sql = summary_sql.decode("utf-8") summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, sql_params) self._execute_raw_sql_query(table_name, summary_sql, bind_params=list(summary_sql_params)) def delete_ocp_on_aws_hive_partition_by_day(self, days, aws_source, ocp_source, year, month): """Deletes partitions individually for each day in days list.""" table = "reporting_ocpawscostlineitem_project_daily_summary" retries = settings.HIVE_PARTITION_DELETE_RETRIES if self.table_exists_trino(table): LOG.info( "Deleting Hive partitions for the following: \n\tSchema: %s " "\n\tOCP Source: %s \n\tAWS Source: %s \n\tTable: %s \n\tYear-Month: %s-%s \n\tDays: %s", self.schema, ocp_source, aws_source, table, year, month, days, ) for day in days: for i in range(retries): try: sql = f""" DELETE FROM hive.{self.schema}.{table} WHERE aws_source = '{aws_source}' AND ocp_source = '{ocp_source}' AND year = '{year}' AND (month = replace(ltrim(replace('{month}', '0', ' ')),' ', '0') OR month = '{month}') AND day = '{day}'""" self._execute_presto_raw_sql_query( self.schema, sql, log_ref=f"delete_ocp_on_aws_hive_partition_by_day for {year}-{month}-{day}", attempts_left=(retries - 1) - i, ) break except TrinoExternalError as err: if err.error_name == "HIVE_METASTORE_ERROR" and i < (retries - 1): continue else: raise err def populate_ocp_on_aws_cost_daily_summary_presto( self, start_date, end_date, openshift_provider_uuid, aws_provider_uuid, report_period_id, bill_id, markup_value, distribution, ): """Populate the daily cost aggregated summary for OCP on AWS. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ # Default to cpu distribution year = start_date.strftime("%Y") month = start_date.strftime("%m") days = DateHelper().list_days(start_date, end_date) days_str = "','".join([str(day.day) for day in days]) days_list = [str(day.day) for day in days] self.delete_ocp_on_aws_hive_partition_by_day( days_list, aws_provider_uuid, openshift_provider_uuid, year, month ) pod_column = "pod_effective_usage_cpu_core_hours" node_column = "node_capacity_cpu_core_hours" if distribution == "memory": pod_column = "pod_effective_usage_memory_gigabyte_hours" node_column = "node_capacity_memory_gigabyte_hours" summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpawscostlineitem_daily_summary.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "schema": self.schema, "start_date": start_date, "year": year, "month": month, "days": days_str, "end_date": end_date, "aws_source_uuid": aws_provider_uuid, "ocp_source_uuid": openshift_provider_uuid, "bill_id": bill_id, "report_period_id": report_period_id, "markup": markup_value, "pod_column": pod_column, "node_column": node_column, } LOG.info("Running OCP on AWS SQL with params:") LOG.info(summary_sql_params) self._execute_presto_multipart_sql_query(self.schema, summary_sql, bind_params=summary_sql_params) def back_populate_ocp_on_aws_daily_summary(self, start_date, end_date, report_period_id): """Populate the OCP on AWS and OCP daily summary tables. after populating the project table via trino.""" table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_daily_summary"] sql = pkgutil.get_data( "masu.database", "sql/reporting_ocpawscostentrylineitem_daily_summary_back_populate.sql" ) sql = sql.decode("utf-8") sql_params = { "schema": self.schema, "start_date": start_date, "end_date": end_date, "report_period_id": report_period_id, } sql, sql_params = self.jinja_sql.prepare_query(sql, sql_params) self._execute_raw_sql_query(table_name, sql, bind_params=list(sql_params)) def populate_ocp_on_aws_tags_summary_table(self, bill_ids, start_date, end_date): """Populate the line item aggregated totals data table.""" table_name = self._table_map["ocp_on_aws_tags_summary"] agg_sql = pkgutil.get_data("masu.database", "sql/reporting_ocpawstags_summary.sql") agg_sql = agg_sql.decode("utf-8") agg_sql_params = {"schema": self.schema, "bill_ids": bill_ids, "start_date": start_date, "end_date": end_date} agg_sql, agg_sql_params = self.jinja_sql.prepare_query(agg_sql, agg_sql_params) self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params)) def populate_markup_cost(self, provider_uuid, markup, start_date, end_date, bill_ids=None): """Set markup costs in the database.""" with schema_context(self.schema): if bill_ids and start_date and end_date: date_filters = {"usage_start__gte": start_date, "usage_start__lte": end_date} else: date_filters = {} OCPALL_MARKUP = (OCPAllCostLineItemDailySummaryP, *OCP_ON_ALL_PERSPECTIVES) for bill_id in bill_ids: AWSCostEntryLineItemDailySummary.objects.filter(cost_entry_bill_id=bill_id, **date_filters).update( markup_cost=(F("unblended_cost") * markup), markup_cost_blended=(F("blended_cost") * markup), markup_cost_savingsplan=(F("savingsplan_effective_cost") * markup), ) OCPAWSCostLineItemDailySummaryP.objects.filter(cost_entry_bill_id=bill_id, **date_filters).update( markup_cost=(F("unblended_cost") * markup) ) for ocpaws_model in OCP_ON_AWS_PERSPECTIVES: ocpaws_model.objects.filter(source_uuid=provider_uuid, **date_filters).update( markup_cost=(F("unblended_cost") * markup) ) OCPAllCostLineItemProjectDailySummaryP.objects.filter( source_uuid=provider_uuid, source_type="AWS", **date_filters ).update(project_markup_cost=(F("pod_cost") * markup)) for markup_model in OCPALL_MARKUP: markup_model.objects.filter(source_uuid=provider_uuid, source_type="AWS", **date_filters).update( markup_cost=(F("unblended_cost") * markup) ) def populate_enabled_tag_keys(self, start_date, end_date, bill_ids): """Populate the enabled tag key table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. bill_ids (list) A list of bill IDs. Returns (None) """ table_name = self._table_map["enabled_tag_keys"] summary_sql = pkgutil.get_data("masu.database", "sql/reporting_awsenabledtagkeys.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params) self._execute_raw_sql_query( table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params) ) def update_line_item_daily_summary_with_enabled_tags(self, start_date, end_date, bill_ids): """Populate the enabled tag key table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. bill_ids (list) A list of bill IDs. Returns (None) """ table_name = self._table_map["line_item_daily_summary"] summary_sql = pkgutil.get_data( "masu.database", "sql/reporting_awscostentryline_item_daily_summary_update_enabled_tags.sql" ) summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params) self._execute_raw_sql_query( table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params) ) def get_openshift_on_cloud_matched_tags(self, aws_bill_id, ocp_report_period_id): """Return a list of matched tags.""" sql = pkgutil.get_data("masu.database", "sql/reporting_ocpaws_matched_tags.sql") sql = sql.decode("utf-8") sql_params = {"bill_id": aws_bill_id, "report_period_id": ocp_report_period_id, "schema": self.schema} sql, bind_params = self.jinja_sql.prepare_query(sql, sql_params) with connection.cursor() as cursor: cursor.db.set_schema(self.schema) cursor.execute(sql, params=bind_params) results = cursor.fetchall() return [json.loads(result[0]) for result in results] def get_openshift_on_cloud_matched_tags_trino(self, aws_source_uuid, ocp_source_uuid, start_date, end_date): """Return a list of matched tags.""" sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpaws_matched_tags.sql") sql = sql.decode("utf-8") days = DateHelper().list_days(start_date, end_date) days_str = "','".join([str(day.day) for day in days]) sql_params = { "start_date": start_date, "end_date": end_date, "schema": self.schema, "aws_source_uuid": aws_source_uuid, "ocp_source_uuid": ocp_source_uuid, "year": start_date.strftime("%Y"), "month": start_date.strftime("%m"), "days": days_str, } sql, sql_params = self.jinja_sql.prepare_query(sql, sql_params) results = self._execute_presto_raw_sql_query( self.schema, sql, bind_params=sql_params, log_ref="reporting_ocpaws_matched_tags.sql" ) return [json.loads(result[0]) for result in results]
class OCPReportSummaryUpdater(PartitionHandlerMixin): """Class to update OCP report summary data.""" def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema = schema self._provider = provider self._manifest = manifest self._cluster_id = get_cluster_id_from_provider(self._provider.uuid) self._date_accessor = DateAccessor() def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating OpenShift report daily tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, start, end, ) with OCPReportDBAccessor(self._schema) as accessor: accessor.populate_node_label_line_item_daily_table(start, end, self._cluster_id) accessor.populate_line_item_daily_table(start, end, self._cluster_id) accessor.populate_storage_line_item_daily_table(start, end, self._cluster_id) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) with schema_context(self._schema): self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date) report_period = None with OCPReportDBAccessor(self._schema) as accessor: report_period = accessor.report_periods_for_provider_uuid(self._provider.uuid, start_date) with schema_context(self._schema): report_period_ids = [report_period.id] for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating OpenShift report summary tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, start, end, ) accessor.populate_line_item_daily_summary_table(start, end, self._cluster_id, self._provider.uuid) accessor.populate_storage_line_item_daily_summary_table( start, end, self._cluster_id, self._provider.uuid ) accessor.populate_ui_summary_tables(start, end, self._provider.uuid) accessor.populate_pod_label_summary_table(report_period_ids, start_date, end_date) accessor.populate_volume_label_summary_table(report_period_ids, start_date, end_date) accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids) if report_period.summary_data_creation_datetime is None: report_period.summary_data_creation_datetime = self._date_accessor.today_with_timezone("UTC") report_period.summary_data_updated_datetime = self._date_accessor.today_with_timezone("UTC") report_period.save() self.check_cluster_infrastructure(start_date, end_date) return start_date, end_date def check_cluster_infrastructure(self, start_date, end_date): LOG.info("Checking if OpenShift cluster %s is running on cloud infrastructure.", self._provider.uuid) updater_base = OCPCloudUpdaterBase(self._schema, self._provider, self._manifest) infra_map = updater_base.get_infra_map_from_providers() if not infra_map: # Check the cluster to see if it is running on cloud infrastructure infra_map = updater_base._generate_ocp_infra_map_from_sql(start_date, end_date) if infra_map: for ocp_source, infra_tuple in infra_map.items(): LOG.info( "OpenShift cluster %s is running on %s source %s.", ocp_source, infra_tuple[1], infra_tuple[0] ) def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" # Default to this month's bill with OCPReportDBAccessor(self._schema) as accessor: if self._manifest: # Override the bill date to correspond with the manifest bill_date = self._manifest.billing_period_start_datetime.date() report_periods = accessor.get_usage_period_query_by_provider(self._provider.uuid) report_periods = report_periods.filter(report_period_start=bill_date).all() do_month_update = True with schema_context(self._schema): if report_periods is not None and len(report_periods) > 0: do_month_update = self._determine_if_full_summary_update_needed(report_periods[0]) if do_month_update: last_day_of_month = calendar.monthrange(bill_date.year, bill_date.month)[1] start_date = bill_date.strftime("%Y-%m-%d") end_date = bill_date.replace(day=last_day_of_month) end_date = end_date.strftime("%Y-%m-%d") LOG.info("Overriding start and end date to process full month.") LOG.info("Returning start: %s, end: %s", str(start_date), str(end_date)) return start_date, end_date def _determine_if_full_summary_update_needed(self, report_period): """Decide whether to update summary tables for full billing period.""" summary_creation = report_period.summary_data_creation_datetime is_done_processing = False with ReportManifestDBAccessor() as manifest_accesor: is_done_processing = manifest_accesor.manifest_ready_for_summary(self._manifest.id) is_new_period = summary_creation is None # Run the full month if this is the first time we've seen this report # period if is_done_processing and is_new_period: return True return False
class AzureReportSummaryUpdater(PartitionHandlerMixin): """Class to update Azure report summary data.""" def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema = schema self._provider = provider self._manifest = manifest self._date_accessor = DateAccessor() def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" with AzureReportDBAccessor(self._schema) as accessor: # This is the normal processing route if self._manifest: # Override the bill date to correspond with the manifest bill_date = self._manifest.billing_period_start_datetime.date() bills = accessor.get_cost_entry_bills_query_by_provider(self._provider.uuid) bills = bills.filter(billing_period_start=bill_date).all() first_bill = bills.filter(billing_period_start=bill_date).first() do_month_update = False with schema_context(self._schema): if first_bill: do_month_update = self._determine_if_full_summary_update_needed(first_bill) if do_month_update: last_day_of_month = calendar.monthrange(bill_date.year, bill_date.month)[1] start_date = bill_date.strftime("%Y-%m-%d") end_date = bill_date.replace(day=last_day_of_month) end_date = end_date.strftime("%Y-%m-%d") LOG.info("Overriding start and end date to process full month.") return start_date, end_date def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str): A start date and end date. """ LOG.info("update_daily_tables for: %s-%s", str(start_date), str(end_date)) start_date, end_date = self._get_sql_inputs(start_date, end_date) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ LOG.info("update_summary_tables for: %s-%s", str(start_date), str(end_date)) start_date, end_date = self._get_sql_inputs(start_date, end_date) with schema_context(self._schema): self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date) bills = get_bills_from_provider( self._provider.uuid, self._schema, datetime.datetime.strptime(start_date, "%Y-%m-%d"), datetime.datetime.strptime(end_date, "%Y-%m-%d"), ) bill_ids = [] with schema_context(self._schema): bill_ids = [str(bill.id) for bill in bills] with AzureReportDBAccessor(self._schema) as accessor: # Need these bills on the session to update dates after processing bills = accessor.bills_for_provider_uuid(self._provider.uuid, start_date) for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating Azure report summary tables: \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, start, end, ) accessor.populate_line_item_daily_summary_table(start, end, bill_ids) accessor.populate_ui_summary_tables(start, end, self._provider.uuid) accessor.populate_tags_summary_table(bill_ids, start_date, end_date) for bill in bills: if bill.summary_data_creation_datetime is None: bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone("UTC") bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone("UTC") bill.save() return start_date, end_date def _determine_if_full_summary_update_needed(self, bill): """Decide whether to update summary tables for full billing period.""" summary_creation = bill.summary_data_creation_datetime is_done_processing = False with ReportManifestDBAccessor() as manifest_accesor: is_done_processing = manifest_accesor.manifest_ready_for_summary(self._manifest.id) is_new_bill = summary_creation is None # Do a full month update if we just finished processing a finalized # bill or we just finished processing a bill for the first time if is_done_processing and is_new_bill: return True return False
class AWSReportDBAccessor(ReportDBAccessorBase): """Class to interact with customer reporting tables.""" def __init__(self, schema): """Establish the database connection. Args: schema (str): The customer schema to associate with """ super().__init__(schema) self._datetime_format = Config.AWS_DATETIME_STR_FORMAT self.date_accessor = DateAccessor() self.jinja_sql = JinjaSql() def get_cost_entry_bills(self): """Get all cost entry bill objects.""" table_name = AWSCostEntryBill with schema_context(self.schema): columns = [ "id", "bill_type", "payer_account_id", "billing_period_start", "provider_id" ] bills = self._get_db_obj_query(table_name).values(*columns) return {(bill["bill_type"], bill["payer_account_id"], bill["billing_period_start"], bill["provider_id"]): bill["id"] for bill in bills} def get_cost_entry_bills_by_date(self, start_date): """Return a cost entry bill for the specified start date.""" table_name = AWSCostEntryBill with schema_context(self.schema): return self._get_db_obj_query(table_name).filter( billing_period_start=start_date) def get_cost_entry_bills_query_by_provider(self, provider_uuid): """Return all cost entry bills for the specified provider.""" table_name = AWSCostEntryBill with schema_context(self.schema): return self._get_db_obj_query(table_name).filter( provider_id=provider_uuid) def bills_for_provider_uuid(self, provider_uuid, start_date=None): """Return all cost entry bills for provider_uuid on date.""" bills = self.get_cost_entry_bills_query_by_provider(provider_uuid) if start_date: if isinstance(start_date, str): start_date = parse(start_date) bill_date = start_date.replace(day=1) bills = bills.filter(billing_period_start=bill_date) return bills def get_bill_query_before_date(self, date, provider_uuid=None): """Get the cost entry bill objects with billing period before provided date.""" table_name = AWSCostEntryBill with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) if provider_uuid: cost_entry_bill_query = base_query.filter( billing_period_start__lte=date, provider_id=provider_uuid) else: cost_entry_bill_query = base_query.filter( billing_period_start__lte=date) return cost_entry_bill_query def get_lineitem_query_for_billid(self, bill_id): """Get the AWS cost entry line item for a given bill query.""" table_name = AWSCostEntryLineItem with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) line_item_query = base_query.filter(cost_entry_bill_id=bill_id) return line_item_query def get_daily_query_for_billid(self, bill_id): """Get the AWS cost daily item for a given bill query.""" table_name = AWSCostEntryLineItemDaily with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) daily_item_query = base_query.filter(cost_entry_bill_id=bill_id) return daily_item_query def get_summary_query_for_billid(self, bill_id): """Get the AWS cost summary item for a given bill query.""" table_name = AWSCostEntryLineItemDailySummary with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_ocp_aws_summary_query_for_billid(self, bill_id): """Get the OCP-on-AWS report summary item for a given bill query.""" table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_daily_summary"] base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_ocp_aws_project_summary_query_for_billid(self, bill_id): """Get the OCP-on-AWS report project summary item for a given bill query.""" table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_project_daily_summary"] base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_cost_entry_query_for_billid(self, bill_id): """Get the AWS cost entry data for a given bill query.""" table_name = AWSCostEntry with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) line_item_query = base_query.filter(bill_id=bill_id) return line_item_query def get_cost_entries(self): """Make a mapping of cost entries by start time.""" table_name = AWSCostEntry with schema_context(self.schema): cost_entries = self._get_db_obj_query(table_name).all() return {(ce.bill_id, ce.interval_start.strftime(self._datetime_format)): ce.id for ce in cost_entries} def get_products(self): """Make a mapping of product sku to product objects.""" table_name = AWSCostEntryProduct with schema_context(self.schema): columns = ["id", "sku", "product_name", "region"] products = self._get_db_obj_query(table_name, columns=columns).all() return {(product["sku"], product["product_name"], product["region"]): product["id"] for product in products} def get_pricing(self): """Make a mapping of pricing values string to pricing objects.""" table_name = AWSCostEntryPricing with schema_context(self.schema): pricing = self._get_db_obj_query(table_name).all() return {f"{p.term}-{p.unit}": p.id for p in pricing} def get_reservations(self): """Make a mapping of reservation ARN to reservation objects.""" table_name = AWSCostEntryReservation with schema_context(self.schema): columns = ["id", "reservation_arn"] reservs = self._get_db_obj_query(table_name, columns=columns).all() return {res["reservation_arn"]: res["id"] for res in reservs} def populate_line_item_daily_table(self, start_date, end_date, bill_ids): """Populate the daily aggregate of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. bill_ids (list) Returns (None) """ table_name = AWS_CUR_TABLE_MAP["line_item_daily"] daily_sql = pkgutil.get_data( "masu.database", "sql/reporting_awscostentrylineitem_daily.sql") daily_sql = daily_sql.decode("utf-8") daily_sql_params = { "uuid": str(uuid.uuid4()).replace("-", "_"), "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } daily_sql, daily_sql_params = self.jinja_sql.prepare_query( daily_sql, daily_sql_params) self._execute_raw_sql_query(table_name, daily_sql, start_date, end_date, bind_params=list(daily_sql_params)) def populate_line_item_daily_summary_table(self, start_date, end_date, bill_ids): """Populate the daily aggregated summary of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ table_name = AWS_CUR_TABLE_MAP["line_item_daily_summary"] summary_sql = pkgutil.get_data( "masu.database", "sql/reporting_awscostentrylineitem_daily_summary.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "uuid": str(uuid.uuid4()).replace("-", "_"), "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query( summary_sql, summary_sql_params) self._execute_raw_sql_query(table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)) def populate_line_item_daily_summary_table_presto(self, start_date, end_date, source_uuid, bill_id, markup_value): """Populate the daily aggregated summary of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ summary_sql = pkgutil.get_data( "masu.database", "presto_sql/reporting_awscostentrylineitem_daily_summary.sql") summary_sql = summary_sql.decode("utf-8") uuid_str = str(uuid.uuid4()).replace("-", "_") summary_sql_params = { "uuid": uuid_str, "start_date": start_date, "end_date": end_date, "schema": self.schema, "table": PRESTO_LINE_ITEM_TABLE, "source_uuid": source_uuid, "year": start_date.strftime("%Y"), "month": start_date.strftime("%m"), "markup": markup_value if markup_value else 0, "bill_id": bill_id, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query( summary_sql, summary_sql_params) LOG.info(f"Summary SQL: {str(summary_sql)}") self._execute_presto_raw_sql_query(self.schema, summary_sql) def mark_bill_as_finalized(self, bill_id): """Mark a bill in the database as finalized.""" table_name = AWSCostEntryBill with schema_context(self.schema): bill = self._get_db_obj_query(table_name).get(id=bill_id) if bill.finalized_datetime is None: bill.finalized_datetime = self.date_accessor.today_with_timezone( "UTC") bill.save() def populate_tags_summary_table(self, bill_ids): """Populate the line item aggregated totals data table.""" table_name = AWS_CUR_TABLE_MAP["tags_summary"] agg_sql = pkgutil.get_data("masu.database", "sql/reporting_awstags_summary.sql") agg_sql = agg_sql.decode("utf-8") agg_sql_params = {"schema": self.schema, "bill_ids": bill_ids} agg_sql, agg_sql_params = self.jinja_sql.prepare_query( agg_sql, agg_sql_params) self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params)) def populate_ocp_on_aws_cost_daily_summary(self, start_date, end_date, cluster_id, bill_ids, markup_value): """Populate the daily cost aggregated summary for OCP on AWS. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_daily_summary"] summary_sql = pkgutil.get_data( "masu.database", "sql/reporting_ocpawscostlineitem_daily_summary.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "uuid": str(uuid.uuid4()).replace("-", "_"), "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "cluster_id": cluster_id, "schema": self.schema, "markup": markup_value, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query( summary_sql, summary_sql_params) self._execute_raw_sql_query(table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)) def populate_ocp_on_aws_tags_summary_table(self): """Populate the line item aggregated totals data table.""" table_name = AWS_CUR_TABLE_MAP["ocp_on_aws_tags_summary"] agg_sql = pkgutil.get_data("masu.database", "sql/reporting_ocpawstags_summary.sql") agg_sql = agg_sql.decode("utf-8") agg_sql_params = {"schema": self.schema} agg_sql, agg_sql_params = self.jinja_sql.prepare_query( agg_sql, agg_sql_params) self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params)) def populate_markup_cost(self, markup, start_date, end_date, bill_ids=None): """Set markup costs in the database.""" with schema_context(self.schema): if bill_ids and start_date and end_date: for bill_id in bill_ids: AWSCostEntryLineItemDailySummary.objects.filter( cost_entry_bill_id=bill_id, usage_start__gte=start_date, usage_start__lte=end_date).update( markup_cost=(F("unblended_cost") * markup)) elif bill_ids: for bill_id in bill_ids: AWSCostEntryLineItemDailySummary.objects.filter( cost_entry_bill_id=bill_id).update( markup_cost=(F("unblended_cost") * markup)) def populate_enabled_tag_keys(self, start_date, end_date, bill_ids): """Populate the enabled tag key table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. bill_ids (list) A list of bill IDs. Returns (None) """ table_name = AWS_CUR_TABLE_MAP["enabled_tag_keys"] summary_sql = pkgutil.get_data("masu.database", "sql/reporting_awsenabledtagkeys.sql") summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query( summary_sql, summary_sql_params) self._execute_raw_sql_query(table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)) def update_line_item_daily_summary_with_enabled_tags( self, start_date, end_date, bill_ids): """Populate the enabled tag key table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. bill_ids (list) A list of bill IDs. Returns (None) """ table_name = AWS_CUR_TABLE_MAP["line_item_daily_summary"] summary_sql = pkgutil.get_data( "masu.database", "sql/reporting_awscostentryline_item_daily_summary_update_enabled_tags.sql" ) summary_sql = summary_sql.decode("utf-8") summary_sql_params = { "start_date": start_date, "end_date": end_date, "bill_ids": bill_ids, "schema": self.schema, } summary_sql, summary_sql_params = self.jinja_sql.prepare_query( summary_sql, summary_sql_params) self._execute_raw_sql_query(table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params))
class ReportManifestDBAccessor(KokuDBAccess): """Class to interact with the koku database for CUR processing statistics.""" def __init__(self): """Access the AWS report manifest database table.""" self._schema = 'public' super().__init__(self._schema) self._table = CostUsageReportManifest self.date_accessor = DateAccessor() def get_manifest(self, assembly_id, provider_uuid): """Get the manifest associated with the provided provider and id.""" query = self._get_db_obj_query() return query.filter(provider_id=provider_uuid)\ .filter(assembly_id=assembly_id).first() def get_manifest_by_id(self, manifest_id): """Get the manifest by id.""" with schema_context(self._schema): query = self._get_db_obj_query() return query.filter(id=manifest_id).first() def mark_manifest_as_updated(self, manifest): """Update the updated timestamp.""" manifest.manifest_updated_datetime = \ self.date_accessor.today_with_timezone('UTC') manifest.save() # pylint: disable=arguments-differ def add(self, **kwargs): """ Add a new row to the CUR stats database. Args: kwargs (dict): Fields containing CUR Manifest attributes. Valid keys are: assembly_id, billing_period_start_datetime, num_processed_files (optional), num_total_files, provider_uuid, Returns: None """ if 'manifest_creation_datetime' not in kwargs: kwargs['manifest_creation_datetime'] = \ self.date_accessor.today_with_timezone('UTC') if 'num_processed_files' not in kwargs: kwargs['num_processed_files'] = 0 # The Django model insists on calling this field provider_id if 'provider_uuid' in kwargs: uuid = kwargs.pop('provider_uuid') kwargs['provider_id'] = uuid return super().add(**kwargs) # pylint: disable=no-self-use def get_last_report_completed_datetime(self, manifest_id): """Get the most recent report processing completion time for a manifest.""" result = CostUsageReportStatus.objects.\ filter(manifest_id=manifest_id).order_by('-last_completed_datetime').first() return result.last_completed_datetime def reset_manifest(self, manifest_id): """Return the manifest to a state as if it had not been processed. This sets the number of processed files to zero and nullifies the started and completed times on the reports. """ manifest = self.get_manifest_by_id(manifest_id) manifest.num_processed_files = 0 manifest.save() files = CostUsageReportStatus.objects.filter(id=manifest_id).all() for file in files: file.last_completed_datetime = None file.last_started_datetime = None file.save() def get_manifest_list_for_provider_and_bill_date(self, provider_uuid, bill_date): """Return all manifests for a provider and bill date.""" filters = { 'provider_id': provider_uuid, 'billing_period_start_datetime__date': bill_date } return CostUsageReportManifest.objects.\ filter(**filters).all()
class ReportManifestDBAccessor(KokuDBAccess): """Class to interact with the koku database for CUR processing statistics.""" def __init__(self): """Access the AWS report manifest database table.""" self._schema = "public" super().__init__(self._schema) self._table = CostUsageReportManifest self.date_accessor = DateAccessor() def get_manifest(self, assembly_id, provider_uuid): """Get the manifest associated with the provided provider and id.""" query = self._get_db_obj_query() return query.filter(provider_id=provider_uuid).filter( assembly_id=assembly_id).first() def get_manifest_by_id(self, manifest_id): """Get the manifest by id.""" with schema_context(self._schema): query = self._get_db_obj_query() return query.filter(id=manifest_id).first() def mark_manifest_as_updated(self, manifest): """Update the updated timestamp.""" if manifest: manifest.manifest_updated_datetime = self.date_accessor.today_with_timezone( "UTC") manifest.save() def mark_manifest_as_completed(self, manifest): """Update the updated timestamp.""" if manifest: manifest.manifest_completed_datetime = self.date_accessor.today_with_timezone( "UTC") manifest.save() # pylint: disable=arguments-differ def add(self, **kwargs): """ Add a new row to the CUR stats database. Args: kwargs (dict): Fields containing CUR Manifest attributes. Valid keys are: assembly_id, billing_period_start_datetime, num_processed_files (optional), num_total_files, provider_uuid, Returns: None """ if "manifest_creation_datetime" not in kwargs: kwargs[ "manifest_creation_datetime"] = self.date_accessor.today_with_timezone( "UTC") if "num_processed_files" not in kwargs: kwargs["num_processed_files"] = 0 # The Django model insists on calling this field provider_id if "provider_uuid" in kwargs: uuid = kwargs.pop("provider_uuid") kwargs["provider_id"] = uuid return super().add(**kwargs) # pylint: disable=no-self-use def get_last_report_completed_datetime(self, manifest_id): """Get the most recent report processing completion time for a manifest.""" result = (CostUsageReportStatus.objects.filter( manifest_id=manifest_id).order_by( "-last_completed_datetime").first()) if result: return result.last_completed_datetime return None def reset_manifest(self, manifest_id): """Return the manifest to a state as if it had not been processed. This sets the number of processed files to zero and nullifies the started and completed times on the reports. """ manifest = self.get_manifest_by_id(manifest_id) manifest.num_processed_files = 0 manifest.save() files = CostUsageReportStatus.objects.filter(id=manifest_id).all() for file in files: file.last_completed_datetime = None file.last_started_datetime = None file.save() def get_manifest_list_for_provider_and_bill_date(self, provider_uuid, bill_date): """Return all manifests for a provider and bill date.""" filters = { "provider_id": provider_uuid, "billing_period_start_datetime__date": bill_date } return CostUsageReportManifest.objects.filter(**filters).all() def get_last_seen_manifest_ids(self, bill_date): """Return a tuple containing the assembly_id of the last seen manifest and a boolean The boolean will state whether or not that manifest has been processed.""" assembly_ids = [] # The following query uses a window function to rank the manifests for all the providers, # and then just pulls out the top ranked (most recent) manifests manifests = (CostUsageReportManifest.objects.filter( billing_period_start_datetime=bill_date).annotate( row_number=Window( expression=RowNumber(), partition_by=F("provider_id"), order_by=F("manifest_creation_datetime").desc(), )).order_by("row_number")) for manifest in [ manifest for manifest in manifests if manifest.row_number == 1 ]: # loop through the manifests and decide if they have finished processing processed = manifest.num_total_files == manifest.num_processed_files # if all of the files for the manifest have been processed we don't want to add it # to assembly_ids because it is safe to delete if not processed: assembly_ids.append(manifest.assembly_id) return assembly_ids def purge_expired_report_manifest(self, provider_type, expired_date): """ Deletes Cost usage Report Manifests older than expired_date. Args: provider_type (String) the provider type to delete associated manifests expired_date (datetime.datetime) delete all manifests older than this date, exclusive. """ delete_count = CostUsageReportManifest.objects.filter( provider__type=provider_type, billing_period_start_datetime__lt=expired_date).delete()[0] LOG.info( "Removed %s CostUsageReportManifest(s) for provider type %s that had a billing period start date before %s", delete_count, provider_type, expired_date, ) def purge_expired_report_manifest_provider_uuid(self, provider_uuid, expired_date): """ Delete cost usage reports older than expired_date and provider_uuid. Args: provider_uuid (uuid) The provider uuid to use to delete associated manifests expired_date (datetime.datetime) delete all manifests older than this date, exclusive. """ delete_count = CostUsageReportManifest.objects.filter( provider_id=provider_uuid, billing_period_start_datetime__lt=expired_date).delete() LOG.info( "Removed %s CostUsageReportManifest(s) for provider_uuid %s that had a billing period start date before %s", delete_count, provider_uuid, expired_date, )
class ReportManifestDBAccessor(KokuDBAccess): """Class to interact with the koku database for CUR processing statistics.""" def __init__(self): """Access the AWS report manifest database table.""" self._schema = 'public' super().__init__(self._schema) self._table = \ self.get_base().classes.reporting_common_costusagereportmanifest self.date_accessor = DateAccessor() def get_manifest(self, assembly_id, provider_id): """Get the manifest associated with the provided provider and id.""" query = self._get_db_obj_query() return query.filter_by(provider_id=provider_id)\ .filter_by(assembly_id=assembly_id).first() def get_manifest_by_id(self, manifest_id): """Get the manifest by id.""" query = self._get_db_obj_query() return query.filter_by(id=manifest_id).first() def mark_manifest_as_updated(self, manifest): """Update the updated timestamp.""" manifest.manifest_updated_datetime = \ self.date_accessor.today_with_timezone('UTC') def add(self, use_savepoint=True, **kwargs): """ Add a new row to the CUR stats database. Args: kwargs (dict): Fields containing CUR Manifest attributes. Valid keys are: assembly_id, billing_period_start_datetime, num_processed_files (optional), num_total_files, provider_id, Returns: None """ if 'manifest_creation_datetime' not in kwargs: kwargs['manifest_creation_datetime'] = \ self.date_accessor.today_with_timezone('UTC') if 'num_processed_files' not in kwargs: kwargs['num_processed_files'] = 0 return super().add(use_savepoint, **kwargs) def get_last_report_completed_datetime(self, manifest_id): """Get the most recent report processing completion time for a manifest.""" table = self.get_base().classes.reporting_common_costusagereportstatus result = self._session.query(func.max(table.last_completed_datetime))\ .filter(table.manifest_id == manifest_id)\ .first() return result[0] def reset_manifest(self, manifest_id): """Return the manifest to a state as if it had not been processed. This sets the number of processed files to zero and nullifies the started and completed times on the reports. """ manifest = self.get_manifest_by_id(manifest_id) manifest.num_processed_files = 0 table = self.get_base().classes.reporting_common_costusagereportstatus files = self._session.query(table).filter(table.manifest_id == manifest_id)\ .all() for file in files: file.last_completed_datetime = None file.last_started_datetime = None self.commit()
class OCPReportParquetSummaryUpdater: """Class to update OCP report summary data from Presto/Parquet data.""" def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema = schema self._provider = provider self._manifest = manifest self._cluster_id = get_cluster_id_from_provider(self._provider.uuid) self._cluster_alias = get_cluster_alias_from_cluster_id( self._cluster_id) self._date_accessor = DateAccessor() def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" with OCPReportDBAccessor(self._schema) as accessor: # This is the normal processing route if self._manifest: # Override the bill date to correspond with the manifest bill_date = self._manifest.billing_period_start_datetime.date() report_periods = accessor.get_usage_period_query_by_provider( self._provider.uuid) report_periods = report_periods.filter( report_period_start=bill_date).all() first_period = report_periods.first() do_month_update = False with schema_context(self._schema): if first_period: do_month_update = determine_if_full_summary_update_needed( first_period) if do_month_update: last_day_of_month = calendar.monthrange( bill_date.year, bill_date.month)[1] start_date = bill_date end_date = bill_date.replace(day=last_day_of_month) LOG.info( "Overriding start and end date to process full month.") if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if isinstance(end_date, str): end_date = ciso8601.parse_datetime(end_date).date() return start_date, end_date def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) LOG.info("NO-OP update_daily_tables for: %s-%s", str(start_date), str(end_date)) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) report_periods = None with OCPReportDBAccessor(self._schema) as accessor: with schema_context(self._schema): report_periods = accessor.report_periods_for_provider_uuid( self._provider.uuid, start_date) report_period_ids = [ report_period.id for report_period in report_periods ] for report_period in report_periods: LOG.info( "Updating OpenShift report summary tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tReport Period ID: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, report_period.id, start_date, end_date, ) # This will process POD and STORAGE together accessor.populate_line_item_daily_summary_table_presto( start_date, end_date, report_period.id, self._cluster_id, self._cluster_alias, self._provider.uuid) # This will process POD and STORAGE together LOG.info( "Updating OpenShift label summary tables for \n\tSchema: %s " "\n\tReport Period IDs: %s", self._schema, report_period_ids, ) accessor.populate_pod_label_summary_table(report_period_ids) accessor.populate_volume_label_summary_table(report_period_ids) accessor.update_line_item_daily_summary_with_enabled_tags( start_date, end_date, report_period_ids) LOG.info("Updating OpenShift report periods") for period in report_periods: if period.summary_data_creation_datetime is None: period.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") period.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") period.save() return start_date, end_date
class AWSReportDBAccessor(ReportDBAccessorBase): """Class to interact with customer reporting tables.""" def __init__(self, schema, column_map): """Establish the database connection. Args: schema (str): The customer schema to associate with column_map (dict): A mapping of report columns to database columns """ super().__init__(schema, column_map) self._datetime_format = Config.AWS_DATETIME_STR_FORMAT self.column_map = column_map self._schema_name = schema self.date_accessor = DateAccessor() def get_cost_entry_bills(self): """Get all cost entry bill objects.""" table_name = AWSCostEntryBill with schema_context(self.schema): columns = [ 'id', 'bill_type', 'payer_account_id', 'billing_period_start', 'provider_id' ] bills = self._get_db_obj_query(table_name).values(*columns) return {(bill['bill_type'], bill['payer_account_id'], bill['billing_period_start'], bill['provider_id']): bill['id'] for bill in bills} def get_cost_entry_bills_by_date(self, start_date): """Return a cost entry bill for the specified start date.""" table_name = AWSCostEntryBill with schema_context(self.schema): return self._get_db_obj_query(table_name)\ .filter(billing_period_start=start_date) # pylint: disable=invalid-name def get_cost_entry_bills_query_by_provider(self, provider_id): """Return all cost entry bills for the specified provider.""" table_name = AWSCostEntryBill with schema_context(self.schema): return self._get_db_obj_query(table_name)\ .filter(provider_id=provider_id) def bills_for_provider_id(self, provider_id, start_date=None): """Return all cost entry bills for provider_id on date.""" bills = self.get_cost_entry_bills_query_by_provider(provider_id) if start_date: bill_date = parse(start_date).replace(day=1) bills = bills.filter(billing_period_start=bill_date) return bills def get_bill_query_before_date(self, date): """Get the cost entry bill objects with billing period before provided date.""" table_name = AWSCostEntryBill with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) cost_entry_bill_query = base_query.filter( billing_period_start__lte=date) return cost_entry_bill_query def get_lineitem_query_for_billid(self, bill_id): """Get the AWS cost entry line item for a given bill query.""" table_name = AWSCostEntryLineItem with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) line_item_query = base_query.filter(cost_entry_bill_id=bill_id) return line_item_query def get_daily_query_for_billid(self, bill_id): """Get the AWS cost daily item for a given bill query.""" table_name = AWSCostEntryLineItemDaily with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) daily_item_query = base_query.filter(cost_entry_bill_id=bill_id) return daily_item_query def get_summary_query_for_billid(self, bill_id): """Get the AWS cost summary item for a given bill query.""" table_name = AWSCostEntryLineItemDailySummary with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_ocp_aws_summary_query_for_billid(self, bill_id): """Get the OCP-on-AWS report summary item for a given bill query.""" table_name = AWS_CUR_TABLE_MAP['ocp_on_aws_daily_summary'] base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_ocp_aws_project_summary_query_for_billid(self, bill_id): """Get the OCP-on-AWS report project summary item for a given bill query.""" table_name = AWS_CUR_TABLE_MAP['ocp_on_aws_project_daily_summary'] base_query = self._get_db_obj_query(table_name) summary_item_query = base_query.filter(cost_entry_bill_id=bill_id) return summary_item_query def get_cost_entry_query_for_billid(self, bill_id): """Get the AWS cost entry data for a given bill query.""" table_name = AWSCostEntry with schema_context(self.schema): base_query = self._get_db_obj_query(table_name) line_item_query = base_query.filter(bill_id=bill_id) return line_item_query def get_cost_entries(self): """Make a mapping of cost entries by start time.""" table_name = AWSCostEntry with schema_context(self.schema): cost_entries = self._get_db_obj_query(table_name).all() return {(ce.bill_id, ce.interval_start.strftime(self._datetime_format)): ce.id for ce in cost_entries} def get_products(self): """Make a mapping of product sku to product objects.""" table_name = AWSCostEntryProduct with schema_context(self.schema): columns = ['id', 'sku', 'product_name', 'region'] products = self._get_db_obj_query(table_name, columns=columns).all() return {(product['sku'], product['product_name'], product['region']): product['id'] for product in products} def get_pricing(self): """Make a mapping of pricing values string to pricing objects.""" table_name = AWSCostEntryPricing with schema_context(self.schema): pricing = self._get_db_obj_query(table_name).all() return { '{term}-{unit}'.format(term=p.term, unit=p.unit): p.id for p in pricing } def get_reservations(self): """Make a mapping of reservation ARN to reservation objects.""" table_name = AWSCostEntryReservation with schema_context(self.schema): columns = ['id', 'reservation_arn'] reservs = self._get_db_obj_query(table_name, columns=columns).all() return {res['reservation_arn']: res['id'] for res in reservs} def populate_line_item_daily_table(self, start_date, end_date, bill_ids): """Populate the daily aggregate of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ table_name = AWS_CUR_TABLE_MAP['line_item_daily'] daily_sql = pkgutil.get_data( 'masu.database', 'sql/reporting_awscostentrylineitem_daily.sql') daily_sql = daily_sql.decode('utf-8').format( uuid=str(uuid.uuid4()).replace('-', '_'), start_date=start_date, end_date=end_date, cost_entry_bill_ids=','.join(bill_ids), schema=self.schema) self._commit_and_vacuum(table_name, daily_sql, start_date, end_date) # pylint: disable=invalid-name def populate_line_item_daily_summary_table(self, start_date, end_date, bill_ids): """Populate the daily aggregated summary of line items table. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ table_name = AWS_CUR_TABLE_MAP['line_item_daily_summary'] summary_sql = pkgutil.get_data( 'masu.database', 'sql/reporting_awscostentrylineitem_daily_summary.sql') summary_sql = summary_sql.decode('utf-8').format( uuid=str(uuid.uuid4()).replace('-', '_'), start_date=start_date, end_date=end_date, cost_entry_bill_ids=','.join(bill_ids), schema=self.schema) self._commit_and_vacuum(table_name, summary_sql, start_date, end_date) def mark_bill_as_finalized(self, bill_id): """Mark a bill in the database as finalized.""" table_name = AWSCostEntryBill with schema_context(self.schema): bill = self._get_db_obj_query(table_name)\ .get(id=bill_id) if bill.finalized_datetime is None: bill.finalized_datetime = self.date_accessor.today_with_timezone( 'UTC') bill.save() # pylint: disable=invalid-name def populate_tags_summary_table(self): """Populate the line item aggregated totals data table.""" table_name = AWS_CUR_TABLE_MAP['tags_summary'] agg_sql = pkgutil.get_data('masu.database', f'sql/reporting_awstags_summary.sql') agg_sql = agg_sql.decode('utf-8').format(schema=self.schema) self._commit_and_vacuum(table_name, agg_sql) def populate_ocp_on_aws_cost_daily_summary(self, start_date, end_date, cluster_id, bill_ids): """Populate the daily cost aggregated summary for OCP on AWS. Args: start_date (datetime.date) The date to start populating the table. end_date (datetime.date) The date to end on. Returns (None) """ aws_where_clause = '' ocp_where_clause = '' if bill_ids: ids = ','.join(bill_ids) aws_where_clause = f'AND cost_entry_bill_id IN ({ids})' if cluster_id: ocp_where_clause = f"AND cluster_id = '{cluster_id}'" table_name = AWS_CUR_TABLE_MAP['ocp_on_aws_daily_summary'] summary_sql = pkgutil.get_data( 'masu.database', 'sql/reporting_ocpawscostlineitem_daily_summary.sql') summary_sql = summary_sql.decode('utf-8').format( uuid=str(uuid.uuid4()).replace('-', '_'), start_date=start_date, end_date=end_date, aws_where_clause=aws_where_clause, ocp_where_clause=ocp_where_clause, schema=self.schema) self._commit_and_vacuum(table_name, summary_sql, start_date, end_date)
class OCPReportParquetSummaryUpdater(PartitionHandlerMixin): """Class to update OCP report summary data from Presto/Parquet data.""" def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema = schema self._provider = provider self._manifest = manifest self._cluster_id = get_cluster_id_from_provider(self._provider.uuid) self._cluster_alias = get_cluster_alias_from_cluster_id( self._cluster_id) self._date_accessor = DateAccessor() def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" with OCPReportDBAccessor(self._schema) as accessor: # This is the normal processing route if self._manifest: # Override the bill date to correspond with the manifest bill_date = self._manifest.billing_period_start_datetime.date() report_periods = accessor.get_usage_period_query_by_provider( self._provider.uuid) report_periods = report_periods.filter( report_period_start=bill_date).all() first_period = report_periods.first() do_month_update = False with schema_context(self._schema): if first_period: do_month_update = determine_if_full_summary_update_needed( first_period) if do_month_update: last_day_of_month = calendar.monthrange( bill_date.year, bill_date.month)[1] start_date = bill_date end_date = bill_date.replace(day=last_day_of_month) LOG.info( "Overriding start and end date to process full month.") if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if isinstance(end_date, str): end_date = ciso8601.parse_datetime(end_date).date() return start_date, end_date def _check_parquet_date_range(self, start_date, end_date): """Make sure we don't summarize for a date range we don't have data for.""" start_datetime = datetime(start_date.year, start_date.month, start_date.day) with OCPReportDBAccessor(self._schema) as accessor: min_timestamp, __ = accessor.get_max_min_timestamp_from_parquet( self._provider.uuid, start_date, end_date) if min_timestamp > start_datetime: start_date = min_timestamp.date() return start_date, end_date def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) LOG.info("NO-OP update_daily_tables for: %s-%s", str(start_date), str(end_date)) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) start_date, end_date = self._check_parquet_date_range( start_date, end_date) with schema_context(self._schema): self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date) with OCPReportDBAccessor(self._schema) as accessor: with schema_context(self._schema): report_period = accessor.report_periods_for_provider_uuid( self._provider.uuid, start_date) report_period_id = report_period.id for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating OpenShift report summary tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tReport Period ID: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, report_period_id, start, end, ) # This will process POD and STORAGE together filters = { "report_period_id": report_period_id } # Use report_period_id to leverage DB index on DELETE accessor.delete_line_item_daily_summary_entries_for_date_range_raw( self._provider.uuid, start, end, filters) accessor.populate_line_item_daily_summary_table_presto( start, end, report_period_id, self._cluster_id, self._cluster_alias, self._provider.uuid) accessor.populate_ui_summary_tables(start, end, self._provider.uuid) # This will process POD and STORAGE together LOG.info( "Updating OpenShift label summary tables for \n\tSchema: %s " "\n\tReport Period IDs: %s", self._schema, [report_period_id], ) accessor.populate_pod_label_summary_table([report_period_id], start_date, end_date) accessor.populate_volume_label_summary_table([report_period_id], start_date, end_date) accessor.populate_openshift_cluster_information_tables( self._provider, self._cluster_id, self._cluster_alias, start_date, end_date) accessor.update_line_item_daily_summary_with_enabled_tags( start_date, end_date, [report_period_id]) LOG.info("Updating OpenShift report periods") if report_period.summary_data_creation_datetime is None: report_period.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") report_period.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") report_period.save() self.check_cluster_infrastructure(start_date, end_date) return start_date, end_date def check_cluster_infrastructure(self, start_date, end_date): LOG.info( "Checking if OpenShift cluster %s is running on cloud infrastructure.", self._provider.uuid) updater_base = OCPCloudUpdaterBase(self._schema, self._provider, self._manifest) infra_map = updater_base.get_infra_map_from_providers() if not infra_map: # Check the cluster to see if it is running on cloud infrastructure infra_map = updater_base._generate_ocp_infra_map_from_sql_trino( start_date, end_date) if infra_map: for ocp_source, infra_tuple in infra_map.items(): LOG.info("OpenShift cluster %s is running on %s source %s.", ocp_source, infra_tuple[1], infra_tuple[0])
class GCPReportParquetSummaryUpdater: """Class to update GCP report parquet summary data.""" def __init__(self, schema, provider, manifest): """Establish parquet summary processor.""" self._schema = schema self._provider = provider self._manifest = manifest self._date_accessor = DateAccessor() def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" with GCPReportDBAccessor(self._schema) as accessor: # This is the normal processing route if self._manifest: # Override the bill date to correspond with the manifest bill_date = self._manifest.billing_period_start_datetime.date() bills = accessor.get_cost_entry_bills_query_by_provider( self._provider.uuid) bills = bills.filter(billing_period_start=bill_date).all() first_bill = bills.filter( billing_period_start=bill_date).first() do_month_update = False with schema_context(self._schema): if first_bill: do_month_update = determine_if_full_summary_update_needed( first_bill) if do_month_update: last_day_of_month = calendar.monthrange( bill_date.year, bill_date.month)[1] start_date = bill_date end_date = bill_date.replace(day=last_day_of_month) LOG.info( "Overriding start and end date to process full month.") if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if isinstance(end_date, str): end_date = ciso8601.parse_datetime(end_date).date() return start_date, end_date def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str): A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) LOG.info("update_daily_tables for: %s-%s", str(start_date), str(end_date)) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) with CostModelDBAccessor(self._schema, self._provider.uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = float(markup.get("value", 0)) / 100 with GCPReportDBAccessor(self._schema) as accessor: # Need these bills on the session to update dates after processing with schema_context(self._schema): bills = accessor.bills_for_provider_uuid( self._provider.uuid, start_date) bill_ids = [str(bill.id) for bill in bills] current_bill_id = bills.first().id if bills else None if current_bill_id is None: msg = f"No bill was found for {start_date}. Skipping summarization" LOG.info(msg) return start_date, end_date for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating GCP report summary tables from parquet: \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, start, end, ) accessor.delete_line_item_daily_summary_entries_for_date_range( self._provider.uuid, start, end) accessor.populate_line_item_daily_summary_table_presto( start, end, self._provider.uuid, current_bill_id, markup_value) accessor.populate_enabled_tag_keys(start, end, bill_ids) accessor.populate_tags_summary_table(bill_ids) accessor.update_line_item_daily_summary_with_enabled_tags( start_date, end_date, bill_ids) for bill in bills: if bill.summary_data_creation_datetime is None: bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") bill.save() return start_date, end_date
class ReportSummaryUpdater: """Update reporting summary tables.""" def __init__(self, customer_schema, provider_uuid, manifest_id=None): """ Initializer. Args: customer_schema (str): Schema name for given customer. provider (str): The provider type. """ self._schema = customer_schema self._provider_uuid = provider_uuid self._manifest = None if manifest_id is not None: with ReportManifestDBAccessor() as manifest_accessor: self._manifest = manifest_accessor.get_manifest_by_id( manifest_id) self._date_accessor = DateAccessor() with ProviderDBAccessor(self._provider_uuid) as provider_accessor: self._provider = provider_accessor.get_provider() if not self._provider: raise ReportSummaryUpdaterError("Provider not found.") try: self._updater, self._ocp_cloud_updater = self._set_updater() except Exception as err: raise ReportSummaryUpdaterError(err) if not self._updater: raise ReportSummaryUpdaterError("Invalid provider type specified.") LOG.info("Starting report data summarization for provider uuid: %s.", self._provider.uuid) def _set_updater(self): """ Create the report summary updater object. Object is specific to the report provider. Args: None Returns: (Object) : Provider-specific report summary updater """ if self._provider.type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): return ( AWSReportSummaryUpdater(self._schema, self._provider, self._manifest), OCPCloudReportSummaryUpdater(self._schema, self._provider, self._manifest), ) if self._provider.type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): return ( AzureReportSummaryUpdater(self._schema, self._provider, self._manifest), OCPCloudReportSummaryUpdater(self._schema, self._provider, self._manifest), ) if self._provider.type in (Provider.PROVIDER_OCP, ): return ( OCPReportSummaryUpdater(self._schema, self._provider, self._manifest), OCPCloudReportSummaryUpdater(self._schema, self._provider, self._manifest), ) return (None, None) def _format_dates(self, start_date, end_date): """Convert dates to strings for use in the updater.""" if isinstance(start_date, datetime.date): start_date = start_date.strftime("%Y-%m-%d") if isinstance(end_date, datetime.date): end_date = end_date.strftime("%Y-%m-%d") elif end_date is None: # Run up to the current date end_date = self._date_accessor.today_with_timezone("UTC") end_date = end_date.strftime("%Y-%m-%d") return start_date, end_date def update_daily_tables(self, start_date, end_date): """ Update report daily rollup tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. manifest_id (str): The particular manifest to use. Returns: (str, str): The start and end date strings used in the daily SQL. """ start_date, end_date = self._format_dates(start_date, end_date) start_date, end_date = self._updater.update_daily_tables( start_date, end_date) return start_date, end_date def update_summary_tables(self, start_date, end_date): """ Update report summary tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. manifest_id (str): The particular manifest to use. Returns: None """ start_date, end_date = self._format_dates(start_date, end_date) LOG.info("Using start date: %s", start_date) LOG.info("Using end date: %s", end_date) start_date, end_date = self._updater.update_summary_tables( start_date, end_date) self._ocp_cloud_updater.update_summary_tables(start_date, end_date) def update_cost_summary_table(self, start_date, end_date): """ Update cost summary tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. Returns: None """ start_date, end_date = self._format_dates(start_date, end_date) self._ocp_cloud_updater.update_cost_summary_table(start_date, end_date)
class ReportProcessorBase: """ Download cost reports from a provider. Base object class for downloading cost reports from a cloud provider. """ def __init__(self, schema_name, report_path, compression, provider_uuid, manifest_id, processed_report): """Initialize the report processor base class. Args: schema_name (str): The name of the customer schema to process into report_path (str): Where the report file lives in the file system compression (CONST): How the report file is compressed. Accepted values: UNCOMPRESSED, GZIP_COMPRESSED """ if compression.upper() not in ALLOWED_COMPRESSIONS: err_msg = f"Compression {compression} is not supported." raise MasuProcessingError(err_msg) self._schema = schema_name self._report_path = report_path self._compression = compression.upper() self._provider_uuid = provider_uuid self._manifest_id = manifest_id self.processed_report = processed_report self.date_accessor = DateAccessor() @property def data_cutoff_date(self): """Determine the date we should use to process and delete data.""" today = self.date_accessor.today_with_timezone("UTC").date() data_cutoff_date = today - relativedelta(days=2) if today.month != data_cutoff_date.month: data_cutoff_date = today.replace(day=1) return data_cutoff_date def _get_data_for_table(self, row, table_name): """Extract the data from a row for a specific table. Args: row (dict): A dictionary representation of a CSV file row table_name (str): The DB table fields are required for Returns: (dict): The data from the row keyed on the DB table's column names """ column_map = REPORT_COLUMN_MAP[table_name] lower_case_column_map = { key.lower(): value for key, value in column_map.items() } result = { lower_case_column_map[key.lower()]: value for key, value in row.items() if key.lower() in lower_case_column_map } return result @staticmethod def _get_file_opener(compression): """Get the file opener for the file's compression. Args: compression (str): The compression format for the file. Returns: (file opener, str): The proper file stream handler for the compression and the read mode for the file """ if compression == GZIP_COMPRESSED: return gzip.open, "rt" return open, "r" # assume uncompressed by default def _write_processed_rows_to_csv(self): """Output CSV content to file stream object.""" values = [ tuple(item.values()) for item in self.processed_report.line_items ] file_obj = io.StringIO() writer = csv.writer(file_obj, delimiter=",", quoting=csv.QUOTE_MINIMAL, quotechar='"') writer.writerows(values) file_obj.seek(0) return file_obj def _save_to_db(self, temp_table, report_db_accessor): """Save current batch of records to the database.""" columns = tuple(self.processed_report.line_items[0].keys()) csv_file = self._write_processed_rows_to_csv() report_db_accessor.bulk_insert_rows(csv_file, temp_table, columns) def _should_process_row(self, row, date_column, is_full_month, is_finalized=None): """Determine if we want to process this row. Args: row (dict): The line item entry from the AWS report file date_column (str): The name of date column to check is_full_month (boolean): If this is the first time we've processed this bill Kwargs: is_finalized (boolean): If this is a finalized bill Returns: (bool): Whether this row should be processed """ if is_finalized or is_full_month: return True row_date = ciso8601.parse_datetime(row[date_column]).date() if row_date < self.data_cutoff_date: return False return True def _should_process_full_month(self): """Determine if we should process the full month of data.""" if not self._manifest_id: log_statement = ( f"No manifest provided, processing as a new billing period.\n" f" Processing entire month.\n" f" schema_name: {self._schema},\n" f" provider_uuid: {self._provider_uuid},\n" f" manifest_id: {self._manifest_id}") LOG.info(log_statement) return True with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(self._manifest_id) bill_date = manifest.billing_period_start_datetime.date() provider_uuid = manifest.provider_id log_statement = (f"Processing bill starting on {bill_date}.\n" f" Processing entire month.\n" f" schema_name: {self._schema},\n" f" provider_uuid: {self._provider_uuid},\n" f" manifest_id: {self._manifest_id}") if (bill_date.month != self.data_cutoff_date.month) or ( bill_date.year != self.data_cutoff_date.year and bill_date.month == self.data_cutoff_date.month): LOG.info(log_statement) return True manifest_list = manifest_accessor.get_manifest_list_for_provider_and_bill_date( provider_uuid, bill_date) if len(manifest_list) == 1: # This is the first manifest for this bill and we are currently # processing it LOG.info(log_statement) return True for manifest in manifest_list: with ReportManifestDBAccessor() as manifest_accessor: if manifest_accessor.manifest_ready_for_summary(manifest.id): log_statement = ( f"Processing bill starting on {bill_date}.\n" f" Processing data on or after {self.data_cutoff_date}.\n" f" schema_name: {self._schema},\n" f" provider_uuid: {self._provider_uuid},\n" f" manifest_id: {self._manifest_id}") LOG.info(log_statement) # We have fully processed a manifest for this provider return False return True def _delete_line_items(self, db_accessor, is_finalized=None): """Delete stale data for the report being processed, if necessary.""" if not self._manifest_id: return False if is_finalized is None: is_finalized = False is_full_month = self._should_process_full_month() with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(self._manifest_id) num_processed_files = manifest_accessor.number_of_files_processed( self._manifest_id) if num_processed_files != 0: return False # Override the bill date to correspond with the manifest bill_date = manifest.billing_period_start_datetime.date() provider_uuid = manifest.provider_id date_filter = self.get_date_column_filter() with db_accessor(self._schema) as accessor: bills = accessor.get_cost_entry_bills_query_by_provider( provider_uuid) bills = bills.filter(billing_period_start=bill_date).all() with schema_context(self._schema): for bill in bills: line_item_query = accessor.get_lineitem_query_for_billid( bill.id) delete_date = bill_date if not is_finalized and not is_full_month: delete_date = self.data_cutoff_date # This means we are processing a mid-month update # and only need to delete a small window of data line_item_query = line_item_query.filter(**date_filter) log_statement = (f"Deleting data for:\n" f" schema_name: {self._schema}\n" f" provider_uuid: {provider_uuid}\n" f" bill date: {str(bill_date)}\n" f" bill ID: {bill.id}\n" f" on or after {delete_date}.") LOG.info(log_statement) line_item_query.delete() return True def get_date_column_filter(self): """Return a filter using the provider-appropriate column.""" with ProviderDBAccessor(self._provider_uuid) as provider_accessor: type = provider_accessor.get_type() if type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): return {"usage_date__gte": self.data_cutoff_date} else: return {"usage_start__gte": self.data_cutoff_date} @staticmethod def remove_temp_cur_files(report_path): """Remove temporary report files.""" # Remove any old files that have failed processing. removed_files = [] return removed_files
class GCPReportSummaryUpdater: """Class to update GCP report summary data.""" def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema = schema self._provider = provider self._manifest = manifest self._date_accessor = DateAccessor() def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" with GCPReportDBAccessor(self._schema) as accessor: # This is the normal processing route if self._manifest: report_range = accessor.get_gcp_scan_range_from_report_name( manifest_id=self._manifest.id) start_date = report_range.get("start", start_date) end_date = report_range.get("end", end_date) return start_date, end_date def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str): A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) bills = get_bills_from_provider( self._provider.uuid, self._schema, datetime.datetime.strptime(start_date, "%Y-%m-%d"), datetime.datetime.strptime(end_date, "%Y-%m-%d"), ) bill_ids = [] with schema_context(self._schema): bill_ids = [str(bill.id) for bill in bills] with GCPReportDBAccessor(self._schema) as accessor: for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating GCP report daily tables for \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s\n\tBills: %s", self._schema, self._provider.uuid, start, end, str(bill_ids), ) accessor.populate_line_item_daily_table(start, end, bill_ids) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) bills = get_bills_from_provider( self._provider.uuid, self._schema, datetime.datetime.strptime(start_date, "%Y-%m-%d"), datetime.datetime.strptime(end_date, "%Y-%m-%d"), ) bill_ids = [] with schema_context(self._schema): bill_ids = [str(bill.id) for bill in bills] with GCPReportDBAccessor(self._schema) as accessor: # Need these bills on the session to update dates after processing bills = accessor.bills_for_provider_uuid(self._provider.uuid, start_date) for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating GCP report summary tables: \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s\n\tBills: %s", self._schema, self._provider.uuid, start, end, str(bill_ids), ) accessor.populate_line_item_daily_summary_table( start, end, bill_ids) accessor.populate_tags_summary_table(bill_ids, start_date, end_date) for bill in bills: if bill.summary_data_creation_datetime is None: bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") bill.save() return start_date, end_date
class OCPReportSummaryUpdater: """Class to update OCP report summary data.""" def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema_name = schema self._provider = provider self._manifest = manifest self._cluster_id = get_cluster_id_from_provider(self._provider.uuid) with ReportingCommonDBAccessor() as reporting_common: self._column_map = reporting_common.column_map self._date_accessor = DateAccessor() def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) LOG.info( 'Updating OpenShift report daily tables for \n\tSchema: %s ' '\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s', self._schema_name, self._provider.uuid, self._cluster_id, start_date, end_date) with OCPReportDBAccessor(self._schema_name, self._column_map) as accessor: accessor.populate_line_item_daily_table(start_date, end_date, self._cluster_id) with OCPReportDBAccessor(self._schema_name, self._column_map) as accessor: accessor.populate_storage_line_item_daily_table( start_date, end_date, self._cluster_id) return start_date, end_date def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) LOG.info( 'Updating OpenShift report summary tables for \n\tSchema: %s ' '\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s', self._schema_name, self._provider.uuid, self._cluster_id, start_date, end_date) report_periods = None with OCPReportDBAccessor(self._schema_name, self._column_map) as accessor: report_periods = accessor.report_periods_for_provider_id( self._provider.id, start_date) accessor.populate_line_item_daily_summary_table( start_date, end_date, self._cluster_id) accessor.populate_pod_label_summary_table() accessor.populate_storage_line_item_daily_summary_table( start_date, end_date, self._cluster_id) accessor.populate_volume_claim_label_summary_table() accessor.populate_volume_label_summary_table() for period in report_periods: if period.summary_data_creation_datetime is None: period.summary_data_creation_datetime = \ self._date_accessor.today_with_timezone('UTC') period.summary_data_updated_datetime = \ self._date_accessor.today_with_timezone('UTC') period.save() return start_date, end_date def _get_sql_inputs(self, start_date, end_date): """Get the required inputs for running summary SQL.""" # Default to this month's bill with OCPReportDBAccessor(self._schema_name, self._column_map) as accessor: if self._manifest: # Override the bill date to correspond with the manifest bill_date = self._manifest.billing_period_start_datetime.date() report_periods = accessor.get_usage_period_query_by_provider( self._provider.id) report_periods = report_periods.filter( report_period_start=bill_date).all() do_month_update = True with schema_context(self._schema_name): if report_periods is not None and len(report_periods) > 0: do_month_update = self._determine_if_full_summary_update_needed( report_periods[0]) if do_month_update: last_day_of_month = calendar.monthrange( bill_date.year, bill_date.month)[1] start_date = bill_date.strftime('%Y-%m-%d') end_date = bill_date.replace(day=last_day_of_month) end_date = end_date.strftime('%Y-%m-%d') LOG.info( 'Overriding start and end date to process full month.') LOG.info('Returning start: %s, end: %s', str(start_date), str(end_date)) return start_date, end_date def _determine_if_full_summary_update_needed(self, report_period): """Decide whether to update summary tables for full billing period.""" processed_files = self._manifest.num_processed_files total_files = self._manifest.num_total_files summary_creation = report_period.summary_data_creation_datetime is_done_processing = processed_files == total_files is_new_period = summary_creation is None # Run the full month if this is the first time we've seen this report # period if is_done_processing and is_new_period: return True return False
class ProviderDBAccessor(KokuDBAccess): """Class to interact with the koku database for Provider Data.""" def __init__(self, provider_uuid=None, auth_id=None): """ Establish Provider database connection. Args: provider_uuid (String) the uuid of the provider auth_id (String) provider authentication database id """ super().__init__("public") self._uuid = provider_uuid self._auth_id = auth_id self._table = Provider self._provider = None self.date_accessor = DateAccessor() @property def provider(self): """Return the provider this accessor is instantiated for.""" query = self._get_db_obj_query() if self._provider is None and query: self._provider = query.first() return self._provider @property def infrastructure(self): """Return the infrastructure object for the provider.""" if self.provider: return self.provider.infrastructure return None def _get_db_obj_query(self): """ Return the sqlachemy query for the provider object. Args: None Returns: (sqlalchemy.orm.query.Query): "SELECT public.api_customer.group_ptr_id ..." """ if not self._auth_id and not self._uuid: return self._table.objects.none() query = self._table.objects.all() if self._auth_id: query = query.filter(authentication_id=self._auth_id) if self._uuid: query = query.filter(uuid=self._uuid) return query def get_provider(self): """Return the provider.""" return self.provider def get_uuid(self): """ Return the provider uuid. Args: None Returns: (String): "UUID v4", example: "edf94475-235e-4b64-ba18-0b81f2de9c9e" """ return str(self.provider.uuid) if self.provider else None def get_provider_name(self): """ Return the provider name. Args: None Returns: (String): "Provider Name assigned by the customer", example: "Test Provider" """ return self.provider.name if self.provider else None def get_type(self): """ Return the provider type. Args: None Returns: (String): "Provider type. Cloud backend name", example: "AWS" """ return self.provider.type if self.provider else None def get_additional_context(self): """ Returns additional context information. Args: None Returns: (dict): { 'crawl_hierarchy': True } """ return self.provider.additional_context if self.provider else {} def get_credentials(self): """ Return the credential information. Args: None Returns: (dict): {"credentials": "Provider Resource Name. i.e. AWS: RoleARN"}, example: {"role_arn": "arn:aws:iam::111111111111:role/CostManagement"} """ credentials = None if self.provider and self.provider.authentication: credentials = self.provider.authentication.credentials return credentials def get_data_source(self): """ Return the data_source information. Args: None Returns: (dict): "Identifier for cost usage report. i.e. AWS: S3 Bucket", example: {"bucket": "my-s3-cur-bucket"} """ data_source = None if self.provider and self.provider.billing_source: data_source = self.provider.billing_source.data_source return data_source def get_setup_complete(self): """ Return whether or not a report has been processed. Args: None Returns: (Boolean): "True if a report has been processed for the provider.", """ return self.provider.setup_complete if self.provider else None def setup_complete(self): """ Set setup_complete to True. Args: None Returns: None """ self.provider.setup_complete = True self.provider.save() invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX) def get_customer_uuid(self): """ Return the provider's customer uuid. Args: None Returns: (String): "UUID v4", example: "edf94475-235e-4b64-ba18-0b81f2de9c9e" """ return str(self.provider.customer.uuid) def get_customer_name(self): """ Return the provider's customer name. Args: None Returns: (String): "Name of the customer", example: "Customer 1 Inc." """ return self.get_schema() def get_schema(self): """ Return the schema for the customer. Args: None Returns: (String): "Name of the database schema", """ return self.provider.customer.schema_name def get_infrastructure_type(self): """Retrun the infrastructure type for an OpenShift provider.""" if self.infrastructure: return self.infrastructure.infrastructure_type return None def get_infrastructure_provider_uuid(self): """Return the UUID of the infrastructure provider an OpenShift cluster is installed on.""" if self.infrastructure: infra_uuid = self.infrastructure.infrastructure_provider.uuid return str(infra_uuid) if infra_uuid else None return None @transaction.atomic() def set_infrastructure(self, infrastructure_provider_uuid, infrastructure_type): """Create an infrastructure mapping for an OpenShift provider. Args: infrastructure_type (str): The provider type this cluster is installed on. Ex. AWS, AZURE, GCP infrastructure_provider_uuid (str): The UUID of the provider this cluster is installed on. Returns: None """ mapping, _ = ProviderInfrastructureMap.objects.get_or_create( infrastructure_provider_id=infrastructure_provider_uuid, infrastructure_type=infrastructure_type ) self.provider.infrastructure = mapping self.provider.save() invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX) def get_associated_openshift_providers(self): """Return a list of OpenShift clusters associated with the cloud provider.""" associated_openshift_providers = [] mapping = ProviderInfrastructureMap.objects.filter(infrastructure_provider_id=self.provider.uuid).first() if mapping: associated_openshift_providers = Provider.objects.filter(infrastructure=mapping).all() return associated_openshift_providers def set_data_updated_timestamp(self): """Set the data updated timestamp to the current time.""" if self.provider: self.provider.data_updated_timestamp = self.date_accessor.today_with_timezone("UTC") self.provider.save() invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX) def set_additional_context(self, new_value): """Sets the additional context value.""" if self.provider: self.provider.additional_context = new_value self.provider.save() invalidate_view_cache_for_tenant_and_cache_key(SOURCES_CACHE_PREFIX)
class ReportManifestDBAccessor(KokuDBAccess): """Class to interact with the koku database for CUR processing statistics.""" def __init__(self): """Access the AWS report manifest database table.""" self._schema = "public" super().__init__(self._schema) self._table = CostUsageReportManifest self.date_accessor = DateAccessor() def get_manifest(self, assembly_id, provider_uuid): """Get the manifest associated with the provided provider and id.""" query = self._get_db_obj_query() return query.filter(provider_id=provider_uuid).filter( assembly_id=assembly_id).first() def get_manifest_by_id(self, manifest_id): """Get the manifest by id.""" with schema_context(self._schema): query = self._get_db_obj_query() return query.filter(id=manifest_id).first() def mark_manifest_as_updated(self, manifest): """Update the updated timestamp.""" if manifest: manifest.manifest_updated_datetime = self.date_accessor.today_with_timezone( "UTC") manifest.save() def mark_manifest_as_completed(self, manifest): """Update the updated timestamp.""" if manifest: manifest.manifest_completed_datetime = self.date_accessor.today_with_timezone( "UTC") manifest.save() def update_number_of_files_for_manifest(self, manifest): """Update the number of files for manifest.""" set_num_of_files = CostUsageReportStatus.objects.filter( manifest_id=manifest.id).count() if manifest: manifest.num_total_files = set_num_of_files manifest.save() def add(self, **kwargs): """ Add a new row to the CUR stats database. Args: kwargs (dict): Fields containing CUR Manifest attributes. Valid keys are: assembly_id, billing_period_start_datetime, num_total_files, provider_uuid, Returns: None """ if "manifest_creation_datetime" not in kwargs: kwargs[ "manifest_creation_datetime"] = self.date_accessor.today_with_timezone( "UTC") # The Django model insists on calling this field provider_id if "provider_uuid" in kwargs: uuid = kwargs.pop("provider_uuid") kwargs["provider_id"] = uuid return super().add(**kwargs) def manifest_ready_for_summary(self, manifest_id): """Determine if the manifest is ready to summarize.""" return not self.is_last_completed_datetime_null(manifest_id) def number_of_files_processed(self, manifest_id): """Return the number of files processed in a manifest.""" return CostUsageReportStatus.objects.filter( manifest_id=manifest_id, last_completed_datetime__isnull=False).count() def is_last_completed_datetime_null(self, manifest_id): """Determine if nulls exist in last_completed_datetime for manifest_id. If the record does not exist, that is equivalent to a null completed dateimte. Return True if record either doesn't exist or if null `last_completed_datetime`. Return False otherwise. """ record = CostUsageReportStatus.objects.filter(manifest_id=manifest_id) if record: return record.filter(last_completed_datetime__isnull=True).exists() return True def get_manifest_list_for_provider_and_bill_date(self, provider_uuid, bill_date): """Return all manifests for a provider and bill date.""" filters = { "provider_id": provider_uuid, "billing_period_start_datetime__date": bill_date } return CostUsageReportManifest.objects.filter(**filters).all() def get_last_seen_manifest_ids(self, bill_date): """Return a tuple containing the assembly_id of the last seen manifest and a boolean The boolean will state whether or not that manifest has been processed.""" assembly_ids = [] # The following query uses a window function to rank the manifests for all the providers, # and then just pulls out the top ranked (most recent) manifests manifests = (CostUsageReportManifest.objects.filter( billing_period_start_datetime=bill_date).annotate( row_number=Window( expression=RowNumber(), partition_by=F("provider_id"), order_by=F("manifest_creation_datetime").desc(), )).order_by("row_number")) for manifest in [ manifest for manifest in manifests if manifest.row_number == 1 ]: # loop through the manifests and decide if they have finished processing processed = self.manifest_ready_for_summary(manifest.id) # if all of the files for the manifest have been processed we don't want to add it # to assembly_ids because it is safe to delete if not processed: assembly_ids.append(manifest.assembly_id) return assembly_ids def purge_expired_report_manifest(self, provider_type, expired_date): """ Deletes Cost usage Report Manifests older than expired_date. Args: provider_type (String) the provider type to delete associated manifests expired_date (datetime.datetime) delete all manifests older than this date, exclusive. """ delete_count = CostUsageReportManifest.objects.filter( provider__type=provider_type, billing_period_start_datetime__lt=expired_date).delete()[0] LOG.info( "Removed %s CostUsageReportManifest(s) for provider type %s that had a billing period start date before %s", delete_count, provider_type, expired_date, ) def purge_expired_report_manifest_provider_uuid(self, provider_uuid, expired_date): """ Delete cost usage reports older than expired_date and provider_uuid. Args: provider_uuid (uuid) The provider uuid to use to delete associated manifests expired_date (datetime.datetime) delete all manifests older than this date, exclusive. """ delete_count = CostUsageReportManifest.objects.filter( provider_id=provider_uuid, billing_period_start_datetime__lt=expired_date).delete() LOG.info( "Removed %s CostUsageReportManifest(s) for provider_uuid %s that had a billing period start date before %s", delete_count, provider_uuid, expired_date, ) def get_s3_csv_cleared(self, manifest): """Return whether we have cleared CSV files from S3 for this manifest.""" s3_csv_cleared = False if manifest: s3_csv_cleared = manifest.s3_csv_cleared return s3_csv_cleared def mark_s3_csv_cleared(self, manifest): """Return whether we have cleared CSV files from S3 for this manifest.""" if manifest: manifest.s3_csv_cleared = True manifest.save() def get_s3_parquet_cleared(self, manifest): """Return whether we have cleared CSV files from S3 for this manifest.""" s3_parquet_cleared = False if manifest: s3_parquet_cleared = manifest.s3_parquet_cleared return s3_parquet_cleared def mark_s3_parquet_cleared(self, manifest): """Return whether we have cleared CSV files from S3 for this manifest.""" if manifest: manifest.s3_parquet_cleared = True manifest.save() def get_max_export_time_for_manifests(self, provider_uuid, bill_date): """Return the max export time for manifests given provider and bill date.""" filters = { "provider_id": provider_uuid, "billing_period_start_datetime__date": bill_date } manifests = CostUsageReportManifest.objects.filter(**filters).all() max_export = manifests.aggregate(Max("export_time")) return max_export.get("export_time__max")
class ReportSummaryUpdater: """Update reporting summary tables.""" def __init__(self, customer_schema, provider_uuid, manifest_id=None, tracing_id=None): """ Initializer. Args: customer_schema (str): Schema name for given customer. provider (str): The provider type. """ self._schema = customer_schema self._provider_uuid = provider_uuid self._manifest = None self._tracing_id = tracing_id if manifest_id is not None: with ReportManifestDBAccessor() as manifest_accessor: self._manifest = manifest_accessor.get_manifest_by_id(manifest_id) self._date_accessor = DateAccessor() with ProviderDBAccessor(self._provider_uuid) as provider_accessor: self._provider = provider_accessor.get_provider() if not self._provider: raise ReportSummaryUpdaterProviderNotFoundError( f"Provider data for uuid '{self._provider_uuid}' not found." ) try: self._updater, self._ocp_cloud_updater = self._set_updater() except Exception as err: raise ReportSummaryUpdaterError(err) if not self._updater: raise ReportSummaryUpdaterError("Invalid provider type specified.") msg = f"Starting report data summarization for provider uuid: {self._provider.uuid}." LOG.info(log_json(self._tracing_id, msg)) @cached_property def trino_enabled(self): """Return whether the source is enabled for Trino processing.""" return enable_trino_processing(self._provider_uuid, self._provider.type, self._schema) def _set_updater(self): """ Create the report summary updater object. Object is specific to the report provider. Args: None Returns: (Object) : Provider-specific report summary updater """ if self._provider.type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): report_summary_updater = AWSReportParquetSummaryUpdater if self.trino_enabled else AWSReportSummaryUpdater elif self._provider.type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): report_summary_updater = ( AzureReportParquetSummaryUpdater if self.trino_enabled else AzureReportSummaryUpdater ) elif self._provider.type in (Provider.PROVIDER_OCP,): report_summary_updater = OCPReportParquetSummaryUpdater if self.trino_enabled else OCPReportSummaryUpdater elif self._provider.type in (Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL): report_summary_updater = GCPReportParquetSummaryUpdater if self.trino_enabled else GCPReportSummaryUpdater else: return (None, None) ocp_cloud_updater = OCPCloudParquetReportSummaryUpdater if self.trino_enabled else OCPCloudReportSummaryUpdater LOG.info(f"Set report_summary_updater = {report_summary_updater.__name__}") return ( report_summary_updater(self._schema, self._provider, self._manifest), ocp_cloud_updater(self._schema, self._provider, self._manifest), ) def _format_dates(self, start_date, end_date): """Convert dates to strings for use in the updater.""" if isinstance(start_date, datetime.date): start_date = start_date.strftime("%Y-%m-%d") if isinstance(end_date, datetime.date): end_date = end_date.strftime("%Y-%m-%d") elif end_date is None: # Run up to the current date end_date = self._date_accessor.today_with_timezone("UTC") end_date = end_date.strftime("%Y-%m-%d") return start_date, end_date def update_daily_tables(self, start_date, end_date): """ Update report daily rollup tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. Returns: (str, str): The start and end date strings used in the daily SQL. """ msg = f"Daily summary starting for source {self._provider_uuid}" LOG.info(log_json(self._tracing_id, msg)) start_date, end_date = self._format_dates(start_date, end_date) start_date, end_date = self._updater.update_daily_tables(start_date, end_date) invalidate_view_cache_for_tenant_and_source_type(self._schema, self._provider.type) msg = f"Daily summary completed for source {self._provider_uuid}" LOG.info(log_json(self._tracing_id, msg)) return start_date, end_date def update_summary_tables(self, start_date, end_date, tracing_id): """ Update report summary tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. tracing_id (str): The tracing_id. Returns: None """ msg = f"Summary processing starting for source {self._provider_uuid}" LOG.info(log_json(self._tracing_id, msg)) start_date, end_date = self._format_dates(start_date, end_date) LOG.info(log_json(tracing_id, f"Using start date: {start_date}")) LOG.info(log_json(tracing_id, f"Using end date: {end_date}")) start_date, end_date = self._updater.update_summary_tables(start_date, end_date) msg = f"Summary processing completed for source {self._provider_uuid}" LOG.info(log_json(self._tracing_id, msg)) invalidate_view_cache_for_tenant_and_source_type(self._schema, self._provider.type) return start_date, end_date def get_openshift_on_cloud_infra_map(self, start_date, end_date, tracing_id): """Get cloud infrastructure source and OpenShift source mapping.""" infra_map = {} try: if self._provider.type in Provider.OPENSHIFT_ON_CLOUD_PROVIDER_LIST: msg = f"Getting OpenShift on Cloud infrastructure map for {self._provider_uuid}" LOG.info(log_json(self._tracing_id, msg)) start_date, end_date = self._format_dates(start_date, end_date) LOG.info(log_json(tracing_id, f"Using start date: {start_date}")) LOG.info(log_json(tracing_id, f"Using end date: {end_date}")) infra_map = self._ocp_cloud_updater.get_infra_map(start_date, end_date) except Exception as ex: raise ReportSummaryUpdaterCloudError(str(ex)) return infra_map def update_openshift_on_cloud_summary_tables( self, start_date, end_date, ocp_provider_uuid, infra_provider_uuid, infra_provider_type, tracing_id ): """ Update report summary tables. Args: start_date (str, datetime): When to start. end_date (str, datetime): When to end. tracing_id (str): The tracing_id. Returns: None """ if self._provider.type in Provider.OPENSHIFT_ON_CLOUD_PROVIDER_LIST: msg = f"OpenShift on {infra_provider_type} summary processing starting for source {self._provider_uuid}" LOG.info(log_json(self._tracing_id, msg)) start_date, end_date = self._format_dates(start_date, end_date) LOG.info(log_json(tracing_id, f"Using start date: {start_date}")) LOG.info(log_json(tracing_id, f"Using end date: {end_date}")) try: self._ocp_cloud_updater.update_summary_tables( start_date, end_date, ocp_provider_uuid, infra_provider_uuid, infra_provider_type ) msg = ( f"OpenShift on {infra_provider_type} summary processing completed", f" for source {self._provider_uuid}", ) LOG.info(log_json(self._tracing_id, msg)) invalidate_view_cache_for_tenant_and_source_type(self._schema, self._provider.type) except Exception as ex: raise ReportSummaryUpdaterCloudError(str(ex)) else: msg = ( f"{infra_provider_type} is not in {Provider.OPENSHIFT_ON_CLOUD_PROVIDER_LIST}.", "Not running OpenShift on Cloud summary.", ) LOG.info(log_json(self._tracing_id, msg))