def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating OpenShift report daily tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, start, end, ) with OCPReportDBAccessor(self._schema) as accessor: accessor.populate_node_label_line_item_daily_table(start, end, self._cluster_id) accessor.populate_line_item_daily_table(start, end, self._cluster_id) accessor.populate_storage_line_item_daily_table(start, end, self._cluster_id) return start_date, end_date
def test_date_range_pair(self): """Test that start and end dates are returned by this generator.""" start_date = "2020-01-01" end_date = "2020-02-29" step = 3 date_generator = common_utils.date_range_pair(start_date, end_date, step=step) start_date = parser.parse(start_date) end_date = parser.parse(end_date) self.assertIsInstance(date_generator, types.GeneratorType) first_start, first_end = next(date_generator) self.assertEqual(first_start, start_date.date()) self.assertEqual(first_end, start_date.date() + timedelta(days=step)) for start, end in date_generator: self.assertIsInstance(start, date) self.assertIsInstance(end, date) self.assertGreater(start, start_date.date()) self.assertLessEqual(end, end_date.date()) self.assertEqual(end, end_date.date())
def update_daily_tables(self, start_date, end_date): """Populate the daily tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str): A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) bills = get_bills_from_provider( self._provider.uuid, self._schema, datetime.datetime.strptime(start_date, "%Y-%m-%d"), datetime.datetime.strptime(end_date, "%Y-%m-%d"), ) bill_ids = [] with schema_context(self._schema): bill_ids = [str(bill.id) for bill in bills] with AWSReportDBAccessor(self._schema) as accessor: for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating AWS report daily tables for \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, start, end, ) accessor.populate_line_item_daily_table(start, end, bill_ids) return start_date, end_date
def test_update_daily_summary_tables(self, mock_presto, mock_tag_update, mock_delete): """Test that we run Presto summary.""" start_str = self.dh.this_month_start.isoformat() end_str = self.dh.this_month_end.isoformat() start, end = self.updater._get_sql_inputs(start_str, end_str) for s, e in date_range_pair(start, end, step=settings.TRINO_DATE_STEP): expected_start, expected_end = s, e with AWSReportDBAccessor(self.schema) as accessor: with schema_context(self.schema): bills = accessor.bills_for_provider_uuid( self.aws_provider.uuid, start) bill_ids = [str(bill.id) for bill in bills] current_bill_id = bills.first().id if bills else None with CostModelDBAccessor( self.schema, self.aws_provider.uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = float(markup.get("value", 0)) / 100 start_return, end_return = self.updater.update_summary_tables( start, end) mock_delete.assert_called_with(self.aws_provider.uuid, expected_start, expected_end) mock_presto.assert_called_with(expected_start, expected_end, self.aws_provider.uuid, current_bill_id, markup_value) mock_tag_update.assert_called_with(bill_ids, start, end) self.assertEqual(start_return, start) self.assertEqual(end_return, end)
def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) with CostModelDBAccessor(self._schema, self._provider.uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = float(markup.get("value", 0)) / 100 with GCPReportDBAccessor(self._schema) as accessor: # Need these bills on the session to update dates after processing with schema_context(self._schema): bills = accessor.bills_for_provider_uuid( self._provider.uuid, start_date) bill_ids = [str(bill.id) for bill in bills] current_bill_id = bills.first().id if bills else None if current_bill_id is None: msg = f"No bill was found for {start_date}. Skipping summarization" LOG.info(msg) return start_date, end_date for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating GCP report summary tables from parquet: \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, start, end, ) accessor.delete_line_item_daily_summary_entries_for_date_range( self._provider.uuid, start, end) accessor.populate_line_item_daily_summary_table_presto( start, end, self._provider.uuid, current_bill_id, markup_value) accessor.populate_enabled_tag_keys(start, end, bill_ids) accessor.populate_tags_summary_table(bill_ids) accessor.update_line_item_daily_summary_with_enabled_tags( start_date, end_date, bill_ids) for bill in bills: if bill.summary_data_creation_datetime is None: bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") bill.save() return start_date, end_date
def test_date_range_pair_date_args(self): """Test that start and end dates are returned by this generator with date args passed instead of str.""" start_date = date(2020, 1, 1) end_date = date(2020, 2, 29) step = 3 date_generator = common_utils.date_range_pair(start_date, end_date, step=step) start_date = datetime(start_date.year, start_date.month, start_date.day) end_date = datetime(end_date.year, end_date.month, end_date.day) self.assertIsInstance(date_generator, types.GeneratorType) first_start, first_end = next(date_generator) self.assertEqual(first_start, start_date.date()) self.assertEqual(first_end, start_date.date() + timedelta(days=step)) for start, end in date_generator: self.assertIsInstance(start, date) self.assertIsInstance(end, date) self.assertGreater(start, start_date.date()) self.assertLessEqual(end, end_date.date()) self.assertEqual(end, end_date.date())
def update_aws_summary_tables(self, openshift_provider_uuid, aws_provider_uuid, start_date, end_date): """Update operations specifically for OpenShift on AWS.""" if isinstance(start_date, str): start_date = parser.parse(start_date).date() if isinstance(end_date, str): end_date = parser.parse(end_date).date() cluster_id = get_cluster_id_from_provider(openshift_provider_uuid) with OCPReportDBAccessor(self._schema) as accessor: report_period = accessor.report_periods_for_provider_uuid( openshift_provider_uuid, start_date) accessor.delete_infrastructure_raw_cost_from_daily_summary( openshift_provider_uuid, report_period.id, start_date, end_date) aws_bills = aws_get_bills_from_provider(aws_provider_uuid, self._schema, start_date, end_date) with schema_context(self._schema): aws_bill_ids = [str(bill.id) for bill in aws_bills] current_aws_bill_id = aws_bills.first().id if aws_bills else None current_ocp_report_period_id = report_period.id with CostModelDBAccessor(self._schema, aws_provider_uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = Decimal(markup.get("value", 0)) / 100 # OpenShift on AWS with AWSReportDBAccessor(self._schema) as accessor: for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating OpenShift on AWS summary table for " "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s" "\n\tCluster ID: %s, AWS Bill ID: %s", self._schema, self._provider.uuid, start, end, cluster_id, current_aws_bill_id, ) accessor.populate_ocp_on_aws_cost_daily_summary_presto( start, end, openshift_provider_uuid, aws_provider_uuid, current_ocp_report_period_id, current_aws_bill_id, markup_value, ) accessor.back_populate_ocp_on_aws_daily_summary( start_date, end_date, current_ocp_report_period_id) accessor.populate_ocp_on_aws_tags_summary_table( aws_bill_ids, start_date, end_date)
def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) with schema_context(self._schema): self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date) bills = get_bills_from_provider( self._provider.uuid, self._schema, datetime.datetime.strptime(start_date, "%Y-%m-%d"), datetime.datetime.strptime(end_date, "%Y-%m-%d"), ) bill_ids = [] with schema_context(self._schema): bill_ids = [str(bill.id) for bill in bills] with GCPReportDBAccessor(self._schema) as accessor: # Need these bills on the session to update dates after processing bills = accessor.bills_for_provider_uuid(self._provider.uuid, start_date) for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating GCP report summary tables: \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s\n\tBills: %s", self._schema, self._provider.uuid, start, end, str(bill_ids), ) accessor.populate_line_item_daily_summary_table( start, end, bill_ids) accessor.populate_ui_summary_tables(start, end, self._provider.uuid) accessor.populate_tags_summary_table(bill_ids, start_date, end_date) for bill in bills: if bill.summary_data_creation_datetime is None: bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") bill.save() return start_date, end_date
def create_expected_csv_files(start_date, end_date, invoice_month, etag, keys=False): """Create the list of expected csv.""" files = list() for start, end in date_range_pair(start_date, end_date): if start == end: continue end = end + relativedelta(days=1) files.append(f"{invoice_month}_{etag}_{start}:{end}.csv") if keys: return [{"key": f"{f}", "local_file": f"{f}"} for f in files] return files
def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) report_periods = None with OCPReportDBAccessor(self._schema) as accessor: report_periods = accessor.report_periods_for_provider_uuid( self._provider.uuid, start_date) with schema_context(self._schema): report_period_ids = [ report_period.id for report_period in report_periods ] for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating OpenShift report summary tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, start, end, ) accessor.populate_line_item_daily_summary_table( start, end, self._cluster_id) accessor.populate_storage_line_item_daily_summary_table( start, end, self._cluster_id) accessor.populate_pod_label_summary_table(report_period_ids, start_date, end_date) accessor.populate_volume_label_summary_table( report_period_ids, start_date, end_date) accessor.update_line_item_daily_summary_with_enabled_tags( start_date, end_date, report_period_ids) for period in report_periods: if period.summary_data_creation_datetime is None: period.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") period.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") period.save() return start_date, end_date
def update_azure_summary_tables(self, openshift_provider_uuid, azure_provider_uuid, start_date, end_date): """Update operations specifically for OpenShift on Azure.""" if isinstance(start_date, str): start_date = parser.parse(start_date).date() if isinstance(end_date, str): end_date = parser.parse(end_date).date() cluster_id = get_cluster_id_from_provider(openshift_provider_uuid) azure_bills = azure_get_bills_from_provider(azure_provider_uuid, self._schema, start_date, end_date) with schema_context(self._schema): azure_bill_ids = [str(bill.id) for bill in azure_bills] current_azure_bill_id = azure_bills.first( ).id if azure_bills else None with CostModelDBAccessor(self._schema, azure_provider_uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = Decimal(markup.get("value", 0)) / 100 # OpenShift on Azure with AzureReportDBAccessor(self._schema) as accessor: for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating OpenShift on Azure summary table for " "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s" "\n\tCluster ID: %s, Azure Bill ID: %s", self._schema, self._provider.uuid, start, end, cluster_id, current_azure_bill_id, ) accessor.populate_ocp_on_azure_cost_daily_summary_presto( start, end, openshift_provider_uuid, azure_provider_uuid, cluster_id, current_azure_bill_id, markup_value, ) accessor.populate_ocp_on_azure_tags_summary_table( azure_bill_ids, start_date, end_date)
def update_aws_summary_tables(self, openshift_provider_uuid, aws_provider_uuid, start_date, end_date): """Update operations specifically for OpenShift on AWS.""" if isinstance(start_date, str): start_date = parser.parse(start_date) if isinstance(end_date, str): end_date = parser.parse(end_date) cluster_id = get_cluster_id_from_provider(openshift_provider_uuid) aws_bills = aws_get_bills_from_provider(aws_provider_uuid, self._schema, start_date, end_date) aws_bill_ids = [] with schema_context(self._schema): aws_bill_ids = [str(bill.id) for bill in aws_bills] with CostModelDBAccessor(self._schema, aws_provider_uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = Decimal(markup.get("value", 0)) / 100 # OpenShift on AWS with AWSReportDBAccessor(self._schema) as accessor: for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating OpenShift on AWS summary table for " "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s" "\n\tCluster ID: %s, AWS Bill IDs: %s", self._schema, self._provider.uuid, start, end, cluster_id, str(aws_bill_ids), ) accessor.populate_ocp_on_aws_cost_daily_summary( start, end, cluster_id, aws_bill_ids, markup_value) accessor.populate_ocp_on_aws_tags_summary_table() self.refresh_openshift_on_infrastructure_views( OCP_ON_AWS_MATERIALIZED_VIEWS) with OCPReportDBAccessor(self._schema) as accessor: # This call just sends the infrastructure cost to the # OCP usage daily summary table accessor.update_summary_infrastructure_cost( cluster_id, start_date, end_date)
def test_date_range_pair_one_day(self): """Test that generator works for a single day.""" start_date = "2020-01-01" end_date = start_date step = 3 date_generator = common_utils.date_range_pair(start_date, end_date, step=step) start_date = parser.parse(start_date) end_date = parser.parse(end_date) self.assertIsInstance(date_generator, types.GeneratorType) first_start, first_end = next(date_generator) self.assertEqual(first_start, start_date.date()) self.assertEqual(first_end, end_date.date()) with self.assertRaises(StopIteration): next(date_generator)
def update_aws_summary_tables(self, openshift_provider_uuid, aws_provider_uuid, start_date, end_date): """Update operations specifically for OpenShift on AWS.""" cluster_id = get_cluster_id_from_provider(openshift_provider_uuid) aws_bills = aws_get_bills_from_provider( aws_provider_uuid, self._schema, datetime.datetime.strptime(start_date, "%Y-%m-%d"), datetime.datetime.strptime(end_date, "%Y-%m-%d"), ) aws_bill_ids = [] with schema_context(self._schema): aws_bill_ids = [str(bill.id) for bill in aws_bills] with CostModelDBAccessor(self._schema, aws_provider_uuid, self._column_map) as cost_model_accessor: markup = cost_model_accessor.get_markup() markup_value = Decimal(markup.get("value", 0)) / 100 # OpenShift on AWS with AWSReportDBAccessor(self._schema, self._column_map) as accessor: for start, end in date_range_pair(start_date, end_date): LOG.info( "Updating OpenShift on AWS summary table for " "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s" "\n\tCluster ID: %s, AWS Bill IDs: %s", self._schema, self._provider.uuid, start, end, cluster_id, str(aws_bill_ids), ) accessor.populate_ocp_on_aws_cost_daily_summary( start, end, cluster_id, aws_bill_ids) accessor.populate_ocp_on_aws_markup_cost(markup_value, aws_bill_ids) with OCPReportDBAccessor(self._schema, self._column_map) as accessor: # This call just sends the infrastructure cost to the # OCP usage daily summary table accessor.update_summary_infrastructure_cost( cluster_id, start_date, end_date)
def _get_relevant_file_names(self, invoice_month): """ Generate a list of relevant file names for the manifest's dates. GCP reports are simply named "YYYY-MM-DD.csv" with an optional prefix. So, we have to iterate through all files and use rudimentary name pattern-matching to find files relevant to this date range. Args: invoice_month (datetime.datetime): invoice month in "%Y%m" format Returns: list of relevant file (blob) names found in the GCP storage bucket. """ relevant_file_names = list() for start, end in date_range_pair(self.scan_start, self.scan_end): end = end + relativedelta(days=1) relevant_file_names.append( f"{invoice_month}_{self.etag}_{start}:{end}.csv") return relevant_file_names
def update_aws_summary_tables(self, openshift_provider_uuid, aws_provider_uuid, start_date, end_date): """Update operations specifically for OpenShift on AWS.""" if isinstance(start_date, str): start_date = parser.parse(start_date).date() if isinstance(end_date, str): end_date = parser.parse(end_date).date() cluster_id = get_cluster_id_from_provider(openshift_provider_uuid) cluster_alias = get_cluster_alias_from_cluster_id(cluster_id) with OCPReportDBAccessor(self._schema) as accessor: if not accessor.get_cluster_for_provider(openshift_provider_uuid): LOG.info( f"No cluster information available for OCP Provider: {openshift_provider_uuid}, " f"skipping OCP on Cloud summary table update for AWS source: {aws_provider_uuid}." ) return report_period = accessor.report_periods_for_provider_uuid( openshift_provider_uuid, start_date) if not report_period: LOG.info( f"No report period for AWS provider {openshift_provider_uuid} with start date {start_date}" ) return accessor.delete_infrastructure_raw_cost_from_daily_summary( openshift_provider_uuid, report_period.id, start_date, end_date) aws_bills = aws_get_bills_from_provider(aws_provider_uuid, self._schema, start_date, end_date) with schema_context(self._schema): self._handle_partitions( self._schema, ( "reporting_ocpawscostlineitem_daily_summary_p", "reporting_ocpawscostlineitem_project_daily_summary_p", "reporting_ocpaws_compute_summary_p", "reporting_ocpaws_cost_summary_p", "reporting_ocpaws_cost_summary_by_account_p", "reporting_ocpaws_cost_summary_by_region_p", "reporting_ocpaws_cost_summary_by_service_p", "reporting_ocpaws_storage_summary_p", "reporting_ocpaws_database_summary_p", "reporting_ocpaws_network_summary_p", "reporting_ocpallcostlineitem_daily_summary_p", "reporting_ocpallcostlineitem_project_daily_summary_p", "reporting_ocpall_compute_summary_pt", "reporting_ocpall_cost_summary_pt", ), start_date, end_date, ) aws_bill_ids = [str(bill.id) for bill in aws_bills] current_aws_bill_id = aws_bills.first().id if aws_bills else None current_ocp_report_period_id = report_period.id with CostModelDBAccessor(self._schema, aws_provider_uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = Decimal(markup.get("value", 0)) / 100 with CostModelDBAccessor( self._schema, openshift_provider_uuid) as cost_model_accessor: distribution = cost_model_accessor.distribution # OpenShift on AWS sql_params = { "schema_name": self._schema, "start_date": start_date, "end_date": end_date, "source_uuid": aws_provider_uuid, "cluster_id": cluster_id, "cluster_alias": cluster_alias, } with AWSReportDBAccessor(self._schema) as accessor: for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating OpenShift on AWS summary table for " "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s" "\n\tCluster ID: %s, AWS Bill ID: %s", self._schema, self._provider.uuid, start, end, cluster_id, current_aws_bill_id, ) filters = { "report_period_id": current_ocp_report_period_id } # Use report_period_id to leverage DB index on DELETE accessor.delete_line_item_daily_summary_entries_for_date_range_raw( self._provider.uuid, start, end, filters, table=OCPAWSCostLineItemProjectDailySummaryP) accessor.populate_ocp_on_aws_cost_daily_summary_presto( start, end, openshift_provider_uuid, aws_provider_uuid, current_ocp_report_period_id, current_aws_bill_id, markup_value, distribution, ) accessor.back_populate_ocp_on_aws_daily_summary( start_date, end_date, current_ocp_report_period_id) accessor.populate_ocp_on_aws_tags_summary_table( aws_bill_ids, start_date, end_date) accessor.populate_ocp_on_aws_ui_summary_tables(sql_params) with OCPReportDBAccessor(self._schema) as ocp_accessor: sql_params["source_type"] = "AWS" LOG.info( f"Processing OCP-ALL for AWS (T) (s={start_date} e={end_date})" ) ocp_accessor.populate_ocp_on_all_project_daily_summary( "aws", sql_params) ocp_accessor.populate_ocp_on_all_daily_summary( "aws", sql_params) ocp_accessor.populate_ocp_on_all_ui_summary_tables(sql_params) ocp_accessor.populate_ui_summary_tables( start, end, openshift_provider_uuid, UI_SUMMARY_TABLES_MARKUP_SUBSET) LOG.info( "Updating ocp_on_cloud_updated_datetime OpenShift report periods") with schema_context(self._schema): report_period.ocp_on_cloud_updated_datetime = self._date_accessor.today_with_timezone( "UTC") report_period.save()
def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) report_periods = None with OCPReportDBAccessor(self._schema) as accessor: with schema_context(self._schema): report_periods = accessor.report_periods_for_provider_uuid( self._provider.uuid, start_date) report_period_ids = [ report_period.id for report_period in report_periods ] for report_period in report_periods: for start, end in date_range_pair( start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating OpenShift report summary tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tReport Period ID: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, report_period.id, start, end, ) # This will process POD and STORAGE together accessor.delete_line_item_daily_summary_entries_for_date_range( self._provider.uuid, start, end) accessor.populate_line_item_daily_summary_table_presto( start, end, report_period.id, self._cluster_id, self._cluster_alias, self._provider.uuid) # This will process POD and STORAGE together LOG.info( "Updating OpenShift label summary tables for \n\tSchema: %s " "\n\tReport Period IDs: %s", self._schema, report_period_ids, ) accessor.populate_pod_label_summary_table(report_period_ids, start_date, end_date) accessor.populate_volume_label_summary_table( report_period_ids, start_date, end_date) accessor.update_line_item_daily_summary_with_enabled_tags( start_date, end_date, report_period_ids) LOG.info("Updating OpenShift report periods") for period in report_periods: if period.summary_data_creation_datetime is None: period.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") period.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") period.save() return start_date, end_date
def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) with schema_context(self._schema): self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date) with CostModelDBAccessor(self._schema, self._provider.uuid) as cost_model_accessor: markup = cost_model_accessor.markup markup_value = float(markup.get("value", 0)) / 100 with AWSReportDBAccessor(self._schema) as accessor: # Need these bills on the session to update dates after processing with schema_context(self._schema): bills = accessor.bills_for_provider_uuid( self._provider.uuid, start_date) bill_ids = [str(bill.id) for bill in bills] current_bill_id = bills.first().id if bills else None for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating AWS report summary tables from parquet: \n\tSchema: %s" "\n\tProvider: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, start, end, ) filters = { "cost_entry_bill_id": current_bill_id } # Use cost_entry_bill_id to leverage DB index on DELETE accessor.delete_line_item_daily_summary_entries_for_date_range_raw( self._provider.uuid, start, end, filters) accessor.populate_line_item_daily_summary_table_presto( start, end, self._provider.uuid, current_bill_id, markup_value) accessor.populate_ui_summary_tables(start, end, self._provider.uuid) # accessor.populate_enabled_tag_keys(start, end, bill_ids) accessor.populate_tags_summary_table(bill_ids, start_date, end_date) # accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, bill_ids) for bill in bills: if bill.summary_data_creation_datetime is None: bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") bill.save() return start_date, end_date
def update_summary_tables(self, start_date, end_date): """Populate the summary tables for reporting. Args: start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns (str, str) A start date and end date. """ start_date, end_date = self._get_sql_inputs(start_date, end_date) start_date, end_date = self._check_parquet_date_range( start_date, end_date) with schema_context(self._schema): self._handle_partitions(self._schema, UI_SUMMARY_TABLES, start_date, end_date) with OCPReportDBAccessor(self._schema) as accessor: with schema_context(self._schema): report_period = accessor.report_periods_for_provider_uuid( self._provider.uuid, start_date) report_period_id = report_period.id for start, end in date_range_pair(start_date, end_date, step=settings.TRINO_DATE_STEP): LOG.info( "Updating OpenShift report summary tables for \n\tSchema: %s " "\n\tProvider: %s \n\tCluster: %s \n\tReport Period ID: %s \n\tDates: %s - %s", self._schema, self._provider.uuid, self._cluster_id, report_period_id, start, end, ) # This will process POD and STORAGE together filters = { "report_period_id": report_period_id } # Use report_period_id to leverage DB index on DELETE accessor.delete_line_item_daily_summary_entries_for_date_range_raw( self._provider.uuid, start, end, filters) accessor.populate_line_item_daily_summary_table_presto( start, end, report_period_id, self._cluster_id, self._cluster_alias, self._provider.uuid) accessor.populate_ui_summary_tables(start, end, self._provider.uuid) # This will process POD and STORAGE together LOG.info( "Updating OpenShift label summary tables for \n\tSchema: %s " "\n\tReport Period IDs: %s", self._schema, [report_period_id], ) accessor.populate_pod_label_summary_table([report_period_id], start_date, end_date) accessor.populate_volume_label_summary_table([report_period_id], start_date, end_date) accessor.populate_openshift_cluster_information_tables( self._provider, self._cluster_id, self._cluster_alias, start_date, end_date) accessor.update_line_item_daily_summary_with_enabled_tags( start_date, end_date, [report_period_id]) LOG.info("Updating OpenShift report periods") if report_period.summary_data_creation_datetime is None: report_period.summary_data_creation_datetime = self._date_accessor.today_with_timezone( "UTC") report_period.summary_data_updated_datetime = self._date_accessor.today_with_timezone( "UTC") report_period.save() self.check_cluster_infrastructure(start_date, end_date) return start_date, end_date