def test_manifest_is_ready_is_not_ready(self): """Test that False is returned when a manifest is not ready to process.""" billing_start = DateAccessor().today_with_timezone('UTC').replace( day=1) manifest_dict = { 'assembly_id': '1234', 'billing_period_start_datetime': billing_start, 'num_total_files': 2, 'num_processed_files': 1, 'provider_id': 2 } with ReportManifestDBAccessor() as accessor: manifest = accessor.add(**manifest_dict) accessor.commit() manifest_id = manifest.id updater = ReportSummaryUpdater(self.schema, self.ocp_test_provider_uuid, manifest_id) self.assertFalse(updater.manifest_is_ready()) with ReportManifestDBAccessor() as accessor: manifests = accessor._get_db_obj_query().all() for manifest in manifests: accessor.delete(manifest) accessor.commit()
def update_cost_summary_table(schema_name, provider_uuid, manifest_id=None, start_date=None, end_date=None): """Update derived costs summary table. Args: schema_name (str) The DB schema name. provider_uuid (str) The provider uuid. manifest_id (str) The manifest id. start_date (str, Optional) - Start date of range to update derived cost. end_date (str, Optional) - End date of range to update derived cost. Returns: None """ worker_stats.COST_SUMMARY_ATTEMPTS_COUNTER.inc() stmt = (f'update_cost_summary_table called with args:\n' f' schema_name: {schema_name},\n' f' provider_uuid: {provider_uuid}\n' f' manifest_id: {manifest_id}') LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) updater.update_cost_summary_table(start_date, end_date)
def test_update_summary_tables_finalized_bill_not_done_proc( self, mock_daily, mock_summary): """Test that summary tables are run for a full month.""" report_updater_base = ReportSummaryUpdater('acct10001', self.aws_provider_uuid, self.manifest.id) start_date = self.date_accessor.today_with_timezone('UTC') end_date = start_date + datetime.timedelta(days=1) bill_date = start_date.replace(day=1).date() with schema_context(self.schema): bill = self.accessor.get_cost_entry_bills_by_date(bill_date)[0] bill.finalized_datetime = start_date self.accessor.commit() start_date_str = start_date.strftime('%Y-%m-%d') end_date_str = end_date.strftime('%Y-%m-%d') self.assertIsNone(bill.summary_data_creation_datetime) self.assertIsNone(bill.summary_data_updated_datetime) if report_updater_base.manifest_is_ready(): self.updater.update_daily_tables(start_date_str, end_date_str) mock_daily.assert_called() if report_updater_base.manifest_is_ready(): self.updater.update_summary_tables(start_date_str, end_date_str) mock_summary.assert_called() with AWSReportDBAccessor('acct10001', self.column_map) as accessor: bill = accessor.get_cost_entry_bills_by_date(bill_date)[0] self.assertIsNotNone(bill.summary_data_creation_datetime) self.assertIsNotNone(bill.summary_data_updated_datetime)
def test_update_summary_tables_new_bill_not_done_processing( self, mock_daily, mock_summary): """Test that summary tables are not run for a full month.""" report_updater_base = ReportSummaryUpdater("acct10001", self.aws_provider_uuid, self.manifest.id) start_date = self.date_accessor.today_with_timezone("UTC") end_date = start_date + datetime.timedelta(days=1) bill_date = start_date.replace(day=1).date() with schema_context(self.schema): bill = self.accessor.get_cost_entry_bills_by_date(bill_date)[0] start_date_str = start_date.strftime("%Y-%m-%d") end_date_str = end_date.strftime("%Y-%m-%d") self.assertIsNone(bill.summary_data_creation_datetime) self.assertIsNone(bill.summary_data_updated_datetime) # manifest_is_ready is now unconditionally returning True, so summary is expected. if report_updater_base.manifest_is_ready(): self.updater.update_daily_tables(start_date_str, end_date_str) mock_daily.assert_called() if report_updater_base.manifest_is_ready(): self.updater.update_summary_tables(start_date_str, end_date_str) mock_summary.assert_called() with AWSReportDBAccessor("acct10001", self.column_map) as accessor: bill = accessor.get_cost_entry_bills_by_date(bill_date)[0] self.assertIsNotNone(bill.summary_data_creation_datetime) self.assertIsNotNone(bill.summary_data_updated_datetime)
def test_update_summary_tables_new_period_not_done_processing( self, mock_daily, mock_sum, mock_storage_daily, mock_storage_summary): """Test that summary tables are not run since num_processed_files != num_total_files.""" billing_start = self.date_accessor.today_with_timezone('UTC').replace( day=1) + relativedelta(months=-1) manifest_dict = { 'assembly_id': '1234', 'billing_period_start_datetime': billing_start, 'num_total_files': 2, 'num_processed_files': 1, 'provider_id': 2 } self.manifest_accessor.delete(self.manifest) self.manifest_accessor.commit() self.manifest = self.manifest_accessor.add(**manifest_dict) self.manifest_accessor.commit() report_updater_base = ReportSummaryUpdater('acct10001', self.ocp_test_provider_uuid, self.manifest.id) start_date = self.date_accessor.today_with_timezone('UTC') end_date = start_date + datetime.timedelta(days=1) bill_date = start_date.replace(day=1).date() period = self.accessor.get_usage_periods_by_date(bill_date)[0] self.assertIsNone(period.summary_data_creation_datetime) self.assertIsNone(period.summary_data_updated_datetime) start_date_str = start_date.strftime('%Y-%m-%d') end_date_str = end_date.strftime('%Y-%m-%d') if report_updater_base.manifest_is_ready(): self.updater.update_daily_tables(start_date_str, end_date_str) mock_daily.assert_not_called() mock_storage_daily.assert_not_called() mock_sum.assert_not_called() mock_storage_summary.assert_not_called() if report_updater_base.manifest_is_ready(): self.updater.update_summary_tables(start_date_str, end_date_str) mock_sum.assert_not_called() mock_storage_summary.assert_not_called() with OCPReportDBAccessor('acct10001', self.column_map) as accessor: period = accessor.get_usage_periods_by_date(bill_date)[0] self.assertIsNone(period.summary_data_creation_datetime) self.assertIsNone(period.summary_data_updated_datetime)
def test_manifest_is_ready_is_ready(self): """Test that True is returned when a manifest is ready to process.""" billing_start = DateAccessor().today_with_timezone('UTC').replace(day=1) manifest_dict = { 'assembly_id': '1234', 'billing_period_start_datetime': billing_start, 'num_total_files': 2, 'num_processed_files': 2, 'provider_uuid': self.ocp_provider_uuid, } with ReportManifestDBAccessor() as accessor: manifest = accessor.add(**manifest_dict) manifest_id = manifest.id updater = ReportSummaryUpdater(self.schema, self.ocp_test_provider_uuid, manifest_id) self.assertTrue(updater.manifest_is_ready())
def test_bad_provider(self): """Test that an unimplemented provider throws an error.""" credentials = {"credentials": {"role_arn": "unknown"}} self.unknown_auth = ProviderAuthentication.objects.create( credentials=credentials) self.unknown_auth.save() data_source = {"data_source": {"bucket": "unknown"}} self.unknown_billing_source = ProviderBillingSource.objects.create( data_source=data_source) self.unknown_billing_source.save() with patch("masu.celery.tasks.check_report_updates"): self.unknown_provider = Provider.objects.create( uuid=self.unkown_test_provider_uuid, name="Test Provider", type="FOO", authentication=self.unknown_auth, billing_source=self.unknown_billing_source, customer=self.customer, setup_complete=False, active=True, ) self.unknown_provider.save() with self.assertRaises(ReportSummaryUpdaterError): _ = ReportSummaryUpdater(self.schema, self.unkown_test_provider_uuid)
def test_manifest_is_ready_is_not_ready(self): """Test that False is returned when a manifest is not ready to process.""" billing_start = DateAccessor().today_with_timezone("UTC").replace( day=1) manifest_dict = { "assembly_id": "1234", "billing_period_start_datetime": billing_start, "num_total_files": 2, "num_processed_files": 1, "provider_uuid": self.ocp_provider_uuid, } with ReportManifestDBAccessor() as accessor: manifest = accessor.add(**manifest_dict) manifest_id = manifest.id updater = ReportSummaryUpdater(self.schema, self.ocp_test_provider_uuid, manifest_id) # manifest_is_ready is now unconditionally returning True, so summary is expected. self.assertTrue(updater.manifest_is_ready())
def test_no_provider_on_create(self): """Test that an error is raised when no provider exists.""" billing_start = DateAccessor().today_with_timezone("UTC").replace(day=1) no_provider_uuid = uuid4() manifest_dict = { "assembly_id": "1234", "billing_period_start_datetime": billing_start, "num_total_files": 2, "provider_uuid": self.ocp_provider_uuid, } with ReportManifestDBAccessor() as accessor: manifest = accessor.add(**manifest_dict) manifest_id = manifest.id with self.assertRaises(ReportSummaryUpdaterError): ReportSummaryUpdater(self.schema, no_provider_uuid, manifest_id)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc() stmt = ( f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}" ) LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) if updater.manifest_is_ready(): start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if provider_uuid: dh = DateHelper(utc=True) prev_month_last_day = dh.last_month_end start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d") prev_month_last_day = prev_month_last_day.replace(tzinfo=None) prev_month_last_day = prev_month_last_day.replace(microsecond=0, second=0, minute=0, hour=0, day=1) if manifest_id and (start_date_obj <= prev_month_last_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True chain( update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date), refresh_materialized_views.si(schema_name, provider, manifest_id), remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only), ).apply_async() else: chain( update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date), refresh_materialized_views.si(schema_name, provider, manifest_id), ).apply_async() else: refresh_materialized_views.delay(schema_name, provider, manifest_id)
def test_azure_local_route(self, mock_daily, mock_update, mock_cloud, mock_cloud_cost): """Test that AZURE Local report updating works as expected.""" mock_start = 1 mock_end = 2 mock_daily.return_value = (mock_start, mock_end) mock_update.return_value = (mock_start, mock_end) updater = ReportSummaryUpdater(self.schema, self.azure_test_provider_uuid) self.assertIsInstance(updater._updater, AzureReportSummaryUpdater) updater.update_daily_tables(self.today, self.tomorrow) mock_daily.assert_called_with(self.today, self.tomorrow) mock_update.assert_not_called() mock_cloud.assert_not_called() updater.update_summary_tables(self.today, self.tomorrow) mock_update.assert_called_with(self.today, self.tomorrow) mock_cloud.assert_called_with(mock_start, mock_end) updater.update_cost_summary_table(self.today, self.tomorrow) mock_cloud_cost.assert_called_with(self.today, self.tomorrow)
def test_aws_local_route(self, mock_daily, mock_update): """Test that AWS Local report updating works as expected.""" mock_start = 1 mock_end = 2 mock_daily.return_value = (mock_start, mock_end) mock_update.return_value = (mock_start, mock_end) updater = ReportSummaryUpdater(self.schema, self.aws_provider_uuid, tracing_id=self.tracing_id) self.assertIsInstance(updater._updater, AWSReportSummaryUpdater) updater.update_daily_tables(self.today, self.tomorrow) mock_daily.assert_called_with(self.today, self.tomorrow) mock_update.assert_not_called() updater.update_summary_tables(self.today, self.tomorrow, self.tracing_id) mock_update.assert_called_with(self.today, self.tomorrow)
def test_aws_ocp_exception_route(self, mock_daily, mock_update, mock_cloud): """Test that AWS report updating works as expected.""" mock_start = 1 mock_end = 2 mock_daily.return_value = (mock_start, mock_end) mock_update.return_value = (mock_start, mock_end) mock_cloud.side_effect = Exception("test") updater = ReportSummaryUpdater(self.schema, self.aws_provider_uuid) self.assertIsInstance(updater._updater, AWSReportSummaryUpdater) updater.update_daily_tables(self.today, self.tomorrow) mock_daily.assert_called_with(self.today, self.tomorrow) mock_update.assert_not_called() mock_cloud.assert_not_called() with self.assertRaises(ReportSummaryUpdaterCloudError): updater.update_summary_tables(self.today, self.tomorrow)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) if updater.manifest_is_ready(): start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date) if provider_uuid: chain( update_charge_info.s(schema_name, provider_uuid, start_date, end_date), refresh_materialized_views.si(schema_name, provider, manifest_id), ).apply_async() else: refresh_materialized_views.delay(schema_name, provider, manifest_id)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc() stmt = (f'update_summary_tables called with args:\n' f' schema_name: {schema_name},\n' f' provider: {provider},\n' f' start_date: {start_date},\n' f' end_date: {end_date},\n' f' manifest_id: {manifest_id}') LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) if updater.manifest_is_ready(): start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if provider_uuid: update_charge_info.apply_async( args=( schema_name, provider_uuid, start_date, end_date), link=update_cost_summary_table.si( schema_name, provider_uuid, manifest_id, start_date, end_date))
def test_bad_provider(self): """Test that an unimplemented provider throws an error.""" self.unknown_auth = ProviderAuthentication.objects.create( provider_resource_name="unknown") self.unknown_auth.save() self.unknown_billing_source = ProviderBillingSource.objects.create( bucket="unknown") self.unknown_billing_source.save() self.unknown_provider = Provider.objects.create( uuid=self.unkown_test_provider_uuid, name="Test Provider", type="FOO", authentication=self.unknown_auth, billing_source=self.unknown_billing_source, customer=self.customer, setup_complete=False, active=True, ) self.unknown_provider.save() with self.assertRaises(ReportSummaryUpdaterError): _ = ReportSummaryUpdater(self.schema, self.unkown_test_provider_uuid)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if not provider_uuid: refresh_materialized_views.delay(schema_name, provider, manifest_id) return with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date) | refresh_materialized_views.si( schema_name, provider, manifest_id) else: stmt = ( f"\n update_cost_model_costs skipped. No cost model available for \n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) linked_tasks = refresh_materialized_views.s(schema_name, provider, manifest_id) dh = DateHelper(utc=True) prev_month_start_day = dh.last_month_start.replace(tzinfo=None) start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d") if manifest_id and (start_date_obj <= prev_month_start_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True linked_tasks |= remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only) chain(linked_tasks).apply_async()
def test_azure_parquet_summary_updater(self): """Test that the AWSReportParquetSummaryUpdater is returned.""" updater = ReportSummaryUpdater(self.schema, self.azure_provider_uuid) self.assertIsInstance(updater._updater, AzureReportParquetSummaryUpdater)
def update_summary_tables( # noqa: C901 schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None, queue_name=None, synchronous=False, ): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() task_name = "masu.processor.tasks.update_summary_tables" cache_args = [schema_name] if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(msg) update_summary_tables.s( schema_name, provider, provider_uuid, start_date, end_date=end_date, manifest_id=manifest_id, queue_name=queue_name, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=3600) stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) try: updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date) except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not provider_uuid: refresh_materialized_views.s( schema_name, provider, manifest_id=manifest_id, queue_name=queue_name).apply_async( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) return if enable_trino_processing(provider_uuid) and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date).set( queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE ) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) else: stmt = (f"\n update_cost_model_costs skipped.\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) dh = DateHelper(utc=True) prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date() if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if manifest_id and (start_date <= prev_month_start_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True linked_tasks |= remove_expired_data.si( schema_name, provider, simulate, provider_uuid, line_items_only, queue_name).set(queue=queue_name or REMOVE_EXPIRED_DATA_QUEUE) chain(linked_tasks).apply_async() if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if not provider_uuid: refresh_materialized_views.delay(schema_name, provider, manifest_id=manifest_id) return if settings.ENABLE_PARQUET_PROCESSING and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id) else: stmt = (f"\n update_cost_model_costs skipped.\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id) dh = DateHelper(utc=True) prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date() if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if manifest_id and (start_date <= prev_month_start_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True linked_tasks |= remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only) chain(linked_tasks).apply_async()
def test_bad_provider(self): """Test that an unknown provider uuid throws an error.""" with self.assertRaises(ReportSummaryUpdaterProviderNotFoundError): _ = ReportSummaryUpdater(self.schema, uuid4())
def update_openshift_on_cloud( self, schema_name, openshift_provider_uuid, infrastructure_provider_uuid, infrastructure_provider_type, start_date, end_date, manifest_id=None, queue_name=None, synchronous=False, tracing_id=None, ): """Update OpenShift on Cloud for a specific OpenShift and cloud source.""" task_name = "masu.processor.tasks.update_openshift_on_cloud" cache_args = [schema_name, infrastructure_provider_uuid] if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(log_json(tracing_id, msg)) update_openshift_on_cloud.s( schema_name, openshift_provider_uuid, infrastructure_provider_uuid, infrastructure_provider_type, start_date, end_date, manifest_id=manifest_id, queue_name=queue_name, synchronous=synchronous, tracing_id=tracing_id, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=settings.WORKER_CACHE_TIMEOUT) stmt = (f"update_openshift_on_cloud called with args: " f" schema_name: {schema_name}, " f" openshift_provider_uuid: {openshift_provider_uuid}, " f" infrastructure_provider_uuid: {infrastructure_provider_uuid}, " f" infrastructure_provider_type: {infrastructure_provider_type}, " f" start_date: {start_date}, " f" end_date: {end_date}, " f" manifest_id: {manifest_id}, " f" queue_name: {queue_name}, " f" tracing_id: {tracing_id}") LOG.info(log_json(tracing_id, stmt)) try: updater = ReportSummaryUpdater(schema_name, infrastructure_provider_uuid, manifest_id, tracing_id) updater.update_openshift_on_cloud_summary_tables( start_date, end_date, openshift_provider_uuid, infrastructure_provider_uuid, infrastructure_provider_type, tracing_id, ) except ReportSummaryUpdaterCloudError as ex: LOG.info( log_json( tracing_id, ( f"update_openshift_on_cloud failed for: {infrastructure_provider_type} ", f"provider: {infrastructure_provider_uuid}, ", f"OpenShift provider {openshift_provider_uuid}. \nError: {ex}\n", f"Retry {self.request.retries} of {settings.MAX_UPDATE_RETRIES}", ), )) raise ReportSummaryUpdaterCloudError finally: if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_summary_tables( # noqa: C901 schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None, queue_name=None, synchronous=False, tracing_id=None, ): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() task_name = "masu.processor.tasks.update_summary_tables" cache_args = [schema_name, provider, provider_uuid] ocp_on_cloud_infra_map = {} if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(log_json(tracing_id, msg)) update_summary_tables.s( schema_name, provider, provider_uuid, start_date, end_date=end_date, manifest_id=manifest_id, queue_name=queue_name, tracing_id=tracing_id, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=settings.WORKER_CACHE_TIMEOUT) stmt = (f"update_summary_tables called with args: " f" schema_name: {schema_name}, " f" provider: {provider}, " f" start_date: {start_date}, " f" end_date: {end_date}, " f" manifest_id: {manifest_id}, " f" tracing_id: {tracing_id}") LOG.info(log_json(tracing_id, stmt)) try: updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id, tracing_id) start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date, tracing_id) ocp_on_cloud_infra_map = updater.get_openshift_on_cloud_infra_map( start_date, end_date, tracing_id) except ReportSummaryUpdaterCloudError as ex: LOG.info( log_json( tracing_id, f"Failed to correlate OpenShift metrics for provider: {provider_uuid}. Error: {ex}" )) except ReportSummaryUpdaterProviderNotFoundError as pnf_ex: LOG.warning( log_json( tracing_id, (f"{pnf_ex} Possible source/provider delete during processing. " + "Processing for this provier will halt."), )) if not synchronous: worker_cache.release_single_task(task_name, cache_args) return except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not provider_uuid: refresh_materialized_views.s( schema_name, provider, manifest_id=manifest_id, queue_name=queue_name, tracing_id=tracing_id).apply_async( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) return if enable_trino_processing(provider_uuid, provider, schema_name) and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs" f" schema_name: {schema_name}, " f" provider_uuid: {provider_uuid}") LOG.info(log_json(tracing_id, stmt)) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model # Create queued tasks for each OpenShift on Cloud cluster signature_list = [] for openshift_provider_uuid, infrastructure_tuple in ocp_on_cloud_infra_map.items( ): infra_provider_uuid = infrastructure_tuple[0] infra_provider_type = infrastructure_tuple[1] signature_list.append( update_openshift_on_cloud.s( schema_name, openshift_provider_uuid, infra_provider_uuid, infra_provider_type, str(start_date), str(end_date), manifest_id=manifest_id, queue_name=queue_name, synchronous=synchronous, tracing_id=tracing_id, ).set(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)) # Apply OCP on Cloud tasks if signature_list: if synchronous: group(signature_list).apply() else: group(signature_list).apply_async() if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date, tracing_id=tracing_id).set( queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE ) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id, tracing_id=tracing_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) else: stmt = f"update_cost_model_costs skipped. schema_name: {schema_name}, provider_uuid: {provider_uuid}" LOG.info(log_json(tracing_id, stmt)) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id, tracing_id=tracing_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) chain(linked_tasks).apply_async() if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def test_update_openshift_on_cloud_summary_tables(self, mock_update): """Test that we run OCP on Cloud summary.""" start_date = DateHelper().this_month_start.date() end_date = DateHelper().today.date() updater = ReportSummaryUpdater(self.schema, self.azure_provider_uuid) updater.update_openshift_on_cloud_summary_tables( start_date, end_date, self.ocp_on_azure_ocp_provider.uuid, self.azure_provider_uuid, Provider.PROVIDER_AZURE, tracing_id=1, ) mock_update.assert_called() mock_update.reset_mock() # Only run for cloud sources that support OCP on Cloud updater = ReportSummaryUpdater(self.schema, self.ocp_on_azure_ocp_provider.uuid) updater.update_openshift_on_cloud_summary_tables( start_date, end_date, self.ocp_on_azure_ocp_provider.uuid, self.azure_provider_uuid, Provider.PROVIDER_AZURE, tracing_id=1, ) mock_update.assert_not_called() mock_update.reset_mock() updater = ReportSummaryUpdater(self.schema, self.azure_provider_uuid) mock_update.side_effect = Exception with self.assertRaises(ReportSummaryUpdaterCloudError): updater.update_openshift_on_cloud_summary_tables( start_date, end_date, self.ocp_on_azure_ocp_provider.uuid, self.azure_provider_uuid, Provider.PROVIDER_AZURE, tracing_id=1, )
def test_bad_provider(self): """Test that an unimplemented provider throws an error.""" with self.assertRaises(ReportSummaryUpdaterError): random_uuid = str(uuid.uuid4()) _ = ReportSummaryUpdater(self.schema, random_uuid)