def test_aws_local_route(self, mock_daily, mock_update, mock_cloud): """Test that AWS Local report updating works as expected.""" mock_start = 1 mock_end = 2 mock_daily.return_value = (mock_start, mock_end) mock_update.return_value = (mock_start, mock_end) updater = ReportSummaryUpdater(self.schema, self.aws_provider_uuid) self.assertIsInstance(updater._updater, AWSReportSummaryUpdater) updater.update_daily_tables(self.today, self.tomorrow) mock_daily.assert_called_with(self.today, self.tomorrow) mock_update.assert_not_called() mock_cloud.assert_not_called() updater.update_summary_tables(self.today, self.tomorrow) mock_update.assert_called_with(self.today, self.tomorrow) mock_cloud.assert_called_with(mock_start, mock_end)
def test_azure_local_route(self, mock_daily, mock_update): """Test that AZURE Local report updating works as expected.""" mock_start = 1 mock_end = 2 mock_daily.return_value = (mock_start, mock_end) mock_update.return_value = (mock_start, mock_end) updater = ReportSummaryUpdater(self.schema, self.azure_test_provider_uuid, tracing_id=self.tracing_id) self.assertIsInstance(updater._updater, AzureReportSummaryUpdater) updater.update_daily_tables(self.today, self.tomorrow) mock_daily.assert_called_with(self.today, self.tomorrow) mock_update.assert_not_called() updater.update_summary_tables(self.today, self.tomorrow, self.tracing_id) mock_update.assert_called_with(self.today, self.tomorrow)
def test_aws_ocp_exception_route(self, mock_daily, mock_update, mock_cloud): """Test that AWS report updating works as expected.""" mock_start = 1 mock_end = 2 mock_daily.return_value = (mock_start, mock_end) mock_update.return_value = (mock_start, mock_end) mock_cloud.side_effect = Exception("test") updater = ReportSummaryUpdater(self.schema, self.aws_provider_uuid) self.assertIsInstance(updater._updater, AWSReportSummaryUpdater) updater.update_daily_tables(self.today, self.tomorrow) mock_daily.assert_called_with(self.today, self.tomorrow) mock_update.assert_not_called() mock_cloud.assert_not_called() with self.assertRaises(ReportSummaryUpdaterCloudError): updater.update_summary_tables(self.today, self.tomorrow)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc() stmt = ( f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}" ) LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) if updater.manifest_is_ready(): start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if provider_uuid: dh = DateHelper(utc=True) prev_month_last_day = dh.last_month_end start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d") prev_month_last_day = prev_month_last_day.replace(tzinfo=None) prev_month_last_day = prev_month_last_day.replace(microsecond=0, second=0, minute=0, hour=0, day=1) if manifest_id and (start_date_obj <= prev_month_last_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True chain( update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date), refresh_materialized_views.si(schema_name, provider, manifest_id), remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only), ).apply_async() else: chain( update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date), refresh_materialized_views.si(schema_name, provider, manifest_id), ).apply_async() else: refresh_materialized_views.delay(schema_name, provider, manifest_id)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) if updater.manifest_is_ready(): start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date) if provider_uuid: chain( update_charge_info.s(schema_name, provider_uuid, start_date, end_date), refresh_materialized_views.si(schema_name, provider, manifest_id), ).apply_async() else: refresh_materialized_views.delay(schema_name, provider, manifest_id)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc() stmt = (f'update_summary_tables called with args:\n' f' schema_name: {schema_name},\n' f' provider: {provider},\n' f' start_date: {start_date},\n' f' end_date: {end_date},\n' f' manifest_id: {manifest_id}') LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) if updater.manifest_is_ready(): start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if provider_uuid: update_charge_info.apply_async( args=( schema_name, provider_uuid, start_date, end_date), link=update_cost_summary_table.si( schema_name, provider_uuid, manifest_id, start_date, end_date))
def update_summary_tables( # noqa: C901 schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None, queue_name=None, synchronous=False, tracing_id=None, ): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() task_name = "masu.processor.tasks.update_summary_tables" cache_args = [schema_name, provider, provider_uuid] ocp_on_cloud_infra_map = {} if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(log_json(tracing_id, msg)) update_summary_tables.s( schema_name, provider, provider_uuid, start_date, end_date=end_date, manifest_id=manifest_id, queue_name=queue_name, tracing_id=tracing_id, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=settings.WORKER_CACHE_TIMEOUT) stmt = (f"update_summary_tables called with args: " f" schema_name: {schema_name}, " f" provider: {provider}, " f" start_date: {start_date}, " f" end_date: {end_date}, " f" manifest_id: {manifest_id}, " f" tracing_id: {tracing_id}") LOG.info(log_json(tracing_id, stmt)) try: updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id, tracing_id) start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date, tracing_id) ocp_on_cloud_infra_map = updater.get_openshift_on_cloud_infra_map( start_date, end_date, tracing_id) except ReportSummaryUpdaterCloudError as ex: LOG.info( log_json( tracing_id, f"Failed to correlate OpenShift metrics for provider: {provider_uuid}. Error: {ex}" )) except ReportSummaryUpdaterProviderNotFoundError as pnf_ex: LOG.warning( log_json( tracing_id, (f"{pnf_ex} Possible source/provider delete during processing. " + "Processing for this provier will halt."), )) if not synchronous: worker_cache.release_single_task(task_name, cache_args) return except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not provider_uuid: refresh_materialized_views.s( schema_name, provider, manifest_id=manifest_id, queue_name=queue_name, tracing_id=tracing_id).apply_async( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) return if enable_trino_processing(provider_uuid, provider, schema_name) and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs" f" schema_name: {schema_name}, " f" provider_uuid: {provider_uuid}") LOG.info(log_json(tracing_id, stmt)) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model # Create queued tasks for each OpenShift on Cloud cluster signature_list = [] for openshift_provider_uuid, infrastructure_tuple in ocp_on_cloud_infra_map.items( ): infra_provider_uuid = infrastructure_tuple[0] infra_provider_type = infrastructure_tuple[1] signature_list.append( update_openshift_on_cloud.s( schema_name, openshift_provider_uuid, infra_provider_uuid, infra_provider_type, str(start_date), str(end_date), manifest_id=manifest_id, queue_name=queue_name, synchronous=synchronous, tracing_id=tracing_id, ).set(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)) # Apply OCP on Cloud tasks if signature_list: if synchronous: group(signature_list).apply() else: group(signature_list).apply_async() if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date, tracing_id=tracing_id).set( queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE ) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id, tracing_id=tracing_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) else: stmt = f"update_cost_model_costs skipped. schema_name: {schema_name}, provider_uuid: {provider_uuid}" LOG.info(log_json(tracing_id, stmt)) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id, tracing_id=tracing_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) chain(linked_tasks).apply_async() if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if not provider_uuid: refresh_materialized_views.delay(schema_name, provider, manifest_id) return with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date) | refresh_materialized_views.si( schema_name, provider, manifest_id) else: stmt = ( f"\n update_cost_model_costs skipped. No cost model available for \n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) linked_tasks = refresh_materialized_views.s(schema_name, provider, manifest_id) dh = DateHelper(utc=True) prev_month_start_day = dh.last_month_start.replace(tzinfo=None) start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d") if manifest_id and (start_date_obj <= prev_month_start_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True linked_tasks |= remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only) chain(linked_tasks).apply_async()
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) start_date, end_date = updater.update_daily_tables(start_date, end_date) updater.update_summary_tables(start_date, end_date) if not provider_uuid: refresh_materialized_views.delay(schema_name, provider, manifest_id=manifest_id) return if settings.ENABLE_PARQUET_PROCESSING and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id) else: stmt = (f"\n update_cost_model_costs skipped.\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id) dh = DateHelper(utc=True) prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date() if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if manifest_id and (start_date <= prev_month_start_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True linked_tasks |= remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only) chain(linked_tasks).apply_async()
def update_summary_tables( # noqa: C901 schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None, queue_name=None, synchronous=False, ): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() task_name = "masu.processor.tasks.update_summary_tables" cache_args = [schema_name] if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(msg) update_summary_tables.s( schema_name, provider, provider_uuid, start_date, end_date=end_date, manifest_id=manifest_id, queue_name=queue_name, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=3600) stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) try: updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date) except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not provider_uuid: refresh_materialized_views.s( schema_name, provider, manifest_id=manifest_id, queue_name=queue_name).apply_async( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) return if enable_trino_processing(provider_uuid) and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date).set( queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE ) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) else: stmt = (f"\n update_cost_model_costs skipped.\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) dh = DateHelper(utc=True) prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date() if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if manifest_id and (start_date <= prev_month_start_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True linked_tasks |= remove_expired_data.si( schema_name, provider, simulate, provider_uuid, line_items_only, queue_name).set(queue=queue_name or REMOVE_EXPIRED_DATA_QUEUE) chain(linked_tasks).apply_async() if not synchronous: worker_cache.release_single_task(task_name, cache_args)