def refresh_materialized_views( # noqa: C901 schema_name, provider_type, manifest_id=None, provider_uuid="", synchronous=False, queue_name=None, tracing_id=None, ): """Refresh the database's materialized views for reporting.""" task_name = "masu.processor.tasks.refresh_materialized_views" cache_args = [schema_name, provider_type, provider_uuid] if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(log_json(tracing_id, msg)) refresh_materialized_views.s( schema_name, provider_type, manifest_id=manifest_id, provider_uuid=provider_uuid, synchronous=synchronous, queue_name=queue_name, tracing_id=tracing_id, ).apply_async(queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=settings.WORKER_CACHE_TIMEOUT) materialized_views = () try: with schema_context(schema_name): for view in materialized_views: table_name = view._meta.db_table with connection.cursor() as cursor: cursor.execute( f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}") LOG.info(log_json(tracing_id, f"Refreshed {table_name}.")) invalidate_view_cache_for_tenant_and_source_type( schema_name, provider_type) if provider_uuid: ProviderDBAccessor(provider_uuid).set_data_updated_timestamp() if manifest_id: # Processing for this monifest should be complete after this step with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(manifest_id) manifest_accessor.mark_manifest_as_completed(manifest) except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_cost_model_costs(schema_name, provider_uuid, start_date=None, end_date=None, queue_name=None, synchronous=False): """Update usage charge information. Args: schema_name (str) The DB schema name. provider_uuid (str) The provider uuid. start_date (str, Optional) - Start date of range to update derived cost. end_date (str, Optional) - End date of range to update derived cost. Returns None """ task_name = "masu.processor.tasks.update_cost_model_costs" cache_args = [schema_name, provider_uuid, start_date, end_date] if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(msg) update_cost_model_costs.s( schema_name, provider_uuid, start_date=start_date, end_date=end_date, queue_name=queue_name, synchronous=synchronous, ).apply_async(queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=600) worker_stats.COST_MODEL_COST_UPDATE_ATTEMPTS_COUNTER.inc() stmt = (f"update_cost_model_costs called with args:\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) try: updater = CostModelCostUpdater(schema_name, provider_uuid) if updater: updater.update_cost_model_costs(start_date, end_date) except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def refresh_materialized_views(schema_name, provider_type, manifest_id=None, provider_uuid=None, synchronous=False): """Refresh the database's materialized views for reporting.""" task_name = "masu.processor.tasks.refresh_materialized_views" cache_args = [schema_name] if not synchronous: worker_cache = WorkerCache() while worker_cache.single_task_is_running(task_name, cache_args): time.sleep(5) worker_cache.lock_single_task(task_name, cache_args) materialized_views = () if provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): materialized_views = (AWS_MATERIALIZED_VIEWS + OCP_ON_AWS_MATERIALIZED_VIEWS + OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS) elif provider_type in (Provider.PROVIDER_OCP): materialized_views = (OCP_MATERIALIZED_VIEWS + OCP_ON_AWS_MATERIALIZED_VIEWS + OCP_ON_AZURE_MATERIALIZED_VIEWS + OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS) elif provider_type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): materialized_views = (AZURE_MATERIALIZED_VIEWS + OCP_ON_AZURE_MATERIALIZED_VIEWS + OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS) with schema_context(schema_name): for view in materialized_views: table_name = view._meta.db_table with connection.cursor() as cursor: cursor.execute( f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}") LOG.info(f"Refreshed {table_name}.") invalidate_view_cache_for_tenant_and_source_type(schema_name, provider_type) if provider_uuid: ProviderDBAccessor(provider_uuid).set_data_updated_timestamp() if manifest_id: # Processing for this monifest should be complete after this step with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(manifest_id) manifest_accessor.mark_manifest_as_completed(manifest) if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_cost_model_costs(schema_name, provider_uuid, start_date=None, end_date=None, provider_type=None, synchronous=False): """Update usage charge information. Args: schema_name (str) The DB schema name. provider_uuid (str) The provider uuid. start_date (str, Optional) - Start date of range to update derived cost. end_date (str, Optional) - End date of range to update derived cost. Returns None """ task_name = "masu.processor.tasks.update_cost_model_costs" cache_args = [schema_name, provider_uuid, start_date, end_date] if not synchronous: worker_cache = WorkerCache() while worker_cache.single_task_is_running(task_name, cache_args): time.sleep(5) worker_cache.lock_single_task(task_name, cache_args, timeout=300) worker_stats.COST_MODEL_COST_UPDATE_ATTEMPTS_COUNTER.inc() stmt = (f"update_cost_model_costs called with args:\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) updater = CostModelCostUpdater(schema_name, provider_uuid) if updater: updater.update_cost_model_costs(start_date, end_date) if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_openshift_on_cloud( self, schema_name, openshift_provider_uuid, infrastructure_provider_uuid, infrastructure_provider_type, start_date, end_date, manifest_id=None, queue_name=None, synchronous=False, tracing_id=None, ): """Update OpenShift on Cloud for a specific OpenShift and cloud source.""" task_name = "masu.processor.tasks.update_openshift_on_cloud" cache_args = [schema_name, infrastructure_provider_uuid] if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(log_json(tracing_id, msg)) update_openshift_on_cloud.s( schema_name, openshift_provider_uuid, infrastructure_provider_uuid, infrastructure_provider_type, start_date, end_date, manifest_id=manifest_id, queue_name=queue_name, synchronous=synchronous, tracing_id=tracing_id, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=settings.WORKER_CACHE_TIMEOUT) stmt = (f"update_openshift_on_cloud called with args: " f" schema_name: {schema_name}, " f" openshift_provider_uuid: {openshift_provider_uuid}, " f" infrastructure_provider_uuid: {infrastructure_provider_uuid}, " f" infrastructure_provider_type: {infrastructure_provider_type}, " f" start_date: {start_date}, " f" end_date: {end_date}, " f" manifest_id: {manifest_id}, " f" queue_name: {queue_name}, " f" tracing_id: {tracing_id}") LOG.info(log_json(tracing_id, stmt)) try: updater = ReportSummaryUpdater(schema_name, infrastructure_provider_uuid, manifest_id, tracing_id) updater.update_openshift_on_cloud_summary_tables( start_date, end_date, openshift_provider_uuid, infrastructure_provider_uuid, infrastructure_provider_type, tracing_id, ) except ReportSummaryUpdaterCloudError as ex: LOG.info( log_json( tracing_id, ( f"update_openshift_on_cloud failed for: {infrastructure_provider_type} ", f"provider: {infrastructure_provider_uuid}, ", f"OpenShift provider {openshift_provider_uuid}. \nError: {ex}\n", f"Retry {self.request.retries} of {settings.MAX_UPDATE_RETRIES}", ), )) raise ReportSummaryUpdaterCloudError finally: if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_summary_tables( # noqa: C901 schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None, queue_name=None, synchronous=False, tracing_id=None, ): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() task_name = "masu.processor.tasks.update_summary_tables" cache_args = [schema_name, provider, provider_uuid] ocp_on_cloud_infra_map = {} if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(log_json(tracing_id, msg)) update_summary_tables.s( schema_name, provider, provider_uuid, start_date, end_date=end_date, manifest_id=manifest_id, queue_name=queue_name, tracing_id=tracing_id, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=settings.WORKER_CACHE_TIMEOUT) stmt = (f"update_summary_tables called with args: " f" schema_name: {schema_name}, " f" provider: {provider}, " f" start_date: {start_date}, " f" end_date: {end_date}, " f" manifest_id: {manifest_id}, " f" tracing_id: {tracing_id}") LOG.info(log_json(tracing_id, stmt)) try: updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id, tracing_id) start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date, tracing_id) ocp_on_cloud_infra_map = updater.get_openshift_on_cloud_infra_map( start_date, end_date, tracing_id) except ReportSummaryUpdaterCloudError as ex: LOG.info( log_json( tracing_id, f"Failed to correlate OpenShift metrics for provider: {provider_uuid}. Error: {ex}" )) except ReportSummaryUpdaterProviderNotFoundError as pnf_ex: LOG.warning( log_json( tracing_id, (f"{pnf_ex} Possible source/provider delete during processing. " + "Processing for this provier will halt."), )) if not synchronous: worker_cache.release_single_task(task_name, cache_args) return except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not provider_uuid: refresh_materialized_views.s( schema_name, provider, manifest_id=manifest_id, queue_name=queue_name, tracing_id=tracing_id).apply_async( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) return if enable_trino_processing(provider_uuid, provider, schema_name) and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs" f" schema_name: {schema_name}, " f" provider_uuid: {provider_uuid}") LOG.info(log_json(tracing_id, stmt)) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model # Create queued tasks for each OpenShift on Cloud cluster signature_list = [] for openshift_provider_uuid, infrastructure_tuple in ocp_on_cloud_infra_map.items( ): infra_provider_uuid = infrastructure_tuple[0] infra_provider_type = infrastructure_tuple[1] signature_list.append( update_openshift_on_cloud.s( schema_name, openshift_provider_uuid, infra_provider_uuid, infra_provider_type, str(start_date), str(end_date), manifest_id=manifest_id, queue_name=queue_name, synchronous=synchronous, tracing_id=tracing_id, ).set(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)) # Apply OCP on Cloud tasks if signature_list: if synchronous: group(signature_list).apply() else: group(signature_list).apply_async() if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date, tracing_id=tracing_id).set( queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE ) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id, tracing_id=tracing_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) else: stmt = f"update_cost_model_costs skipped. schema_name: {schema_name}, provider_uuid: {provider_uuid}" LOG.info(log_json(tracing_id, stmt)) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id, tracing_id=tracing_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) chain(linked_tasks).apply_async() if not synchronous: worker_cache.release_single_task(task_name, cache_args)
def update_summary_tables( # noqa: C901 schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None, queue_name=None, synchronous=False, ): """Populate the summary tables for reporting. Args: schema_name (str) The DB schema name. provider (str) The provider type. provider_uuid (str) The provider uuid. report_dict (dict) The report data dict from previous task. start_date (str) The date to start populating the table. end_date (str) The date to end on. Returns None """ worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels( provider_type=provider).inc() task_name = "masu.processor.tasks.update_summary_tables" cache_args = [schema_name] if not synchronous: worker_cache = WorkerCache() if worker_cache.single_task_is_running(task_name, cache_args): msg = f"Task {task_name} already running for {cache_args}. Requeuing." LOG.info(msg) update_summary_tables.s( schema_name, provider, provider_uuid, start_date, end_date=end_date, manifest_id=manifest_id, queue_name=queue_name, ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE) return worker_cache.lock_single_task(task_name, cache_args, timeout=3600) stmt = (f"update_summary_tables called with args:\n" f" schema_name: {schema_name},\n" f" provider: {provider},\n" f" start_date: {start_date},\n" f" end_date: {end_date},\n" f" manifest_id: {manifest_id}") LOG.info(stmt) try: updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id) start_date, end_date = updater.update_daily_tables( start_date, end_date) updater.update_summary_tables(start_date, end_date) except Exception as ex: if not synchronous: worker_cache.release_single_task(task_name, cache_args) raise ex if not provider_uuid: refresh_materialized_views.s( schema_name, provider, manifest_id=manifest_id, queue_name=queue_name).apply_async( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) return if enable_trino_processing(provider_uuid) and provider in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL, Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, ): cost_model = None stmt = ( f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) else: with CostModelDBAccessor(schema_name, provider_uuid) as cost_model_accessor: cost_model = cost_model_accessor.cost_model if cost_model is not None: linked_tasks = update_cost_model_costs.s( schema_name, provider_uuid, start_date, end_date).set( queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE ) | refresh_materialized_views.si( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) else: stmt = (f"\n update_cost_model_costs skipped.\n" f" schema_name: {schema_name},\n" f" provider_uuid: {provider_uuid}") LOG.info(stmt) linked_tasks = refresh_materialized_views.s( schema_name, provider, provider_uuid=provider_uuid, manifest_id=manifest_id).set( queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE) dh = DateHelper(utc=True) prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date() if isinstance(start_date, str): start_date = ciso8601.parse_datetime(start_date).date() if manifest_id and (start_date <= prev_month_start_day): # We want make sure that the manifest_id is not none, because # we only want to call the delete line items after the summarize_reports # task above simulate = False line_items_only = True linked_tasks |= remove_expired_data.si( schema_name, provider, simulate, provider_uuid, line_items_only, queue_name).set(queue=queue_name or REMOVE_EXPIRED_DATA_QUEUE) chain(linked_tasks).apply_async() if not synchronous: worker_cache.release_single_task(task_name, cache_args)