Beispiel #1
0
def refresh_materialized_views(  # noqa: C901
    schema_name,
    provider_type,
    manifest_id=None,
    provider_uuid="",
    synchronous=False,
    queue_name=None,
    tracing_id=None,
):
    """Refresh the database's materialized views for reporting."""
    task_name = "masu.processor.tasks.refresh_materialized_views"
    cache_args = [schema_name, provider_type, provider_uuid]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            refresh_materialized_views.s(
                schema_name,
                provider_type,
                manifest_id=manifest_id,
                provider_uuid=provider_uuid,
                synchronous=synchronous,
                queue_name=queue_name,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)
    materialized_views = ()
    try:
        with schema_context(schema_name):
            for view in materialized_views:
                table_name = view._meta.db_table
                with connection.cursor() as cursor:
                    cursor.execute(
                        f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}")
                    LOG.info(log_json(tracing_id, f"Refreshed {table_name}."))

        invalidate_view_cache_for_tenant_and_source_type(
            schema_name, provider_type)

        if provider_uuid:
            ProviderDBAccessor(provider_uuid).set_data_updated_timestamp()
        if manifest_id:
            # Processing for this monifest should be complete after this step
            with ReportManifestDBAccessor() as manifest_accessor:
                manifest = manifest_accessor.get_manifest_by_id(manifest_id)
                manifest_accessor.mark_manifest_as_completed(manifest)
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Beispiel #2
0
def update_cost_model_costs(schema_name,
                            provider_uuid,
                            start_date=None,
                            end_date=None,
                            queue_name=None,
                            synchronous=False):
    """Update usage charge information.

    Args:
        schema_name (str) The DB schema name.
        provider_uuid (str) The provider uuid.
        start_date (str, Optional) - Start date of range to update derived cost.
        end_date (str, Optional) - End date of range to update derived cost.

    Returns
        None

    """
    task_name = "masu.processor.tasks.update_cost_model_costs"
    cache_args = [schema_name, provider_uuid, start_date, end_date]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(msg)
            update_cost_model_costs.s(
                schema_name,
                provider_uuid,
                start_date=start_date,
                end_date=end_date,
                queue_name=queue_name,
                synchronous=synchronous,
            ).apply_async(queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE)
            return
        worker_cache.lock_single_task(task_name, cache_args, timeout=600)

    worker_stats.COST_MODEL_COST_UPDATE_ATTEMPTS_COUNTER.inc()

    stmt = (f"update_cost_model_costs called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
    LOG.info(stmt)

    try:
        updater = CostModelCostUpdater(schema_name, provider_uuid)
        if updater:
            updater.update_cost_model_costs(start_date, end_date)
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Beispiel #3
0
def refresh_materialized_views(schema_name,
                               provider_type,
                               manifest_id=None,
                               provider_uuid=None,
                               synchronous=False):
    """Refresh the database's materialized views for reporting."""
    task_name = "masu.processor.tasks.refresh_materialized_views"
    cache_args = [schema_name]
    if not synchronous:
        worker_cache = WorkerCache()
        while worker_cache.single_task_is_running(task_name, cache_args):
            time.sleep(5)

        worker_cache.lock_single_task(task_name, cache_args)
    materialized_views = ()
    if provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL):
        materialized_views = (AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_OCP):
        materialized_views = (OCP_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_AZURE,
                           Provider.PROVIDER_AZURE_LOCAL):
        materialized_views = (AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)

    with schema_context(schema_name):
        for view in materialized_views:
            table_name = view._meta.db_table
            with connection.cursor() as cursor:
                cursor.execute(
                    f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}")
                LOG.info(f"Refreshed {table_name}.")

    invalidate_view_cache_for_tenant_and_source_type(schema_name,
                                                     provider_type)

    if provider_uuid:
        ProviderDBAccessor(provider_uuid).set_data_updated_timestamp()
    if manifest_id:
        # Processing for this monifest should be complete after this step
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            manifest_accessor.mark_manifest_as_completed(manifest)

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Beispiel #4
0
def update_cost_model_costs(schema_name,
                            provider_uuid,
                            start_date=None,
                            end_date=None,
                            provider_type=None,
                            synchronous=False):
    """Update usage charge information.

    Args:
        schema_name (str) The DB schema name.
        provider_uuid (str) The provider uuid.
        start_date (str, Optional) - Start date of range to update derived cost.
        end_date (str, Optional) - End date of range to update derived cost.

    Returns
        None

    """
    task_name = "masu.processor.tasks.update_cost_model_costs"
    cache_args = [schema_name, provider_uuid, start_date, end_date]
    if not synchronous:
        worker_cache = WorkerCache()
        while worker_cache.single_task_is_running(task_name, cache_args):
            time.sleep(5)
        worker_cache.lock_single_task(task_name, cache_args, timeout=300)

    worker_stats.COST_MODEL_COST_UPDATE_ATTEMPTS_COUNTER.inc()

    stmt = (f"update_cost_model_costs called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
    LOG.info(stmt)

    updater = CostModelCostUpdater(schema_name, provider_uuid)
    if updater:
        updater.update_cost_model_costs(start_date, end_date)

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Beispiel #5
0
def update_openshift_on_cloud(
    self,
    schema_name,
    openshift_provider_uuid,
    infrastructure_provider_uuid,
    infrastructure_provider_type,
    start_date,
    end_date,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
    tracing_id=None,
):
    """Update OpenShift on Cloud for a specific OpenShift and cloud source."""
    task_name = "masu.processor.tasks.update_openshift_on_cloud"
    cache_args = [schema_name, infrastructure_provider_uuid]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            update_openshift_on_cloud.s(
                schema_name,
                openshift_provider_uuid,
                infrastructure_provider_uuid,
                infrastructure_provider_type,
                start_date,
                end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
                synchronous=synchronous,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)
    stmt = (f"update_openshift_on_cloud called with args: "
            f" schema_name: {schema_name}, "
            f" openshift_provider_uuid: {openshift_provider_uuid}, "
            f" infrastructure_provider_uuid: {infrastructure_provider_uuid}, "
            f" infrastructure_provider_type: {infrastructure_provider_type}, "
            f" start_date: {start_date}, "
            f" end_date: {end_date}, "
            f" manifest_id: {manifest_id}, "
            f" queue_name: {queue_name}, "
            f" tracing_id: {tracing_id}")
    LOG.info(log_json(tracing_id, stmt))

    try:
        updater = ReportSummaryUpdater(schema_name,
                                       infrastructure_provider_uuid,
                                       manifest_id, tracing_id)
        updater.update_openshift_on_cloud_summary_tables(
            start_date,
            end_date,
            openshift_provider_uuid,
            infrastructure_provider_uuid,
            infrastructure_provider_type,
            tracing_id,
        )
    except ReportSummaryUpdaterCloudError as ex:
        LOG.info(
            log_json(
                tracing_id,
                (
                    f"update_openshift_on_cloud failed for: {infrastructure_provider_type} ",
                    f"provider: {infrastructure_provider_uuid}, ",
                    f"OpenShift provider {openshift_provider_uuid}. \nError: {ex}\n",
                    f"Retry {self.request.retries} of {settings.MAX_UPDATE_RETRIES}",
                ),
            ))
        raise ReportSummaryUpdaterCloudError
    finally:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
Beispiel #6
0
def update_summary_tables(  # noqa: C901
    schema_name,
    provider,
    provider_uuid,
    start_date,
    end_date=None,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
    tracing_id=None,
):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()
    task_name = "masu.processor.tasks.update_summary_tables"
    cache_args = [schema_name, provider, provider_uuid]
    ocp_on_cloud_infra_map = {}

    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            update_summary_tables.s(
                schema_name,
                provider,
                provider_uuid,
                start_date,
                end_date=end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)

    stmt = (f"update_summary_tables called with args: "
            f" schema_name: {schema_name}, "
            f" provider: {provider}, "
            f" start_date: {start_date}, "
            f" end_date: {end_date}, "
            f" manifest_id: {manifest_id}, "
            f" tracing_id: {tracing_id}")
    LOG.info(log_json(tracing_id, stmt))

    try:
        updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id,
                                       tracing_id)
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date, tracing_id)
        ocp_on_cloud_infra_map = updater.get_openshift_on_cloud_infra_map(
            start_date, end_date, tracing_id)
    except ReportSummaryUpdaterCloudError as ex:
        LOG.info(
            log_json(
                tracing_id,
                f"Failed to correlate OpenShift metrics for provider: {provider_uuid}. Error: {ex}"
            ))

    except ReportSummaryUpdaterProviderNotFoundError as pnf_ex:
        LOG.warning(
            log_json(
                tracing_id,
                (f"{pnf_ex} Possible source/provider delete during processing. "
                 + "Processing for this provier will halt."),
            ))
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        return
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex
    if not provider_uuid:
        refresh_materialized_views.s(
            schema_name,
            provider,
            manifest_id=manifest_id,
            queue_name=queue_name,
            tracing_id=tracing_id).apply_async(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
        return

    if enable_trino_processing(provider_uuid, provider,
                               schema_name) and provider in (
                                   Provider.PROVIDER_AWS,
                                   Provider.PROVIDER_AWS_LOCAL,
                                   Provider.PROVIDER_AZURE,
                                   Provider.PROVIDER_AZURE_LOCAL,
                               ):
        cost_model = None
        stmt = (
            f"Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs"
            f" schema_name: {schema_name}, "
            f" provider_uuid: {provider_uuid}")
        LOG.info(log_json(tracing_id, stmt))
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    # Create queued tasks for each OpenShift on Cloud cluster
    signature_list = []
    for openshift_provider_uuid, infrastructure_tuple in ocp_on_cloud_infra_map.items(
    ):
        infra_provider_uuid = infrastructure_tuple[0]
        infra_provider_type = infrastructure_tuple[1]
        signature_list.append(
            update_openshift_on_cloud.s(
                schema_name,
                openshift_provider_uuid,
                infra_provider_uuid,
                infra_provider_type,
                str(start_date),
                str(end_date),
                manifest_id=manifest_id,
                queue_name=queue_name,
                synchronous=synchronous,
                tracing_id=tracing_id,
            ).set(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE))

    # Apply OCP on Cloud tasks
    if signature_list:
        if synchronous:
            group(signature_list).apply()
        else:
            group(signature_list).apply_async()

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name,
            provider_uuid,
            start_date,
            end_date,
            tracing_id=tracing_id).set(
                queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE
            ) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id,
                tracing_id=tracing_id).set(
                    queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
    else:
        stmt = f"update_cost_model_costs skipped. schema_name: {schema_name}, provider_uuid: {provider_uuid}"
        LOG.info(log_json(tracing_id, stmt))
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id,
            tracing_id=tracing_id).set(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)

    chain(linked_tasks).apply_async()

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Beispiel #7
0
def update_summary_tables(  # noqa: C901
    schema_name,
    provider,
    provider_uuid,
    start_date,
    end_date=None,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()
    task_name = "masu.processor.tasks.update_summary_tables"
    cache_args = [schema_name]

    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(msg)
            update_summary_tables.s(
                schema_name,
                provider,
                provider_uuid,
                start_date,
                end_date=end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name, cache_args, timeout=3600)

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    try:
        updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date)
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex

    if not provider_uuid:
        refresh_materialized_views.s(
            schema_name,
            provider,
            manifest_id=manifest_id,
            queue_name=queue_name).apply_async(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
        return

    if enable_trino_processing(provider_uuid) and provider in (
            Provider.PROVIDER_AWS,
            Provider.PROVIDER_AWS_LOCAL,
            Provider.PROVIDER_AZURE,
            Provider.PROVIDER_AZURE_LOCAL,
    ):
        cost_model = None
        stmt = (
            f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name, provider_uuid, start_date, end_date).set(
                queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE
            ) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id).set(
                    queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
    else:
        stmt = (f"\n update_cost_model_costs skipped.\n"
                f" schema_name: {schema_name},\n"
                f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id).set(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)

    dh = DateHelper(utc=True)
    prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date()
    if isinstance(start_date, str):
        start_date = ciso8601.parse_datetime(start_date).date()
    if manifest_id and (start_date <= prev_month_start_day):
        # We want make sure that the manifest_id is not none, because
        # we only want to call the delete line items after the summarize_reports
        # task above
        simulate = False
        line_items_only = True

        linked_tasks |= remove_expired_data.si(
            schema_name, provider, simulate, provider_uuid, line_items_only,
            queue_name).set(queue=queue_name or REMOVE_EXPIRED_DATA_QUEUE)

    chain(linked_tasks).apply_async()
    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)