Ejemplo n.º 1
0
    def test_aws_local_route(self, mock_daily, mock_update, mock_cloud):
        """Test that AWS Local report updating works as expected."""
        mock_start = 1
        mock_end = 2
        mock_daily.return_value = (mock_start, mock_end)
        mock_update.return_value = (mock_start, mock_end)
        updater = ReportSummaryUpdater(self.schema, self.aws_provider_uuid)
        self.assertIsInstance(updater._updater, AWSReportSummaryUpdater)

        updater.update_daily_tables(self.today, self.tomorrow)
        mock_daily.assert_called_with(self.today, self.tomorrow)
        mock_update.assert_not_called()
        mock_cloud.assert_not_called()

        updater.update_summary_tables(self.today, self.tomorrow)
        mock_update.assert_called_with(self.today, self.tomorrow)
        mock_cloud.assert_called_with(mock_start, mock_end)
Ejemplo n.º 2
0
    def test_azure_local_route(self, mock_daily, mock_update):
        """Test that AZURE Local report updating works as expected."""
        mock_start = 1
        mock_end = 2
        mock_daily.return_value = (mock_start, mock_end)
        mock_update.return_value = (mock_start, mock_end)
        updater = ReportSummaryUpdater(self.schema,
                                       self.azure_test_provider_uuid,
                                       tracing_id=self.tracing_id)
        self.assertIsInstance(updater._updater, AzureReportSummaryUpdater)

        updater.update_daily_tables(self.today, self.tomorrow)
        mock_daily.assert_called_with(self.today, self.tomorrow)
        mock_update.assert_not_called()

        updater.update_summary_tables(self.today, self.tomorrow,
                                      self.tracing_id)
        mock_update.assert_called_with(self.today, self.tomorrow)
Ejemplo n.º 3
0
    def test_aws_ocp_exception_route(self, mock_daily, mock_update, mock_cloud):
        """Test that AWS report updating works as expected."""
        mock_start = 1
        mock_end = 2
        mock_daily.return_value = (mock_start, mock_end)
        mock_update.return_value = (mock_start, mock_end)
        mock_cloud.side_effect = Exception("test")

        updater = ReportSummaryUpdater(self.schema, self.aws_provider_uuid)
        self.assertIsInstance(updater._updater, AWSReportSummaryUpdater)

        updater.update_daily_tables(self.today, self.tomorrow)
        mock_daily.assert_called_with(self.today, self.tomorrow)
        mock_update.assert_not_called()
        mock_cloud.assert_not_called()

        with self.assertRaises(ReportSummaryUpdaterCloudError):
            updater.update_summary_tables(self.today, self.tomorrow)
Ejemplo n.º 4
0
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc()

    stmt = (
        f"update_summary_tables called with args:\n"
        f" schema_name: {schema_name},\n"
        f" provider: {provider},\n"
        f" start_date: {start_date},\n"
        f" end_date: {end_date},\n"
        f" manifest_id: {manifest_id}"
    )
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
    if updater.manifest_is_ready():
        start_date, end_date = updater.update_daily_tables(start_date, end_date)
        updater.update_summary_tables(start_date, end_date)
    if provider_uuid:
        dh = DateHelper(utc=True)
        prev_month_last_day = dh.last_month_end
        start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d")
        prev_month_last_day = prev_month_last_day.replace(tzinfo=None)
        prev_month_last_day = prev_month_last_day.replace(microsecond=0, second=0, minute=0, hour=0, day=1)
        if manifest_id and (start_date_obj <= prev_month_last_day):
            # We want make sure that the manifest_id is not none, because
            # we only want to call the delete line items after the summarize_reports
            # task above
            simulate = False
            line_items_only = True
            chain(
                update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date),
                refresh_materialized_views.si(schema_name, provider, manifest_id),
                remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only),
            ).apply_async()
        else:
            chain(
                update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date),
                refresh_materialized_views.si(schema_name, provider, manifest_id),
            ).apply_async()
    else:
        refresh_materialized_views.delay(schema_name, provider, manifest_id)
Ejemplo n.º 5
0
def update_summary_tables(schema_name,
                          provider,
                          provider_uuid,
                          start_date,
                          end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
    if updater.manifest_is_ready():
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date)
    if provider_uuid:
        chain(
            update_charge_info.s(schema_name, provider_uuid, start_date,
                                 end_date),
            refresh_materialized_views.si(schema_name, provider, manifest_id),
        ).apply_async()
    else:
        refresh_materialized_views.delay(schema_name, provider, manifest_id)
Ejemplo n.º 6
0
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc()

    stmt = (f'update_summary_tables called with args:\n'
            f' schema_name: {schema_name},\n'
            f' provider: {provider},\n'
            f' start_date: {start_date},\n'
            f' end_date: {end_date},\n'
            f' manifest_id: {manifest_id}')
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
    if updater.manifest_is_ready():
        start_date, end_date = updater.update_daily_tables(start_date, end_date)
        updater.update_summary_tables(start_date, end_date)

    if provider_uuid:
        update_charge_info.apply_async(
            args=(
                schema_name,
                provider_uuid,
                start_date,
                end_date),
            link=update_cost_summary_table.si(
                schema_name,
                provider_uuid,
                manifest_id,
                start_date,
                end_date))
Ejemplo n.º 7
0
def update_summary_tables(  # noqa: C901
    schema_name,
    provider,
    provider_uuid,
    start_date,
    end_date=None,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
    tracing_id=None,
):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()
    task_name = "masu.processor.tasks.update_summary_tables"
    cache_args = [schema_name, provider, provider_uuid]
    ocp_on_cloud_infra_map = {}

    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            update_summary_tables.s(
                schema_name,
                provider,
                provider_uuid,
                start_date,
                end_date=end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)

    stmt = (f"update_summary_tables called with args: "
            f" schema_name: {schema_name}, "
            f" provider: {provider}, "
            f" start_date: {start_date}, "
            f" end_date: {end_date}, "
            f" manifest_id: {manifest_id}, "
            f" tracing_id: {tracing_id}")
    LOG.info(log_json(tracing_id, stmt))

    try:
        updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id,
                                       tracing_id)
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date, tracing_id)
        ocp_on_cloud_infra_map = updater.get_openshift_on_cloud_infra_map(
            start_date, end_date, tracing_id)
    except ReportSummaryUpdaterCloudError as ex:
        LOG.info(
            log_json(
                tracing_id,
                f"Failed to correlate OpenShift metrics for provider: {provider_uuid}. Error: {ex}"
            ))

    except ReportSummaryUpdaterProviderNotFoundError as pnf_ex:
        LOG.warning(
            log_json(
                tracing_id,
                (f"{pnf_ex} Possible source/provider delete during processing. "
                 + "Processing for this provier will halt."),
            ))
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        return
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex
    if not provider_uuid:
        refresh_materialized_views.s(
            schema_name,
            provider,
            manifest_id=manifest_id,
            queue_name=queue_name,
            tracing_id=tracing_id).apply_async(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
        return

    if enable_trino_processing(provider_uuid, provider,
                               schema_name) and provider in (
                                   Provider.PROVIDER_AWS,
                                   Provider.PROVIDER_AWS_LOCAL,
                                   Provider.PROVIDER_AZURE,
                                   Provider.PROVIDER_AZURE_LOCAL,
                               ):
        cost_model = None
        stmt = (
            f"Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs"
            f" schema_name: {schema_name}, "
            f" provider_uuid: {provider_uuid}")
        LOG.info(log_json(tracing_id, stmt))
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    # Create queued tasks for each OpenShift on Cloud cluster
    signature_list = []
    for openshift_provider_uuid, infrastructure_tuple in ocp_on_cloud_infra_map.items(
    ):
        infra_provider_uuid = infrastructure_tuple[0]
        infra_provider_type = infrastructure_tuple[1]
        signature_list.append(
            update_openshift_on_cloud.s(
                schema_name,
                openshift_provider_uuid,
                infra_provider_uuid,
                infra_provider_type,
                str(start_date),
                str(end_date),
                manifest_id=manifest_id,
                queue_name=queue_name,
                synchronous=synchronous,
                tracing_id=tracing_id,
            ).set(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE))

    # Apply OCP on Cloud tasks
    if signature_list:
        if synchronous:
            group(signature_list).apply()
        else:
            group(signature_list).apply_async()

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name,
            provider_uuid,
            start_date,
            end_date,
            tracing_id=tracing_id).set(
                queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE
            ) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id,
                tracing_id=tracing_id).set(
                    queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
    else:
        stmt = f"update_cost_model_costs skipped. schema_name: {schema_name}, provider_uuid: {provider_uuid}"
        LOG.info(log_json(tracing_id, stmt))
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id,
            tracing_id=tracing_id).set(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)

    chain(linked_tasks).apply_async()

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Ejemplo n.º 8
0
def update_summary_tables(schema_name,
                          provider,
                          provider_uuid,
                          start_date,
                          end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)

    start_date, end_date = updater.update_daily_tables(start_date, end_date)
    updater.update_summary_tables(start_date, end_date)

    if not provider_uuid:
        refresh_materialized_views.delay(schema_name, provider, manifest_id)
        return

    with CostModelDBAccessor(schema_name,
                             provider_uuid) as cost_model_accessor:
        cost_model = cost_model_accessor.cost_model

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name, provider_uuid,
            start_date, end_date) | refresh_materialized_views.si(
                schema_name, provider, manifest_id)
    else:
        stmt = (
            f"\n update_cost_model_costs skipped. No cost model available for \n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
        linked_tasks = refresh_materialized_views.s(schema_name, provider,
                                                    manifest_id)

    dh = DateHelper(utc=True)
    prev_month_start_day = dh.last_month_start.replace(tzinfo=None)
    start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    if manifest_id and (start_date_obj <= prev_month_start_day):
        # We want make sure that the manifest_id is not none, because
        # we only want to call the delete line items after the summarize_reports
        # task above
        simulate = False
        line_items_only = True

        linked_tasks |= remove_expired_data.si(schema_name, provider, simulate,
                                               provider_uuid, line_items_only)

    chain(linked_tasks).apply_async()
Ejemplo n.º 9
0
def update_summary_tables(schema_name,
                          provider,
                          provider_uuid,
                          start_date,
                          end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)

    start_date, end_date = updater.update_daily_tables(start_date, end_date)
    updater.update_summary_tables(start_date, end_date)

    if not provider_uuid:
        refresh_materialized_views.delay(schema_name,
                                         provider,
                                         manifest_id=manifest_id)
        return

    if settings.ENABLE_PARQUET_PROCESSING and provider in (
            Provider.PROVIDER_AWS,
            Provider.PROVIDER_AWS_LOCAL,
            Provider.PROVIDER_AZURE,
            Provider.PROVIDER_AZURE_LOCAL,
    ):
        cost_model = None
        stmt = (
            f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name, provider_uuid, start_date,
            end_date) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id)
    else:
        stmt = (f"\n update_cost_model_costs skipped.\n"
                f" schema_name: {schema_name},\n"
                f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id)

    dh = DateHelper(utc=True)
    prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date()
    if isinstance(start_date, str):
        start_date = ciso8601.parse_datetime(start_date).date()
    if manifest_id and (start_date <= prev_month_start_day):
        # We want make sure that the manifest_id is not none, because
        # we only want to call the delete line items after the summarize_reports
        # task above
        simulate = False
        line_items_only = True

        linked_tasks |= remove_expired_data.si(schema_name, provider, simulate,
                                               provider_uuid, line_items_only)

    chain(linked_tasks).apply_async()
Ejemplo n.º 10
0
def update_summary_tables(  # noqa: C901
    schema_name,
    provider,
    provider_uuid,
    start_date,
    end_date=None,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()
    task_name = "masu.processor.tasks.update_summary_tables"
    cache_args = [schema_name]

    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(msg)
            update_summary_tables.s(
                schema_name,
                provider,
                provider_uuid,
                start_date,
                end_date=end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name, cache_args, timeout=3600)

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    try:
        updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date)
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex

    if not provider_uuid:
        refresh_materialized_views.s(
            schema_name,
            provider,
            manifest_id=manifest_id,
            queue_name=queue_name).apply_async(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
        return

    if enable_trino_processing(provider_uuid) and provider in (
            Provider.PROVIDER_AWS,
            Provider.PROVIDER_AWS_LOCAL,
            Provider.PROVIDER_AZURE,
            Provider.PROVIDER_AZURE_LOCAL,
    ):
        cost_model = None
        stmt = (
            f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name, provider_uuid, start_date, end_date).set(
                queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE
            ) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id).set(
                    queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
    else:
        stmt = (f"\n update_cost_model_costs skipped.\n"
                f" schema_name: {schema_name},\n"
                f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id).set(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)

    dh = DateHelper(utc=True)
    prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date()
    if isinstance(start_date, str):
        start_date = ciso8601.parse_datetime(start_date).date()
    if manifest_id and (start_date <= prev_month_start_day):
        # We want make sure that the manifest_id is not none, because
        # we only want to call the delete line items after the summarize_reports
        # task above
        simulate = False
        line_items_only = True

        linked_tasks |= remove_expired_data.si(
            schema_name, provider, simulate, provider_uuid, line_items_only,
            queue_name).set(queue=queue_name or REMOVE_EXPIRED_DATA_QUEUE)

    chain(linked_tasks).apply_async()
    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)