Ejemplo n.º 1
0
    def test_manifest_is_ready_is_not_ready(self):
        """Test that False is returned when a manifest is not ready to process."""
        billing_start = DateAccessor().today_with_timezone('UTC').replace(
            day=1)
        manifest_dict = {
            'assembly_id': '1234',
            'billing_period_start_datetime': billing_start,
            'num_total_files': 2,
            'num_processed_files': 1,
            'provider_id': 2
        }
        with ReportManifestDBAccessor() as accessor:
            manifest = accessor.add(**manifest_dict)
            accessor.commit()
            manifest_id = manifest.id
        updater = ReportSummaryUpdater(self.schema,
                                       self.ocp_test_provider_uuid,
                                       manifest_id)
        self.assertFalse(updater.manifest_is_ready())

        with ReportManifestDBAccessor() as accessor:
            manifests = accessor._get_db_obj_query().all()
            for manifest in manifests:
                accessor.delete(manifest)
            accessor.commit()
Ejemplo n.º 2
0
def update_cost_summary_table(schema_name, provider_uuid, manifest_id=None,
                              start_date=None, end_date=None):
    """Update derived costs summary table.

    Args:
        schema_name (str) The DB schema name.
        provider_uuid (str) The provider uuid.
        manifest_id (str) The manifest id.
        start_date (str, Optional) - Start date of range to update derived cost.
        end_date (str, Optional) - End date of range to update derived cost.

    Returns:
        None

    """
    worker_stats.COST_SUMMARY_ATTEMPTS_COUNTER.inc()

    stmt = (f'update_cost_summary_table called with args:\n'
            f' schema_name: {schema_name},\n'
            f' provider_uuid: {provider_uuid}\n'
            f' manifest_id: {manifest_id}')
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
    updater.update_cost_summary_table(start_date, end_date)
Ejemplo n.º 3
0
    def test_update_summary_tables_finalized_bill_not_done_proc(
            self, mock_daily, mock_summary):
        """Test that summary tables are run for a full month."""
        report_updater_base = ReportSummaryUpdater('acct10001',
                                                   self.aws_provider_uuid,
                                                   self.manifest.id)

        start_date = self.date_accessor.today_with_timezone('UTC')
        end_date = start_date + datetime.timedelta(days=1)
        bill_date = start_date.replace(day=1).date()
        with schema_context(self.schema):
            bill = self.accessor.get_cost_entry_bills_by_date(bill_date)[0]
            bill.finalized_datetime = start_date
            self.accessor.commit()

        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')

        self.assertIsNone(bill.summary_data_creation_datetime)
        self.assertIsNone(bill.summary_data_updated_datetime)

        if report_updater_base.manifest_is_ready():
            self.updater.update_daily_tables(start_date_str, end_date_str)
        mock_daily.assert_called()

        if report_updater_base.manifest_is_ready():
            self.updater.update_summary_tables(start_date_str, end_date_str)
        mock_summary.assert_called()

        with AWSReportDBAccessor('acct10001', self.column_map) as accessor:
            bill = accessor.get_cost_entry_bills_by_date(bill_date)[0]
            self.assertIsNotNone(bill.summary_data_creation_datetime)
            self.assertIsNotNone(bill.summary_data_updated_datetime)
    def test_update_summary_tables_new_bill_not_done_processing(
            self, mock_daily, mock_summary):
        """Test that summary tables are not run for a full month."""
        report_updater_base = ReportSummaryUpdater("acct10001",
                                                   self.aws_provider_uuid,
                                                   self.manifest.id)

        start_date = self.date_accessor.today_with_timezone("UTC")
        end_date = start_date + datetime.timedelta(days=1)
        bill_date = start_date.replace(day=1).date()

        with schema_context(self.schema):
            bill = self.accessor.get_cost_entry_bills_by_date(bill_date)[0]

        start_date_str = start_date.strftime("%Y-%m-%d")
        end_date_str = end_date.strftime("%Y-%m-%d")

        self.assertIsNone(bill.summary_data_creation_datetime)
        self.assertIsNone(bill.summary_data_updated_datetime)

        # manifest_is_ready is now unconditionally returning True, so summary is expected.
        if report_updater_base.manifest_is_ready():
            self.updater.update_daily_tables(start_date_str, end_date_str)
        mock_daily.assert_called()

        if report_updater_base.manifest_is_ready():
            self.updater.update_summary_tables(start_date_str, end_date_str)
        mock_summary.assert_called()

        with AWSReportDBAccessor("acct10001", self.column_map) as accessor:
            bill = accessor.get_cost_entry_bills_by_date(bill_date)[0]
            self.assertIsNotNone(bill.summary_data_creation_datetime)
            self.assertIsNotNone(bill.summary_data_updated_datetime)
    def test_update_summary_tables_new_period_not_done_processing(
            self, mock_daily, mock_sum, mock_storage_daily,
            mock_storage_summary):
        """Test that summary tables are not run since num_processed_files != num_total_files."""
        billing_start = self.date_accessor.today_with_timezone('UTC').replace(
            day=1) + relativedelta(months=-1)
        manifest_dict = {
            'assembly_id': '1234',
            'billing_period_start_datetime': billing_start,
            'num_total_files': 2,
            'num_processed_files': 1,
            'provider_id': 2
        }
        self.manifest_accessor.delete(self.manifest)
        self.manifest_accessor.commit()
        self.manifest = self.manifest_accessor.add(**manifest_dict)
        self.manifest_accessor.commit()

        report_updater_base = ReportSummaryUpdater('acct10001',
                                                   self.ocp_test_provider_uuid,
                                                   self.manifest.id)

        start_date = self.date_accessor.today_with_timezone('UTC')
        end_date = start_date + datetime.timedelta(days=1)
        bill_date = start_date.replace(day=1).date()

        period = self.accessor.get_usage_periods_by_date(bill_date)[0]

        self.assertIsNone(period.summary_data_creation_datetime)
        self.assertIsNone(period.summary_data_updated_datetime)

        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')
        if report_updater_base.manifest_is_ready():
            self.updater.update_daily_tables(start_date_str, end_date_str)
        mock_daily.assert_not_called()
        mock_storage_daily.assert_not_called()
        mock_sum.assert_not_called()
        mock_storage_summary.assert_not_called()

        if report_updater_base.manifest_is_ready():
            self.updater.update_summary_tables(start_date_str, end_date_str)
        mock_sum.assert_not_called()
        mock_storage_summary.assert_not_called()

        with OCPReportDBAccessor('acct10001', self.column_map) as accessor:
            period = accessor.get_usage_periods_by_date(bill_date)[0]
            self.assertIsNone(period.summary_data_creation_datetime)
            self.assertIsNone(period.summary_data_updated_datetime)
Ejemplo n.º 6
0
 def test_manifest_is_ready_is_ready(self):
     """Test that True is returned when a manifest is ready to process."""
     billing_start = DateAccessor().today_with_timezone('UTC').replace(day=1)
     manifest_dict = {
         'assembly_id': '1234',
         'billing_period_start_datetime': billing_start,
         'num_total_files': 2,
         'num_processed_files': 2,
         'provider_uuid': self.ocp_provider_uuid,
     }
     with ReportManifestDBAccessor() as accessor:
         manifest = accessor.add(**manifest_dict)
     manifest_id = manifest.id
     updater = ReportSummaryUpdater(self.schema, self.ocp_test_provider_uuid, manifest_id)
     self.assertTrue(updater.manifest_is_ready())
Ejemplo n.º 7
0
    def test_bad_provider(self):
        """Test that an unimplemented provider throws an error."""
        credentials = {"credentials": {"role_arn": "unknown"}}
        self.unknown_auth = ProviderAuthentication.objects.create(
            credentials=credentials)
        self.unknown_auth.save()
        data_source = {"data_source": {"bucket": "unknown"}}
        self.unknown_billing_source = ProviderBillingSource.objects.create(
            data_source=data_source)
        self.unknown_billing_source.save()

        with patch("masu.celery.tasks.check_report_updates"):
            self.unknown_provider = Provider.objects.create(
                uuid=self.unkown_test_provider_uuid,
                name="Test Provider",
                type="FOO",
                authentication=self.unknown_auth,
                billing_source=self.unknown_billing_source,
                customer=self.customer,
                setup_complete=False,
                active=True,
            )
        self.unknown_provider.save()

        with self.assertRaises(ReportSummaryUpdaterError):
            _ = ReportSummaryUpdater(self.schema,
                                     self.unkown_test_provider_uuid)
Ejemplo n.º 8
0
    def test_manifest_is_ready_is_not_ready(self):
        """Test that False is returned when a manifest is not ready to process."""
        billing_start = DateAccessor().today_with_timezone("UTC").replace(
            day=1)
        manifest_dict = {
            "assembly_id": "1234",
            "billing_period_start_datetime": billing_start,
            "num_total_files": 2,
            "num_processed_files": 1,
            "provider_uuid": self.ocp_provider_uuid,
        }
        with ReportManifestDBAccessor() as accessor:
            manifest = accessor.add(**manifest_dict)
        manifest_id = manifest.id
        updater = ReportSummaryUpdater(self.schema,
                                       self.ocp_test_provider_uuid,
                                       manifest_id)

        # manifest_is_ready is now unconditionally returning True, so summary is expected.
        self.assertTrue(updater.manifest_is_ready())
Ejemplo n.º 9
0
 def test_no_provider_on_create(self):
     """Test that an error is raised when no provider exists."""
     billing_start = DateAccessor().today_with_timezone("UTC").replace(day=1)
     no_provider_uuid = uuid4()
     manifest_dict = {
         "assembly_id": "1234",
         "billing_period_start_datetime": billing_start,
         "num_total_files": 2,
         "provider_uuid": self.ocp_provider_uuid,
     }
     with ReportManifestDBAccessor() as accessor:
         manifest = accessor.add(**manifest_dict)
     manifest_id = manifest.id
     with self.assertRaises(ReportSummaryUpdaterError):
         ReportSummaryUpdater(self.schema, no_provider_uuid, manifest_id)
Ejemplo n.º 10
0
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None, manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc()

    stmt = (
        f"update_summary_tables called with args:\n"
        f" schema_name: {schema_name},\n"
        f" provider: {provider},\n"
        f" start_date: {start_date},\n"
        f" end_date: {end_date},\n"
        f" manifest_id: {manifest_id}"
    )
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
    if updater.manifest_is_ready():
        start_date, end_date = updater.update_daily_tables(start_date, end_date)
        updater.update_summary_tables(start_date, end_date)
    if provider_uuid:
        dh = DateHelper(utc=True)
        prev_month_last_day = dh.last_month_end
        start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d")
        prev_month_last_day = prev_month_last_day.replace(tzinfo=None)
        prev_month_last_day = prev_month_last_day.replace(microsecond=0, second=0, minute=0, hour=0, day=1)
        if manifest_id and (start_date_obj <= prev_month_last_day):
            # We want make sure that the manifest_id is not none, because
            # we only want to call the delete line items after the summarize_reports
            # task above
            simulate = False
            line_items_only = True
            chain(
                update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date),
                refresh_materialized_views.si(schema_name, provider, manifest_id),
                remove_expired_data.si(schema_name, provider, simulate, provider_uuid, line_items_only),
            ).apply_async()
        else:
            chain(
                update_cost_model_costs.s(schema_name, provider_uuid, start_date, end_date),
                refresh_materialized_views.si(schema_name, provider, manifest_id),
            ).apply_async()
    else:
        refresh_materialized_views.delay(schema_name, provider, manifest_id)
    def test_azure_local_route(self, mock_daily, mock_update, mock_cloud, mock_cloud_cost):
        """Test that AZURE Local report updating works as expected."""
        mock_start = 1
        mock_end = 2
        mock_daily.return_value = (mock_start, mock_end)
        mock_update.return_value = (mock_start, mock_end)
        updater = ReportSummaryUpdater(self.schema, self.azure_test_provider_uuid)
        self.assertIsInstance(updater._updater, AzureReportSummaryUpdater)

        updater.update_daily_tables(self.today, self.tomorrow)
        mock_daily.assert_called_with(self.today, self.tomorrow)
        mock_update.assert_not_called()
        mock_cloud.assert_not_called()

        updater.update_summary_tables(self.today, self.tomorrow)
        mock_update.assert_called_with(self.today, self.tomorrow)
        mock_cloud.assert_called_with(mock_start, mock_end)

        updater.update_cost_summary_table(self.today, self.tomorrow)
        mock_cloud_cost.assert_called_with(self.today, self.tomorrow)
Ejemplo n.º 12
0
    def test_aws_local_route(self, mock_daily, mock_update):
        """Test that AWS Local report updating works as expected."""
        mock_start = 1
        mock_end = 2
        mock_daily.return_value = (mock_start, mock_end)
        mock_update.return_value = (mock_start, mock_end)
        updater = ReportSummaryUpdater(self.schema,
                                       self.aws_provider_uuid,
                                       tracing_id=self.tracing_id)
        self.assertIsInstance(updater._updater, AWSReportSummaryUpdater)

        updater.update_daily_tables(self.today, self.tomorrow)
        mock_daily.assert_called_with(self.today, self.tomorrow)
        mock_update.assert_not_called()

        updater.update_summary_tables(self.today, self.tomorrow,
                                      self.tracing_id)
        mock_update.assert_called_with(self.today, self.tomorrow)
Ejemplo n.º 13
0
    def test_aws_ocp_exception_route(self, mock_daily, mock_update, mock_cloud):
        """Test that AWS report updating works as expected."""
        mock_start = 1
        mock_end = 2
        mock_daily.return_value = (mock_start, mock_end)
        mock_update.return_value = (mock_start, mock_end)
        mock_cloud.side_effect = Exception("test")

        updater = ReportSummaryUpdater(self.schema, self.aws_provider_uuid)
        self.assertIsInstance(updater._updater, AWSReportSummaryUpdater)

        updater.update_daily_tables(self.today, self.tomorrow)
        mock_daily.assert_called_with(self.today, self.tomorrow)
        mock_update.assert_not_called()
        mock_cloud.assert_not_called()

        with self.assertRaises(ReportSummaryUpdaterCloudError):
            updater.update_summary_tables(self.today, self.tomorrow)
Ejemplo n.º 14
0
def update_summary_tables(schema_name,
                          provider,
                          provider_uuid,
                          start_date,
                          end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
    if updater.manifest_is_ready():
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date)
    if provider_uuid:
        chain(
            update_charge_info.s(schema_name, provider_uuid, start_date,
                                 end_date),
            refresh_materialized_views.si(schema_name, provider, manifest_id),
        ).apply_async()
    else:
        refresh_materialized_views.delay(schema_name, provider, manifest_id)
Ejemplo n.º 15
0
def update_summary_tables(schema_name, provider, provider_uuid, start_date, end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(provider_type=provider).inc()

    stmt = (f'update_summary_tables called with args:\n'
            f' schema_name: {schema_name},\n'
            f' provider: {provider},\n'
            f' start_date: {start_date},\n'
            f' end_date: {end_date},\n'
            f' manifest_id: {manifest_id}')
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
    if updater.manifest_is_ready():
        start_date, end_date = updater.update_daily_tables(start_date, end_date)
        updater.update_summary_tables(start_date, end_date)

    if provider_uuid:
        update_charge_info.apply_async(
            args=(
                schema_name,
                provider_uuid,
                start_date,
                end_date),
            link=update_cost_summary_table.si(
                schema_name,
                provider_uuid,
                manifest_id,
                start_date,
                end_date))
Ejemplo n.º 16
0
    def test_bad_provider(self):
        """Test that an unimplemented provider throws an error."""
        self.unknown_auth = ProviderAuthentication.objects.create(
            provider_resource_name="unknown")
        self.unknown_auth.save()
        self.unknown_billing_source = ProviderBillingSource.objects.create(
            bucket="unknown")
        self.unknown_billing_source.save()

        self.unknown_provider = Provider.objects.create(
            uuid=self.unkown_test_provider_uuid,
            name="Test Provider",
            type="FOO",
            authentication=self.unknown_auth,
            billing_source=self.unknown_billing_source,
            customer=self.customer,
            setup_complete=False,
            active=True,
        )
        self.unknown_provider.save()

        with self.assertRaises(ReportSummaryUpdaterError):
            _ = ReportSummaryUpdater(self.schema,
                                     self.unkown_test_provider_uuid)
Ejemplo n.º 17
0
def update_summary_tables(schema_name,
                          provider,
                          provider_uuid,
                          start_date,
                          end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)

    start_date, end_date = updater.update_daily_tables(start_date, end_date)
    updater.update_summary_tables(start_date, end_date)

    if not provider_uuid:
        refresh_materialized_views.delay(schema_name, provider, manifest_id)
        return

    with CostModelDBAccessor(schema_name,
                             provider_uuid) as cost_model_accessor:
        cost_model = cost_model_accessor.cost_model

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name, provider_uuid,
            start_date, end_date) | refresh_materialized_views.si(
                schema_name, provider, manifest_id)
    else:
        stmt = (
            f"\n update_cost_model_costs skipped. No cost model available for \n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
        linked_tasks = refresh_materialized_views.s(schema_name, provider,
                                                    manifest_id)

    dh = DateHelper(utc=True)
    prev_month_start_day = dh.last_month_start.replace(tzinfo=None)
    start_date_obj = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    if manifest_id and (start_date_obj <= prev_month_start_day):
        # We want make sure that the manifest_id is not none, because
        # we only want to call the delete line items after the summarize_reports
        # task above
        simulate = False
        line_items_only = True

        linked_tasks |= remove_expired_data.si(schema_name, provider, simulate,
                                               provider_uuid, line_items_only)

    chain(linked_tasks).apply_async()
Ejemplo n.º 18
0
    def test_azure_parquet_summary_updater(self):
        """Test that the AWSReportParquetSummaryUpdater is returned."""
        updater = ReportSummaryUpdater(self.schema, self.azure_provider_uuid)

        self.assertIsInstance(updater._updater,
                              AzureReportParquetSummaryUpdater)
Ejemplo n.º 19
0
def update_summary_tables(  # noqa: C901
    schema_name,
    provider,
    provider_uuid,
    start_date,
    end_date=None,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()
    task_name = "masu.processor.tasks.update_summary_tables"
    cache_args = [schema_name]

    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(msg)
            update_summary_tables.s(
                schema_name,
                provider,
                provider_uuid,
                start_date,
                end_date=end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name, cache_args, timeout=3600)

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    try:
        updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date)
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex

    if not provider_uuid:
        refresh_materialized_views.s(
            schema_name,
            provider,
            manifest_id=manifest_id,
            queue_name=queue_name).apply_async(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
        return

    if enable_trino_processing(provider_uuid) and provider in (
            Provider.PROVIDER_AWS,
            Provider.PROVIDER_AWS_LOCAL,
            Provider.PROVIDER_AZURE,
            Provider.PROVIDER_AZURE_LOCAL,
    ):
        cost_model = None
        stmt = (
            f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name, provider_uuid, start_date, end_date).set(
                queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE
            ) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id).set(
                    queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
    else:
        stmt = (f"\n update_cost_model_costs skipped.\n"
                f" schema_name: {schema_name},\n"
                f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id).set(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)

    dh = DateHelper(utc=True)
    prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date()
    if isinstance(start_date, str):
        start_date = ciso8601.parse_datetime(start_date).date()
    if manifest_id and (start_date <= prev_month_start_day):
        # We want make sure that the manifest_id is not none, because
        # we only want to call the delete line items after the summarize_reports
        # task above
        simulate = False
        line_items_only = True

        linked_tasks |= remove_expired_data.si(
            schema_name, provider, simulate, provider_uuid, line_items_only,
            queue_name).set(queue=queue_name or REMOVE_EXPIRED_DATA_QUEUE)

    chain(linked_tasks).apply_async()
    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Ejemplo n.º 20
0
def update_summary_tables(schema_name,
                          provider,
                          provider_uuid,
                          start_date,
                          end_date=None,
                          manifest_id=None):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()

    stmt = (f"update_summary_tables called with args:\n"
            f" schema_name: {schema_name},\n"
            f" provider: {provider},\n"
            f" start_date: {start_date},\n"
            f" end_date: {end_date},\n"
            f" manifest_id: {manifest_id}")
    LOG.info(stmt)

    updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id)

    start_date, end_date = updater.update_daily_tables(start_date, end_date)
    updater.update_summary_tables(start_date, end_date)

    if not provider_uuid:
        refresh_materialized_views.delay(schema_name,
                                         provider,
                                         manifest_id=manifest_id)
        return

    if settings.ENABLE_PARQUET_PROCESSING and provider in (
            Provider.PROVIDER_AWS,
            Provider.PROVIDER_AWS_LOCAL,
            Provider.PROVIDER_AZURE,
            Provider.PROVIDER_AZURE_LOCAL,
    ):
        cost_model = None
        stmt = (
            f"\n Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs\n"
            f" schema_name: {schema_name},\n"
            f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name, provider_uuid, start_date,
            end_date) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id)
    else:
        stmt = (f"\n update_cost_model_costs skipped.\n"
                f" schema_name: {schema_name},\n"
                f" provider_uuid: {provider_uuid}")
        LOG.info(stmt)
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id)

    dh = DateHelper(utc=True)
    prev_month_start_day = dh.last_month_start.replace(tzinfo=None).date()
    if isinstance(start_date, str):
        start_date = ciso8601.parse_datetime(start_date).date()
    if manifest_id and (start_date <= prev_month_start_day):
        # We want make sure that the manifest_id is not none, because
        # we only want to call the delete line items after the summarize_reports
        # task above
        simulate = False
        line_items_only = True

        linked_tasks |= remove_expired_data.si(schema_name, provider, simulate,
                                               provider_uuid, line_items_only)

    chain(linked_tasks).apply_async()
Ejemplo n.º 21
0
 def test_bad_provider(self):
     """Test that an unknown provider uuid throws an error."""
     with self.assertRaises(ReportSummaryUpdaterProviderNotFoundError):
         _ = ReportSummaryUpdater(self.schema, uuid4())
Ejemplo n.º 22
0
def update_openshift_on_cloud(
    self,
    schema_name,
    openshift_provider_uuid,
    infrastructure_provider_uuid,
    infrastructure_provider_type,
    start_date,
    end_date,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
    tracing_id=None,
):
    """Update OpenShift on Cloud for a specific OpenShift and cloud source."""
    task_name = "masu.processor.tasks.update_openshift_on_cloud"
    cache_args = [schema_name, infrastructure_provider_uuid]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            update_openshift_on_cloud.s(
                schema_name,
                openshift_provider_uuid,
                infrastructure_provider_uuid,
                infrastructure_provider_type,
                start_date,
                end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
                synchronous=synchronous,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)
    stmt = (f"update_openshift_on_cloud called with args: "
            f" schema_name: {schema_name}, "
            f" openshift_provider_uuid: {openshift_provider_uuid}, "
            f" infrastructure_provider_uuid: {infrastructure_provider_uuid}, "
            f" infrastructure_provider_type: {infrastructure_provider_type}, "
            f" start_date: {start_date}, "
            f" end_date: {end_date}, "
            f" manifest_id: {manifest_id}, "
            f" queue_name: {queue_name}, "
            f" tracing_id: {tracing_id}")
    LOG.info(log_json(tracing_id, stmt))

    try:
        updater = ReportSummaryUpdater(schema_name,
                                       infrastructure_provider_uuid,
                                       manifest_id, tracing_id)
        updater.update_openshift_on_cloud_summary_tables(
            start_date,
            end_date,
            openshift_provider_uuid,
            infrastructure_provider_uuid,
            infrastructure_provider_type,
            tracing_id,
        )
    except ReportSummaryUpdaterCloudError as ex:
        LOG.info(
            log_json(
                tracing_id,
                (
                    f"update_openshift_on_cloud failed for: {infrastructure_provider_type} ",
                    f"provider: {infrastructure_provider_uuid}, ",
                    f"OpenShift provider {openshift_provider_uuid}. \nError: {ex}\n",
                    f"Retry {self.request.retries} of {settings.MAX_UPDATE_RETRIES}",
                ),
            ))
        raise ReportSummaryUpdaterCloudError
    finally:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
Ejemplo n.º 23
0
def update_summary_tables(  # noqa: C901
    schema_name,
    provider,
    provider_uuid,
    start_date,
    end_date=None,
    manifest_id=None,
    queue_name=None,
    synchronous=False,
    tracing_id=None,
):
    """Populate the summary tables for reporting.

    Args:
        schema_name (str) The DB schema name.
        provider    (str) The provider type.
        provider_uuid (str) The provider uuid.
        report_dict (dict) The report data dict from previous task.
        start_date  (str) The date to start populating the table.
        end_date    (str) The date to end on.

    Returns
        None

    """
    worker_stats.REPORT_SUMMARY_ATTEMPTS_COUNTER.labels(
        provider_type=provider).inc()
    task_name = "masu.processor.tasks.update_summary_tables"
    cache_args = [schema_name, provider, provider_uuid]
    ocp_on_cloud_infra_map = {}

    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(log_json(tracing_id, msg))
            update_summary_tables.s(
                schema_name,
                provider,
                provider_uuid,
                start_date,
                end_date=end_date,
                manifest_id=manifest_id,
                queue_name=queue_name,
                tracing_id=tracing_id,
            ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
            return
        worker_cache.lock_single_task(task_name,
                                      cache_args,
                                      timeout=settings.WORKER_CACHE_TIMEOUT)

    stmt = (f"update_summary_tables called with args: "
            f" schema_name: {schema_name}, "
            f" provider: {provider}, "
            f" start_date: {start_date}, "
            f" end_date: {end_date}, "
            f" manifest_id: {manifest_id}, "
            f" tracing_id: {tracing_id}")
    LOG.info(log_json(tracing_id, stmt))

    try:
        updater = ReportSummaryUpdater(schema_name, provider_uuid, manifest_id,
                                       tracing_id)
        start_date, end_date = updater.update_daily_tables(
            start_date, end_date)
        updater.update_summary_tables(start_date, end_date, tracing_id)
        ocp_on_cloud_infra_map = updater.get_openshift_on_cloud_infra_map(
            start_date, end_date, tracing_id)
    except ReportSummaryUpdaterCloudError as ex:
        LOG.info(
            log_json(
                tracing_id,
                f"Failed to correlate OpenShift metrics for provider: {provider_uuid}. Error: {ex}"
            ))

    except ReportSummaryUpdaterProviderNotFoundError as pnf_ex:
        LOG.warning(
            log_json(
                tracing_id,
                (f"{pnf_ex} Possible source/provider delete during processing. "
                 + "Processing for this provier will halt."),
            ))
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        return
    except Exception as ex:
        if not synchronous:
            worker_cache.release_single_task(task_name, cache_args)
        raise ex
    if not provider_uuid:
        refresh_materialized_views.s(
            schema_name,
            provider,
            manifest_id=manifest_id,
            queue_name=queue_name,
            tracing_id=tracing_id).apply_async(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
        return

    if enable_trino_processing(provider_uuid, provider,
                               schema_name) and provider in (
                                   Provider.PROVIDER_AWS,
                                   Provider.PROVIDER_AWS_LOCAL,
                                   Provider.PROVIDER_AZURE,
                                   Provider.PROVIDER_AZURE_LOCAL,
                               ):
        cost_model = None
        stmt = (
            f"Markup for {provider} is calculated during summarization. No need to run update_cost_model_costs"
            f" schema_name: {schema_name}, "
            f" provider_uuid: {provider_uuid}")
        LOG.info(log_json(tracing_id, stmt))
    else:
        with CostModelDBAccessor(schema_name,
                                 provider_uuid) as cost_model_accessor:
            cost_model = cost_model_accessor.cost_model

    # Create queued tasks for each OpenShift on Cloud cluster
    signature_list = []
    for openshift_provider_uuid, infrastructure_tuple in ocp_on_cloud_infra_map.items(
    ):
        infra_provider_uuid = infrastructure_tuple[0]
        infra_provider_type = infrastructure_tuple[1]
        signature_list.append(
            update_openshift_on_cloud.s(
                schema_name,
                openshift_provider_uuid,
                infra_provider_uuid,
                infra_provider_type,
                str(start_date),
                str(end_date),
                manifest_id=manifest_id,
                queue_name=queue_name,
                synchronous=synchronous,
                tracing_id=tracing_id,
            ).set(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE))

    # Apply OCP on Cloud tasks
    if signature_list:
        if synchronous:
            group(signature_list).apply()
        else:
            group(signature_list).apply_async()

    if cost_model is not None:
        linked_tasks = update_cost_model_costs.s(
            schema_name,
            provider_uuid,
            start_date,
            end_date,
            tracing_id=tracing_id).set(
                queue=queue_name or UPDATE_COST_MODEL_COSTS_QUEUE
            ) | refresh_materialized_views.si(
                schema_name,
                provider,
                provider_uuid=provider_uuid,
                manifest_id=manifest_id,
                tracing_id=tracing_id).set(
                    queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
    else:
        stmt = f"update_cost_model_costs skipped. schema_name: {schema_name}, provider_uuid: {provider_uuid}"
        LOG.info(log_json(tracing_id, stmt))
        linked_tasks = refresh_materialized_views.s(
            schema_name,
            provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id,
            tracing_id=tracing_id).set(
                queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)

    chain(linked_tasks).apply_async()

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Ejemplo n.º 24
0
    def test_update_openshift_on_cloud_summary_tables(self, mock_update):
        """Test that we run OCP on Cloud summary."""
        start_date = DateHelper().this_month_start.date()
        end_date = DateHelper().today.date()

        updater = ReportSummaryUpdater(self.schema, self.azure_provider_uuid)
        updater.update_openshift_on_cloud_summary_tables(
            start_date,
            end_date,
            self.ocp_on_azure_ocp_provider.uuid,
            self.azure_provider_uuid,
            Provider.PROVIDER_AZURE,
            tracing_id=1,
        )
        mock_update.assert_called()

        mock_update.reset_mock()

        # Only run for cloud sources that support OCP on Cloud
        updater = ReportSummaryUpdater(self.schema,
                                       self.ocp_on_azure_ocp_provider.uuid)
        updater.update_openshift_on_cloud_summary_tables(
            start_date,
            end_date,
            self.ocp_on_azure_ocp_provider.uuid,
            self.azure_provider_uuid,
            Provider.PROVIDER_AZURE,
            tracing_id=1,
        )
        mock_update.assert_not_called()

        mock_update.reset_mock()

        updater = ReportSummaryUpdater(self.schema, self.azure_provider_uuid)
        mock_update.side_effect = Exception
        with self.assertRaises(ReportSummaryUpdaterCloudError):
            updater.update_openshift_on_cloud_summary_tables(
                start_date,
                end_date,
                self.ocp_on_azure_ocp_provider.uuid,
                self.azure_provider_uuid,
                Provider.PROVIDER_AZURE,
                tracing_id=1,
            )
Ejemplo n.º 25
0
 def test_bad_provider(self):
     """Test that an unimplemented provider throws an error."""
     with self.assertRaises(ReportSummaryUpdaterError):
         random_uuid = str(uuid.uuid4())
         _ = ReportSummaryUpdater(self.schema, random_uuid)