Beispiel #1
0
    def check_if_manifest_should_be_downloaded(self, assembly_id):
        """Check if we should download this manifest.

        We first check if we have a database record of this manifest.
        That would indicate that we have already downloaded and at least
        begun processing. We then check the last completed time for
        a file in this manifest. This second check is to cover the case
        when we did not complete processing and need to re-downlaod and
        process the manifest.

        Returns True if the manifest should be downloaded and processed.
        """
        manifest_id = None
        num_processed_files = 0
        num_total_files = 0
        today = DateAccessor().today_with_timezone('UTC')
        last_completed_cutoff = today - datetime.timedelta(hours=1)
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest(assembly_id,
                                                      self._provider_id)
            if manifest:
                manifest_id = manifest.id
                num_processed_files = manifest.num_processed_files
                num_total_files = manifest.num_total_files
                if num_processed_files < num_total_files:
                    completed_datetime = manifest_accessor.get_last_report_completed_datetime(
                        manifest_id)
                    if completed_datetime and completed_datetime < last_completed_cutoff:
                        # It has been more than an hour since we processed a file
                        # and we didn't finish processing. We should download
                        # and reprocess.
                        manifest_accessor.reset_manifest(manifest_id)
                        return True
                # The manifest exists and we have processed all the files.
                # We should not redownload.
                return False
        # The manifest does not exist, this is the first time we are
        # downloading and processing it.
        return True
Beispiel #2
0
def summarize_reports(reports_to_summarize):
    """
    Summarize reports returned from line summary task.

    Args:
        reports_to_summarize (list) list of reports to process

    Returns:
        None

    """
    reports_deduplicated = [
        dict(t) for t in {tuple(d.items())
                          for d in reports_to_summarize}
    ]

    for report in reports_deduplicated:
        # For day-to-day summarization we choose a small window to
        # cover new data from a window of days.
        # This saves us from re-summarizing unchanged data and cuts down
        # on processing time. There are override mechanisms in the
        # Updater classes for when full-month summarization is
        # required.
        with ReportManifestDBAccessor() as manifest_accesor:
            if manifest_accesor.manifest_ready_for_summary(
                    report.get("manifest_id")):
                start_date = DateAccessor().today() - datetime.timedelta(
                    days=2)
                start_date = start_date.strftime("%Y-%m-%d")
                end_date = DateAccessor().today().strftime("%Y-%m-%d")
                LOG.info("report to summarize: %s", str(report))
                update_summary_tables.delay(
                    report.get("schema_name"),
                    report.get("provider_type"),
                    report.get("provider_uuid"),
                    start_date=start_date,
                    end_date=end_date,
                    manifest_id=report.get("manifest_id"),
                )
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        cls.test_report_path = (
            './koku/masu/test/data/azure/costreport_a243c6f2-199f-4074-9a2c-40e671cf1584.csv'
        )
        cls.date_accessor = DateAccessor()
        cls.manifest_accessor = ReportManifestDBAccessor()

        with ReportingCommonDBAccessor() as report_common_db:
            cls.column_map = report_common_db.column_map

        _report_tables = copy.deepcopy(AZURE_REPORT_TABLE_MAP)
        _report_tables.pop('line_item_daily_summary', None)
        _report_tables.pop('tags_summary', None)
        _report_tables.pop('ocp_on_azure_daily_summary', None)
        _report_tables.pop('ocp_on_azure_project_daily_summary', None)
        cls.report_tables = list(_report_tables.values())
        # Grab a single row of test data to work with
        with open(cls.test_report_path, 'r', encoding='utf-8-sig') as f:
            reader = csv.DictReader(f)
            cls.row = next(reader)
    def _determine_if_full_summary_update_needed(self, bill):
        """Decide whether to update summary tables for full billing period."""
        now_utc = self._date_accessor.today_with_timezone("UTC")

        summary_creation = bill.summary_data_creation_datetime
        finalized_datetime = bill.finalized_datetime

        is_done_processing = False
        with ReportManifestDBAccessor() as manifest_accesor:
            is_done_processing = manifest_accesor.manifest_ready_for_summary(self._manifest.id)
        is_newly_finalized = False
        if finalized_datetime is not None:
            is_newly_finalized = finalized_datetime.date() == now_utc.date()

        is_new_bill = summary_creation is None

        # Do a full month update if we just finished processing a finalized
        # bill or we just finished processing a bill for the first time
        if (is_done_processing and is_newly_finalized) or (is_done_processing and is_new_bill):  # noqa: W504
            return True

        return False
Beispiel #5
0
 def setUpClass(cls):
     """Set up the test class with required objects."""
     cls.common_accessor = ReportingCommonDBAccessor()
     cls.column_map = cls.common_accessor.column_map
     cls.accessor = AWSReportDBAccessor(schema='acct10001',
                                        column_map=cls.column_map)
     cls.report_schema = cls.accessor.report_schema
     cls.creator = ReportObjectCreator(cls.accessor, cls.column_map,
                                       cls.report_schema.column_types)
     cls.all_tables = list(AWS_CUR_TABLE_MAP.values())
     cls.foreign_key_tables = [
         AWS_CUR_TABLE_MAP['bill'], AWS_CUR_TABLE_MAP['product'],
         AWS_CUR_TABLE_MAP['pricing'], AWS_CUR_TABLE_MAP['reservation']
     ]
     billing_start = datetime.datetime.utcnow().replace(day=1)
     cls.manifest_dict = {
         'assembly_id': '1234',
         'billing_period_start_datetime': billing_start,
         'num_total_files': 2,
         'provider_id': 1
     }
     cls.manifest_accessor = ReportManifestDBAccessor()
Beispiel #6
0
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        # These test reports should be replaced with OCP reports once processor is implemented.
        cls.test_report_path = (
            "./koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv"
        )
        cls.storage_report_path = "./koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv"
        cls.node_report_path = "./koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_node_labels.csv"
        cls.unknown_report = "./koku/masu/test/data/test_cur.csv"
        cls.test_report_gzip_path = "./koku/masu/test/data/test_cur.csv.gz"

        cls.date_accessor = DateAccessor()
        cls.billing_start = cls.date_accessor.today_with_timezone(
            "UTC").replace(year=2018,
                           month=6,
                           day=1,
                           hour=0,
                           minute=0,
                           second=0)
        cls.assembly_id = "1234"

        cls.manifest_accessor = ReportManifestDBAccessor()

        cls.accessor = OCPReportDBAccessor(cls.schema)
        cls.report_schema = cls.accessor.report_schema

        _report_tables = copy.deepcopy(OCP_REPORT_TABLE_MAP)
        cls.report_tables = list(_report_tables.values())

        # Grab a single row of test data to work with
        with open(cls.test_report_path, "r") as f:
            reader = csv.DictReader(f)
            cls.row = next(reader)

        with open(cls.storage_report_path, "r") as f:
            reader = csv.DictReader(f)
            cls.storage_row = next(reader)
    def test_get_report_context_for_date_should_download(self, mock_session, mock_manifest, mock_delete, mock_check):
        """Test that data is returned on the reports to process."""
        current_month = DateAccessor().today().replace(day=1, second=1, microsecond=1)
        auth_credential = fake_arn(service="iam", generate_account_id=True)
        downloader = AWSReportDownloader(
            self.mock_task,
            self.fake_customer_name,
            auth_credential,
            self.fake_bucket_name,
            provider_uuid=self.aws_provider_uuid,
        )

        start_str = current_month.strftime(downloader.manifest_date_format)
        assembly_id = "1234"
        compression = downloader.report.get("Compression")
        report_keys = ["file1", "file2"]
        mock_manifest.return_value = (
            "",
            {
                "assemblyId": assembly_id,
                "Compression": compression,
                "reportKeys": report_keys,
                "billingPeriod": {"start": start_str},
            },
        )
        mock_check.return_value = True

        expected = {"manifest_id": None, "assembly_id": assembly_id, "compression": compression, "files": report_keys}

        result = downloader.get_report_context_for_date(current_month)
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest_entry = manifest_accessor.get_manifest(assembly_id, self.aws_provider_uuid)
            expected["manifest_id"] = manifest_entry.id

        self.assertIsInstance(result, dict)
        for key, value in result.items():
            self.assertIn(key, expected)
            self.assertEqual(value, expected.get(key))
Beispiel #8
0
    def _process_manifest_db_record(self, assembly_id, billing_start, num_of_files):
        """Insert or update the manifest DB record."""
        LOG.info("Inserting/updating manifest in database for assembly_id: %s", assembly_id)

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest_entry = manifest_accessor.get_manifest(assembly_id, self._provider_uuid)

            if not manifest_entry:
                msg = f"No manifest entry found in database. Adding for bill period start: {billing_start}"
                LOG.info(log_json(self.request_id, msg, self.context))
                manifest_dict = {
                    "assembly_id": assembly_id,
                    "billing_period_start_datetime": billing_start,
                    "num_total_files": num_of_files,
                    "provider_uuid": self._provider_uuid,
                    "task": self._task.request.id,
                }
                manifest_entry = manifest_accessor.add(**manifest_dict)

            manifest_accessor.mark_manifest_as_updated(manifest_entry)
            manifest_id = manifest_entry.id

        return manifest_id
Beispiel #9
0
    def download_file(self, key, stored_etag=None, manifest_id=None, start_date=None):
        """
        Download a file from Azure bucket.

        Args:
            key (str): The object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        local_filename = utils.get_local_file_name(key)
        full_file_path = f"{self._get_exports_data_directory()}/{local_filename}"

        file_creation_date = None
        try:
            blob = self._azure_client.get_cost_export_for_key(key, self.container_name)
            etag = blob.etag
            file_creation_date = blob.last_modified
        except AzureCostReportNotFound as ex:
            msg = f"Error when downloading Azure report for key: {key}. Error {ex}"
            LOG.error(log_json(self.request_id, msg, self.context))
            raise AzureReportDownloaderError(msg)

        msg = f"Downloading {key} to {full_file_path}"
        LOG.info(log_json(self.request_id, msg, self.context))
        blob = self._azure_client.download_cost_export(key, self.container_name, destination=full_file_path)
        # Push to S3
        s3_csv_path = get_path_prefix(
            self.account, Provider.PROVIDER_AZURE, self._provider_uuid, start_date, Config.CSV_DATA_TYPE
        )
        copy_local_report_file_to_s3_bucket(
            self.request_id, s3_csv_path, full_file_path, local_filename, manifest_id, start_date, self.context
        )

        manifest_accessor = ReportManifestDBAccessor()
        manifest = manifest_accessor.get_manifest_by_id(manifest_id)

        if not manifest_accessor.get_s3_csv_cleared(manifest):
            remove_files_not_in_set_from_s3_bucket(self.request_id, s3_csv_path, manifest_id)
            manifest_accessor.mark_s3_csv_cleared(manifest)

        msg = f"Returning full_file_path: {full_file_path}, etag: {etag}"
        LOG.info(log_json(self.request_id, msg, self.context))
        return full_file_path, etag, file_creation_date, []
Beispiel #10
0
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        cls.test_report_path = "./koku/masu/test/data/azure/costreport_a243c6f2-199f-4074-9a2c-40e671cf1584.csv"
        cls.test_v2_report_path = "./koku/masu/test/data/azure/azure_version_2.csv"
        cls.camelCase_report_path = "./koku/masu/test/data/azure/azure_camelCased.csv"

        cls.date_accessor = DateAccessor()
        cls.manifest_accessor = ReportManifestDBAccessor()

        _report_tables = copy.deepcopy(AZURE_REPORT_TABLE_MAP)
        _report_tables.pop("line_item_daily_summary", None)
        _report_tables.pop("tags_summary", None)
        _report_tables.pop("enabled_tag_keys", None)
        _report_tables.pop("ocp_on_azure_daily_summary", None)
        _report_tables.pop("ocp_on_azure_project_daily_summary", None)
        _report_tables.pop("ocp_on_azure_tags_summary", None)
        cls.report_tables = list(_report_tables.values())
        # Grab a single row of test data to work with
        with open(cls.test_report_path, "r", encoding="utf-8-sig") as f:
            reader = csv.DictReader(f)
            cls.row = next(reader)
            cls.row = {key.lower(): value for key, value in cls.row.items()}
Beispiel #11
0
def refresh_materialized_views(schema_name, provider_type, manifest_id=None):
    """Refresh the database's materialized views for reporting."""
    materialized_views = ()
    if provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL):
        materialized_views = AWS_MATERIALIZED_VIEWS
    elif provider_type in (Provider.PROVIDER_OCP):
        materialized_views = OCP_MATERIALIZED_VIEWS
    elif provider_type in (Provider.PROVIDER_AZURE,
                           Provider.PROVIDER_AZURE_LOCAL):
        materialized_views = AZURE_MATERIALIZED_VIEWS
    with schema_context(schema_name):
        for view in materialized_views:
            table_name = view._meta.db_table
            with connection.cursor() as cursor:
                cursor.execute(
                    f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}")
                LOG.info(f"Refreshed {table_name}.")

    if manifest_id:
        # Processing for this monifest should be complete after this step
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            manifest_accessor.mark_manifest_as_completed(manifest)
Beispiel #12
0
    def test_check_if_manifest_should_be_downloaded_error_processing_manifest(
            self):
        """Test that a manifest that did not succeessfully process should be reprocessed."""
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self.manifest_id)
            manifest.num_processed_files = 1
            manifest.num_total_files = 2
            manifest_accessor.commit()

        with ReportStatsDBAccessor(self.report_name,
                                   self.manifest_id) as file_accessor:
            file_accessor.log_last_started_datetime()
            file_accessor.commit()
            file_accessor.log_last_completed_datetime()
            file_accessor.commit()
            completed_datetime = self.date_accessor.today_with_timezone(
                'UTC') - datetime.timedelta(hours=1)
            file_accessor.update(last_completed_datetime=completed_datetime)
            file_accessor.commit()

        result = self.downloader.check_if_manifest_should_be_downloaded(
            self.assembly_id)
        self.assertTrue(result)
Beispiel #13
0
def summarize_manifest(report_meta):
    """
    Kick off manifest summary when all report files have completed line item processing.

    Args:
        report (Dict) - keys: value
                        schema_name: String,
                        manifest_id: Integer,
                        provider_uuid: String,
                        provider_type: String,

    Returns:
        Celery Async UUID.

    """
    async_id = None
    schema_name = report_meta.get("schema_name")
    manifest_id = report_meta.get("manifest_id")
    provider_uuid = report_meta.get("provider_uuid")
    schema_name = report_meta.get("schema_name")
    provider_type = report_meta.get("provider_type")
    start_date = report_meta.get("start")
    end_date = report_meta.get("end")

    with ReportManifestDBAccessor() as manifest_accesor:
        if manifest_accesor.manifest_ready_for_summary(manifest_id):
            report_meta = {
                "schema_name": schema_name,
                "provider_type": provider_type,
                "provider_uuid": provider_uuid,
                "manifest_id": manifest_id,
            }
            if start_date and end_date:
                report_meta["start"] = start_date
                report_meta["end"] = end_date
            async_id = summarize_reports.delay([report_meta])
    return async_id
Beispiel #14
0
 def setUp(self):
     """Set up each test case."""
     super().setUp()
     self.cache_key = self.fake.word()
     self.downloader = ReportDownloaderBase(provider_uuid=self.aws_provider_uuid, cache_key=self.cache_key)
     self.billing_start = self.date_accessor.today_with_timezone("UTC").replace(day=1)
     self.manifest_dict = {
         "assembly_id": self.assembly_id,
         "billing_period_start_datetime": self.billing_start,
         "num_total_files": 2,
         "provider_uuid": self.aws_provider_uuid,
     }
     with ReportManifestDBAccessor() as manifest_accessor:
         self.manifest = manifest_accessor.add(**self.manifest_dict)
         self.manifest.save()
         self.manifest_id = self.manifest.id
     for i in [1, 2]:
         baker.make(
             CostUsageReportStatus,
             report_name=f"{self.assembly_id}_file_{i}.csv.gz",
             last_completed_datetime=None,
             last_started_datetime=None,
             manifest_id=self.manifest_id,
         )
Beispiel #15
0
    def _delete_line_items_in_range(self, bill_id, scan_start):
        """Delete stale data between date range."""
        gcp_date_filter = {"usage_start__gte": scan_start}

        if not self._manifest_id:
            return False
        with ReportManifestDBAccessor() as manifest_accessor:
            num_processed_files = manifest_accessor.number_of_files_processed(
                self._manifest_id)
            if num_processed_files != 0:
                return False

        with GCPReportDBAccessor(self._schema) as accessor:
            line_item_query = accessor.get_lineitem_query_for_billid(bill_id)
            line_item_query = line_item_query.filter(**gcp_date_filter)
            delete_count = line_item_query.delete()
            if delete_count:
                log_statement = (f"Deleting data for:\n"
                                 f" schema_name: {self._schema}\n"
                                 f" provider_uuid: {self._provider_uuid}\n"
                                 f" bill ID: {bill_id}\n"
                                 f" on or after {scan_start}")
                LOG.info(log_statement)
        return True
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        # These test reports should be replaced with OCP reports once processor is impelmented.
        cls.test_report = './koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv'
        cls.storage_report = (
            './koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv'
        )
        cls.unknown_report = './koku/masu/test/data/test_cur.csv'
        cls.test_report_gzip = './koku/masu/test/data/test_cur.csv.gz'

        cls.date_accessor = DateAccessor()
        cls.billing_start = cls.date_accessor.today_with_timezone(
            'UTC').replace(year=2018,
                           month=6,
                           day=1,
                           hour=0,
                           minute=0,
                           second=0)
        cls.assembly_id = '1234'

        cls.manifest_accessor = ReportManifestDBAccessor()

        with ReportingCommonDBAccessor() as report_common_db:
            cls.column_map = report_common_db.column_map

        cls.accessor = OCPReportDBAccessor(cls.schema, cls.column_map)
        cls.report_schema = cls.accessor.report_schema

        _report_tables = copy.deepcopy(OCP_REPORT_TABLE_MAP)
        cls.report_tables = list(_report_tables.values())

        # Grab a single row of test data to work with
        with open(cls.test_report, 'r') as f:
            reader = csv.DictReader(f)
            cls.row = next(reader)
    def test_get_manifest_context_new_info(self, mock_manifest, mock_delete):
        """Test that the manifest is read."""
        current_month = DateAccessor().today().replace(day=1,
                                                       second=1,
                                                       microsecond=1)

        assembly_id = "1234"
        compression = "PLAIN"
        report_keys = ["file1", "file2"]
        version = "5678"
        mock_manifest.return_value = {
            "uuid": assembly_id,
            "Compression": compression,
            "reportKeys": report_keys,
            "date": current_month,
            "files": report_keys,
            "version": version,
            "certified": False,
            "cluster_id": "4e009161-4f40-42c8-877c-3e59f6baea3d",
            "cr_status": {
                "clusterID": "4e009161-4f40-42c8-877c-3e59f6baea3d",
                "clusterVersion": "stable-4.6",
                "api_url": "https://cloud.redhat.com",
                "authentication": {
                    "type": "token"
                },
                "packaging": {
                    "max_reports_to_store": 30,
                    "max_size_MB": 100
                },
                "upload": {
                    "ingress_path": "/api/ingress/v1/upload",
                    "upload": False
                },
                "operator_commit": "a09a5b21e55ce4a07fe31aa560650b538ec6de7c",
                "prometheus": {
                    "error": "fake error"
                },
                "reports": {
                    "report_month":
                    "07",
                    "last_hour_queried":
                    "2021-07-28 11:00:00 - 2021-07-28 11:59:59"
                },
                "source": {
                    "sources_path": "/api/sources/v1.0/",
                    "name": "INSERT-SOURCE-NAME"
                },
            },
        }
        self.assertIsNone(self.ocp_report_downloader.context.get("version"))
        result = self.ocp_report_downloader.get_manifest_context_for_date(
            current_month)
        self.assertEqual(result.get("assembly_id"), assembly_id)
        self.assertEqual(result.get("compression"), compression)
        self.assertIsNotNone(result.get("files"))

        manifest_id = result.get("manifest_id")
        manifest = ReportManifestDBAccessor().get_manifest_by_id(manifest_id)
        expected_errors = {"prometheus_error": "fake error"}
        self.assertEqual(manifest.operator_version, version)
        self.assertEqual(manifest.operator_certified, False)
        self.assertEqual(manifest.operator_airgapped, True)
        self.assertEqual(manifest.cluster_channel, "stable-4.6")
        self.assertEqual(manifest.cluster_id,
                         "4e009161-4f40-42c8-877c-3e59f6baea3d")
        self.assertEqual(manifest.operator_errors, expected_errors)
        self.assertEqual(self.ocp_report_downloader.context.get("version"),
                         version)
class ReportManifestDBAccessorTest(IamTestCase):
    """Test cases for the ReportManifestDBAccessor."""
    def setUp(self):
        """Set up the test class."""
        super().setUp()
        self.schema = self.schema_name
        self.billing_start = DateAccessor().today_with_timezone("UTC").replace(
            day=1)
        self.manifest_dict = {
            "assembly_id": "1234",
            "billing_period_start_datetime": self.billing_start,
            "num_total_files": 2,
            "provider_uuid": self.provider_uuid,
        }
        self.manifest_accessor = ReportManifestDBAccessor()

    def tearDown(self):
        """Tear down the test class."""
        super().tearDown()
        with schema_context(self.schema):
            manifests = self.manifest_accessor._get_db_obj_query().all()
            for manifest in manifests:
                self.manifest_accessor.delete(manifest)

    def test_initializer(self):
        """Test the initializer."""
        accessor = ReportManifestDBAccessor()
        self.assertIsNotNone(accessor._table)

    def test_get_manifest(self):
        """Test that the right manifest is returned."""
        with schema_context(self.schema):
            added_manifest = self.manifest_accessor.add(**self.manifest_dict)

            assembly_id = self.manifest_dict.get("assembly_id")
            provider_uuid = self.manifest_dict.get("provider_uuid")
            manifest = self.manifest_accessor.get_manifest(
                assembly_id, provider_uuid)

        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)
        self.assertEqual(manifest.assembly_id, assembly_id)
        self.assertEqual(manifest.provider_id, provider_uuid)
        self.assertEqual(manifest.num_total_files,
                         self.manifest_dict.get("num_total_files"))

    def test_get_manifest_by_id(self):
        """Test that the right manifest is returned by id."""
        with schema_context(self.schema):
            added_manifest = self.manifest_accessor.add(**self.manifest_dict)
            manifest = self.manifest_accessor.get_manifest_by_id(
                added_manifest.id)
        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)

    def test_mark_manifest_as_updated(self):
        """Test that the manifest is marked updated."""
        with schema_context(self.schema):
            manifest = self.manifest_accessor.add(**self.manifest_dict)
            now = DateAccessor().today_with_timezone("UTC")
            self.manifest_accessor.mark_manifest_as_updated(manifest)
            self.assertGreater(manifest.manifest_updated_datetime, now)

    def test_mark_manifest_as_updated_none_manifest(self):
        """Test that a none manifest doesn't update failure."""
        try:
            self.manifest_accessor.mark_manifest_as_updated(None)
        except Exception as err:
            self.fail(f"Test failed with error: {err}")

    def test_mark_manifest_as_completed_none_manifest(self):
        """Test that a none manifest doesn't complete failure."""
        try:
            self.manifest_accessor.mark_manifest_as_completed(None)
        except Exception as err:
            self.fail(f"Test failed with error: {err}")

    def test_get_manifest_list_for_provider_and_bill_date(self):
        """Test that all manifests are returned for a provider and bill."""
        bill_date = self.manifest_dict["billing_period_start_datetime"].date()
        manifest_dict = copy.deepcopy(self.manifest_dict)
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 1)

        manifest_dict["assembly_id"] = "2345"
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 2)

        manifest_dict["assembly_id"] = "3456"
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 3)

    def test_get_last_seen_manifest_ids(self):
        """Test that get_last_seen_manifest_ids returns the appropriate assembly_ids."""
        # test that the most recently seen manifests that haven't been processed are returned
        manifest_dict2 = {
            "assembly_id": "5678",
            "billing_period_start_datetime": self.billing_start,
            "num_total_files": 1,
            "provider_uuid": "00000000-0000-0000-0000-000000000002",
        }
        manifest = self.manifest_accessor.add(**self.manifest_dict)
        manifest2 = self.manifest_accessor.add(**manifest_dict2)
        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids,
                         [manifest.assembly_id, manifest2.assembly_id])

        # test that when the manifest's files have been processed - it is no longer returned
        manifest2_helper = ManifestCreationHelper(
            manifest2.id, manifest_dict2.get("num_total_files"),
            manifest_dict2.get("assembly_id"))

        manifest2_helper.generate_test_report_files()
        manifest2_helper.process_all_files()

        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids, [manifest.assembly_id])

        # test that of two manifests with the same provider_ids - that only the most recently
        # seen is returned
        manifest_dict3 = {
            "assembly_id": "91011",
            "billing_period_start_datetime": self.billing_start,
            "num_total_files": 1,
            "provider_uuid": self.provider_uuid,
        }
        manifest3 = self.manifest_accessor.add(**manifest_dict3)
        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids, [manifest3.assembly_id])

        # test that manifests for a different billing month are not returned
        current_month = self.billing_start
        calculated_month = current_month + relativedelta(months=-2)
        manifest3.billing_period_start_datetime = calculated_month
        manifest3.save()
        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids, [manifest.assembly_id])

    def test_is_last_completed_datetime_null(self):
        """Test is last completed datetime is null."""
        manifest_id = 123456789
        self.assertTrue(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))
        baker.make(CostUsageReportManifest, id=manifest_id)
        baker.make(CostUsageReportStatus,
                   manifest_id=manifest_id,
                   last_completed_datetime=None)
        self.assertTrue(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))

        CostUsageReportStatus.objects.filter(manifest_id=manifest_id).update(
            last_completed_datetime=FAKE.date())

        self.assertFalse(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))
Beispiel #19
0
def _process_report_file(schema_name, provider, report_dict):
    """
    Task to process a Report.

    Args:
        schema_name   (String) db schema name
        provider      (String) provider type
        report_dict   (dict) The report data dict from previous task

    Returns:
        None

    """
    start_date = report_dict.get("start_date")
    report_path = report_dict.get("file")
    compression = report_dict.get("compression")
    manifest_id = report_dict.get("manifest_id")
    provider_uuid = report_dict.get("provider_uuid")
    log_statement = (f"Processing Report:\n"
                     f" schema_name: {schema_name}\n"
                     f" provider: {provider}\n"
                     f" provider_uuid: {provider_uuid}\n"
                     f" file: {report_path}\n"
                     f" compression: {compression}\n"
                     f" start_date: {start_date}")
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = f"Avaiable memory: {mem.free} bytes ({mem.percent}%)"
    LOG.info(mem_msg)

    file_name = report_path.split("/")[-1]
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_started_datetime()

    try:
        processor = ReportProcessor(
            schema_name=schema_name,
            report_path=report_path,
            compression=compression,
            provider=provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id,
        )

        processor.process()
    except (ReportProcessorError, ReportProcessorDBError) as processing_error:
        with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
            stats_recorder.clear_last_started_datetime()
        raise processing_error

    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_completed_datetime()

    with ReportManifestDBAccessor() as manifest_accesor:
        manifest = manifest_accesor.get_manifest_by_id(manifest_id)
        if manifest:
            manifest_accesor.mark_manifest_as_updated(manifest)
        else:
            LOG.error("Unable to find manifest for ID: %s, file %s",
                      manifest_id, file_name)

    with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor:
        if provider_accessor.get_setup_complete():
            files = processor.remove_processed_files(path.dirname(report_path))
            LOG.info("Temporary files removed: %s", str(files))
        provider_accessor.setup_complete()

    return True
class ReportStatsDBAccessorTest(MasuTestCase):
    """Test Cases for the ReportStatsDBAccessor object."""
    def setUp(self):
        """Set up the test class."""
        super().setUp()
        billing_start = datetime.utcnow().replace(day=1)
        manifest_dict = {
            "assembly_id": "1234",
            "billing_period_start_datetime": billing_start,
            "num_total_files": 2,
            "provider_uuid": self.aws_provider_uuid,
        }
        self.manifest_accessor = ReportManifestDBAccessor()

        manifest = self.manifest_accessor.add(**manifest_dict)
        self.manifest_id = manifest.id

    def test_initializer(self):
        """Test Initializer."""
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)
        self.assertIsNotNone(saver._obj)

    def test_initializer_preexisting_report(self):
        """Test getting a new accessor stats on a preexisting report."""
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)
        saver.update(
            cursor_position=33,
            last_completed_datetime="2011-1-1 11:11:11",
            last_started_datetime="2022-2-2 22:22:22",
            etag="myetag",
        )

        self.assertIsNotNone(saver._obj)

        # Get another accessor for the same report and verify we get back the right information.
        saver2 = ReportStatsDBAccessor("myreport", self.manifest_id)
        last_completed = saver2.get_last_completed_datetime()

        self.assertEqual(last_completed.year, 2011)
        self.assertEqual(last_completed.month, 1)
        self.assertEqual(last_completed.day, 1)
        self.assertEqual(last_completed.hour, 11)
        self.assertEqual(last_completed.minute, 11)
        self.assertEqual(last_completed.second, 11)

        self.assertEqual(saver.get_etag(), "myetag")

    def test_add_remove(self):
        """Test basic add/remove logic."""
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)

        self.assertTrue(saver.does_db_entry_exist())
        returned_obj = saver._get_db_obj_query()
        self.assertEqual(returned_obj.first().report_name, "myreport")

        saver.delete()
        returned_obj = saver._get_db_obj_query()
        self.assertIsNone(returned_obj.first())

    def test_update(self):
        """Test updating an existing row."""
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)

        returned_obj = saver._get_db_obj_query()
        self.assertEqual(returned_obj.first().report_name, "myreport")

        saver.update(
            cursor_position=33,
            last_completed_datetime=parser.parse("2011-1-1 11:11:11"),
            last_started_datetime=parser.parse("2022-2-2 22:22:22"),
            etag="myetag",
        )

        last_completed = saver.get_last_completed_datetime()
        self.assertEqual(last_completed.year, 2011)
        self.assertEqual(last_completed.month, 1)
        self.assertEqual(last_completed.day, 1)
        self.assertEqual(last_completed.hour, 11)
        self.assertEqual(last_completed.minute, 11)
        self.assertEqual(last_completed.second, 11)

        last_started = saver.get_last_started_datetime()
        self.assertEqual(last_started.year, 2022)
        self.assertEqual(last_started.month, 2)
        self.assertEqual(last_started.day, 2)
        self.assertEqual(last_started.hour, 22)
        self.assertEqual(last_started.minute, 22)
        self.assertEqual(last_started.second, 22)

        self.assertEqual(saver.get_etag(), "myetag")

        saver.delete()
        returned_obj = saver._get_db_obj_query()
        self.assertIsNone(returned_obj.first())

    def test_log_last_started_datetime(self):
        """Test convience function for last started processing time."""
        initial_count = CostUsageReportStatus.objects.count()
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)
        saver.log_last_started_datetime()
        self.assertIsNotNone(saver.get_last_started_datetime())
        saver.delete()
        self.assertEqual(CostUsageReportStatus.objects.count(), initial_count)

    def test_log_last_completed_datetime(self):
        """Test convience function for last completed processing time."""
        initial_count = CostUsageReportStatus.objects.count()
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)
        saver.log_last_completed_datetime()
        self.assertIsNotNone(saver.get_last_completed_datetime())
        saver.delete()
        self.assertEqual(CostUsageReportStatus.objects.count(), initial_count)

    def test_clear_last_started_date(self):
        """Test convience function for clear last started date."""
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)
        saver.log_last_started_datetime()
        self.assertIsNotNone(saver.get_last_started_datetime())
        saver.clear_last_started_datetime()
        self.assertIsNone(saver.get_last_started_datetime())
    def convert_to_parquet(self):  # noqa: C901
        """
        Convert archived CSV data from our S3 bucket for a given provider to Parquet.

        This function chiefly follows the download of a providers data.

        This task is defined to attempt up to 10 retries using exponential backoff
        starting with a 10-second delay. This is intended to allow graceful handling
        of temporary AWS S3 connectivity issues because it is relatively important
        for us to convert the archived data.
        """
        parquet_base_filename = ""

        if self.csv_path_s3 is None or self.parquet_path_s3 is None or self.local_path is None:
            msg = (
                f"Invalid paths provided to convert_csv_to_parquet."
                f"CSV path={self.csv_path_s3}, Parquet path={self.parquet_path_s3}, and local_path={self.local_path}."
            )
            LOG.error(log_json(self.tracing_id, msg, self.error_context))
            return "", pd.DataFrame()

        manifest_accessor = ReportManifestDBAccessor()
        manifest = manifest_accessor.get_manifest_by_id(self.manifest_id)

        # OCP data is daily chunked report files.
        # AWS and Azure are monthly reports. Previous reports should be removed so data isn't duplicated
        if not manifest_accessor.get_s3_parquet_cleared(manifest) and self.provider_type not in (
            Provider.PROVIDER_OCP,
            Provider.PROVIDER_GCP,
            Provider.PROVIDER_GCP_LOCAL,
        ):
            remove_files_not_in_set_from_s3_bucket(
                self.tracing_id, self.parquet_path_s3, self.manifest_id, self.error_context
            )
            remove_files_not_in_set_from_s3_bucket(
                self.tracing_id, self.parquet_daily_path_s3, self.manifest_id, self.error_context
            )
            remove_files_not_in_set_from_s3_bucket(
                self.tracing_id, self.parquet_ocp_on_cloud_path_s3, self.manifest_id, self.error_context
            )
            manifest_accessor.mark_s3_parquet_cleared(manifest)

        failed_conversion = []
        daily_data_frames = []
        for csv_filename in self.file_list:
            if self.provider_type == Provider.PROVIDER_OCP and self.report_type is None:
                msg = f"Could not establish report type for {csv_filename}."
                LOG.warn(log_json(self.tracing_id, msg, self.error_context))
                failed_conversion.append(csv_filename)
                continue
            parquet_base_filename, daily_frame, success = self.convert_csv_to_parquet(csv_filename)
            daily_data_frames.extend(daily_frame)
            if self.provider_type not in (Provider.PROVIDER_AZURE):
                self.create_daily_parquet(parquet_base_filename, daily_frame)
            if not success:
                failed_conversion.append(csv_filename)

        if failed_conversion:
            msg = f"Failed to convert the following files to parquet:{','.join(failed_conversion)}."
            LOG.warn(log_json(self.tracing_id, msg, self.error_context))
        return parquet_base_filename, daily_data_frames
Beispiel #22
0
def summarize_manifest(report_meta, manifest_uuid):
    """
    Kick off manifest summary when all report files have completed line item processing.

    Args:
        manifest_uuid (string) - The id associated with the payload manifest
        report (Dict) - keys: value
                        schema_name: String,
                        manifest_id: Integer,
                        provider_uuid: String,
                        provider_type: String,

    Returns:
        Celery Async UUID.

    """
    async_id = None
    schema_name = report_meta.get("schema_name")
    manifest_id = report_meta.get("manifest_id")
    provider_uuid = report_meta.get("provider_uuid")
    provider_type = report_meta.get("provider_type")
    start_date = report_meta.get("start")
    end_date = report_meta.get("end")

    context = {"account": report_meta.get("schema_name"), "provider_uuid": str(provider_uuid)}

    with ReportManifestDBAccessor() as manifest_accesor:
        if manifest_accesor.manifest_ready_for_summary(manifest_id):
            new_report_meta = {
                "schema_name": schema_name,
                "provider_type": provider_type,
                "provider_uuid": provider_uuid,
                "manifest_id": manifest_id,
            }
            if start_date and end_date:
                if "0001-01-01 00:00:00+00:00" in [str(start_date), str(end_date)]:
                    cr_status = report_meta.get("cr_status", {})
                    context["cluster_id"] = cr_status.get("clusterID", "no-cluster-id")
                    data_collection_message = cr_status.get("reports", {}).get("data_collection_message", "")
                    if data_collection_message:
                        # remove potentially sensitive info from the error message
                        msg = (
                            f'data collection error [operator]: {re.sub("{[^}]+}", "{***}", data_collection_message)}'
                        )
                        cr_status["reports"]["data_collection_message"] = msg
                        # The full CR status is logged below, but we should limit our alert to just the query.
                        # We can check the full manifest to get the full error.
                        LOG.error(msg)
                        LOG.info(log_json(manifest_uuid, msg, context))
                    LOG.info(
                        log_json(manifest_uuid, f"CR Status for invalid manifest: {json.dumps(cr_status)}", context)
                    )
                    return  # an invalid payload will fail to summarize, so return before we try
                LOG.info(
                    log_json(
                        manifest_uuid,
                        f"Summarizing OCP reports from {str(start_date)}-{str(end_date)} for provider: {provider_uuid}",
                        context,
                    )
                )
                new_report_meta["start"] = start_date
                new_report_meta["end"] = end_date
                new_report_meta["manifest_uuid"] = manifest_uuid
            async_id = summarize_reports.s([new_report_meta], OCP_QUEUE).apply_async(queue=OCP_QUEUE)
    return async_id
Beispiel #23
0
class OCPCloudReportSummaryUpdaterTest(MasuTestCase):
    """Test cases for the OCPCloudReportSummaryUpdaterTest class."""
    @classmethod
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        cls.date_accessor = DateAccessor()

    def setUp(self):
        """Setup tests."""
        super().setUp()
        self.column_map = ReportingCommonDBAccessor().column_map
        self.accessor = AWSReportDBAccessor(schema='acct10001',
                                            column_map=self.column_map)
        self.manifest_accessor = ReportManifestDBAccessor()

    def tearDown(self):
        """Return the database to a pre-test state."""
        self.accessor._session.rollback()

        aws_tables = list(AWS_CUR_TABLE_MAP.values())
        with AWSReportDBAccessor(self.test_schema,
                                 self.column_map) as aws_accessor:
            aws_accessor._session.rollback()
            for table_name in aws_tables:
                tables = aws_accessor._get_db_obj_query(table_name).all()
                for table in tables:
                    aws_accessor._session.delete(table)
                aws_accessor.commit()

        ocp_tables = list(OCP_REPORT_TABLE_MAP.values())
        with OCPReportDBAccessor(self.test_schema,
                                 self.column_map) as ocp_accessor:
            for table_name in ocp_tables:
                tables = ocp_accessor._get_db_obj_query(table_name).all()
                for table in tables:
                    ocp_accessor._session.delete(table)
                ocp_accessor.commit()

        manifests = self.manifest_accessor._get_db_obj_query().all()
        for manifest in manifests:
            self.manifest_accessor.delete(manifest)
        self.manifest_accessor.commit()

    def _generate_ocp_on_aws_data(self):
        """Test that the OCP on AWS cost summary table is populated."""
        creator = ReportObjectCreator(self.accessor, self.column_map,
                                      self.accessor.report_schema.column_types)

        bill_ids = []

        today = DateAccessor().today_with_timezone('UTC')
        last_month = today - relativedelta.relativedelta(months=1)
        resource_id = 'i-12345'
        for cost_entry_date in (today, last_month):
            bill = creator.create_cost_entry_bill(cost_entry_date)
            bill_ids.append(str(bill.id))
            cost_entry = creator.create_cost_entry(bill, cost_entry_date)
            product = creator.create_cost_entry_product('Compute Instance')
            pricing = creator.create_cost_entry_pricing()
            reservation = creator.create_cost_entry_reservation()
            creator.create_cost_entry_line_item(bill,
                                                cost_entry,
                                                product,
                                                pricing,
                                                reservation,
                                                resource_id=resource_id)

        self.accessor.populate_line_item_daily_table(last_month, today,
                                                     bill_ids)

        with OCPReportDBAccessor(self.test_schema,
                                 self.column_map) as ocp_accessor:
            cluster_id = self.ocp_provider_resource_name
            with ProviderDBAccessor(provider_uuid=self.ocp_test_provider_uuid
                                    ) as provider_access:
                provider_id = provider_access.get_provider().id

            for cost_entry_date in (today, last_month):
                period = creator.create_ocp_report_period(
                    cost_entry_date,
                    provider_id=provider_id,
                    cluster_id=cluster_id)
                report = creator.create_ocp_report(period, cost_entry_date)
                creator.create_ocp_usage_line_item(period,
                                                   report,
                                                   resource_id=resource_id)
            cluster_id = get_cluster_id_from_provider(
                self.ocp_test_provider_uuid)
            ocp_accessor.populate_line_item_daily_table(
                last_month, today, cluster_id)

    def test_get_infra_db_key_for_provider_type(self):
        """Test db_key private method for OCP-on-AWS infrastructure map."""
        with ProviderDBAccessor(
                self.ocp_test_provider_uuid) as provider_accessor:
            provider = provider_accessor.get_provider()
        updater = OCPCloudReportSummaryUpdater(schema='acct10001',
                                               provider=provider,
                                               manifest=None)
        self.assertEqual(updater._get_infra_db_key_for_provider_type('AWS'),
                         'aws_uuid')
        self.assertEqual(
            updater._get_infra_db_key_for_provider_type('AWS-local'),
            'aws_uuid')
        self.assertEqual(updater._get_infra_db_key_for_provider_type('OCP'),
                         'ocp_uuid')
        self.assertEqual(updater._get_infra_db_key_for_provider_type('WRONG'),
                         None)

    @patch(
        'masu.processor.ocp.ocp_cloud_summary_updater.AWSReportDBAccessor.populate_ocp_on_aws_cost_daily_summary'
    )
    @patch(
        'masu.database.ocp_report_db_accessor.OCPReportDBAccessor.populate_cost_summary_table'
    )
    @patch(
        'masu.processor.ocp.ocp_cloud_summary_updater.OCPCloudReportSummaryUpdater._get_ocp_cluster_id_for_provider'
    )
    def test_update_summary_tables_with_ocp_provider(self, mock_utility,
                                                     mock_ocp,
                                                     mock_ocp_on_aws):
        """Test that summary tables are properly run for an OCP provider."""
        fake_cluster = 'my-ocp-cluster'
        mock_utility.return_value = fake_cluster
        start_date = self.date_accessor.today_with_timezone('UTC')
        end_date = start_date + datetime.timedelta(days=1)
        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')
        with ProviderDBAccessor(
                self.ocp_test_provider_uuid) as provider_accessor:
            provider = provider_accessor.get_provider()
        updater = OCPCloudReportSummaryUpdater(schema='acct10001',
                                               provider=provider,
                                               manifest=None)
        updater.update_summary_tables(start_date_str, end_date_str)

        mock_ocp_on_aws.assert_called_with(start_date_str, end_date_str,
                                           fake_cluster, [])
        mock_ocp.assert_called_with(fake_cluster, start_date_str, end_date_str)

    @patch(
        'masu.processor.ocp.ocp_cloud_summary_updater.AWSReportDBAccessor.populate_ocp_on_aws_cost_daily_summary'
    )
    @patch(
        'masu.database.ocp_report_db_accessor.OCPReportDBAccessor.populate_cost_summary_table'
    )
    @patch(
        'masu.processor.ocp.ocp_cloud_summary_updater.get_bills_from_provider')
    @patch(
        'masu.processor.ocp.ocp_cloud_summary_updater.OCPCloudReportSummaryUpdater._get_ocp_cluster_id_for_provider'
    )
    def test_update_summary_tables_with_aws_provider(self,
                                                     mock_cluster_id_utility,
                                                     mock_utility, mock_ocp,
                                                     mock_ocp_on_aws):
        """Test that summary tables are properly run for an OCP provider."""
        fake_cluster_id = 'my-ocp-cluster'
        mock_cluster_id_utility.return_value = fake_cluster_id

        fake_bills = [Mock(), Mock()]
        fake_bills[0].id = 1
        fake_bills[1].id = 2
        bill_ids = [str(bill.id) for bill in fake_bills]
        mock_utility.return_value = fake_bills
        start_date = self.date_accessor.today_with_timezone('UTC')
        end_date = start_date + datetime.timedelta(days=1)
        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')
        with ProviderDBAccessor(
                self.aws_test_provider_uuid) as provider_accessor:
            provider = provider_accessor.get_provider()
        updater = OCPCloudReportSummaryUpdater(schema='acct10001',
                                               provider=provider,
                                               manifest=None)
        updater.update_summary_tables(start_date_str, end_date_str)
        mock_ocp_on_aws.assert_called_with(start_date_str, end_date_str,
                                           fake_cluster_id, bill_ids)
        mock_ocp.assert_called_with(fake_cluster_id, start_date_str,
                                    end_date_str)

    @patch(
        'masu.processor.ocp.ocp_cloud_summary_updater.AWSReportDBAccessor.populate_ocp_on_aws_cost_daily_summary'
    )
    @patch(
        'masu.database.ocp_report_db_accessor.OCPReportDBAccessor.populate_cost_summary_table'
    )
    def test_update_summary_tables_no_ocp_on_aws(self, mock_ocp,
                                                 mock_ocp_on_aws):
        """Test that summary tables do not run when OCP-on-AWS does not exist."""
        test_provider_list = [
            self.aws_test_provider_uuid, self.ocp_test_provider_uuid
        ]

        for provider_uuid in test_provider_list:
            start_date = self.date_accessor.today_with_timezone('UTC')
            end_date = start_date + datetime.timedelta(days=1)
            start_date_str = start_date.strftime('%Y-%m-%d')
            end_date_str = end_date.strftime('%Y-%m-%d')

            with ProviderDBAccessor(provider_uuid) as provider_accessor:
                provider = provider_accessor.get_provider()

            updater = OCPCloudReportSummaryUpdater(schema='acct10001',
                                                   provider=provider,
                                                   manifest=None)

            updater.update_summary_tables(start_date_str, end_date_str)
            mock_ocp.assert_called()
            mock_ocp_on_aws.assert_not_called()

    def test_update_summary_tables(self):
        """Test that summary tables are updated correctly."""
        self._generate_ocp_on_aws_data()

        start_date = self.date_accessor.today_with_timezone('UTC')
        end_date = start_date + datetime.timedelta(days=1)
        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')
        with ProviderDBAccessor(
                self.ocp_test_provider_uuid) as provider_accessor:
            provider = provider_accessor.get_provider()
        updater = OCPCloudReportSummaryUpdater(schema='acct10001',
                                               provider=provider,
                                               manifest=None)

        with AWSReportDBAccessor(self.test_schema,
                                 self.column_map) as aws_accessor:
            summary_table_name = AWS_CUR_TABLE_MAP['ocp_on_aws_daily_summary']
            query = aws_accessor._get_db_obj_query(summary_table_name)
            initial_count = query.count()
            updater.update_summary_tables(start_date_str, end_date_str)
            self.assertNotEqual(query.count(), initial_count)
class ReportManifestDBAccessorTest(IamTestCase):
    """Test cases for the ReportManifestDBAccessor."""
    def setUp(self):
        """Set up the test class."""
        super().setUp()
        self.schema = self.schema_name
        billing_start = DateAccessor().today_with_timezone('UTC').replace(
            day=1)
        self.manifest_dict = {
            'assembly_id': '1234',
            'billing_period_start_datetime': billing_start,
            'num_total_files': 2,
            'provider_uuid': self.provider_uuid,
        }
        self.manifest_accessor = ReportManifestDBAccessor()

    def tearDown(self):
        """Tear down the test class."""
        super().tearDown()
        with schema_context(self.schema):
            manifests = self.manifest_accessor._get_db_obj_query().all()
            for manifest in manifests:
                self.manifest_accessor.delete(manifest)

    def test_initializer(self):
        """Test the initializer."""
        accessor = ReportManifestDBAccessor()
        self.assertIsNotNone(accessor._table)

    def test_get_manifest(self):
        """Test that the right manifest is returned."""
        with schema_context(self.schema):
            added_manifest = self.manifest_accessor.add(**self.manifest_dict)

            assembly_id = self.manifest_dict.get('assembly_id')
            provider_uuid = self.manifest_dict.get('provider_uuid')
            manifest = self.manifest_accessor.get_manifest(
                assembly_id, provider_uuid)

        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)
        self.assertEqual(manifest.assembly_id, assembly_id)
        self.assertEqual(manifest.provider_id, provider_uuid)
        self.assertEqual(manifest.num_total_files,
                         self.manifest_dict.get('num_total_files'))

    def test_get_manifest_by_id(self):
        """Test that the right manifest is returned by id."""
        with schema_context(self.schema):
            added_manifest = self.manifest_accessor.add(**self.manifest_dict)
            manifest = self.manifest_accessor.get_manifest_by_id(
                added_manifest.id)
        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)

    def test_mark_manifest_as_updated(self):
        """Test that the manifest is marked updated."""
        with schema_context(self.schema):
            manifest = self.manifest_accessor.add(**self.manifest_dict)
            now = DateAccessor().today_with_timezone('UTC')
            self.manifest_accessor.mark_manifest_as_updated(manifest)
            self.assertGreater(manifest.manifest_updated_datetime, now)

    def test_mark_manifest_as_completed(self):
        """Test that the manifest is marked updated."""
        manifest = self.manifest_accessor.add(**self.manifest_dict)
        now = DateAccessor().today_with_timezone('UTC')
        self.manifest_accessor.mark_manifest_as_completed(manifest)
        self.assertGreater(manifest.manifest_completed_datetime, now)

    def test_get_manifest_list_for_provider_and_bill_date(self):
        """Test that all manifests are returned for a provider and bill."""
        bill_date = self.manifest_dict['billing_period_start_datetime'].date()
        manifest_dict = copy.deepcopy(self.manifest_dict)
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 1)

        manifest_dict['assembly_id'] = '2345'
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 2)

        manifest_dict['assembly_id'] = '3456'
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 3)

    def test_get_last_report_completed_datetime(self):
        """Test that the last completed report datetime is returned."""
        manifest = self.manifest_accessor.add(**self.manifest_dict)
        earlier_time = DateAccessor().today_with_timezone('UTC')
        later_time = earlier_time + datetime.timedelta(hours=1)

        ReportStatsDBAccessor(
            'earlier_report',
            manifest.id).update(last_completed_datetime=earlier_time)
        ReportStatsDBAccessor(
            'later_report',
            manifest.id).update(last_completed_datetime=later_time)

        result = self.manifest_accessor.get_last_report_completed_datetime(
            manifest.id)

        self.assertEqual(result, later_time)
 def test_initializer(self):
     """Test the initializer."""
     accessor = ReportManifestDBAccessor()
     self.assertIsNotNone(accessor._table)
Beispiel #26
0
    def test_clean_volume(self, mock_date, mock_config):
        """Test that the clean volume function is cleaning the appropriate files"""
        # create a manifest
        mock_date.return_value = ["2020-02-01"]
        manifest_dict = {
            "assembly_id": "1234",
            "billing_period_start_datetime": "2020-02-01",
            "num_total_files": 2,
            "provider_uuid": self.aws_provider_uuid,
        }
        manifest_accessor = ReportManifestDBAccessor()
        manifest = manifest_accessor.add(**manifest_dict)
        # create two files on the temporary volume one with a matching prefix id
        #  as the assembly_id in the manifest above
        with tempfile.TemporaryDirectory() as tmpdirname:
            mock_config.PVC_DIR = tmpdirname
            mock_config.VOLUME_FILE_RETENTION = 60 * 60 * 24
            old_matching_file = os.path.join(tmpdirname,
                                             "%s.csv" % manifest.assembly_id)
            new_no_match_file = os.path.join(tmpdirname, "newfile.csv")
            old_no_match_file = os.path.join(tmpdirname, "oldfile.csv")
            filepaths = [
                old_matching_file, new_no_match_file, old_no_match_file
            ]
            for path in filepaths:
                open(path, "a").close()
                self.assertEqual(os.path.exists(path), True)

            # Update timestame for oldfile.csv
            datehelper = DateHelper()
            now = datehelper.now
            old_datetime = now - timedelta(
                seconds=mock_config.VOLUME_FILE_RETENTION * 2)
            oldtime = old_datetime.timestamp()
            os.utime(old_matching_file, (oldtime, oldtime))
            os.utime(old_no_match_file, (oldtime, oldtime))

            # now run the clean volume task
            tasks.clean_volume()
            # make sure that the file with the matching id still exists and that
            # the file with the other id is gone
            self.assertEqual(os.path.exists(old_matching_file), True)
            self.assertEqual(os.path.exists(new_no_match_file), True)
            self.assertEqual(os.path.exists(old_no_match_file), False)
            # now edit the manifest to say that all the files have been processed
            # and rerun the clean_volumes task
            manifest.num_processed_files = manifest_dict.get("num_total_files")
            manifest_helper = ManifestCreationHelper(
                manifest.id, manifest_dict.get("num_total_files"),
                manifest_dict.get("assembly_id"))
            manifest_helper.generate_test_report_files()
            manifest_helper.process_all_files()

            manifest.save()
            tasks.clean_volume()
            # ensure that the original file is deleted from the volume
            self.assertEqual(os.path.exists(old_matching_file), False)
            self.assertEqual(os.path.exists(new_no_match_file), True)

        # assert the tempdir is cleaned up
        self.assertEqual(os.path.exists(tmpdirname), False)
        # test no files found for codecov
        tasks.clean_volume()
Beispiel #27
0
class ReportManifestDBAccessorTest(IamTestCase):
    """Test cases for the ReportManifestDBAccessor."""
    def setUp(self):
        """Set up the test class."""
        super().setUp()
        self.schema = self.schema_name

        billing_start = DateAccessor().today_with_timezone('UTC').replace(
            day=1)
        self.manifest_dict = {
            'assembly_id': '1234',
            'billing_period_start_datetime': billing_start,
            'num_total_files': 2,
            'provider_id': 1,
        }
        self.manifest_accessor = ReportManifestDBAccessor()

    def tearDown(self):
        """Tear down the test class."""
        super().tearDown()
        with schema_context(self.schema):
            manifests = self.manifest_accessor._get_db_obj_query().all()
            for manifest in manifests:
                self.manifest_accessor.delete(manifest)

    def test_initializer(self):
        """Test the initializer."""
        accessor = ReportManifestDBAccessor()
        self.assertIsNotNone(accessor._table)

    def test_get_manifest(self):
        """Test that the right manifest is returned."""
        with schema_context(self.schema):
            added_manifest = self.manifest_accessor.add(**self.manifest_dict)

            assembly_id = self.manifest_dict.get('assembly_id')
            provider_id = self.manifest_dict.get('provider_id')
            manifest = self.manifest_accessor.get_manifest(
                assembly_id, provider_id)

        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)
        self.assertEqual(manifest.assembly_id, assembly_id)
        self.assertEqual(manifest.provider_id, provider_id)
        self.assertEqual(manifest.num_total_files,
                         self.manifest_dict.get('num_total_files'))

    def test_get_manifest_by_id(self):
        """Test that the right manifest is returned by id."""
        with schema_context(self.schema):

            added_manifest = self.manifest_accessor.add(**self.manifest_dict)
            manifest = self.manifest_accessor.get_manifest_by_id(
                added_manifest.id)
        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)

    def test_mark_manifest_as_updated(self):
        """Test that the manifest is marked updated."""
        with schema_context(self.schema):
            manifest = self.manifest_accessor.add(**self.manifest_dict)
            now = DateAccessor().today_with_timezone('UTC')
            self.manifest_accessor.mark_manifest_as_updated(manifest)
            self.assertGreater(manifest.manifest_updated_datetime, now)
Beispiel #28
0
def _process_report_file(schema_name, provider, provider_uuid, report_dict):
    """
    Task to process a Report.

    Args:
        schema_name   (String) db schema name
        provider      (String) provider type
        provider_uuid (String) provider uuid
        report_dict   (dict) The report data dict from previous task

    Returns:
        None

    """
    start_date = report_dict.get('start_date')
    report_path = report_dict.get('file')
    compression = report_dict.get('compression')
    manifest_id = report_dict.get('manifest_id')
    provider_id = report_dict.get('provider_id')
    stmt = ('Processing Report:'
            ' schema_name: {},'
            ' report_path: {},'
            ' compression: {},'
            ' provider: {},'
            ' start_date: {}')
    log_statement = stmt.format(schema_name, report_path, compression,
                                provider, start_date)
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent)
    LOG.info(mem_msg)

    file_name = report_path.split('/')[-1]

    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_started_datetime()
        stats_recorder.commit()

        processor = ReportProcessor(schema_name=schema_name,
                                    report_path=report_path,
                                    compression=compression,
                                    provider=provider,
                                    provider_id=provider_id,
                                    manifest_id=manifest_id)
        processor.process()
        stats_recorder.log_last_completed_datetime()
        stats_recorder.commit()

    with ReportManifestDBAccessor() as manifest_accesor:
        manifest = manifest_accesor.get_manifest_by_id(manifest_id)
        if manifest:
            manifest.num_processed_files += 1
            manifest_accesor.mark_manifest_as_updated(manifest)
            manifest_accesor.commit()
        else:
            LOG.error('Unable to find manifest for ID: %s, file %s',
                      manifest_id, file_name)

    with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor:
        provider_accessor.setup_complete()
        provider_accessor.commit()

    files = processor.remove_processed_files(path.dirname(report_path))
    LOG.info('Temporary files removed: %s', str(files))
Beispiel #29
0
def refresh_materialized_views(  # noqa: C901
        schema_name,
        provider_type,
        manifest_id=None,
        provider_uuid=None,
        synchronous=False,
        queue_name=None):
    """Refresh the database's materialized views for reporting."""
    task_name = "masu.processor.tasks.refresh_materialized_views"
    cache_args = [schema_name]
    if not synchronous:
        worker_cache = WorkerCache()
        if worker_cache.single_task_is_running(task_name, cache_args):
            msg = f"Task {task_name} already running for {cache_args}. Requeuing."
            LOG.info(msg)
            refresh_materialized_views.s(
                schema_name,
                provider_type,
                manifest_id=manifest_id,
                provider_uuid=provider_uuid,
                synchronous=synchronous,
                queue_name=queue_name,
            ).apply_async(queue=queue_name or REFRESH_MATERIALIZED_VIEWS_QUEUE)
            return
        worker_cache.lock_single_task(task_name, cache_args, timeout=600)
    materialized_views = ()
    if provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL):
        materialized_views = (AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_OCP):
        materialized_views = (OCP_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_AZURE,
                           Provider.PROVIDER_AZURE_LOCAL):
        materialized_views = (AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL):
        materialized_views = GCP_MATERIALIZED_VIEWS

    with schema_context(schema_name):
        for view in materialized_views:
            table_name = view._meta.db_table
            with connection.cursor() as cursor:
                cursor.execute(
                    f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}")
                LOG.info(f"Refreshed {table_name}.")

    invalidate_view_cache_for_tenant_and_source_type(schema_name,
                                                     provider_type)

    if provider_uuid:
        ProviderDBAccessor(provider_uuid).set_data_updated_timestamp()
    if manifest_id:
        # Processing for this monifest should be complete after this step
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            manifest_accessor.mark_manifest_as_completed(manifest)

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Beispiel #30
0
    def remove(self,
               simulate=False,
               provider_uuid=None,
               line_items_only=False):
        """
        Remove expired data based on the retention policy.

        Also remove expired CostUsageReportManifests, regardless of Provider type.

        Args:
            None

        Returns:
            ([{}]) List of dictionaries containing 'account_payer_id' and 'billing_period_start'

        """
        removed_data = None
        disable_purge_line_item = (Provider.PROVIDER_AZURE,
                                   Provider.PROVIDER_AZURE_LOCAL,
                                   Provider.PROVIDER_OCP)
        no_data_msg = "%s has no line item data to be be removed."
        if provider_uuid is not None:
            if line_items_only:
                if self._provider in disable_purge_line_item:
                    LOG.info(no_data_msg % self._provider)
                else:
                    expiration_date = self._calculate_expiration_date(
                        line_items_only=line_items_only)
                    removed_data = self._cleaner.purge_expired_line_item(
                        expired_date=expiration_date,
                        simulate=simulate,
                        provider_uuid=provider_uuid)
            else:
                removed_data = self._cleaner.purge_expired_report_data(
                    simulate=simulate, provider_uuid=provider_uuid)
                with ReportManifestDBAccessor() as manifest_accessor:
                    # Remove expired CostUsageReportManifests
                    expiration_date = self._calculate_expiration_date()
                    if not simulate:
                        manifest_accessor.purge_expired_report_manifest_provider_uuid(
                            provider_uuid, expiration_date)
                    LOG.info(
                        """Removed CostUsageReportManifest for
                        provider uuid: %s before billing period: %s""",
                        provider_uuid,
                        expiration_date,
                    )
        else:
            expiration_date = self._calculate_expiration_date(
                line_items_only=line_items_only)
            if line_items_only:
                if self._provider in disable_purge_line_item:
                    LOG.info(no_data_msg % self._provider)
                else:
                    removed_data = self._cleaner.purge_expired_line_item(
                        expired_date=expiration_date, simulate=simulate)
            else:
                # Remove expired CostUsageReportManifests
                removed_data = self._cleaner.purge_expired_report_data(
                    expired_date=expiration_date, simulate=simulate)
                with ReportManifestDBAccessor() as manifest_accessor:
                    if not simulate:
                        manifest_accessor.purge_expired_report_manifest(
                            self._provider, expiration_date)
                    LOG.info(
                        """Removed CostUsageReportManifest for
                        provider type: %s before billing period: %s""",
                        self._provider,
                        expiration_date,
                    )

        return removed_data