Example #1
0
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        cls.test_report_path = './koku/masu/test/data/gcp/evidence-2019-06-03.csv'

        cls.date_accessor = DateAccessor()
        cls.manifest_accessor = ReportManifestDBAccessor()

        with ReportingCommonDBAccessor() as report_common_db:
            cls.column_map = report_common_db.column_map
Example #2
0
 def setUp(self):
     """Setup up shared variables."""
     super().setUp()
     self.dh = DateHelper()
     manifest_id = 1
     with ReportManifestDBAccessor() as manifest_accessor:
         self.manifest = manifest_accessor.get_manifest_by_id(manifest_id)
     self.updater = AWSReportParquetSummaryUpdater(self.schema_name,
                                                   self.aws_provider,
                                                   self.manifest)
    def setUpClass(cls):
        """Set up class variables."""
        super().setUpClass()
        cls.fake_customer_name = CUSTOMER_NAME
        cls.fake_report_name = "koku-local"

        cls.fake_bucket_prefix = PREFIX
        cls.selected_region = REGION

        cls.manifest_accessor = ReportManifestDBAccessor()
Example #4
0
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()

        cls.accessor = AzureReportDBAccessor("acct10001")
        cls.report_schema = cls.accessor.report_schema
        cls.all_tables = list(AZURE_REPORT_TABLE_MAP.values())
        cls.creator = ReportObjectCreator(cls.schema)
        cls.date_accessor = DateHelper()
        cls.manifest_accessor = ReportManifestDBAccessor()
Example #5
0
    def setUpClass(cls):
        super().setUpClass()
        cls.fake_customer_name = CUSTOMER_NAME
        cls.fake_report_name = REPORT
        cls.fake_bucket_prefix = PREFIX
        cls.fake_bucket_name = BUCKET
        cls.selected_region = REGION
        cls.auth_credential = fake_arn(service='iam', generate_account_id=True)

        cls.manifest_accessor = ReportManifestDBAccessor()
Example #6
0
    def _process_manifest_db_record(self, assembly_id, billing_start,
                                    num_of_files):
        """Insert or update the manifest DB record."""
        LOG.info("Inserting/updating manifest in database for assembly_id: %s",
                 assembly_id)

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest_entry = manifest_accessor.get_manifest(
                assembly_id, self._provider_uuid)

            if not manifest_entry:
                msg = f"No manifest entry found in database. Adding for bill period start: {billing_start}"
                LOG.info(log_json(self.request_id, msg, self.context))
                manifest_dict = {
                    "assembly_id": assembly_id,
                    "billing_period_start_datetime": billing_start,
                    "num_total_files": num_of_files,
                    "provider_uuid": self._provider_uuid,
                }
                try:
                    manifest_entry = manifest_accessor.add(**manifest_dict)
                except IntegrityError as error:
                    msg = (
                        f"Manifest entry uniqueness collision: Error {error}. "
                        "Manifest already added, getting manifest_entry_id.")
                    LOG.warning(log_json(self.request_id, msg, self.context))
                    with ReportManifestDBAccessor() as manifest_accessor:
                        manifest_entry = manifest_accessor.get_manifest(
                            assembly_id, self._provider_uuid)
            if not manifest_entry:
                msg = f"Manifest entry not found for given manifest {manifest_dict}."
                with ProviderDBAccessor(
                        self._provider_uuid) as provider_accessor:
                    provider = provider_accessor.get_provider()
                    if not provider:
                        msg = f"Provider entry not found for {self._provider_uuid}."
                LOG.warning(log_json(self.request_id, msg, self.context))
                raise IntegrityError(msg)
            else:
                manifest_accessor.mark_manifest_as_updated(manifest_entry)
                manifest_id = manifest_entry.id

        return manifest_id
Example #7
0
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()

        cls.accessor = AWSReportDBAccessor(cls.schema)
        cls.report_schema = cls.accessor.report_schema
        cls.all_tables = list(AWS_CUR_TABLE_MAP.values())
        cls.creator = ReportObjectCreator(cls.schema)
        cls.date_accessor = DateAccessor()
        cls.manifest_accessor = ReportManifestDBAccessor()
    def test_check_if_manifest_should_be_downloaded_done_processing_manifest(self, _):
        """Test that a manifest that has finished processing is not reprocessed."""
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self.manifest_id)
            manifest.num_processed_files = 2
            manifest.num_total_files = 2
            manifest.save()

        result = self.downloader.check_if_manifest_should_be_downloaded(self.assembly_id)
        self.assertFalse(result)
Example #9
0
def summarize_reports(reports_to_summarize, queue_name=None):
    """
    Summarize reports returned from line summary task.

    Args:
        reports_to_summarize (list) list of reports to process

    Returns:
        None

    """
    reports_to_summarize = [
        report for report in reports_to_summarize if report
    ]
    reports_deduplicated = [
        dict(t) for t in {tuple(d.items())
                          for d in reports_to_summarize}
    ]

    for report in reports_deduplicated:
        # For day-to-day summarization we choose a small window to
        # cover new data from a window of days.
        # This saves us from re-summarizing unchanged data and cuts down
        # on processing time. There are override mechanisms in the
        # Updater classes for when full-month summarization is
        # required.
        with ReportManifestDBAccessor() as manifest_accesor:
            if manifest_accesor.manifest_ready_for_summary(
                    report.get("manifest_id")):
                if report.get("start") and report.get("end"):
                    LOG.info("using start and end dates from the manifest")
                    start_date = parser.parse(
                        report.get("start")).strftime("%Y-%m-%d")
                    end_date = parser.parse(
                        report.get("end")).strftime("%Y-%m-%d")
                else:
                    LOG.info("generating start and end dates for manifest")
                    start_date = DateAccessor().today() - datetime.timedelta(
                        days=2)
                    start_date = start_date.strftime("%Y-%m-%d")
                    end_date = DateAccessor().today().strftime("%Y-%m-%d")
                msg = f"report to summarize: {str(report)}"
                tracing_id = report.get(
                    "tracing_id", report.get("manifest_uuid", "no-tracing-id"))
                LOG.info(log_json(tracing_id, msg))
                update_summary_tables.s(
                    report.get("schema_name"),
                    report.get("provider_type"),
                    report.get("provider_uuid"),
                    start_date=start_date,
                    end_date=end_date,
                    manifest_id=report.get("manifest_id"),
                    queue_name=queue_name,
                    tracing_id=tracing_id,
                ).apply_async(queue=queue_name or UPDATE_SUMMARY_TABLES_QUEUE)
Example #10
0
 def setUpClass(cls):
     """Set up the test class."""
     billing_start = DateAccessor().today_with_timezone('UTC').replace(
         day=1)
     cls.manifest_dict = {
         'assembly_id': '1234',
         'billing_period_start_datetime': billing_start,
         'num_total_files': 2,
         'provider_id': 1
     }
     cls.manifest_accessor = ReportManifestDBAccessor()
Example #11
0
    def _should_process_full_month(self):
        """Determine if we should process the full month of data."""
        if not self._manifest_id:
            log_statement = (
                f"No manifest provided, processing as a new billing period.\n"
                f" Processing entire month.\n"
                f" schema_name: {self._schema},\n"
                f" provider_uuid: {self._provider_uuid},\n"
                f" manifest_id: {self._manifest_id}"
            )
            LOG.info(log_statement)
            return True

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self._manifest_id)
            bill_date = manifest.billing_period_start_datetime.date()
            provider_uuid = manifest.provider_id

        log_statement = (
            f"Processing bill starting on {bill_date}.\n"
            f" Processing entire month.\n"
            f" schema_name: {self._schema},\n"
            f" provider_uuid: {self._provider_uuid},\n"
            f" manifest_id: {self._manifest_id}"
        )

        if (bill_date.month != self.data_cutoff_date.month) or (
            bill_date.year != self.data_cutoff_date.year and bill_date.month == self.data_cutoff_date.month
        ):
            LOG.info(log_statement)
            return True

        manifest_list = manifest_accessor.get_manifest_list_for_provider_and_bill_date(provider_uuid, bill_date)

        if len(manifest_list) == 1:
            # This is the first manifest for this bill and we are currently
            # processing it
            LOG.info(log_statement)
            return True

        for manifest in manifest_list:
            if manifest.num_processed_files >= manifest.num_total_files:
                log_statement = (
                    f"Processing bill starting on {bill_date}.\n"
                    f" Processing data on or after {self.data_cutoff_date}.\n"
                    f" schema_name: {self._schema},\n"
                    f" provider_uuid: {self._provider_uuid},\n"
                    f" manifest_id: {self._manifest_id}"
                )
                LOG.info(log_statement)
                # We have fully processed a manifest for this provider
                return False

        return True
Example #12
0
 def _get_existing_manifest_db_id(self, assembly_id):
     """Return a manifest DB object if it exists."""
     manifest_id = None
     with ReportManifestDBAccessor() as manifest_accessor:
         manifest = manifest_accessor.get_manifest(
             assembly_id,
             self._provider_id
         )
         if manifest:
             manifest_id = manifest.id
     return manifest_id
    def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download an S3 object to file.

        Args:
            key (str): The S3 object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        local_s3_filename = utils.get_local_file_name(key)

        directory_path = f"{DATA_DIR}/{self.customer_name}/aws-local/{self.bucket}"
        full_file_path = f"{directory_path}/{local_s3_filename}"

        if not os.path.isfile(key):
            log_msg = f"Unable to locate {key} in {self.bucket_path}"
            raise AWSReportDownloaderNoFileError(log_msg)

        # Make sure the data directory exists
        os.makedirs(directory_path, exist_ok=True)
        s3_etag_hasher = hashlib.new("ripemd160")
        s3_etag_hasher.update(bytes(local_s3_filename, "utf-8"))
        s3_etag = s3_etag_hasher.hexdigest()

        file_creation_date = None
        if s3_etag != stored_etag or not os.path.isfile(full_file_path):
            msg = f"Downloading {key} to {full_file_path}"
            LOG.info(log_json(self.tracing_id, msg, self.context))
            shutil.copy2(key, full_file_path)
            file_creation_date = datetime.datetime.fromtimestamp(
                os.path.getmtime(full_file_path))
            # Push to S3

            s3_csv_path = get_path_prefix(self.account, Provider.PROVIDER_AWS,
                                          self._provider_uuid, start_date,
                                          Config.CSV_DATA_TYPE)
            utils.copy_local_report_file_to_s3_bucket(
                self.tracing_id, s3_csv_path, full_file_path,
                local_s3_filename, manifest_id, start_date, self.context)

            manifest_accessor = ReportManifestDBAccessor()
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)

            if not manifest_accessor.get_s3_csv_cleared(manifest):
                utils.remove_files_not_in_set_from_s3_bucket(
                    self.tracing_id, s3_csv_path, manifest_id)
                manifest_accessor.mark_s3_csv_cleared(manifest)
        return full_file_path, s3_etag, file_creation_date, []
    def test_is_last_completed_datetime_null(self):
        """Test is last completed datetime is null."""
        manifest_id = 123456789
        self.assertTrue(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))
        baker.make(CostUsageReportManifest, id=manifest_id)
        baker.make(CostUsageReportStatus,
                   manifest_id=manifest_id,
                   last_completed_datetime=None)
        self.assertTrue(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))

        CostUsageReportStatus.objects.filter(manifest_id=manifest_id).update(
            last_completed_datetime=FAKE.date())

        self.assertFalse(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))
Example #15
0
    def setUpClass(cls):
        """Set up class variables."""
        super().setUpClass()
        cls.fake_customer_name = CUSTOMER_NAME
        cls.fake_report_name = "koku-local"

        cls.fake_bucket_prefix = PREFIX
        cls.selected_region = REGION
        cls.fake_auth_credential = fake_arn(service="iam", generate_account_id=True)

        cls.manifest_accessor = ReportManifestDBAccessor()
Example #16
0
    def test_clean_volume(self, mock_date, mock_config):
        """Test that the clean volume function is cleaning the appropriate files"""
        # create a manifest
        mock_date.return_value = ["2020-02-01"]
        manifest_dict = {
            "assembly_id": "1234",
            "billing_period_start_datetime": "2020-02-01",
            "num_total_files": 2,
            "provider_uuid": self.aws_provider_uuid,
        }
        manifest_accessor = ReportManifestDBAccessor()
        manifest = manifest_accessor.add(**manifest_dict)
        # create two files on the temporary volume one with a matching prefix id
        #  as the assembly_id in the manifest above
        with tempfile.TemporaryDirectory() as tmpdirname:
            mock_config.PVC_DIR = tmpdirname
            mock_config.VOLUME_FILE_RETENTION = 60 * 60 * 24
            old_matching_file = os.path.join(tmpdirname, "%s.csv" % manifest.assembly_id)
            new_no_match_file = os.path.join(tmpdirname, "newfile.csv")
            old_no_match_file = os.path.join(tmpdirname, "oldfile.csv")
            filepaths = [old_matching_file, new_no_match_file, old_no_match_file]
            for path in filepaths:
                open(path, "a").close()
                self.assertEqual(os.path.exists(path), True)

            # Update timestame for oldfile.csv
            datehelper = DateHelper()
            now = datehelper.now
            old_datetime = now - timedelta(seconds=mock_config.VOLUME_FILE_RETENTION * 2)
            oldtime = old_datetime.timestamp()
            os.utime(old_matching_file, (oldtime, oldtime))
            os.utime(old_no_match_file, (oldtime, oldtime))

            # now run the clean volume task
            tasks.clean_volume()
            # make sure that the file with the matching id still exists and that
            # the file with the other id is gone
            self.assertEqual(os.path.exists(old_matching_file), True)
            self.assertEqual(os.path.exists(new_no_match_file), True)
            self.assertEqual(os.path.exists(old_no_match_file), False)
            # now edit the manifest to say that all the files have been processed
            # and rerun the clean_volumes task
            manifest.num_processed_files = manifest_dict.get("num_total_files")
            manifest.save()
            tasks.clean_volume()
            # ensure that the original file is deleted from the volume
            self.assertEqual(os.path.exists(old_matching_file), False)
            self.assertEqual(os.path.exists(new_no_match_file), True)

        # assert the tempdir is cleaned up
        self.assertEqual(os.path.exists(tmpdirname), False)
        # test no files found for codecov
        tasks.clean_volume()
 def setUp(self):
     """Set up the test class."""
     super().setUp()
     self.schema = self.schema_name
     billing_start = DateAccessor().today_with_timezone('UTC').replace(day=1)
     self.manifest_dict = {
         'assembly_id': '1234',
         'billing_period_start_datetime': billing_start,
         'num_total_files': 2,
         'provider_uuid': self.provider_uuid,
     }
     self.manifest_accessor = ReportManifestDBAccessor()
    def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download a file from Azure bucket.

        Args:
            key (str): The object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        local_filename = utils.get_local_file_name(key)
        full_file_path = f"{self._get_exports_data_directory()}/{local_filename}"

        file_creation_date = None
        try:
            blob = self._azure_client.get_cost_export_for_key(
                key, self.container_name)
            etag = blob.etag
            file_creation_date = blob.last_modified
        except AzureCostReportNotFound as ex:
            msg = f"Error when downloading Azure report for key: {key}. Error {ex}"
            LOG.error(log_json(self.tracing_id, msg, self.context))
            raise AzureReportDownloaderError(msg)

        msg = f"Downloading {key} to {full_file_path}"
        LOG.info(log_json(self.tracing_id, msg, self.context))
        blob = self._azure_client.download_cost_export(
            key, self.container_name, destination=full_file_path)
        # Push to S3
        s3_csv_path = get_path_prefix(self.account, Provider.PROVIDER_AZURE,
                                      self._provider_uuid, start_date,
                                      Config.CSV_DATA_TYPE)
        copy_local_report_file_to_s3_bucket(self.tracing_id, s3_csv_path,
                                            full_file_path, local_filename,
                                            manifest_id, start_date,
                                            self.context)

        manifest_accessor = ReportManifestDBAccessor()
        manifest = manifest_accessor.get_manifest_by_id(manifest_id)

        if not manifest_accessor.get_s3_csv_cleared(manifest):
            remove_files_not_in_set_from_s3_bucket(self.tracing_id,
                                                   s3_csv_path, manifest_id)
            manifest_accessor.mark_s3_csv_cleared(manifest)

        msg = f"Returning full_file_path: {full_file_path}, etag: {etag}"
        LOG.info(log_json(self.tracing_id, msg, self.context))
        return full_file_path, etag, file_creation_date, []
    def test_check_if_manifest_should_be_downloaded_error_no_complete_date(self, _):
        """Test that a manifest that did not succeessfully process should be reprocessed."""
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self.manifest_id)
            manifest.num_processed_files = 1
            manifest.num_total_files = 2
            manifest.save()

        with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor:
            file_accessor.log_last_started_datetime()
        result = self.downloader.check_if_manifest_should_be_downloaded(self.assembly_id)
        self.assertTrue(result)
Example #20
0
    def tearDown(self):
        """Tear down each test case."""
        super().tearDown()
        with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor:
            files = file_accessor._get_db_obj_query().all()
            for file in files:
                file_accessor.delete(file)

        with ReportManifestDBAccessor() as manifest_accessor:
            manifests = manifest_accessor._get_db_obj_query().all()
            for manifest in manifests:
                manifest_accessor.delete(manifest)
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        with ReportingCommonDBAccessor() as report_common_db:
            cls.column_map = report_common_db.column_map

        cls.accessor = AzureReportDBAccessor("acct10001", cls.column_map)
        cls.report_schema = cls.accessor.report_schema
        cls.all_tables = list(AZURE_REPORT_TABLE_MAP.values())
        cls.creator = ReportObjectCreator(cls.schema, cls.column_map)
        cls.date_accessor = DateAccessor()
        cls.manifest_accessor = ReportManifestDBAccessor()
Example #22
0
    def setUp(self):
        """Set up GCP tests."""
        super().setUp()
        self.temp_dir = tempfile.mkdtemp()
        self.test_report = f"{self.temp_dir}/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv"

        shutil.copy2(self.test_report_path, self.test_report)

        gcp_auth = ProviderAuthentication.objects.create(
            credentials={"project-id": fake.word()})
        gcp_billing_source = ProviderBillingSource.objects.create(
            data_source={"bucket": fake.word()})
        with patch("masu.celery.tasks.check_report_updates"):
            self.gcp_provider = Provider.objects.create(
                uuid=uuid.uuid4(),
                name="Test Provider",
                type=Provider.PROVIDER_GCP,
                authentication=gcp_auth,
                billing_source=gcp_billing_source,
                customer=self.customer,
                setup_complete=True,
            )

        start_time = "2020-11-08 23:00:00+00:00"
        report_date_range = utils.month_date_range(parser.parse(start_time))
        start_date, end_date = report_date_range.split("-")

        self.start_date_utc = parser.parse(start_date).replace(hour=0,
                                                               minute=0,
                                                               tzinfo=pytz.UTC)
        self.end_date_utc = parser.parse(end_date).replace(hour=0,
                                                           minute=0,
                                                           tzinfo=pytz.UTC)

        self.assembly_id = "1234"
        self.manifest_dict = {
            "assembly_id": self.assembly_id,
            "billing_period_start_datetime": self.start_date_utc,
            "num_total_files": 1,
            "provider_uuid": self.gcp_provider.uuid,
        }
        manifest_accessor = ReportManifestDBAccessor()
        self.manifest = manifest_accessor.add(**self.manifest_dict)

        self.processor = GCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.gcp_provider.uuid,
            manifest_id=self.manifest.id,
        )
        self.accessor = GCPReportDBAccessor(self.schema)
 def setUp(self):
     """Set up the test class."""
     super().setUp()
     self.schema = self.schema_name
     billing_start = DateAccessor().today_with_timezone("UTC").replace(
         day=1)
     self.manifest_dict = {
         "assembly_id": "1234",
         "billing_period_start_datetime": billing_start,
         "num_total_files": 2,
         "provider_uuid": self.provider_uuid,
     }
     self.manifest_accessor = ReportManifestDBAccessor()
Example #24
0
 def setUpClass(cls):
     """Set up the test class."""
     billing_start = datetime.utcnow().replace(day=1)
     manifest_dict = {
         'assembly_id': '1234',
         'billing_period_start_datetime': billing_start,
         'num_total_files': 2,
         'provider_id': 1
     }
     cls.manifest_accessor = ReportManifestDBAccessor()
     manifest = cls.manifest_accessor.add(**manifest_dict)
     cls.manifest_accessor.commit()
     cls.manifest_id = manifest.id
    def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download a file from Azure bucket.

        Args:
            key (str): The object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        local_filename = utils.get_local_file_name(key)
        full_file_path = f"{self._get_exports_data_directory()}/{local_filename}"

        etag_hasher = hashlib.new("ripemd160")
        etag_hasher.update(bytes(local_filename, "utf-8"))
        etag = etag_hasher.hexdigest()

        file_creation_date = None
        if etag != stored_etag:
            msg = f"Downloading {key} to {full_file_path}"
            LOG.info(log_json(self.request_id, msg, self.context))
            shutil.copy2(key, full_file_path)
            file_creation_date = datetime.datetime.fromtimestamp(
                os.path.getmtime(full_file_path))
            # Push to S3
            s3_csv_path = get_path_prefix(self.account,
                                          Provider.PROVIDER_AZURE,
                                          self._provider_uuid, start_date,
                                          Config.CSV_DATA_TYPE)
            copy_local_report_file_to_s3_bucket(self.request_id, s3_csv_path,
                                                full_file_path, local_filename,
                                                manifest_id, start_date,
                                                self.context)

            manifest_accessor = ReportManifestDBAccessor()
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)

            if not manifest_accessor.get_s3_csv_cleared(manifest):
                remove_files_not_in_set_from_s3_bucket(self.request_id,
                                                       s3_csv_path,
                                                       manifest_id)
                manifest_accessor.mark_s3_csv_cleared(manifest)

        msg = f"Returning full_file_path: {full_file_path}, etag: {etag}"
        LOG.info(log_json(self.request_id, msg, self.context))
        return full_file_path, etag, file_creation_date, []
Example #26
0
def refresh_materialized_views(schema_name,
                               provider_type,
                               manifest_id=None,
                               provider_uuid=None,
                               synchronous=False):
    """Refresh the database's materialized views for reporting."""
    task_name = "masu.processor.tasks.refresh_materialized_views"
    cache_args = [schema_name]
    if not synchronous:
        worker_cache = WorkerCache()
        while worker_cache.single_task_is_running(task_name, cache_args):
            time.sleep(5)

        worker_cache.lock_single_task(task_name, cache_args)
    materialized_views = ()
    if provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL):
        materialized_views = (AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_OCP):
        materialized_views = (OCP_MATERIALIZED_VIEWS +
                              OCP_ON_AWS_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)
    elif provider_type in (Provider.PROVIDER_AZURE,
                           Provider.PROVIDER_AZURE_LOCAL):
        materialized_views = (AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_AZURE_MATERIALIZED_VIEWS +
                              OCP_ON_INFRASTRUCTURE_MATERIALIZED_VIEWS)

    with schema_context(schema_name):
        for view in materialized_views:
            table_name = view._meta.db_table
            with connection.cursor() as cursor:
                cursor.execute(
                    f"REFRESH MATERIALIZED VIEW CONCURRENTLY {table_name}")
                LOG.info(f"Refreshed {table_name}.")

    invalidate_view_cache_for_tenant_and_source_type(schema_name,
                                                     provider_type)

    if provider_uuid:
        ProviderDBAccessor(provider_uuid).set_data_updated_timestamp()
    if manifest_id:
        # Processing for this monifest should be complete after this step
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            manifest_accessor.mark_manifest_as_completed(manifest)

    if not synchronous:
        worker_cache.release_single_task(task_name, cache_args)
Example #27
0
    def setUp(self):
        """Set up GCP tests."""
        super().setUp()
        self.temp_dir = tempfile.mkdtemp()
        self.test_report = f'{self.temp_dir}/evidence-2019-06-03.csv'

        shutil.copy2(self.test_report_path, self.test_report)

        gcp_auth = ProviderAuthentication.objects.create(
            credentials={'project-id': fake.word()})
        gcp_billing_source = ProviderBillingSource.objects.create(
            data_source={'bucket': fake.word()})
        self.gcp_provider = Provider.objects.create(
            uuid=uuid.uuid4(),
            name='Test Provider',
            type=Provider.PROVIDER_GCP,
            authentication=gcp_auth,
            billing_source=gcp_billing_source,
            customer=self.customer,
            setup_complete=True,
        )

        start_time = '2019-09-17T00:00:00-07:00'
        report_date_range = utils.month_date_range(parser.parse(start_time))
        start_date, end_date = report_date_range.split('-')

        self.start_date_utc = parser.parse(start_date).replace(hour=0,
                                                               minute=0,
                                                               tzinfo=pytz.UTC)
        self.end_date_utc = parser.parse(end_date).replace(hour=0,
                                                           minute=0,
                                                           tzinfo=pytz.UTC)

        self.assembly_id = '1234'
        self.manifest_dict = {
            'assembly_id': self.assembly_id,
            'billing_period_start_datetime': self.start_date_utc,
            'num_total_files': 1,
            'provider_uuid': self.gcp_provider.uuid,
        }
        manifest_accessor = ReportManifestDBAccessor()
        self.manifest = manifest_accessor.add(**self.manifest_dict)

        self.processor = GCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.gcp_provider.uuid,
            manifest_id=self.manifest.id,
        )
        self.accessor = GCPReportDBAccessor(self.schema, self.column_map)
Example #28
0
    def test_refresh_materialized_views(self):
        """Test that materialized views are refreshed."""
        manifest_dict = {
            'assembly_id':
            '12345',
            'billing_period_start_datetime':
            DateAccessor().today_with_timezone('UTC'),
            'num_total_files':
            2,
            'provider_uuid':
            self.aws_provider_uuid,
            'task':
            '170653c0-3e66-4b7e-a764-336496d7ca5a',
        }
        fake_aws = FakeAWSCostData(self.aws_provider)
        generator = AWSReportDataGenerator(self.tenant)
        generator.add_data_to_tenant(fake_aws)

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.add(**manifest_dict)
            manifest.save()

        refresh_materialized_views(self.schema,
                                   Provider.PROVIDER_AWS,
                                   manifest_id=manifest.id)

        views_to_check = [
            view for view in AWS_MATERIALIZED_VIEWS
            if 'Cost' in view._meta.db_table
        ]

        with schema_context(self.schema):
            for view in views_to_check:
                self.assertNotEqual(view.objects.count(), 0)

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest.id)
            self.assertIsNotNone(manifest.manifest_completed_datetime)
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()

        cls.accessor = GCPReportDBAccessor(schema=cls.schema)
        cls.report_schema = cls.accessor.report_schema

        cls.all_tables = list(GCP_REPORT_TABLE_MAP.values())
        cls.foreign_key_tables = [
            GCP_REPORT_TABLE_MAP["bill"],
            GCP_REPORT_TABLE_MAP["product"],
            GCP_REPORT_TABLE_MAP["project"],
        ]
        cls.manifest_accessor = ReportManifestDBAccessor()
Example #30
0
    def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download an OCP usage file.

        Args:
            key (str): The OCP file name.

        Returns:
            (String): The path and file name of the saved file

        """
        if not self.manifest:
            self.manifest = ReportManifestDBAccessor().get_manifest_by_id(
                manifest_id)
        self.context["version"] = self.manifest.operator_version
        local_filename = utils.get_local_file_name(key)

        directory_path = f"{DATA_DIR}/{self.customer_name}/ocp/{self.cluster_id}"
        full_file_path = f"{directory_path}/{local_filename}"

        # Make sure the data directory exists
        os.makedirs(directory_path, exist_ok=True)
        etag_hasher = hashlib.new("ripemd160")
        etag_hasher.update(bytes(local_filename, "utf-8"))
        ocp_etag = etag_hasher.hexdigest()

        file_creation_date = None
        if ocp_etag != stored_etag or not os.path.isfile(full_file_path):
            msg = f"Downloading {key} to {full_file_path}"
            LOG.info(log_json(self.request_id, msg, self.context))
            shutil.move(key, full_file_path)
            file_creation_date = datetime.datetime.fromtimestamp(
                os.path.getmtime(full_file_path))

        file_names = create_daily_archives(
            self.request_id,
            self.account,
            self._provider_uuid,
            local_filename,
            full_file_path,
            manifest_id,
            start_date,
            self.context,
        )

        return full_file_path, ocp_etag, file_creation_date, file_names