def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download an S3 object to file.

        Args:
            key (str): The S3 object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        local_s3_filename = utils.get_local_file_name(key)

        directory_path = f"{DATA_DIR}/{self.customer_name}/aws-local/{self.bucket}"
        full_file_path = f"{directory_path}/{local_s3_filename}"

        if not os.path.isfile(key):
            log_msg = f"Unable to locate {key} in {self.bucket_path}"
            raise AWSReportDownloaderNoFileError(log_msg)

        # Make sure the data directory exists
        os.makedirs(directory_path, exist_ok=True)
        s3_etag_hasher = hashlib.new("ripemd160")
        s3_etag_hasher.update(bytes(local_s3_filename, "utf-8"))
        s3_etag = s3_etag_hasher.hexdigest()

        file_creation_date = None
        if s3_etag != stored_etag or not os.path.isfile(full_file_path):
            msg = f"Downloading {key} to {full_file_path}"
            LOG.info(log_json(self.tracing_id, msg, self.context))
            shutil.copy2(key, full_file_path)
            file_creation_date = datetime.datetime.fromtimestamp(
                os.path.getmtime(full_file_path))
            # Push to S3

            s3_csv_path = get_path_prefix(self.account, Provider.PROVIDER_AWS,
                                          self._provider_uuid, start_date,
                                          Config.CSV_DATA_TYPE)
            utils.copy_local_report_file_to_s3_bucket(
                self.tracing_id, s3_csv_path, full_file_path,
                local_s3_filename, manifest_id, start_date, self.context)

            manifest_accessor = ReportManifestDBAccessor()
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)

            if not manifest_accessor.get_s3_csv_cleared(manifest):
                utils.remove_files_not_in_set_from_s3_bucket(
                    self.tracing_id, s3_csv_path, manifest_id)
                manifest_accessor.mark_s3_csv_cleared(manifest)
        return full_file_path, s3_etag, file_creation_date, []
    def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download a file from Azure bucket.

        Args:
            key (str): The object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        local_filename = utils.get_local_file_name(key)
        full_file_path = f"{self._get_exports_data_directory()}/{local_filename}"

        file_creation_date = None
        try:
            blob = self._azure_client.get_cost_export_for_key(
                key, self.container_name)
            etag = blob.etag
            file_creation_date = blob.last_modified
        except AzureCostReportNotFound as ex:
            msg = f"Error when downloading Azure report for key: {key}. Error {ex}"
            LOG.error(log_json(self.tracing_id, msg, self.context))
            raise AzureReportDownloaderError(msg)

        msg = f"Downloading {key} to {full_file_path}"
        LOG.info(log_json(self.tracing_id, msg, self.context))
        blob = self._azure_client.download_cost_export(
            key, self.container_name, destination=full_file_path)
        # Push to S3
        s3_csv_path = get_path_prefix(self.account, Provider.PROVIDER_AZURE,
                                      self._provider_uuid, start_date,
                                      Config.CSV_DATA_TYPE)
        copy_local_report_file_to_s3_bucket(self.tracing_id, s3_csv_path,
                                            full_file_path, local_filename,
                                            manifest_id, start_date,
                                            self.context)

        manifest_accessor = ReportManifestDBAccessor()
        manifest = manifest_accessor.get_manifest_by_id(manifest_id)

        if not manifest_accessor.get_s3_csv_cleared(manifest):
            remove_files_not_in_set_from_s3_bucket(self.tracing_id,
                                                   s3_csv_path, manifest_id)
            manifest_accessor.mark_s3_csv_cleared(manifest)

        msg = f"Returning full_file_path: {full_file_path}, etag: {etag}"
        LOG.info(log_json(self.tracing_id, msg, self.context))
        return full_file_path, etag, file_creation_date, []
    def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download a file from Azure bucket.

        Args:
            key (str): The object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        local_filename = utils.get_local_file_name(key)
        full_file_path = f"{self._get_exports_data_directory()}/{local_filename}"

        etag_hasher = hashlib.new("ripemd160")
        etag_hasher.update(bytes(local_filename, "utf-8"))
        etag = etag_hasher.hexdigest()

        file_creation_date = None
        if etag != stored_etag:
            msg = f"Downloading {key} to {full_file_path}"
            LOG.info(log_json(self.request_id, msg, self.context))
            shutil.copy2(key, full_file_path)
            file_creation_date = datetime.datetime.fromtimestamp(
                os.path.getmtime(full_file_path))
            # Push to S3
            s3_csv_path = get_path_prefix(self.account,
                                          Provider.PROVIDER_AZURE,
                                          self._provider_uuid, start_date,
                                          Config.CSV_DATA_TYPE)
            copy_local_report_file_to_s3_bucket(self.request_id, s3_csv_path,
                                                full_file_path, local_filename,
                                                manifest_id, start_date,
                                                self.context)

            manifest_accessor = ReportManifestDBAccessor()
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)

            if not manifest_accessor.get_s3_csv_cleared(manifest):
                remove_files_not_in_set_from_s3_bucket(self.request_id,
                                                       s3_csv_path,
                                                       manifest_id)
                manifest_accessor.mark_s3_csv_cleared(manifest)

        msg = f"Returning full_file_path: {full_file_path}, etag: {etag}"
        LOG.info(log_json(self.request_id, msg, self.context))
        return full_file_path, etag, file_creation_date, []
Beispiel #4
0
class ReportManifestDBAccessorTest(IamTestCase):
    """Test cases for the ReportManifestDBAccessor."""
    def setUp(self):
        """Set up the test class."""
        super().setUp()
        self.schema = self.schema_name
        self.billing_start = DateAccessor().today_with_timezone("UTC").replace(
            day=1)
        self.manifest_dict = {
            "assembly_id": "1234",
            "billing_period_start_datetime": self.billing_start,
            "num_total_files": 2,
            "provider_uuid": self.provider_uuid,
        }
        self.manifest_accessor = ReportManifestDBAccessor()

    def tearDown(self):
        """Tear down the test class."""
        super().tearDown()
        with schema_context(self.schema):
            manifests = self.manifest_accessor._get_db_obj_query().all()
            for manifest in manifests:
                self.manifest_accessor.delete(manifest)

    def test_initializer(self):
        """Test the initializer."""
        accessor = ReportManifestDBAccessor()
        self.assertIsNotNone(accessor._table)

    def test_get_manifest(self):
        """Test that the right manifest is returned."""
        with schema_context(self.schema):
            added_manifest = self.manifest_accessor.add(**self.manifest_dict)

            assembly_id = self.manifest_dict.get("assembly_id")
            provider_uuid = self.manifest_dict.get("provider_uuid")
            manifest = self.manifest_accessor.get_manifest(
                assembly_id, provider_uuid)

        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)
        self.assertEqual(manifest.assembly_id, assembly_id)
        self.assertEqual(manifest.provider_id, provider_uuid)
        self.assertEqual(manifest.num_total_files,
                         self.manifest_dict.get("num_total_files"))

    def test_get_manifest_by_id(self):
        """Test that the right manifest is returned by id."""
        with schema_context(self.schema):
            added_manifest = self.manifest_accessor.add(**self.manifest_dict)
            manifest = self.manifest_accessor.get_manifest_by_id(
                added_manifest.id)
        self.assertIsNotNone(manifest)
        self.assertEqual(added_manifest, manifest)

    def test_mark_manifest_as_updated(self):
        """Test that the manifest is marked updated."""
        with schema_context(self.schema):
            manifest = self.manifest_accessor.add(**self.manifest_dict)
            now = DateAccessor().today_with_timezone("UTC")
            self.manifest_accessor.mark_manifest_as_updated(manifest)
            self.assertGreater(manifest.manifest_updated_datetime, now)

    def test_mark_manifest_as_updated_none_manifest(self):
        """Test that a none manifest doesn't update failure."""
        try:
            self.manifest_accessor.mark_manifest_as_updated(None)
        except Exception as err:
            self.fail(f"Test failed with error: {err}")

    def test_mark_manifest_as_completed_none_manifest(self):
        """Test that a none manifest doesn't complete failure."""
        try:
            self.manifest_accessor.mark_manifest_as_completed(None)
        except Exception as err:
            self.fail(f"Test failed with error: {err}")

    def test_get_manifest_list_for_provider_and_bill_date(self):
        """Test that all manifests are returned for a provider and bill."""
        bill_date = self.manifest_dict["billing_period_start_datetime"].date()
        manifest_dict = copy.deepcopy(self.manifest_dict)
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 1)

        manifest_dict["assembly_id"] = "2345"
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 2)

        manifest_dict["assembly_id"] = "3456"
        self.manifest_accessor.add(**manifest_dict)
        result = self.manifest_accessor.get_manifest_list_for_provider_and_bill_date(
            self.provider_uuid, bill_date)
        self.assertEqual(len(result), 3)

    def test_get_last_seen_manifest_ids(self):
        """Test that get_last_seen_manifest_ids returns the appropriate assembly_ids."""
        # test that the most recently seen manifests that haven't been processed are returned
        manifest_dict2 = {
            "assembly_id": "5678",
            "billing_period_start_datetime": self.billing_start,
            "num_total_files": 1,
            "provider_uuid": "00000000-0000-0000-0000-000000000002",
        }
        manifest = self.manifest_accessor.add(**self.manifest_dict)
        manifest2 = self.manifest_accessor.add(**manifest_dict2)
        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids,
                         [manifest.assembly_id, manifest2.assembly_id])

        # test that when the manifest's files have been processed - it is no longer returned
        manifest2_helper = ManifestCreationHelper(
            manifest2.id, manifest_dict2.get("num_total_files"),
            manifest_dict2.get("assembly_id"))

        manifest2_helper.generate_test_report_files()
        manifest2_helper.process_all_files()

        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids, [manifest.assembly_id])

        # test that of two manifests with the same provider_ids - that only the most recently
        # seen is returned
        manifest_dict3 = {
            "assembly_id": "91011",
            "billing_period_start_datetime": self.billing_start,
            "num_total_files": 1,
            "provider_uuid": self.provider_uuid,
        }
        manifest3 = self.manifest_accessor.add(**manifest_dict3)
        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids, [manifest3.assembly_id])

        # test that manifests for a different billing month are not returned
        current_month = self.billing_start
        calculated_month = current_month + relativedelta(months=-2)
        manifest3.billing_period_start_datetime = calculated_month
        manifest3.save()
        assembly_ids = self.manifest_accessor.get_last_seen_manifest_ids(
            self.billing_start)
        self.assertEqual(assembly_ids, [manifest.assembly_id])

    def test_is_last_completed_datetime_null(self):
        """Test is last completed datetime is null."""
        manifest_id = 123456789
        self.assertTrue(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))
        baker.make(CostUsageReportManifest, id=manifest_id)
        baker.make(CostUsageReportStatus,
                   manifest_id=manifest_id,
                   last_completed_datetime=None)
        self.assertTrue(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))

        CostUsageReportStatus.objects.filter(manifest_id=manifest_id).update(
            last_completed_datetime=FAKE.date())

        self.assertFalse(
            ReportManifestDBAccessor().is_last_completed_datetime_null(
                manifest_id))

    def test_get_s3_csv_cleared(self):
        """Test that s3 CSV clear status is reported."""
        with schema_context(self.schema):
            manifest = self.manifest_accessor.add(**self.manifest_dict)
            status = self.manifest_accessor.get_s3_csv_cleared(manifest)
            self.assertFalse(status)

            self.manifest_accessor.mark_s3_csv_cleared(manifest)

            status = self.manifest_accessor.get_s3_csv_cleared(manifest)
            self.assertTrue(status)

    def test_get_s3_parquet_cleared(self):
        """Test that s3 CSV clear status is reported."""
        with schema_context(self.schema):
            manifest = self.manifest_accessor.add(**self.manifest_dict)
            status = self.manifest_accessor.get_s3_parquet_cleared(manifest)
            self.assertFalse(status)

            self.manifest_accessor.mark_s3_parquet_cleared(manifest)

            status = self.manifest_accessor.get_s3_parquet_cleared(manifest)
            self.assertTrue(status)
    def download_file(self,
                      key,
                      stored_etag=None,
                      manifest_id=None,
                      start_date=None):
        """
        Download an S3 object to file.

        Args:
            key (str): The S3 object key identified.

        Returns:
            (String): The path and file name of the saved file

        """
        s3_filename = key.split("/")[-1]
        directory_path = f"{DATA_DIR}/{self.customer_name}/aws/{self.bucket}"

        local_s3_filename = utils.get_local_file_name(key)
        msg = f"Local S3 filename: {local_s3_filename}"
        LOG.info(log_json(self.tracing_id, msg, self.context))
        full_file_path = f"{directory_path}/{local_s3_filename}"

        # Make sure the data directory exists
        os.makedirs(directory_path, exist_ok=True)
        s3_etag = None
        file_creation_date = None
        try:
            s3_file = self.s3_client.get_object(
                Bucket=self.report.get("S3Bucket"), Key=key)
            s3_etag = s3_file.get("ETag")
            file_creation_date = s3_file.get("LastModified")
        except ClientError as ex:
            if ex.response["Error"]["Code"] == "NoSuchKey":
                msg = "Unable to find {} in S3 Bucket: {}".format(
                    s3_filename, self.report.get("S3Bucket"))
                LOG.info(log_json(self.tracing_id, msg, self.context))
                raise AWSReportDownloaderNoFileError(msg)
            if ex.response["Error"]["Code"] == "AccessDenied":
                msg = "Unable to access S3 Bucket {}: (AccessDenied)".format(
                    self.report.get("S3Bucket"))
                LOG.info(log_json(self.tracing_id, msg, self.context))
                raise AWSReportDownloaderNoFileError(msg)
            msg = f"Error downloading file: Error: {str(ex)}"
            LOG.error(log_json(self.tracing_id, msg, self.context))
            raise AWSReportDownloaderError(str(ex))

        if not self._check_size(key, check_inflate=True):
            msg = f"Insufficient disk space to download file: {s3_file}"
            LOG.error(log_json(self.tracing_id, msg, self.context))
            raise AWSReportDownloaderError(msg)

        if s3_etag != stored_etag or not os.path.isfile(full_file_path):
            msg = f"Downloading key: {key} to file path: {full_file_path}"
            LOG.info(log_json(self.tracing_id, msg, self.context))
            self.s3_client.download_file(self.report.get("S3Bucket"), key,
                                         full_file_path)
            # Push to S3
            s3_csv_path = get_path_prefix(self.account, Provider.PROVIDER_AWS,
                                          self._provider_uuid, start_date,
                                          Config.CSV_DATA_TYPE)
            utils.copy_local_report_file_to_s3_bucket(
                self.tracing_id, s3_csv_path, full_file_path,
                local_s3_filename, manifest_id, start_date, self.context)

            manifest_accessor = ReportManifestDBAccessor()
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)

            if not manifest_accessor.get_s3_csv_cleared(manifest):
                utils.remove_files_not_in_set_from_s3_bucket(
                    self.tracing_id, s3_csv_path, manifest_id)
                manifest_accessor.mark_s3_csv_cleared(manifest)
        msg = f"Download complete for {key}"
        LOG.info(log_json(self.tracing_id, msg, self.context))
        return full_file_path, s3_etag, file_creation_date, []