예제 #1
0
    def test_azure_date_converter(self):
        """Test that we convert the new Azure date format."""
        today = DateHelper().today
        old_azure_format = today.strftime("%Y-%m-%d")
        new_azure_format = today.strftime("%m/%d/%Y")

        self.assertEqual(azure_date_converter(old_azure_format).date(), today.date())
        self.assertEqual(azure_date_converter(new_azure_format).date(), today.date())
class GCPReportDownloaderTest(MasuTestCase):
    """Test Cases for the GCPReportDownloader object."""
    def setUp(self):
        """Setup vars for test."""
        super().setUp()
        self.etag = "1234"
        self.today = DateHelper().today

    def tearDown(self):
        """Remove files and directories created during the test run."""
        super().tearDown()
        shutil.rmtree(DATA_DIR, ignore_errors=True)

    def create_gcp_downloader_with_mocked_values(
            self,
            customer_name=FAKE.name(),
            dataset=FAKE.slug(),
            provider_uuid=uuid4(),
            project_id=FAKE.slug(),
            table_id=FAKE.slug(),
    ):
        """
        Create a GCPReportDownloader that skips the initial GCP bigquery check creates etag.

        This also results in Mock objects being set to instance variables that can be patched
        inside other test functions.

        Args:
            customer_name (str): optional customer name; will be randomly generated if None
            bucket_name (str): optional bucket name; will be randomly generated if None
            provider_uuid (uuid): optional provider UUID; will be randomly generated if None

        Returns:
            GCPReportDownloader instance with faked argument data and Mocks in
            self.etag.

        """
        billing_source = {"table_id": table_id, "dataset": dataset}
        credentials = {"project_id": project_id}
        with patch(
                "masu.external.downloader.gcp.gcp_report_downloader.GCPProvider"
        ), patch(
                "masu.external.downloader.gcp.gcp_report_downloader.GCPReportDownloader._generate_etag",
                return_value=self.etag,
        ):
            downloader = GCPReportDownloader(
                customer_name=customer_name,
                data_source=billing_source,
                provider_uuid=provider_uuid,
                credentials=credentials,
            )
        return downloader

    @patch("masu.external.downloader.gcp.gcp_report_downloader.GCPProvider")
    def test_generate_etag_big_query_client_error(self, gcp_provider):
        """Test BigQuery client is handled correctly in generate etag method."""
        billing_source = {"table_id": FAKE.slug(), "dataset": FAKE.slug()}
        credentials = {"project_id": FAKE.slug()}
        err_msg = "GCP Error"
        with patch(
                "masu.external.downloader.gcp.gcp_report_downloader.bigquery"
        ) as bigquery:
            bigquery.Client.side_effect = GoogleCloudError(err_msg)
            with self.assertRaisesRegex(ReportDownloaderWarning, err_msg):
                GCPReportDownloader(
                    customer_name=FAKE.name(),
                    data_source=billing_source,
                    provider_uuid=uuid4(),
                    credentials=credentials,
                )

    @patch("masu.external.downloader.gcp.gcp_report_downloader.GCPProvider")
    def test_generate_etag(self, gcp_provider):
        """Test BigQuery client is handled correctly in generate etag method."""
        billing_source = {"table_id": FAKE.slug(), "dataset": FAKE.slug()}
        credentials = {"project_id": FAKE.slug()}
        with patch(
                "masu.external.downloader.gcp.gcp_report_downloader.bigquery"
        ) as bigquery:
            bigquery.Client.return_value.get_table.return_value.modified.return_value = self.today
            downloader = GCPReportDownloader(customer_name=FAKE.name(),
                                             data_source=billing_source,
                                             provider_uuid=uuid4(),
                                             credentials=credentials)
            self.assertIsNotNone(downloader.etag)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.os.makedirs")
    @patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery")
    def test_download_file_failure_on_file_open(self, mock_bigquery,
                                                mock_makedirs):
        """Assert download_file successful scenario"""
        mock_bigquery.client.return_value.query.return_value = ["This", "test"]
        key = "202011_1234_2020-12-05:2020-12-08.csv"
        downloader = self.create_gcp_downloader_with_mocked_values()
        with patch("masu.external.downloader.gcp.gcp_report_downloader.open"
                   ) as mock_open:
            err_msg = "bad open"
            mock_open.side_effect = IOError(err_msg)
            with self.assertRaisesRegex(GCPReportDownloaderError, err_msg):
                downloader.download_file(key)

    def test_generate_monthly_pseudo_manifest(self):
        """Assert _generate_monthly_pseudo_manifest returns a manifest-like dict."""
        provider_uuid = uuid4()
        dh = DateHelper()
        start_date = dh.this_month_start
        invoice_month = start_date.strftime("%Y%m")
        expected_assembly_id = ":".join(
            [str(provider_uuid), self.etag, invoice_month])
        downloader = self.create_gcp_downloader_with_mocked_values(
            provider_uuid=provider_uuid)
        downloader.scan_end = dh.this_month_end.date()
        if self.today.date() < downloader.scan_end:
            expected_end_date = self.today.date()
        else:
            expected_end_date = downloader.scan_end
        result_manifest = downloader._generate_monthly_pseudo_manifest(
            start_date.date())
        expected_files = create_expected_csv_files(dh.this_month_start.date(),
                                                   downloader.scan_end,
                                                   invoice_month, self.etag)
        expected_manifest_data = {
            "assembly_id": expected_assembly_id,
            "compression": UNCOMPRESSED,
            "start_date": start_date.date(),
            "end_date": expected_end_date,  # inclusive end date
            "file_names": expected_files,
        }
        self.assertEqual(result_manifest, expected_manifest_data)

    def test_generate_assembly_id(self):
        """Assert appropriate generation of assembly ID."""
        provider_uuid = uuid4()
        expected_assembly_id = ":".join([str(provider_uuid), self.etag, "1"])
        downloader = self.create_gcp_downloader_with_mocked_values(
            provider_uuid=provider_uuid)
        assembly_id = downloader._generate_assembly_id("1")
        self.assertEqual(assembly_id, expected_assembly_id)

    def test_relevant_file_names(self):
        """Assert relevant file name is generated correctly."""
        downloader = self.create_gcp_downloader_with_mocked_values()
        mock_invoice_month = self.today.strftime("%Y%m")
        end_date = downloader.scan_end + relativedelta(days=1)
        expected_file_name = [
            f"{mock_invoice_month}_{self.etag}_{downloader.scan_start}:{end_date}.csv"
        ]
        result_file_names = downloader._get_relevant_file_names(
            mock_invoice_month)
        self.assertEqual(expected_file_name, result_file_names)

    def test_get_local_file_for_report(self):
        """Assert that get_local_file_for_report is a simple pass-through."""
        downloader = self.create_gcp_downloader_with_mocked_values()
        report_name = FAKE.file_path()
        local_name = downloader.get_local_file_for_report(report_name)
        self.assertEqual(local_name, report_name)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.os.makedirs")
    @patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery")
    def test_download_file_success(self, mock_bigquery, mock_makedirs):
        """Assert download_file successful scenario"""
        mock_bigquery.client.return_value.query.return_value = ["This", "test"]
        key = "202011_1234_2020-12-05:2020-12-08.csv"
        mock_name = "mock-test-customer-success"
        expected_full_path = f"{DATA_DIR}/{mock_name}/gcp/{key}"
        downloader = self.create_gcp_downloader_with_mocked_values(
            customer_name=mock_name)
        with patch("masu.external.downloader.gcp.gcp_report_downloader.open"):
            with patch(
                    "masu.external.downloader.gcp.gcp_report_downloader.create_daily_archives"
            ):
                full_path, etag, date, _ = downloader.download_file(key)
                mock_makedirs.assert_called()
                self.assertEqual(etag, self.etag)
                self.assertEqual(date, self.today)
                self.assertEqual(full_path, expected_full_path)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.os.makedirs")
    @patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery")
    def test_download_file_success_end_date_today(self, mock_bigquery,
                                                  mock_makedirs):
        """Assert download_file successful scenario"""
        mock_bigquery.client.return_value.query.return_value = ["This", "test"]
        end_date = DateAccessor().today().date()
        key = f"202011_1234_2020-12-05:{end_date}.csv"
        mock_name = "mock-test-customer-end-date"
        expected_full_path = f"{DATA_DIR}/{mock_name}/gcp/{key}"
        downloader = self.create_gcp_downloader_with_mocked_values(
            customer_name=mock_name)
        with patch("masu.external.downloader.gcp.gcp_report_downloader.open"):
            with patch(
                    "masu.external.downloader.gcp.gcp_report_downloader.create_daily_archives"
            ):
                full_path, etag, date, _ = downloader.download_file(key)
                mock_makedirs.assert_called()
                self.assertEqual(etag, self.etag)
                self.assertEqual(date, self.today)
                self.assertEqual(full_path, expected_full_path)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.open")
    def test_download_file_query_client_error(self, mock_open):
        """Test BigQuery client is handled correctly in download file method."""
        key = "202011_1234_2020-12-05:2020-12-08.csv"
        downloader = self.create_gcp_downloader_with_mocked_values()
        err_msg = "GCP Error"
        with patch(
                "masu.external.downloader.gcp.gcp_report_downloader.bigquery"
        ) as bigquery:
            bigquery.Client.side_effect = GoogleCloudError(err_msg)
            with self.assertRaisesRegex(GCPReportDownloaderError, err_msg):
                downloader.download_file(key)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.GCPProvider")
    def test_download_with_unreachable_source(self, gcp_provider):
        """Assert errors correctly when source is unreachable."""
        gcp_provider.return_value.cost_usage_source_is_reachable.side_effect = ValidationError
        billing_source = {"table_id": FAKE.slug(), "dataset": FAKE.slug()}
        credentials = {"project_id": FAKE.slug()}
        with self.assertRaises(GCPReportDownloaderError):
            GCPReportDownloader(FAKE.name(),
                                billing_source,
                                credentials=credentials)

    def test_get_manifest_context_for_date(self):
        """Test successful return of get manifest context for date."""
        self.maxDiff = None
        dh = DateHelper()
        start_date = dh.this_month_start
        invoice_month = start_date.strftime("%Y%m")
        p_uuid = uuid4()
        expected_assembly_id = f"{p_uuid}:{self.etag}:{invoice_month}"
        downloader = self.create_gcp_downloader_with_mocked_values(
            provider_uuid=p_uuid)
        if self.today.date() < downloader.scan_end:
            expected_end = self.today.date()
        else:
            expected_end = downloader.scan_end
        expected_files = create_expected_csv_files(dh.this_month_start.date(),
                                                   expected_end, invoice_month,
                                                   self.etag, True)
        with patch(
                "masu.external.downloader.gcp.gcp_report_downloader.GCPReportDownloader._process_manifest_db_record",
                return_value=2,
        ):
            report_dict = downloader.get_manifest_context_for_date(
                start_date.date())
        self.assertEqual(report_dict.get("manifest_id"), 2)
        self.assertEqual(report_dict.get("files"), expected_files)
        self.assertEqual(report_dict.get("compression"), UNCOMPRESSED)
        self.assertEqual(report_dict.get("assembly_id"), expected_assembly_id)

    def test_generate_monthly_pseudo_no_manifest(self):
        """Test get monly psuedo manifest with no manifest."""
        dh = DateHelper()
        downloader = self.create_gcp_downloader_with_mocked_values(
            provider_uuid=uuid4())
        start_date = dh.last_month_start
        manifest_dict = downloader._generate_monthly_pseudo_manifest(
            start_date)
        self.assertIsNotNone(manifest_dict)

    @override_settings(ENABLE_PARQUET_PROCESSING=True)
    @patch(
        "masu.external.downloader.gcp.gcp_report_downloader.copy_local_report_file_to_s3_bucket"
    )
    def test_create_daily_archives(self, mock_s3):
        """Test that we load daily files to S3."""
        # Use the processor example for data:
        file_path = "./koku/masu/test/data/gcp/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv"
        file_name = "202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv"
        temp_dir = tempfile.gettempdir()
        temp_path = os.path.join(temp_dir, file_name)
        shutil.copy2(file_path, temp_path)

        expected_daily_files = [
            f"{temp_dir}/202011_2020-11-08.csv",
            f"{temp_dir}/202011_2020-11-09.csv",
            f"{temp_dir}/202011_2020-11-10.csv",
            f"{temp_dir}/202011_2020-11-11.csv",
        ]

        start_date = DateHelper().this_month_start
        daily_file_names = create_daily_archives("request_id", "account",
                                                 self.gcp_provider_uuid,
                                                 file_name, temp_path, None,
                                                 start_date, None)

        mock_s3.assert_called()
        self.assertEqual(sorted(daily_file_names),
                         sorted(expected_daily_files))

        for daily_file in expected_daily_files:
            self.assertTrue(os.path.exists(daily_file))
            os.remove(daily_file)

        os.remove(temp_path)

    def test_get_dataset_name(self):
        """Test _get_dataset_name helper."""
        project_id = FAKE.slug()
        dataset_name = FAKE.slug()

        datasets = [f"{project_id}:{dataset_name}", dataset_name]

        for dataset in datasets:
            billing_source = {"table_id": FAKE.slug(), "dataset": dataset}
            credentials = {"project_id": project_id}

            with patch(
                    "masu.external.downloader.gcp.gcp_report_downloader.GCPProvider"
            ), patch(
                    "masu.external.downloader.gcp.gcp_report_downloader.GCPReportDownloader._generate_etag",
                    return_value=self.etag,
            ):
                with patch(
                        "masu.external.downloader.gcp.gcp_report_downloader.GCPProvider"
                ):
                    downloader = GCPReportDownloader(
                        customer_name=FAKE.name(),
                        data_source=billing_source,
                        provider_uuid=uuid4(),
                        credentials=credentials,
                    )

            self.assertEqual(downloader._get_dataset_name(), dataset_name)
예제 #3
0
class GCPReportDownloaderTest(MasuTestCase):
    """Test Cases for the GCPReportDownloader object."""

    def setUp(self):
        """Setup vars for test."""
        super().setUp()
        self.etag = "1234"
        self.today = DateHelper().today

    def tearDown(self):
        """Remove files and directories created during the test run."""
        super().tearDown()
        shutil.rmtree(DATA_DIR, ignore_errors=True)

    def create_gcp_downloader_with_mocked_values(
        self,
        customer_name=FAKE.name(),
        dataset=FAKE.slug(),
        provider_uuid=uuid4(),
        project_id=FAKE.slug(),
        table_id=FAKE.slug(),
    ):
        """
        Create a GCPReportDownloader that skips the initial GCP bigquery check creates etag.

        This also results in Mock objects being set to instance variables that can be patched
        inside other test functions.

        Args:
            customer_name (str): optional customer name; will be randomly generated if None
            bucket_name (str): optional bucket name; will be randomly generated if None
            provider_uuid (uuid): optional provider UUID; will be randomly generated if None

        Returns:
            GCPReportDownloader instance with faked argument data and Mocks in
            self.etag.

        """
        billing_source = {"table_id": table_id, "dataset": dataset}
        credentials = {"project_id": project_id}
        with patch("masu.external.downloader.gcp.gcp_report_downloader.GCPProvider"), patch(
            "masu.external.downloader.gcp.gcp_report_downloader.GCPReportDownloader._generate_etag",
            return_value=self.etag,
        ):
            downloader = GCPReportDownloader(
                customer_name=customer_name,
                data_source=billing_source,
                provider_uuid=provider_uuid,
                credentials=credentials,
            )
        return downloader

    @patch("masu.external.downloader.gcp.gcp_report_downloader.GCPProvider")
    def test_generate_etag_big_query_client_error(self, gcp_provider):
        """Test BigQuery client is handled correctly in generate etag method."""
        billing_source = {"table_id": FAKE.slug(), "dataset": FAKE.slug()}
        credentials = {"project_id": FAKE.slug()}
        err_msg = "GCP Error"
        with patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery") as bigquery:
            bigquery.Client.side_effect = GoogleCloudError(err_msg)
            with self.assertRaisesRegexp(GCPReportDownloaderError, err_msg):
                GCPReportDownloader(
                    customer_name=FAKE.name(),
                    data_source=billing_source,
                    provider_uuid=uuid4(),
                    credentials=credentials,
                )

    @patch("masu.external.downloader.gcp.gcp_report_downloader.GCPProvider")
    def test_generate_etag(self, gcp_provider):
        """Test BigQuery client is handled correctly in generate etag method."""
        billing_source = {"table_id": FAKE.slug(), "dataset": FAKE.slug()}
        credentials = {"project_id": FAKE.slug()}
        with patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery") as bigquery:
            bigquery.Client.return_value.get_table.return_value.modified.return_value = self.today
            downloader = GCPReportDownloader(
                customer_name=FAKE.name(), data_source=billing_source, provider_uuid=uuid4(), credentials=credentials
            )
            self.assertIsNotNone(downloader.etag)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.os.makedirs")
    @patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery")
    def test_download_file_failure_on_file_open(self, mock_bigquery, mock_makedirs):
        """Assert download_file successful scenario"""
        mock_bigquery.client.return_value.query.return_value = ["This", "test"]
        key = "202011_1234_2020-12-05:2020-12-08.csv"
        downloader = self.create_gcp_downloader_with_mocked_values()
        with patch("masu.external.downloader.gcp.gcp_report_downloader.open") as mock_open:
            err_msg = "bad open"
            mock_open.side_effect = IOError(err_msg)
            with self.assertRaisesRegexp(GCPReportDownloaderError, err_msg):
                downloader.download_file(key)

    def test_generate_monthly_pseudo_manifest(self):
        """Assert _generate_monthly_pseudo_manifest returns a manifest-like dict."""
        provider_uuid = uuid4()
        dh = DateHelper()
        start_date = dh.this_month_start
        invoice_month = start_date.strftime("%Y%m")
        expected_end_date = dh.this_month_end.date()
        expected_assembly_id = ":".join([str(provider_uuid), self.etag, invoice_month])
        downloader = self.create_gcp_downloader_with_mocked_values(provider_uuid=provider_uuid)
        downloader.scan_end = dh.this_month_end.date()
        result_manifest = downloader._generate_monthly_pseudo_manifest(start_date.date())
        expected_manifest_data = {
            "assembly_id": expected_assembly_id,
            "compression": UNCOMPRESSED,
            "start_date": start_date.date(),
            "end_date": expected_end_date,  # inclusive end date
            "file_names": [f"{invoice_month}_{self.etag}_{downloader.scan_start}:{downloader.scan_end}.csv"],
        }
        self.assertEqual(result_manifest, expected_manifest_data)

    def test_generate_assembly_id(self):
        """Assert appropriate generation of assembly ID."""
        provider_uuid = uuid4()
        expected_assembly_id = ":".join([str(provider_uuid), self.etag, "1"])
        downloader = self.create_gcp_downloader_with_mocked_values(provider_uuid=provider_uuid)
        assembly_id = downloader._generate_assembly_id("1")
        self.assertEqual(assembly_id, expected_assembly_id)

    def test_relevant_file_names(self):
        """Assert relevant file name is generated correctly."""
        downloader = self.create_gcp_downloader_with_mocked_values()
        mock_invoice_month = self.today.strftime("%Y%m")
        expected_file_name = [f"{mock_invoice_month}_{self.etag}_{downloader.scan_start}:{downloader.scan_end}.csv"]
        result_file_names = downloader._get_relevant_file_names(mock_invoice_month)
        self.assertEqual(expected_file_name, result_file_names)

    def test_get_local_file_for_report(self):
        """Assert that get_local_file_for_report is a simple pass-through."""
        downloader = self.create_gcp_downloader_with_mocked_values()
        report_name = FAKE.file_path()
        local_name = downloader.get_local_file_for_report(report_name)
        self.assertEqual(local_name, report_name)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.os.makedirs")
    @patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery")
    def test_download_file_success(self, mock_bigquery, mock_makedirs):
        """Assert download_file successful scenario"""
        mock_bigquery.client.return_value.query.return_value = ["This", "test"]
        key = "202011_1234_2020-12-05:2020-12-08.csv"
        mock_name = "Cody"
        expected_full_path = f"{DATA_DIR}/{mock_name}/gcp/{key}"
        downloader = self.create_gcp_downloader_with_mocked_values(customer_name=mock_name)
        with patch("masu.external.downloader.gcp.gcp_report_downloader.open"):
            full_path, etag, date = downloader.download_file(key)
            mock_makedirs.assert_called()
            self.assertEqual(etag, self.etag)
            self.assertEqual(date, self.today)
            self.assertEqual(full_path, expected_full_path)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.open")
    def test_download_file_query_client_error(self, mock_open):
        """Test BigQuery client is handled correctly in download file method."""
        key = "202011_1234_2020-12-05:2020-12-08.csv"
        downloader = self.create_gcp_downloader_with_mocked_values()
        err_msg = "GCP Error"
        with patch("masu.external.downloader.gcp.gcp_report_downloader.bigquery") as bigquery:
            bigquery.Client.side_effect = GoogleCloudError(err_msg)
            with self.assertRaisesRegexp(GCPReportDownloaderError, err_msg):
                downloader.download_file(key)

    @patch("masu.external.downloader.gcp.gcp_report_downloader.GCPProvider")
    def test_download_with_unreachable_source(self, gcp_provider):
        """Assert errors correctly when source is unreachable."""
        gcp_provider.return_value.cost_usage_source_is_reachable.side_effect = ValidationError
        billing_source = {"table_id": FAKE.slug(), "dataset": FAKE.slug()}
        credentials = {"project_id": FAKE.slug()}
        with self.assertRaises(GCPReportDownloaderError):
            GCPReportDownloader(FAKE.name(), billing_source, credentials=credentials)

    def test_get_manifest_context_for_date(self):
        """Test successful return of get manifest context for date."""
        dh = DateHelper()
        start_date = dh.this_month_start
        invoice_month = start_date.strftime("%Y%m")
        p_uuid = uuid4()
        expected_assembly_id = f"{p_uuid}:{self.etag}:{invoice_month}"
        downloader = self.create_gcp_downloader_with_mocked_values(provider_uuid=p_uuid)
        csv_file = f"{invoice_month}_{self.etag}_{dh.this_month_start.date()}:{downloader.scan_end}.csv"
        expected_files = [{"key": csv_file, "local_file": csv_file}]
        with patch(
            "masu.external.downloader.gcp.gcp_report_downloader.GCPReportDownloader._process_manifest_db_record",
            return_value=2,
        ):
            report_dict = downloader.get_manifest_context_for_date(start_date.date())
        self.assertEqual(report_dict.get("manifest_id"), 2)
        self.assertEqual(report_dict.get("files"), expected_files)
        self.assertEqual(report_dict.get("compression"), UNCOMPRESSED)
        self.assertEqual(report_dict.get("assembly_id"), expected_assembly_id)

    def test_generate_monthly_pseudo_no_manifest(self):
        """Test get monly psuedo manifest with no manifest."""
        dh = DateHelper()
        downloader = self.create_gcp_downloader_with_mocked_values(provider_uuid=uuid4())
        start_date = dh.last_month_start
        manifest_dict = downloader._generate_monthly_pseudo_manifest(start_date)
        self.assertIsNotNone(manifest_dict)