def test_gcp_process_twice(self): """Test the processing of an GCP file again, results in the same amount of objects.""" self.processor.process() with schema_context(self.schema): num_line_items = len(GCPCostEntryLineItem.objects.all()) num_projects = len(GCPProject.objects.all()) num_bills = len(GCPCostEntryBill.objects.all()) # Why another processor instance? because calling process() with the same processor instance fails # django.db.utils.InternalError: no such savepoint. shutil.copy2(self.test_report_path, self.test_report) processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) processor.process() with schema_context(self.schema): self.assertEqual(num_line_items, len(GCPCostEntryLineItem.objects.all())) self.assertEqual(num_projects, len(GCPProject.objects.all())) self.assertEqual(num_bills, len(GCPCostEntryBill.objects.all()))
def test_no_report_path(self): """Test error caught when report path doesn't exist.""" processor = GCPReportProcessor( schema_name=self.schema, report_path="/path/does/not/exist/202011_123_2020-11-08:2020-11-11.csv", compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) result = processor.process() self.assertFalse(result)
def setUp(self): """Set up GCP tests.""" super().setUp() self.temp_dir = tempfile.mkdtemp() self.test_report = f"{self.temp_dir}/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv" shutil.copy2(self.test_report_path, self.test_report) gcp_auth = ProviderAuthentication.objects.create( credentials={"project-id": fake.word()}) gcp_billing_source = ProviderBillingSource.objects.create( data_source={"bucket": fake.word()}) with patch("masu.celery.tasks.check_report_updates"): self.gcp_provider = Provider.objects.create( uuid=uuid.uuid4(), name="Test Provider", type=Provider.PROVIDER_GCP, authentication=gcp_auth, billing_source=gcp_billing_source, customer=self.customer, setup_complete=True, ) start_time = "2020-11-08 23:00:00+00:00" report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split("-") self.start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.assembly_id = "1234" self.manifest_dict = { "assembly_id": self.assembly_id, "billing_period_start_datetime": self.start_date_utc, "num_total_files": 1, "provider_uuid": self.gcp_provider.uuid, } manifest_accessor = ReportManifestDBAccessor() self.manifest = manifest_accessor.add(**self.manifest_dict) self.processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) self.accessor = GCPReportDBAccessor(self.schema)
def setUp(self): """Set up GCP tests.""" super().setUp() self.temp_dir = tempfile.mkdtemp() self.test_report = f'{self.temp_dir}/evidence-2019-06-03.csv' shutil.copy2(self.test_report_path, self.test_report) gcp_auth = ProviderAuthentication.objects.create( credentials={'project-id': fake.word()}) gcp_billing_source = ProviderBillingSource.objects.create( data_source={'bucket': fake.word()}) self.gcp_provider = Provider.objects.create( uuid=uuid.uuid4(), name='Test Provider', type=Provider.PROVIDER_GCP, authentication=gcp_auth, billing_source=gcp_billing_source, customer=self.customer, setup_complete=True, ) start_time = '2019-09-17T00:00:00-07:00' report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split('-') self.start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.assembly_id = '1234' self.manifest_dict = { 'assembly_id': self.assembly_id, 'billing_period_start_datetime': self.start_date_utc, 'num_total_files': 1, 'provider_uuid': self.gcp_provider.uuid, } manifest_accessor = ReportManifestDBAccessor() self.manifest = manifest_accessor.add(**self.manifest_dict) self.processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) self.accessor = GCPReportDBAccessor(self.schema, self.column_map)
def test_no_manifest_process(self): """Test that we can success process reports without manifest.""" processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) processor.process() with schema_context(self.schema): self.assertTrue(len(GCPCostEntryLineItem.objects.all()) > 0) self.assertTrue(len(GCPProject.objects.all()) > 0) self.assertEquals(1, len(GCPCostEntryBill.objects.all())) self.assertFalse(os.path.exists(self.test_report))
def _set_processor(self): """ Create the report processor object. Processor is specific to the provider's cloud service. Args: None Returns: (Object) : Provider-specific report processor """ if enable_trino_processing(self.provider_uuid): return ParquetReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, provider_type=self.provider_type, manifest_id=self.manifest_id, context=self.context, ) if self.provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): return AWSReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id, ) if self.provider_type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): return AzureReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id, ) if self.provider_type in (Provider.PROVIDER_OCP, ): return OCPReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, ) if self.provider_type in (Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL): return GCPReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id, ) return None
def test_get_line_item_type(self): """Test that the get line item type returns correct type.""" processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) expected_mapping = { "usage": ["Compute Engine", "COMPUTE engine", "Kubernetes Engine"], "storage": ["Filestore", "Storage", "Data Transfer", "DATA TRanSFer"], "network": ["VPC network", "Network services"], "database": ["Bigtable", "Spanner"], "other": ["unknown"], } for expected_item_type, alias_list in expected_mapping.items(): for alias in alias_list: row = {"service.description": alias} result_type = processor._get_line_item_type(row) self.assertEqual(result_type, expected_item_type)
def _set_processor(self): """ Create the report processor object. Processor is specific to the provider's cloud service. Args: None Returns: (Object) : Provider-specific report processor """ if self.provider_type in (AMAZON_WEB_SERVICES, AWS_LOCAL_SERVICE_PROVIDER): return AWSReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id) if self.provider_type in (AZURE, AZURE_LOCAL_SERVICE_PROVIDER): return AzureReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id) if self.provider_type in (OPENSHIFT_CONTAINER_PLATFORM, ): return OCPReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid) if self.provider_type in (GCP, GCP_LOCAL): return GCPReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid) return None
def test_create_cost_entry_line_item_bad_time(self): """Test time parse errors are caught correctly.""" processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) with open(self.test_report) as csvfile: reader = csv.DictReader(csvfile) row_one = next(reader) fake_id = 2 row_one["usage_start_time"] = "bad time value" processor._create_cost_entry_line_item(row_one, fake_id, fake_id, self.accessor, fake_id) self.assertFalse(processor.processed_report.requested_partitions) del row_one["usage_start_time"] processor._create_cost_entry_line_item(row_one, fake_id, fake_id, self.accessor, fake_id) self.assertFalse(processor.processed_report.requested_partitions)
class GCPReportProcessorTest(MasuTestCase): """Test Cases for the GCPReportProcessor object.""" @classmethod def setUpClass(cls): """Set up the test class with required objects.""" super().setUpClass() cls.test_report_path = ( "./koku/masu/test/data/gcp/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv" ) cls.date_accessor = DateAccessor() cls.manifest_accessor = ReportManifestDBAccessor() def setUp(self): """Set up GCP tests.""" super().setUp() self.temp_dir = tempfile.mkdtemp() self.test_report = f"{self.temp_dir}/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv" shutil.copy2(self.test_report_path, self.test_report) gcp_auth = ProviderAuthentication.objects.create( credentials={"project-id": fake.word()}) gcp_billing_source = ProviderBillingSource.objects.create( data_source={"bucket": fake.word()}) with patch("masu.celery.tasks.check_report_updates"): self.gcp_provider = Provider.objects.create( uuid=uuid.uuid4(), name="Test Provider", type=Provider.PROVIDER_GCP, authentication=gcp_auth, billing_source=gcp_billing_source, customer=self.customer, setup_complete=True, ) start_time = "2020-11-08 23:00:00+00:00" report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split("-") self.start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.assembly_id = "1234" self.manifest_dict = { "assembly_id": self.assembly_id, "billing_period_start_datetime": self.start_date_utc, "num_total_files": 1, "provider_uuid": self.gcp_provider.uuid, } manifest_accessor = ReportManifestDBAccessor() self.manifest = manifest_accessor.add(**self.manifest_dict) self.processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) self.accessor = GCPReportDBAccessor(self.schema) def tearDown(self): """Tear down test case.""" super().tearDown() shutil.rmtree(self.temp_dir) def test_gcp_process(self): """Test the processing of an GCP file writes objects to the database.""" with schema_context(self.schema): expected_bill_count = len(GCPCostEntryBill.objects.all()) + 1 self.processor.process() with schema_context(self.schema): self.assertTrue(len(GCPCostEntryLineItem.objects.all()) > 0) self.assertTrue(len(GCPProject.objects.all()) > 0) self.assertEqual(expected_bill_count, len(GCPCostEntryBill.objects.all())) self.assertFalse(os.path.exists(self.test_report)) def test_create_gcp_cost_entry_bill(self): """Test calling _get_or_create_cost_entry_bill on an entry bill that doesn't exist creates it.""" bill_row = {"invoice.month": "202011"} entry_bill_id = self.processor._get_or_create_cost_entry_bill( bill_row, self.accessor) with schema_context(self.schema): self.assertTrue( GCPCostEntryBill.objects.filter(id=entry_bill_id).exists()) def test_get_gcp_cost_entry_bill(self): """Test calling _get_or_create_cost_entry_bill on an entry bill that exists fetches its id.""" start_time = "2020-11-01 00:00:00+00" report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split("-") start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) with schema_context(self.schema): entry_bill = GCPCostEntryBill.objects.create( provider=self.gcp_provider, billing_period_start=start_date_utc, billing_period_end=end_date_utc) entry_bill_id = self.processor._get_or_create_cost_entry_bill( {"invoice.month": "202011"}, self.accessor) self.assertEqual(entry_bill.id, entry_bill_id) def test_create_gcp_project(self): """Test calling _get_or_create_gcp_project on a project id that doesn't exist creates it.""" project_data = { "project.id": fake.word(), "billing_account_id": fake.word(), "project.name": fake.word() } project_id = self.processor._get_or_create_gcp_project( project_data, self.accessor) with schema_context(self.schema): self.assertTrue(GCPProject.objects.filter(id=project_id).exists()) def test_create_gcp_project_name_change(self): """Test changing the project name will update in the table.""" project_id = fake.word() project_info = { "project.id": project_id, "billing_account_id": fake.word(), "project.name": fake.word() } expected_name = "BiggoStormsMixTape" pt_id_1 = self.processor._get_or_create_gcp_project( project_info, self.accessor) with schema_context(self.schema): self.assertTrue(GCPProject.objects.filter(id=pt_id_1).exists()) project_info["project.name"] = expected_name pt_id_2 = self.processor._get_or_create_gcp_project( project_info, self.accessor) # Check that both calls return the same project table id self.assertEqual(pt_id_1, pt_id_2) with schema_context(self.schema): p = GCPProject.objects.filter(project_id=project_id).first() self.assertEqual(p.project_name, expected_name) def test_get_gcp_project(self): """Test calling _get_or_create_gcp_project on a project id that exists gets it.""" project_id = fake.word() account_id = fake.word() project_name = fake.word() with schema_context(self.schema): project = GCPProject.objects.create(project_id=project_id, account_id=account_id, project_name=project_name) fetched_project_id = self.processor._get_or_create_gcp_project( { "project.id": project_id, "billing_account_id": account_id, "project.name": project_name }, self.accessor) self.assertEqual(fetched_project_id, project.id) with schema_context(self.schema): gcp_project = GCPProject.objects.get(id=project.id) self.assertEqual(gcp_project.account_id, account_id) self.assertEqual(gcp_project.project_name, project_name) def test_gcp_process_can_run_twice(self): """Test that row duplicates are inserted into the DB when process called twice.""" self.processor.process() shutil.copy2(self.test_report_path, self.test_report) try: self.processor.process() except InternalError: self.fail("failed to call process twice.") def test_gcp_process_twice(self): """Test the processing of an GCP file again, results in the same amount of objects.""" self.processor.process() with schema_context(self.schema): num_line_items = len(GCPCostEntryLineItem.objects.all()) num_projects = len(GCPProject.objects.all()) num_bills = len(GCPCostEntryBill.objects.all()) # Why another processor instance? because calling process() with the same processor instance fails # django.db.utils.InternalError: no such savepoint. shutil.copy2(self.test_report_path, self.test_report) processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) processor.process() with schema_context(self.schema): self.assertEqual(num_line_items, len(GCPCostEntryLineItem.objects.all())) self.assertEqual(num_projects, len(GCPProject.objects.all())) self.assertEqual(num_bills, len(GCPCostEntryBill.objects.all())) def test_no_report_path(self): """Test error caught when report path doesn't exist.""" processor = GCPReportProcessor( schema_name=self.schema, report_path= "/path/does/not/exist/202011_123_2020-11-08:2020-11-11.csv", compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) result = processor.process() self.assertFalse(result) def test_no_manifest_process(self): """Test that we can success process reports without manifest.""" with schema_context(self.schema): expected_bill_count = len(GCPCostEntryBill.objects.all()) + 1 processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) processor.process() with schema_context(self.schema): self.assertTrue(len(GCPCostEntryLineItem.objects.all()) > 0) self.assertTrue(len(GCPProject.objects.all()) > 0) self.assertEqual(expected_bill_count, len(GCPCostEntryBill.objects.all())) self.assertFalse(os.path.exists(self.test_report)) def test_create_cost_entry_line_item_bad_time(self): """Test time parse errors are caught correctly.""" processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) with open(self.test_report) as csvfile: reader = csv.DictReader(csvfile) row_one = next(reader) fake_id = 2 row_one["usage_start_time"] = "bad time value" processor._create_cost_entry_line_item(row_one, fake_id, fake_id, self.accessor, fake_id) self.assertFalse(processor.processed_report.requested_partitions) del row_one["usage_start_time"] processor._create_cost_entry_line_item(row_one, fake_id, fake_id, self.accessor, fake_id) self.assertFalse(processor.processed_report.requested_partitions) def test_gcp_process_empty_file(self): """Test the processing of an GCP file again, results in the same amount of objects.""" with schema_context(self.schema): num_line_items = len(GCPCostEntryLineItem.objects.all()) num_projects = len(GCPProject.objects.all()) num_bills = len(GCPCostEntryBill.objects.all()) f = open(self.test_report, "w") f.truncate() f.write("invoice.month,project.id") f.close() result = self.processor.process() self.assertTrue(result) with schema_context(self.schema): self.assertEqual(num_line_items, len(GCPCostEntryLineItem.objects.all())) self.assertEqual(num_projects, len(GCPProject.objects.all())) self.assertEqual(num_bills, len(GCPCostEntryBill.objects.all()))
class GCPReportProcessorTest(MasuTestCase): """Test Cases for the GCPReportProcessor object.""" @classmethod def setUpClass(cls): """Set up the test class with required objects.""" super().setUpClass() cls.test_report_path = "./koku/masu/test/data/gcp/evidence-2019-06-03.csv" cls.date_accessor = DateAccessor() cls.manifest_accessor = ReportManifestDBAccessor() with ReportingCommonDBAccessor() as report_common_db: cls.column_map = report_common_db.column_map def setUp(self): """Set up GCP tests.""" super().setUp() self.temp_dir = tempfile.mkdtemp() self.test_report = f"{self.temp_dir}/evidence-2019-06-03.csv" shutil.copy2(self.test_report_path, self.test_report) gcp_auth = ProviderAuthentication.objects.create(credentials={"project-id": fake.word()}) gcp_billing_source = ProviderBillingSource.objects.create(data_source={"bucket": fake.word()}) self.gcp_provider = Provider.objects.create( uuid=uuid.uuid4(), name="Test Provider", type=Provider.PROVIDER_GCP, authentication=gcp_auth, billing_source=gcp_billing_source, customer=self.customer, setup_complete=True, ) start_time = "2019-09-17T00:00:00-07:00" report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split("-") self.start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.assembly_id = "1234" self.manifest_dict = { "assembly_id": self.assembly_id, "billing_period_start_datetime": self.start_date_utc, "num_total_files": 1, "provider_uuid": self.gcp_provider.uuid, } manifest_accessor = ReportManifestDBAccessor() self.manifest = manifest_accessor.add(**self.manifest_dict) self.processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) self.accessor = GCPReportDBAccessor(self.schema, self.column_map) def tearDown(self): """Tear down test case.""" super().tearDown() shutil.rmtree(self.temp_dir) def test_gcp_process(self): """Test the processing of an GCP file writes objects to the database.""" self.processor.process() with schema_context(self.schema): self.assertTrue(len(GCPCostEntryLineItemDaily.objects.all()) > 0) self.assertTrue(len(GCPProject.objects.all()) > 0) self.assertEquals(1, len(GCPCostEntryBill.objects.all())) self.assertFalse(os.path.exists(self.test_report)) def test_create_gcp_cost_entry_bill(self): """Test calling _get_or_create_cost_entry_bill on an entry bill that doesn't exist creates it.""" bill_row = {"Start Time": "2019-09-17T00:00:00-07:00"} entry_bill_id = self.processor._get_or_create_cost_entry_bill(bill_row, self.accessor) with schema_context(self.schema): self.assertTrue(GCPCostEntryBill.objects.filter(id=entry_bill_id).exists()) def test_get_gcp_cost_entry_bill(self): """Test calling _get_or_create_cost_entry_bill on an entry bill that exists fetches its id.""" start_time = "2019-09-17T00:00:00-07:00" report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split("-") start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) with schema_context(self.schema): entry_bill = GCPCostEntryBill.objects.create( provider=self.gcp_provider, billing_period_start=start_date_utc, billing_period_end=end_date_utc ) entry_bill_id = self.processor._get_or_create_cost_entry_bill( {"Start Time": datetime.strftime(start_date_utc, "%Y-%m-%d %H:%M%z")}, self.accessor ) self.assertEquals(entry_bill.id, entry_bill_id) def test_create_gcp_project(self): """Test calling _get_or_create_gcp_project on a project id that doesn't exist creates it.""" project_data = { "Project ID": fake.word(), "Account ID": fake.word(), "Project Number": fake.pyint(), "Project Name": fake.word(), } project_id = self.processor._get_or_create_gcp_project(project_data, self.accessor) with schema_context(self.schema): self.assertTrue(GCPProject.objects.filter(id=project_id).exists()) def test_get_gcp_project(self): """Test calling _get_or_create_gcp_project on a project id that exists gets it.""" project_id = fake.word() account_id = fake.word() with schema_context(self.schema): project = GCPProject.objects.create( project_id=project_id, account_id=account_id, project_number=fake.pyint(), project_name=fake.word() ) fetched_project_id = self.processor._get_or_create_gcp_project( { "Project ID": project_id, "Account ID": fake.word(), "Project Number": fake.pyint(), "Project Name": fake.word(), }, self.accessor, ) self.assertEquals(fetched_project_id, project.id) with schema_context(self.schema): # Even if _get_or_create_gcp_project is called with a different # account_id, but the same project_id, we expect account_id to remain the same gcp_project = GCPProject.objects.get(id=project.id) self.assertEquals(gcp_project.account_id, account_id) def test_gcp_process_twice(self): """Test the processing of an GCP file again, results in the same amount of objects.""" self.processor.process() with schema_context(self.schema): num_line_items = len(GCPCostEntryLineItemDaily.objects.all()) num_projects = len(GCPProject.objects.all()) num_bills = len(GCPCostEntryBill.objects.all()) # Why another processor instance? because calling process() with the same processor instance fails # django.db.utils.InternalError: no such savepoint. shutil.copy2(self.test_report_path, self.test_report) processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) processor.process() with schema_context(self.schema): self.assertEquals(num_line_items, len(GCPCostEntryLineItemDaily.objects.all())) self.assertEquals(num_projects, len(GCPProject.objects.all())) self.assertEquals(num_bills, len(GCPCostEntryBill.objects.all())) def test_consolidate_line_items(self): """Test that logic for consolidating lines work.""" line1 = { "int": fake.pyint(), "float": fake.pyfloat(), "date": datetime.now(), "npint": np.int64(fake.pyint()), "cost_entry_bill_id": fake.pyint(), "project_id": fake.pyint(), } line2 = { "int": fake.pyint(), "float": fake.pyfloat(), "date": datetime.now(), "npint": np.int64(fake.pyint()), "cost_entry_bill_id": fake.pyint(), "project_id": fake.pyint(), } consolidated_line = self.processor._consolidate_line_items(line1, line2) self.assertEquals(consolidated_line["int"], line1["int"] + line2["int"]) self.assertEquals(consolidated_line["float"], line1["float"] + line2["float"]) self.assertEquals(consolidated_line["npint"], line1["npint"] + line2["npint"]) self.assertEquals(consolidated_line["date"], line1["date"]) self.assertEquals(consolidated_line["cost_entry_bill_id"], line1["cost_entry_bill_id"]) self.assertEquals(consolidated_line["project_id"], line1["project_id"])
class GCPReportProcessorTest(MasuTestCase): """Test Cases for the GCPReportProcessor object.""" @classmethod def setUpClass(cls): """Set up the test class with required objects.""" super().setUpClass() cls.test_report_path = ( "./koku/masu/test/data/gcp/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv" ) cls.date_accessor = DateAccessor() cls.manifest_accessor = ReportManifestDBAccessor() def setUp(self): """Set up GCP tests.""" super().setUp() self.temp_dir = tempfile.mkdtemp() self.test_report = f"{self.temp_dir}/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv" shutil.copy2(self.test_report_path, self.test_report) gcp_auth = ProviderAuthentication.objects.create( credentials={"project-id": fake.word()}) gcp_billing_source = ProviderBillingSource.objects.create( data_source={"bucket": fake.word()}) with patch("masu.celery.tasks.check_report_updates"): self.gcp_provider = Provider.objects.create( uuid=uuid.uuid4(), name="Test Provider", type=Provider.PROVIDER_GCP, authentication=gcp_auth, billing_source=gcp_billing_source, customer=self.customer, setup_complete=True, ) start_time = "2019-09-17T00:00:00-07:00" report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split("-") self.start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) self.assembly_id = "1234" self.manifest_dict = { "assembly_id": self.assembly_id, "billing_period_start_datetime": self.start_date_utc, "num_total_files": 1, "provider_uuid": self.gcp_provider.uuid, } manifest_accessor = ReportManifestDBAccessor() self.manifest = manifest_accessor.add(**self.manifest_dict) self.processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) self.accessor = GCPReportDBAccessor(self.schema) def tearDown(self): """Tear down test case.""" super().tearDown() shutil.rmtree(self.temp_dir) def test_gcp_process(self): """Test the processing of an GCP file writes objects to the database.""" self.processor.process() with schema_context(self.schema): self.assertTrue(len(GCPCostEntryLineItem.objects.all()) > 0) self.assertTrue(len(GCPProject.objects.all()) > 0) self.assertEquals(1, len(GCPCostEntryBill.objects.all())) self.assertFalse(os.path.exists(self.test_report)) def test_create_gcp_cost_entry_bill(self): """Test calling _get_or_create_cost_entry_bill on an entry bill that doesn't exist creates it.""" bill_row = {"invoice.month": "202011"} entry_bill_id = self.processor._get_or_create_cost_entry_bill( bill_row, self.accessor) with schema_context(self.schema): self.assertTrue( GCPCostEntryBill.objects.filter(id=entry_bill_id).exists()) def test_get_gcp_cost_entry_bill(self): """Test calling _get_or_create_cost_entry_bill on an entry bill that exists fetches its id.""" start_time = "2020-11-01 00:00:00+00" report_date_range = utils.month_date_range(parser.parse(start_time)) start_date, end_date = report_date_range.split("-") start_date_utc = parser.parse(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) end_date_utc = parser.parse(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) with schema_context(self.schema): entry_bill = GCPCostEntryBill.objects.create( provider=self.gcp_provider, billing_period_start=start_date_utc, billing_period_end=end_date_utc) entry_bill_id = self.processor._get_or_create_cost_entry_bill( {"invoice.month": "202011"}, self.accessor) self.assertEquals(entry_bill.id, entry_bill_id) def test_create_gcp_project(self): """Test calling _get_or_create_gcp_project on a project id that doesn't exist creates it.""" project_data = { "project.id": fake.word(), "billing_account_id": fake.word(), "project.name": fake.word() } project_id = self.processor._get_or_create_gcp_project( project_data, self.accessor) with schema_context(self.schema): self.assertTrue(GCPProject.objects.filter(id=project_id).exists()) def test_get_gcp_project(self): """Test calling _get_or_create_gcp_project on a project id that exists gets it.""" project_id = fake.word() account_id = fake.word() with schema_context(self.schema): project = GCPProject.objects.create(project_id=project_id, account_id=account_id, project_name=fake.word()) fetched_project_id = self.processor._get_or_create_gcp_project( { "project.id": project_id, "billing_account_id": fake.word(), "project.name": fake.word() }, self.accessor) self.assertEquals(fetched_project_id, project.id) with schema_context(self.schema): # Even if _get_or_create_gcp_project is called with a different # account_id, but the same project_id, we expect account_id to remain the same gcp_project = GCPProject.objects.get(id=project.id) self.assertEquals(gcp_project.account_id, account_id) def test_gcp_process_can_run_twice(self): """Test that row duplicates are inserted into the DB when process called twice.""" self.processor.process() shutil.copy2(self.test_report_path, self.test_report) try: self.processor.process() except InternalError: self.fail("failed to call process twice.") def test_gcp_process_twice(self): """Test the processing of an GCP file again, results in the same amount of objects.""" self.processor.process() with schema_context(self.schema): num_line_items = len(GCPCostEntryLineItem.objects.all()) num_projects = len(GCPProject.objects.all()) num_bills = len(GCPCostEntryBill.objects.all()) # Why another processor instance? because calling process() with the same processor instance fails # django.db.utils.InternalError: no such savepoint. shutil.copy2(self.test_report_path, self.test_report) processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) processor.process() with schema_context(self.schema): self.assertEquals(num_line_items, len(GCPCostEntryLineItem.objects.all())) self.assertEquals(num_projects, len(GCPProject.objects.all())) self.assertEquals(num_bills, len(GCPCostEntryBill.objects.all())) def test_no_report_path(self): """Test error caught when report path doesn't exist.""" processor = GCPReportProcessor( schema_name=self.schema, report_path="/path/does/not/exist.csv", compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, manifest_id=self.manifest.id, ) result = processor.process() self.assertFalse(result) def test_no_manifest_process(self): """Test that we can success process reports without manifest.""" processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) processor.process() with schema_context(self.schema): self.assertTrue(len(GCPCostEntryLineItem.objects.all()) > 0) self.assertTrue(len(GCPProject.objects.all()) > 0) self.assertEquals(1, len(GCPCostEntryBill.objects.all())) self.assertFalse(os.path.exists(self.test_report)) def test_get_line_item_type(self): """Test that the get line item type returns correct type.""" processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) expected_mapping = { "usage": ["Compute Engine", "COMPUTE engine", "Kubernetes Engine"], "storage": ["Filestore", "Storage", "Data Transfer", "DATA TRanSFer"], "network": ["VPC network", "Network services"], "database": ["Bigtable", "Spanner"], "other": ["unknown"], } for expected_item_type, alias_list in expected_mapping.items(): for alias in alias_list: row = {"service.description": alias} result_type = processor._get_line_item_type(row) self.assertEqual(result_type, expected_item_type) def test_create_cost_entry_line_item_bad_time(self): """Test time parse errors are caught correctly.""" processor = GCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.gcp_provider.uuid, ) with open(self.test_report) as csvfile: reader = csv.DictReader(csvfile) row_one = next(reader) fake_id = 2 row_one["usage_start_time"] = "bad time value" processor._create_cost_entry_line_item(row_one, fake_id, fake_id, self.accessor, fake_id) self.assertFalse(processor.processed_report.requested_partitions) del row_one["usage_start_time"] processor._create_cost_entry_line_item(row_one, fake_id, fake_id, self.accessor, fake_id) self.assertFalse(processor.processed_report.requested_partitions)