def test_process_usage_and_storage_default(self): """Test the processing of an uncompressed storage and usage files.""" storage_processor = OCPReportProcessor( schema_name='acct10001', report_path=self.storage_report, compression=UNCOMPRESSED, provider_id=1, ) report_db = self.accessor table_name = OCP_REPORT_TABLE_MAP['storage_line_item'] report_schema = report_db.report_schema table = getattr(report_schema, table_name) storage_before_count = report_db._session.query(table).count() storage_processor.process() storage_after_count = report_db._session.query(table).count() self.assertGreater(storage_after_count, storage_before_count) processor = OCPReportProcessor( schema_name='acct10001', report_path=self.test_report, compression=UNCOMPRESSED, provider_id=1, ) report_db = self.accessor table_name = OCP_REPORT_TABLE_MAP['line_item'] report_schema = report_db.report_schema table = getattr(report_schema, table_name) before_count = report_db._session.query(table).count() processor.process() after_count = report_db._session.query(table).count() self.assertGreater(after_count, before_count)
def test_process_node_label_duplicates(self): """Test that row duplicate node label rows are not inserted into the DB.""" counts = {} processor = OCPReportProcessor( schema_name="acct10001", report_path=self.node_report, compression=UNCOMPRESSED, provider_uuid=self.ocp_provider_uuid, ) # Process for the first time processor.process() report_db = self.accessor report_schema = report_db.report_schema for table_name in self.report_tables: table = getattr(report_schema, table_name) with schema_context(self.schema): count = table.objects.count() counts[table_name] = count shutil.copy2(self.node_report_path, self.node_report) processor = OCPReportProcessor( schema_name="acct10001", report_path=self.node_report, compression=UNCOMPRESSED, provider_uuid=self.ocp_provider_uuid, ) # Process for the second time processor.process() for table_name in self.report_tables: table = getattr(report_schema, table_name) with schema_context(self.schema): count = table.objects.count() self.assertTrue(count == counts[table_name])
def _set_processor(self): """ Create the report processor object. Processor is specific to the provider's cloud service. Args: None Returns: (Object) : Provider-specific report processor """ if self.provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): return AWSReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id, ) if self.provider_type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): return AzureReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id, ) if self.provider_type in (Provider.PROVIDER_OCP, ): return OCPReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, ) if self.provider_type in (Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL): return GCPReportProcessor( schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, ) return None
def setUp(self): super().setUp() self.manifest_dict = { 'assembly_id': self.assembly_id, 'billing_period_start_datetime': self.billing_start, 'num_total_files': 2, 'provider_id': self.ocp_provider.id } self.manifest = self.manifest_accessor.add(**self.manifest_dict) self.manifest_accessor.commit() self.ocp_processor = OCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_id=self.ocp_provider.id, )
def test_process_storage_default(self): """Test the processing of an uncompressed storagefile.""" processor = OCPReportProcessor( schema_name="acct10001", report_path=self.storage_report, compression=UNCOMPRESSED, provider_uuid=self.ocp_provider_uuid, ) report_db = self.accessor table_name = OCP_REPORT_TABLE_MAP["storage_line_item"] report_schema = report_db.report_schema table = getattr(report_schema, table_name) with schema_context(self.schema): before_count = table.objects.count() processor.process() with schema_context(self.schema): after_count = table.objects.count() self.assertGreater(after_count, before_count)
def test_process_default(self): """Test the processing of an uncompressed file.""" counts = {} processor = OCPReportProcessor(schema_name='acct10001', report_path=self.test_report, compression=UNCOMPRESSED, provider_id=1) report_db = self.accessor report_schema = report_db.report_schema for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() counts[table_name] = count processor.process() for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() if table_name not in ('reporting_ocpusagelineitem_daily', 'reporting_ocpusagelineitem_daily_summary'): self.assertTrue(count >= counts[table_name])
def test_process_duplicate_rows_same_file(self): """Test that row duplicates are not inserted into the DB.""" data = [] report_db = self.accessor report_schema = report_db.report_schema table_name = OCP_REPORT_TABLE_MAP["line_item"] table = getattr(report_schema, table_name) with schema_context(self.schema): initial_count = table.objects.count() with open(self.test_report) as f: reader = csv.DictReader(f) for row in reader: data.append(row) expected_new_count = len(data) data.extend(data) tmp_file = "/tmp/test_process_duplicate_rows_same_file.csv" field_names = data[0].keys() with open(tmp_file, "w") as f: writer = csv.DictWriter(f, fieldnames=field_names) writer.writeheader() writer.writerows(data) processor = OCPReportProcessor( schema_name="acct10001", report_path=tmp_file, compression=UNCOMPRESSED, provider_uuid=self.ocp_provider_uuid, ) # Process for the first time processor.process() with schema_context(self.schema): count = table.objects.count() self.assertEqual(count, initial_count + expected_new_count)
def setUpClass(cls): """Set up the test class with required objects.""" # These test reports should be replaced with OCP reports once processor is impelmented. cls.test_report = './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv' cls.storage_report = './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv' cls.unknown_report = './tests/data/test_cur.csv' cls.test_report_gzip = './tests/data/test_cur.csv.gz' cls.provider_id = 1 cls.ocp_processor = OCPReportProcessor(schema_name='acct10001', report_path=cls.test_report, compression=UNCOMPRESSED, provider_id=cls.provider_id) cls.date_accessor = DateAccessor() billing_start = cls.date_accessor.today_with_timezone('UTC').replace( year=2018, month=6, day=1, hour=0, minute=0, second=0) cls.assembly_id = '1234' cls.manifest_dict = { 'assembly_id': cls.assembly_id, 'billing_period_start_datetime': billing_start, 'num_total_files': 2, 'provider_id': 1 } cls.manifest_accessor = ReportManifestDBAccessor() with ReportingCommonDBAccessor() as report_common_db: cls.column_map = report_common_db.column_map cls.accessor = OCPReportDBAccessor('acct10001', cls.column_map) cls.report_schema = cls.accessor.report_schema cls.session = cls.accessor._session _report_tables = copy.deepcopy(OCP_REPORT_TABLE_MAP) cls.report_tables = list(_report_tables.values()) # Grab a single row of test data to work with with open(cls.test_report, 'r') as f: reader = csv.DictReader(f) cls.row = next(reader)
def _set_processor(self): """ Create the report processor object. Processor is specific to the provider's cloud service. Args: None Returns: (Object) : Provider-specific report processor """ if self.provider_type in (AMAZON_WEB_SERVICES, AWS_LOCAL_SERVICE_PROVIDER): return AWSReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id) if self.provider_type in (AZURE, AZURE_LOCAL_SERVICE_PROVIDER): return AzureReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid, manifest_id=self.manifest_id) if self.provider_type in (OPENSHIFT_CONTAINER_PLATFORM, ): return OCPReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid) if self.provider_type in (GCP, GCP_LOCAL): return GCPReportProcessor(schema_name=self.schema_name, report_path=self.report_path, compression=self.compression, provider_uuid=self.provider_uuid) return None
def test_process_default_small_batches(self): """Test the processing of an uncompressed file in small batches.""" with patch.object(Config, 'REPORT_PROCESSING_BATCH_SIZE', 5): counts = {} processor = OCPReportProcessor(schema_name='acct10001', report_path=self.test_report, compression=UNCOMPRESSED, provider_id=1) report_db = self.accessor report_schema = report_db.report_schema for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() counts[table_name] = count processor.process() for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() if table_name not in ( 'reporting_ocpusagelineitem_daily', 'reporting_ocpusagelineitem_daily_summary'): self.assertTrue(count >= counts[table_name])
def setUp(self): super().setUp() self.temp_dir = tempfile.mkdtemp() self.test_report = f'{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv' self.storage_report = f'{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv' self.test_report_gzip = f'{self.temp_dir}/test_cur.csv.gz' shutil.copy2(self.test_report_path, self.test_report) shutil.copy2(self.storage_report_path, self.storage_report) shutil.copy2(self.test_report_gzip_path, self.test_report_gzip) self.manifest_dict = { 'assembly_id': self.assembly_id, 'billing_period_start_datetime': self.billing_start, 'num_total_files': 2, 'provider_uuid': self.ocp_provider_uuid } self.manifest = self.manifest_accessor.add(**self.manifest_dict) self.ocp_processor = OCPReportProcessor( schema_name=self.schema, report_path=self.test_report, compression=UNCOMPRESSED, provider_uuid=self.ocp_provider_uuid, )
def test_process_usage_and_storage_with_invalid_data(self): """Test that processing succeeds when rows are missing data.""" pod_report = f"{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1-invalid.csv" storage_report = f"{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_storage-invalid.csv" pod_data = [] storage_data = [] with open(self.test_report_path) as f: reader = csv.DictReader(f) for row in reader: row["node"] = None pod_data.append(row) header = pod_data[0].keys() with open(pod_report, "w") as f: writer = csv.DictWriter(f, fieldnames=header) writer.writeheader() writer.writerows(pod_data) with open(self.storage_report_path) as f: reader = csv.DictReader(f) for row in reader: row["persistentvolume"] = None storage_data.append(row) header = storage_data[0].keys() with open(storage_report, "w") as f: writer = csv.DictWriter(f, fieldnames=header) writer.writeheader() writer.writerows(storage_data) storage_processor = OCPReportProcessor( schema_name="acct10001", report_path=storage_report, compression=UNCOMPRESSED, provider_uuid=self.ocp_provider_uuid, ) report_db = self.accessor table_name = OCP_REPORT_TABLE_MAP["storage_line_item"] report_schema = report_db.report_schema table = getattr(report_schema, table_name) with schema_context(self.schema): storage_before_count = table.objects.count() storage_processor.process() with schema_context(self.schema): storage_after_count = table.objects.count() self.assertEqual(storage_after_count, storage_before_count) processor = OCPReportProcessor( schema_name="acct10001", report_path=pod_report, compression=UNCOMPRESSED, provider_uuid=self.ocp_provider_uuid, ) report_db = self.accessor table_name = OCP_REPORT_TABLE_MAP["line_item"] report_schema = report_db.report_schema table = getattr(report_schema, table_name) with schema_context(self.schema): before_count = table.objects.count() processor.process() with schema_context(self.schema): after_count = table.objects.count() self.assertEqual(after_count, before_count)