def test_process_duplicates(self): """Test that row duplicates are not inserted into the DB.""" counts = {} processor = ReportProcessor(schema_name='testcustomer', report_path=self.test_report, compression=UNCOMPRESSED) # Process for the first time processor.process() report_db = processor.report_db report_schema = report_db.report_schema for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() counts[table_name] = count processor = ReportProcessor(schema_name='testcustomer', report_path=self.test_report, compression=UNCOMPRESSED) # Process for the second time processor.process() for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() self.assertTrue(count == counts[table_name])
def test_process_gzip(self): """Test the processing of a gzip compressed file.""" counts = {} processor = ReportProcessor(schema_name='testcustomer', report_path=self.test_report_gzip, compression=GZIP_COMPRESSED) report_db = processor.report_db report_schema = report_db.report_schema for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() counts[table_name] = count processor.process() for table_name in self.report_tables: table = getattr(report_schema, table_name) count = report_db._session.query(table).count() if table_name == 'reporting_awscostentryreservation': self.assertTrue(count >= counts[table_name]) else: self.assertTrue(count > counts[table_name]) self.assertTrue(processor.report_db._conn.closed) self.assertTrue(processor.report_db._pg2_conn.closed)
def test_aws_process(self, mock_process, mock_ocp_cloud_process, mock_parquet_process, mock_trino_enabled): """Test to process for AWS.""" mock_trino_enabled.return_value = True processor = ReportProcessor( schema_name=self.schema, report_path="/my/report/file", compression="GZIP", provider=Provider.PROVIDER_AWS, provider_uuid=self.aws_provider_uuid, manifest_id=None, ) processor.process() mock_process.assert_not_called() mock_parquet_process.assert_called() mock_ocp_cloud_process.assert_called() mock_trino_enabled.reset_mock() mock_parquet_process.reset_mock() mock_ocp_cloud_process.reset_mock() mock_trino_enabled.return_value = False processor = ReportProcessor( schema_name=self.schema, report_path="/my/report/file", compression="GZIP", provider=Provider.PROVIDER_AWS, provider_uuid=self.aws_provider_uuid, manifest_id=None, ) processor.process() mock_process.assert_called() mock_parquet_process.assert_not_called() mock_ocp_cloud_process.assert_not_called()
def _process_report_file(schema_name, provider, provider_uuid, report_dict): """ Task to process a Report. Args: schema_name (String) db schema name provider (String) provider type provider_uuid (String) provider uuid report_dict (dict) The report data dict from previous task Returns: None """ start_date = report_dict.get('start_date') report_path = report_dict.get('file') compression = report_dict.get('compression') manifest_id = report_dict.get('manifest_id') provider_id = report_dict.get('provider_id') stmt = ('Processing Report:' ' schema_name: {},' ' report_path: {},' ' compression: {},' ' provider: {},' ' start_date: {}') log_statement = stmt.format(schema_name, report_path, compression, provider, start_date) LOG.info(log_statement) mem = psutil.virtual_memory() mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent) LOG.info(mem_msg) file_name = report_path.split('/')[-1] with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder: stats_recorder.log_last_started_datetime() processor = ReportProcessor(schema_name=schema_name, report_path=report_path, compression=compression, provider=provider, provider_id=provider_id, manifest_id=manifest_id) processor.process() with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder: stats_recorder.log_last_completed_datetime() with ReportManifestDBAccessor() as manifest_accesor: manifest = manifest_accesor.get_manifest_by_id(manifest_id) if manifest: manifest.num_processed_files += 1 manifest.save() manifest_accesor.mark_manifest_as_updated(manifest) else: LOG.error('Unable to find manifest for ID: %s, file %s', manifest_id, file_name) with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor: provider_accessor.setup_complete() files = processor.remove_processed_files(path.dirname(report_path)) LOG.info('Temporary files removed: %s', str(files))
def test_aws_process_error(self, fake_process): """Test to process for AWS with processing error""" processor = ReportProcessor(schema_name='acct10001', report_path='/my/report/file', compression='GZIP', provider=AMAZON_WEB_SERVICES, provider_id=1, manifest_id=None) with self.assertRaises(ReportProcessorError): processor.process()
def test_aws_process_error(self, fake_process): """Test to process for AWS with processing error.""" processor = ReportProcessor( schema_name=self.schema, report_path="/my/report/file", compression="GZIP", provider=Provider.PROVIDER_AWS, provider_uuid=self.aws_provider_uuid, manifest_id=None, ) with self.assertRaises(ReportProcessorError): processor.process()
def test_aws_process(self, fake_process): """Test to process for AWS""" processor = ReportProcessor(schema_name='acct10001', report_path='/my/report/file', compression='GZIP', provider=AMAZON_WEB_SERVICES, provider_id=1, manifest_id=None) try: processor.process() except Exception: self.fail('unexpected error')
def test_aws_process(self, fake_process): """Test to process for AWS.""" processor = ReportProcessor( schema_name=self.schema, report_path="/my/report/file", compression="GZIP", provider=Provider.PROVIDER_AWS, provider_uuid=self.aws_provider_uuid, manifest_id=None, ) try: processor.process() except Exception: self.fail("unexpected error")
def _process_report_file(schema_name, report_path, compression): """ Task to process a Report. Args: schema_name (String) db schema name report_path (String) path to downloaded reports compression (String) 'PLAIN' or 'GZIP' Returns: None """ stmt = ('Processing Report:' ' schema_name: {},' ' report_path: {},' ' compression: {}') log_statement = stmt.format(schema_name, report_path, compression) LOG.info(log_statement) mem = psutil.virtual_memory() mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent) LOG.info(mem_msg) file_name = report_path.split('/')[-1] stats_recorder = ReportStatsDBAccessor(file_name) stats_recorder.log_last_started_datetime() stats_recorder.commit() processor = ReportProcessor(schema_name=schema_name, report_path=report_path, compression=compression) processor.process() stats_recorder.log_last_completed_datetime() stats_recorder.commit() stats_recorder.close_session() files = remove_files.remove_temp_cur_files(path.dirname(report_path)) LOG.info('Temporary files removed: %s', str(files))
def _process_report_file(schema_name, provider, report_dict): """ Task to process a Report. Args: schema_name (String) db schema name provider (String) provider type report_dict (dict) The report data dict from previous task Returns: None """ start_date = report_dict.get("start_date") report_path = report_dict.get("file") compression = report_dict.get("compression") manifest_id = report_dict.get("manifest_id") provider_uuid = report_dict.get("provider_uuid") log_statement = (f"Processing Report:\n" f" schema_name: {schema_name}\n" f" provider: {provider}\n" f" provider_uuid: {provider_uuid}\n" f" file: {report_path}\n" f" compression: {compression}\n" f" start_date: {start_date}") LOG.info(log_statement) mem = psutil.virtual_memory() mem_msg = f"Avaiable memory: {mem.free} bytes ({mem.percent}%)" LOG.info(mem_msg) file_name = report_path.split("/")[-1] with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder: stats_recorder.log_last_started_datetime() try: processor = ReportProcessor( schema_name=schema_name, report_path=report_path, compression=compression, provider=provider, provider_uuid=provider_uuid, manifest_id=manifest_id, ) processor.process() except (ReportProcessorError, ReportProcessorDBError) as processing_error: with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder: stats_recorder.clear_last_started_datetime() raise processing_error with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder: stats_recorder.log_last_completed_datetime() with ReportManifestDBAccessor() as manifest_accesor: manifest = manifest_accesor.get_manifest_by_id(manifest_id) if manifest: manifest_accesor.mark_manifest_as_updated(manifest) else: LOG.error("Unable to find manifest for ID: %s, file %s", manifest_id, file_name) with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor: if provider_accessor.get_setup_complete(): files = processor.remove_processed_files(path.dirname(report_path)) LOG.info("Temporary files removed: %s", str(files)) provider_accessor.setup_complete() return True