Example #1
0
    def test_process_duplicates(self):
        """Test that row duplicates are not inserted into the DB."""
        counts = {}
        processor = ReportProcessor(schema_name='testcustomer',
                                    report_path=self.test_report,
                                    compression=UNCOMPRESSED)

        # Process for the first time
        processor.process()
        report_db = processor.report_db
        report_schema = report_db.report_schema

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            counts[table_name] = count

        processor = ReportProcessor(schema_name='testcustomer',
                                    report_path=self.test_report,
                                    compression=UNCOMPRESSED)
        # Process for the second time
        processor.process()

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            self.assertTrue(count == counts[table_name])
Example #2
0
    def test_process_gzip(self):
        """Test the processing of a gzip compressed file."""
        counts = {}
        processor = ReportProcessor(schema_name='testcustomer',
                                    report_path=self.test_report_gzip,
                                    compression=GZIP_COMPRESSED)
        report_db = processor.report_db
        report_schema = report_db.report_schema
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            counts[table_name] = count

        processor.process()

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()

            if table_name == 'reporting_awscostentryreservation':
                self.assertTrue(count >= counts[table_name])
            else:
                self.assertTrue(count > counts[table_name])

        self.assertTrue(processor.report_db._conn.closed)
        self.assertTrue(processor.report_db._pg2_conn.closed)
    def test_aws_process(self, mock_process, mock_ocp_cloud_process,
                         mock_parquet_process, mock_trino_enabled):
        """Test to process for AWS."""
        mock_trino_enabled.return_value = True
        processor = ReportProcessor(
            schema_name=self.schema,
            report_path="/my/report/file",
            compression="GZIP",
            provider=Provider.PROVIDER_AWS,
            provider_uuid=self.aws_provider_uuid,
            manifest_id=None,
        )
        processor.process()
        mock_process.assert_not_called()
        mock_parquet_process.assert_called()
        mock_ocp_cloud_process.assert_called()

        mock_trino_enabled.reset_mock()
        mock_parquet_process.reset_mock()
        mock_ocp_cloud_process.reset_mock()
        mock_trino_enabled.return_value = False
        processor = ReportProcessor(
            schema_name=self.schema,
            report_path="/my/report/file",
            compression="GZIP",
            provider=Provider.PROVIDER_AWS,
            provider_uuid=self.aws_provider_uuid,
            manifest_id=None,
        )
        processor.process()
        mock_process.assert_called()
        mock_parquet_process.assert_not_called()
        mock_ocp_cloud_process.assert_not_called()
Example #4
0
def _process_report_file(schema_name, provider, provider_uuid, report_dict):
    """
    Task to process a Report.

    Args:
        schema_name   (String) db schema name
        provider      (String) provider type
        provider_uuid (String) provider uuid
        report_dict   (dict) The report data dict from previous task

    Returns:
        None

    """
    start_date = report_dict.get('start_date')
    report_path = report_dict.get('file')
    compression = report_dict.get('compression')
    manifest_id = report_dict.get('manifest_id')
    provider_id = report_dict.get('provider_id')
    stmt = ('Processing Report:'
            ' schema_name: {},'
            ' report_path: {},'
            ' compression: {},'
            ' provider: {},'
            ' start_date: {}')
    log_statement = stmt.format(schema_name, report_path, compression,
                                provider, start_date)
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent)
    LOG.info(mem_msg)

    file_name = report_path.split('/')[-1]
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_started_datetime()
    processor = ReportProcessor(schema_name=schema_name,
                                report_path=report_path,
                                compression=compression,
                                provider=provider,
                                provider_id=provider_id,
                                manifest_id=manifest_id)
    processor.process()
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_completed_datetime()

    with ReportManifestDBAccessor() as manifest_accesor:
        manifest = manifest_accesor.get_manifest_by_id(manifest_id)
        if manifest:
            manifest.num_processed_files += 1
            manifest.save()
            manifest_accesor.mark_manifest_as_updated(manifest)
        else:
            LOG.error('Unable to find manifest for ID: %s, file %s',
                      manifest_id, file_name)

    with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor:
        provider_accessor.setup_complete()

    files = processor.remove_processed_files(path.dirname(report_path))
    LOG.info('Temporary files removed: %s', str(files))
Example #5
0
    def test_aws_process_error(self, fake_process):
        """Test to process for AWS with processing error"""

        processor = ReportProcessor(schema_name='acct10001',
                                    report_path='/my/report/file',
                                    compression='GZIP',
                                    provider=AMAZON_WEB_SERVICES,
                                    provider_id=1,
                                    manifest_id=None)
        with self.assertRaises(ReportProcessorError):
            processor.process()
Example #6
0
 def test_aws_process_error(self, fake_process):
     """Test to process for AWS with processing error."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path="/my/report/file",
         compression="GZIP",
         provider=Provider.PROVIDER_AWS,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     with self.assertRaises(ReportProcessorError):
         processor.process()
Example #7
0
 def test_aws_process(self, fake_process):
     """Test to process for AWS"""
     processor = ReportProcessor(schema_name='acct10001',
                                 report_path='/my/report/file',
                                 compression='GZIP',
                                 provider=AMAZON_WEB_SERVICES,
                                 provider_id=1,
                                 manifest_id=None)
     try:
         processor.process()
     except Exception:
         self.fail('unexpected error')
Example #8
0
 def test_aws_process(self, fake_process):
     """Test to process for AWS."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path="/my/report/file",
         compression="GZIP",
         provider=Provider.PROVIDER_AWS,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     try:
         processor.process()
     except Exception:
         self.fail("unexpected error")
Example #9
0
def _process_report_file(schema_name, report_path, compression):
    """
    Task to process a Report.

    Args:
        schema_name (String) db schema name
        report_path (String) path to downloaded reports
        compression (String) 'PLAIN' or 'GZIP'

    Returns:
        None

    """
    stmt = ('Processing Report:'
            ' schema_name: {},'
            ' report_path: {},'
            ' compression: {}')
    log_statement = stmt.format(schema_name, report_path, compression)
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent)
    LOG.info(mem_msg)

    file_name = report_path.split('/')[-1]

    stats_recorder = ReportStatsDBAccessor(file_name)
    stats_recorder.log_last_started_datetime()
    stats_recorder.commit()

    processor = ReportProcessor(schema_name=schema_name,
                                report_path=report_path,
                                compression=compression)

    processor.process()
    stats_recorder.log_last_completed_datetime()
    stats_recorder.commit()
    stats_recorder.close_session()

    files = remove_files.remove_temp_cur_files(path.dirname(report_path))
    LOG.info('Temporary files removed: %s', str(files))
Example #10
0
def _process_report_file(schema_name, provider, report_dict):
    """
    Task to process a Report.

    Args:
        schema_name   (String) db schema name
        provider      (String) provider type
        report_dict   (dict) The report data dict from previous task

    Returns:
        None

    """
    start_date = report_dict.get("start_date")
    report_path = report_dict.get("file")
    compression = report_dict.get("compression")
    manifest_id = report_dict.get("manifest_id")
    provider_uuid = report_dict.get("provider_uuid")
    log_statement = (f"Processing Report:\n"
                     f" schema_name: {schema_name}\n"
                     f" provider: {provider}\n"
                     f" provider_uuid: {provider_uuid}\n"
                     f" file: {report_path}\n"
                     f" compression: {compression}\n"
                     f" start_date: {start_date}")
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = f"Avaiable memory: {mem.free} bytes ({mem.percent}%)"
    LOG.info(mem_msg)

    file_name = report_path.split("/")[-1]
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_started_datetime()

    try:
        processor = ReportProcessor(
            schema_name=schema_name,
            report_path=report_path,
            compression=compression,
            provider=provider,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id,
        )

        processor.process()
    except (ReportProcessorError, ReportProcessorDBError) as processing_error:
        with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
            stats_recorder.clear_last_started_datetime()
        raise processing_error

    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_completed_datetime()

    with ReportManifestDBAccessor() as manifest_accesor:
        manifest = manifest_accesor.get_manifest_by_id(manifest_id)
        if manifest:
            manifest_accesor.mark_manifest_as_updated(manifest)
        else:
            LOG.error("Unable to find manifest for ID: %s, file %s",
                      manifest_id, file_name)

    with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor:
        if provider_accessor.get_setup_complete():
            files = processor.remove_processed_files(path.dirname(report_path))
            LOG.info("Temporary files removed: %s", str(files))
        provider_accessor.setup_complete()

    return True