예제 #1
0
    def test_ocp_on_cloud_processor(self, mock_trino_enabled):
        """Test that we return the right class."""
        mock_trino_enabled.return_value = True

        processor = ReportProcessor(
            schema_name=self.schema,
            report_path="/my/report/file",
            compression="GZIP",
            provider=Provider.PROVIDER_AWS,
            provider_uuid=self.aws_provider_uuid,
            manifest_id=None,
            context={"request_id": 1},
        )
        self.assertIsInstance(processor.ocp_on_cloud_processor,
                              OCPCloudParquetReportProcessor)

        mock_trino_enabled.reset_mock()
        mock_trino_enabled.return_value = False

        processor = ReportProcessor(
            schema_name=self.schema,
            report_path="/my/report/file",
            compression="GZIP",
            provider=Provider.PROVIDER_AWS,
            provider_uuid=self.aws_provider_uuid,
            manifest_id=None,
            context={"request_id": 1},
        )
        self.assertIsNone(processor.ocp_on_cloud_processor)
예제 #2
0
파일: process.py 프로젝트: m7salam/koku
def _process_report_file(schema_name, provider, provider_uuid, report_dict):
    """
    Task to process a Report.

    Args:
        schema_name   (String) db schema name
        provider      (String) provider type
        provider_uuid (String) provider uuid
        report_dict   (dict) The report data dict from previous task

    Returns:
        None

    """
    start_date = report_dict.get('start_date')
    report_path = report_dict.get('file')
    compression = report_dict.get('compression')
    manifest_id = report_dict.get('manifest_id')
    provider_id = report_dict.get('provider_id')
    stmt = ('Processing Report:'
            ' schema_name: {},'
            ' report_path: {},'
            ' compression: {},'
            ' provider: {},'
            ' start_date: {}')
    log_statement = stmt.format(schema_name, report_path, compression,
                                provider, start_date)
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent)
    LOG.info(mem_msg)

    file_name = report_path.split('/')[-1]
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_started_datetime()
    processor = ReportProcessor(schema_name=schema_name,
                                report_path=report_path,
                                compression=compression,
                                provider=provider,
                                provider_id=provider_id,
                                manifest_id=manifest_id)
    processor.process()
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_completed_datetime()

    with ReportManifestDBAccessor() as manifest_accesor:
        manifest = manifest_accesor.get_manifest_by_id(manifest_id)
        if manifest:
            manifest.num_processed_files += 1
            manifest.save()
            manifest_accesor.mark_manifest_as_updated(manifest)
        else:
            LOG.error('Unable to find manifest for ID: %s, file %s',
                      manifest_id, file_name)

    with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor:
        provider_accessor.setup_complete()

    files = processor.remove_processed_files(path.dirname(report_path))
    LOG.info('Temporary files removed: %s', str(files))
예제 #3
0
    def test_process_gzip(self):
        """Test the processing of a gzip compressed file."""
        counts = {}
        processor = ReportProcessor(schema_name='testcustomer',
                                    report_path=self.test_report_gzip,
                                    compression=GZIP_COMPRESSED)
        report_db = processor.report_db
        report_schema = report_db.report_schema
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            counts[table_name] = count

        processor.process()

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()

            if table_name == 'reporting_awscostentryreservation':
                self.assertTrue(count >= counts[table_name])
            else:
                self.assertTrue(count > counts[table_name])

        self.assertTrue(processor.report_db._conn.closed)
        self.assertTrue(processor.report_db._pg2_conn.closed)
예제 #4
0
    def test_aws_remove_processed_files_error(self, fake_process):
        """Test to remove_processed_files for AWS with processing error"""

        processor = ReportProcessor(schema_name='acct10001',
                                    report_path='/my/report/file',
                                    compression='GZIP',
                                    provider=AMAZON_WEB_SERVICES,
                                    provider_id=1,
                                    manifest_id=None)
        with self.assertRaises(ReportProcessorError):
            processor.remove_processed_files('/my/report/file')
예제 #5
0
 def test_aws_process_error(self, fake_process):
     """Test to process for AWS with processing error."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path='/my/report/file',
         compression='GZIP',
         provider=AMAZON_WEB_SERVICES,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     with self.assertRaises(ReportProcessorError):
         processor.process()
예제 #6
0
 def test_aws_remove_processed_files_error(self, fake_process):
     """Test to remove_processed_files for AWS with processing error."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path="/my/report/file",
         compression="GZIP",
         provider=Provider.PROVIDER_AWS,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     with self.assertRaises(ReportProcessorError):
         processor.remove_processed_files("/my/report/file")
예제 #7
0
 def test_aws_remove_processed_files(self, fake_process):
     """Test to remove_processed_files for AWS"""
     processor = ReportProcessor(schema_name='acct10001',
                                 report_path='/my/report/file',
                                 compression='GZIP',
                                 provider=AMAZON_WEB_SERVICES,
                                 provider_id=1,
                                 manifest_id=None)
     try:
         processor.remove_processed_files('/my/report/file')
     except Exception:
         self.fail('unexpected error')
예제 #8
0
 def test_aws_process(self, fake_process):
     """Test to process for AWS."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path='/my/report/file',
         compression='GZIP',
         provider=AMAZON_WEB_SERVICES,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     try:
         processor.process()
     except Exception:
         self.fail('unexpected error')
예제 #9
0
 def test_aws_remove_processed_files(self, fake_process):
     """Test to remove_processed_files for AWS."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path="/my/report/file",
         compression="GZIP",
         provider=Provider.PROVIDER_AWS,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     try:
         processor.remove_processed_files("/my/report/file")
     except Exception:
         self.fail("unexpected error")
예제 #10
0
    def test_process_duplicates(self):
        """Test that row duplicates are not inserted into the DB."""
        counts = {}
        processor = ReportProcessor(schema_name='testcustomer',
                                    report_path=self.test_report,
                                    compression=UNCOMPRESSED)

        # Process for the first time
        processor.process()
        report_db = processor.report_db
        report_schema = report_db.report_schema

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            counts[table_name] = count

        processor = ReportProcessor(schema_name='testcustomer',
                                    report_path=self.test_report,
                                    compression=UNCOMPRESSED)
        # Process for the second time
        processor.process()

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            self.assertTrue(count == counts[table_name])
예제 #11
0
    def test_aws_process(self, mock_process, mock_ocp_cloud_process,
                         mock_parquet_process, mock_trino_enabled):
        """Test to process for AWS."""
        mock_trino_enabled.return_value = True
        processor = ReportProcessor(
            schema_name=self.schema,
            report_path="/my/report/file",
            compression="GZIP",
            provider=Provider.PROVIDER_AWS,
            provider_uuid=self.aws_provider_uuid,
            manifest_id=None,
        )
        processor.process()
        mock_process.assert_not_called()
        mock_parquet_process.assert_called()
        mock_ocp_cloud_process.assert_called()

        mock_trino_enabled.reset_mock()
        mock_parquet_process.reset_mock()
        mock_ocp_cloud_process.reset_mock()
        mock_trino_enabled.return_value = False
        processor = ReportProcessor(
            schema_name=self.schema,
            report_path="/my/report/file",
            compression="GZIP",
            provider=Provider.PROVIDER_AWS,
            provider_uuid=self.aws_provider_uuid,
            manifest_id=None,
        )
        processor.process()
        mock_process.assert_called()
        mock_parquet_process.assert_not_called()
        mock_ocp_cloud_process.assert_not_called()
예제 #12
0
파일: utils.py 프로젝트: tohjustin/koku
 def process_report(self, report, compression, provider_type, provider, manifest):
     """Run the report processor on a report."""
     status = baker.make("CostUsageReportStatus", manifest=manifest, report_name=report)
     status.last_started_datetime = self.dh.now
     ReportProcessor(self.schema, report, compression, provider_type, provider.uuid, manifest.id).process()
     status.last_completed_datetime = self.dh.now
     status.save()
예제 #13
0
 def test_initializer_aws_local(self):
     """Test to initializer for AWS-local"""
     processor = ReportProcessor(schema_name='acct10001',
                                 report_path='/my/report/file',
                                 compression='GZIP',
                                 provider=AWS_LOCAL_SERVICE_PROVIDER,
                                 provider_id=1,
                                 manifest_id=None)
     self.assertIsNotNone(processor._processor)
예제 #14
0
 def test_initializer_aws(self):
     """Test to initializer for AWS"""
     processor = ReportProcessor(schema_name='acct10001',
                                 report_path='/my/report/file',
                                 compression='GZIP',
                                 provider=AMAZON_WEB_SERVICES,
                                 provider_id=1,
                                 manifest_id=None)
     self.assertIsNotNone(processor._processor)
예제 #15
0
 def test_initializer_invalid_provider(self):
     """Test to initializer with invalid provider"""
     with self.assertRaises(ReportProcessorError):
         ReportProcessor(schema_name='acct10001',
                         report_path='/my/report/file',
                         compression='GZIP',
                         provider='unknown',
                         provider_id=1,
                         manifest_id=None)
예제 #16
0
 def test_initializer_error(self, fake_processor):
     """Test to initializer with error."""
     with self.assertRaises(ReportProcessorError):
         ReportProcessor(schema_name='acct10001',
                         report_path='/my/report/file',
                         compression='GZIP',
                         provider=AMAZON_WEB_SERVICES,
                         provider_id=1,
                         manifest_id=None)
예제 #17
0
 def test_initializer_invalid_provider(self):
     """Test to initializer with invalid provider."""
     with self.assertRaises(ReportProcessorError):
         ReportProcessor(
             schema_name=self.schema,
             report_path="/my/report/file",
             compression="GZIP",
             provider="unknown",
             provider_uuid=self.aws_provider_uuid,
             manifest_id=None,
         )
예제 #18
0
 def test_initializer_not_implemented_error(self, fake_processor):
     """Test to initializer with error."""
     with self.assertRaises(NotImplementedError):
         ReportProcessor(
             schema_name=self.schema,
             report_path="/my/report/file",
             compression="GZIP",
             provider=Provider.PROVIDER_AWS,
             provider_uuid=self.aws_provider_uuid,
             manifest_id=None,
         )
예제 #19
0
 def test_initializer_error(self, fake_processor):
     """Test to initializer with error."""
     with self.assertRaises(ReportProcessorError):
         ReportProcessor(
             schema_name=self.schema,
             report_path='/my/report/file',
             compression='GZIP',
             provider=Provider.PROVIDER_AWS,
             provider_uuid=self.aws_provider_uuid,
             manifest_id=None,
         )
예제 #20
0
 def test_initializer_ocp_local(self):
     """Test to initializer for OCP-local"""
     processor = ReportProcessor(
         schema_name='acct10001',
         report_path=
         './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv',
         compression='PLAIN',
         provider=OCP_LOCAL_SERVICE_PROVIDER,
         provider_id=1,
         manifest_id=None)
     self.assertIsNotNone(processor._processor)
예제 #21
0
 def test_initializer_ocp(self):
     """Test to initializer for OCP"""
     processor = ReportProcessor(
         schema_name='acct10001',
         report_path=
         './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv',
         compression='PLAIN',
         provider=OPENSHIFT_CONTAINER_PLATFORM,
         provider_id=1,
         manifest_id=None)
     self.assertIsNotNone(processor._processor)
예제 #22
0
 def test_initializer_aws(self):
     """Test to initializer for AWS."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path='/my/report/file',
         compression='GZIP',
         provider=Provider.PROVIDER_AWS,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     self.assertIsNotNone(processor._processor)
예제 #23
0
 def test_initializer_aws_local(self):
     """Test to initializer for AWS-local."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path="/my/report/file",
         compression="GZIP",
         provider=Provider.PROVIDER_AWS_LOCAL,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     self.assertIsNotNone(processor._processor)
예제 #24
0
 def test_initializer_azure(self):
     """Test to initializer for Azure."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path='/my/report/file',
         compression='GZIP',
         provider=AZURE,
         provider_uuid=self.azure_provider_uuid,
         manifest_id=None,
     )
     self.assertIsNotNone(processor._processor)
예제 #25
0
 def test_initializer_azure_local(self):
     """Test to initializer for AZURE-local"""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path=
         './koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv',
         compression='PLAIN',
         provider=AZURE_LOCAL_SERVICE_PROVIDER,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     self.assertIsNotNone(processor._processor)
예제 #26
0
 def test_set_processor_parquet(self):
     """Test that the Parquet class is returned."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path="/my/report/file",
         compression="GZIP",
         provider=Provider.PROVIDER_AWS,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
         context={"request_id": 1},
     )
     self.assertIsInstance(processor._processor, ParquetReportProcessor)
예제 #27
0
파일: process.py 프로젝트: LaVLaS/masu
def _process_report_file(schema_name, report_path, compression):
    """
    Task to process a Report.

    Args:
        schema_name (String) db schema name
        report_path (String) path to downloaded reports
        compression (String) 'PLAIN' or 'GZIP'

    Returns:
        None

    """
    stmt = ('Processing Report:'
            ' schema_name: {},'
            ' report_path: {},'
            ' compression: {}')
    log_statement = stmt.format(schema_name, report_path, compression)
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent)
    LOG.info(mem_msg)

    file_name = report_path.split('/')[-1]

    stats_recorder = ReportStatsDBAccessor(file_name)
    stats_recorder.log_last_started_datetime()
    stats_recorder.commit()

    processor = ReportProcessor(schema_name=schema_name,
                                report_path=report_path,
                                compression=compression)

    processor.process()
    stats_recorder.log_last_completed_datetime()
    stats_recorder.commit()
    stats_recorder.close_session()

    files = remove_files.remove_temp_cur_files(path.dirname(report_path))
    LOG.info('Temporary files removed: %s', str(files))
예제 #28
0
 def test_initializer_ocp(self):
     """Test to initializer for OCP."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path=
         ('./koku/masu/test/data/ocp/'
          'e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv'
          ),
         compression='PLAIN',
         provider=OPENSHIFT_CONTAINER_PLATFORM,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     self.assertIsNotNone(processor._processor)
예제 #29
0
 def test_initializer_ocp(self):
     """Test to initializer for OCP."""
     processor = ReportProcessor(
         schema_name=self.schema,
         report_path=
         ("./koku/masu/test/data/ocp/"
          "e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv"
          ),
         compression="PLAIN",
         provider=Provider.PROVIDER_OCP,
         provider_uuid=self.aws_provider_uuid,
         manifest_id=None,
     )
     self.assertIsNotNone(processor._processor)
예제 #30
0
 def process_report(self,
                    report,
                    compression,
                    provider_type,
                    provider,
                    manifest,
                    bill_date=None):
     """Run the report processor on a report."""
     status = baker.make("CostUsageReportStatus",
                         manifest=manifest,
                         report_name=report)
     status.last_started_datetime = self.dh.now
     context = {"start_date": bill_date, "tracing_id": uuid4()}
     ReportProcessor(self.schema,
                     report,
                     compression,
                     provider_type,
                     provider.uuid,
                     manifest.id,
                     context=context).process()
     status.last_completed_datetime = self.dh.now
     status.save()