Beispiel #1
0
    def test_process_duplicate_rows_same_file(self):
        """Test that row duplicates are not inserted into the DB."""
        data = []
        with open(self.test_report, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                data.append(row)

        expected_count = len(data)
        data.extend(data)
        tmp_file = "/tmp/test_process_duplicate_rows_same_file.csv"
        field_names = data[0].keys()

        with open(tmp_file, "w") as f:
            writer = csv.DictWriter(f, fieldnames=field_names)
            writer.writeheader()
            writer.writerows(data)

        processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=tmp_file,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema
        table_name = OCP_REPORT_TABLE_MAP["line_item"]
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            count = table.objects.count()
        self.assertEqual(count, expected_count)
    def test_process_duplicate_rows_same_file(self):
        """Test that row duplicates are not inserted into the DB."""
        data = []
        with open(self.test_report, 'r') as f:
            reader = csv.DictReader(f)
            for row in reader:
                data.append(row)

        expected_count = len(data)
        data.extend(data)
        tmp_file = '/tmp/test_process_duplicate_rows_same_file.csv'
        field_names = data[0].keys()

        with open(tmp_file, 'w') as f:
            writer = csv.DictWriter(f, fieldnames=field_names)
            writer.writeheader()
            writer.writerows(data)

        processor = OCPReportProcessor(schema_name='acct10001',
                                       report_path=tmp_file,
                                       compression=UNCOMPRESSED,
                                       provider_id=1)

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema
        table_name = OCP_REPORT_TABLE_MAP['line_item']
        table = getattr(report_schema, table_name)
        count = report_db._session.query(table).count()
        self.assertEqual(count, expected_count)
    def test_process_default_small_batches(self):
        """Test the processing of an uncompressed file in small batches."""
        with patch.object(Config, "REPORT_PROCESSING_BATCH_SIZE", 5):
            counts = {}
            processor = OCPReportProcessor(
                schema_name="acct10001",
                report_path=self.test_report,
                compression=UNCOMPRESSED,
                provider_uuid=self.ocp_provider_uuid,
            )
            report_db = self.accessor
            report_schema = report_db.report_schema
            for table_name in self.report_tables:
                table = getattr(report_schema, table_name)
                with schema_context(self.schema):
                    count = table.objects.count()
                counts[table_name] = count

            processor.process()
            for table_name in self.report_tables:
                table = getattr(report_schema, table_name)
                with schema_context(self.schema):
                    count = table.objects.count()
                if table_name not in (
                        "reporting_ocpusagelineitem_daily",
                        "reporting_ocpusagelineitem_daily_summary"):
                    self.assertTrue(count >= counts[table_name])
    def test_process_default(self):
        """Test the processing of an uncompressed file."""
        counts = {}
        processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
        report_db = self.accessor
        report_schema = report_db.report_schema
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            counts[table_name] = count

        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            if table_name not in ("reporting_ocpusagelineitem_daily",
                                  "reporting_ocpusagelineitem_daily_summary"):
                self.assertTrue(count >= counts[table_name])
        self.assertFalse(os.path.exists(self.test_report))
    def test_process_default_small_batches(self):
        """Test the processing of an uncompressed file in small batches."""
        with patch.object(Config, 'REPORT_PROCESSING_BATCH_SIZE', 5):
            counts = {}
            processor = OCPReportProcessor(
                schema_name='acct10001',
                report_path=self.test_report,
                compression=UNCOMPRESSED,
                provider_id=1,
            )
            report_db = self.accessor
            report_schema = report_db.report_schema
            for table_name in self.report_tables:
                table = getattr(report_schema, table_name)
                count = report_db._session.query(table).count()
                counts[table_name] = count

            processor.process()
            for table_name in self.report_tables:
                table = getattr(report_schema, table_name)
                count = report_db._session.query(table).count()
                if table_name not in (
                        'reporting_ocpusagelineitem_daily',
                        'reporting_ocpusagelineitem_daily_summary',
                ):
                    self.assertTrue(count >= counts[table_name])
    def test_process_default(self):
        """Test the processing of an uncompressed file."""
        counts = {}
        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        report_db = self.accessor
        report_schema = report_db.report_schema
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            counts[table_name] = count

        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            if table_name not in (
                    'reporting_ocpusagelineitem_daily',
                    'reporting_ocpusagelineitem_daily_summary',
            ):
                self.assertTrue(count >= counts[table_name])
Beispiel #7
0
    def test_detect_report_type(self):
        """Test report type detection."""
        usage_processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
        self.assertEqual(usage_processor.report_type,
                         OCPReportTypes.CPU_MEM_USAGE)

        storage_processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
        self.assertEqual(storage_processor.report_type, OCPReportTypes.STORAGE)

        with self.assertRaises(OCPReportProcessorError):
            OCPReportProcessor(
                schema_name="acct10001",
                report_path=self.unknown_report,
                compression=UNCOMPRESSED,
                provider_uuid=self.ocp_provider_uuid,
            )
    def test_process_storage_duplicates(self):
        """Test that row duplicate storage rows are not inserted into the DB."""
        counts = {}
        processor = OCPReportProcessor(schema_name='acct10001',
                                       report_path=self.storage_report,
                                       compression=UNCOMPRESSED,
                                       provider_id=1)

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            counts[table_name] = count

        processor = OCPReportProcessor(schema_name='acct10001',
                                       report_path=self.storage_report,
                                       compression=UNCOMPRESSED,
                                       provider_id=1)
        # Process for the second time
        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            count = report_db._session.query(table).count()
            self.assertTrue(count == counts[table_name])
    def test_process_duplicates(self):
        """Test that row duplicates are not inserted into the DB."""
        counts = {}
        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            counts[table_name] = count

        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        # Process for the second time
        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            self.assertTrue(count == counts[table_name])
    def test_process_usage_and_storage_default(self):
        """Test the processing of an uncompressed storage and usage files."""
        storage_processor = OCPReportProcessor(schema_name='acct10001',
                                               report_path=self.storage_report,
                                               compression=UNCOMPRESSED,
                                               provider_id=1)

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP['storage_line_item']
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        storage_before_count = report_db._session.query(table).count()

        storage_processor.process()

        storage_after_count = report_db._session.query(table).count()
        self.assertGreater(storage_after_count, storage_before_count)

        processor = OCPReportProcessor(schema_name='acct10001',
                                       report_path=self.test_report,
                                       compression=UNCOMPRESSED,
                                       provider_id=1)

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP['line_item']
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        before_count = report_db._session.query(table).count()

        processor.process()

        after_count = report_db._session.query(table).count()
        self.assertGreater(after_count, before_count)
    def setUp(self):
        """Set up the test class."""
        super().setUp()
        self.temp_dir = tempfile.mkdtemp()
        self.test_report = \
            f'{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv'
        self.storage_report = f'{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv'
        self.test_report_gzip = f'{self.temp_dir}/test_cur.csv.gz'
        shutil.copy2(self.test_report_path, self.test_report)
        shutil.copy2(self.storage_report_path, self.storage_report)
        shutil.copy2(self.test_report_gzip_path, self.test_report_gzip)

        self.manifest_dict = {
            'assembly_id': self.assembly_id,
            'billing_period_start_datetime': self.billing_start,
            'num_total_files': 2,
            'provider_uuid': self.ocp_provider_uuid,
        }
        self.manifest = self.manifest_accessor.add(**self.manifest_dict)

        self.ocp_processor = OCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
    def test_create_usage_report_line_item_storage_missing_labels(self):
        """Test that line item data is returned properly."""
        cluster_id = '12345'
        storage_processor = OCPReportProcessor(schema_name='acct10001',
                                               report_path=self.storage_report,
                                               compression=UNCOMPRESSED,
                                               provider_id=1)
        report_period_id = storage_processor._processor._create_report_period(
            self.row, cluster_id, self.accessor)
        report_id = storage_processor._processor._create_report(
            self.row, report_period_id, self.accessor)
        row = copy.deepcopy(self.row)
        del row['pod_labels']

        storage_processor._processor._create_usage_report_line_item(
            row, report_period_id, report_id, self.accessor)

        line_item = None
        if storage_processor._processor.processed_report.line_items:
            line_item = storage_processor._processor.processed_report.line_items[
                -1]

        self.assertIsNotNone(line_item)
        self.assertEqual(line_item.get('report_period_id'), report_period_id)
        self.assertEqual(line_item.get('report_id'), report_id)
        self.assertEqual(line_item.get('persistentvolume_labels'), '{}')
        self.assertEqual(line_item.get('persistentvolumeclaim_labels'), '{}')

        self.assertIsNotNone(storage_processor._processor.line_item_columns)
 def test_initializer_unsupported_compression(self):
     """Assert that an error is raised for an invalid compression."""
     with self.assertRaises(MasuProcessingError):
         OCPReportProcessor(schema_name='acct10001',
                            report_path=self.test_report,
                            compression='unsupported',
                            provider_id=1)
Beispiel #14
0
    def setUpClass(cls):
        """Set up the test class with required objects."""
        # These test reports should be replaced with OCP reports once processor is impelmented.
        cls.test_report = './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv'
        cls.storage_report = './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv'
        cls.unknown_report = './tests/data/test_cur.csv'
        cls.test_report_gzip = './tests/data/test_cur.csv.gz'

        cls.ocp_processor = OCPReportProcessor(schema_name='acct10001',
                                               report_path=cls.test_report,
                                               compression=UNCOMPRESSED,
                                               provider_id=1)

        with ReportingCommonDBAccessor() as report_common_db:
            cls.column_map = report_common_db.column_map

        cls.accessor = OCPReportDBAccessor('acct10001', cls.column_map)
        cls.report_schema = cls.accessor.report_schema
        cls.session = cls.accessor._session

        _report_tables = copy.deepcopy(OCP_REPORT_TABLE_MAP)
        cls.report_tables = list(_report_tables.values())

        # Grab a single row of test data to work with
        with open(cls.test_report, 'r') as f:
            reader = csv.DictReader(f)
            cls.row = next(reader)
    def setUp(self):
        """Set up the test class."""
        super().setUp()
        self.temp_dir = tempfile.mkdtemp()
        self.test_report = f"{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv"
        self.storage_report = f"{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv"
        self.node_report = f"{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_node_labels.csv"
        self.namespace_report = f"{self.temp_dir}/434eda91-885b-40b2-8733-7a21fad62b56_namespace_labels.csv"
        self.test_report_gzip = f"{self.temp_dir}/test_cur.csv.gz"
        self.cluster_alias = "My OCP cluster"
        shutil.copy2(self.test_report_path, self.test_report)
        shutil.copy2(self.storage_report_path, self.storage_report)
        shutil.copy2(self.node_report_path, self.node_report)
        shutil.copy2(self.namespace_report_path, self.namespace_report)
        shutil.copy2(self.test_report_gzip_path, self.test_report_gzip)

        self.manifest_dict = {
            "assembly_id": self.assembly_id,
            "billing_period_start_datetime": self.billing_start,
            "num_total_files": 2,
            "provider_uuid": self.ocp_provider_uuid,
        }
        self.manifest = self.manifest_accessor.add(**self.manifest_dict)

        self.ocp_processor = OCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
Beispiel #16
0
    def _set_processor(self):
        """
        Create the report processor object.

        Processor is specific to the provider's cloud service.

        Args:
            None

        Returns:
            (Object) : Provider-specific report processor

        """
        if enable_trino_processing(self.provider_uuid):
            return ParquetReportProcessor(
                schema_name=self.schema_name,
                report_path=self.report_path,
                compression=self.compression,
                provider_uuid=self.provider_uuid,
                provider_type=self.provider_type,
                manifest_id=self.manifest_id,
                context=self.context,
            )
        if self.provider_type in (Provider.PROVIDER_AWS,
                                  Provider.PROVIDER_AWS_LOCAL):
            return AWSReportProcessor(
                schema_name=self.schema_name,
                report_path=self.report_path,
                compression=self.compression,
                provider_uuid=self.provider_uuid,
                manifest_id=self.manifest_id,
            )

        if self.provider_type in (Provider.PROVIDER_AZURE,
                                  Provider.PROVIDER_AZURE_LOCAL):
            return AzureReportProcessor(
                schema_name=self.schema_name,
                report_path=self.report_path,
                compression=self.compression,
                provider_uuid=self.provider_uuid,
                manifest_id=self.manifest_id,
            )

        if self.provider_type in (Provider.PROVIDER_OCP, ):
            return OCPReportProcessor(
                schema_name=self.schema_name,
                report_path=self.report_path,
                compression=self.compression,
                provider_uuid=self.provider_uuid,
            )
        if self.provider_type in (Provider.PROVIDER_GCP,
                                  Provider.PROVIDER_GCP_LOCAL):
            return GCPReportProcessor(
                schema_name=self.schema_name,
                report_path=self.report_path,
                compression=self.compression,
                provider_uuid=self.provider_uuid,
                manifest_id=self.manifest_id,
            )
        return None
    def test_create_usage_report_line_item_storage_missing_labels(self):
        """Test that line item data is returned properly."""
        cluster_id = "12345"
        storage_processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
        with OCPReportDBAccessor(self.schema) as accessor:
            report_period_id = storage_processor._processor._create_report_period(
                self.storage_row, cluster_id, accessor, self.cluster_alias)
            report_id = storage_processor._processor._create_report(
                self.storage_row, report_period_id, accessor)
            row = copy.deepcopy(self.storage_row)
            del row["persistentvolume_labels"]
            del row["persistentvolumeclaim_labels"]

            storage_processor._processor._create_usage_report_line_item(
                row, report_period_id, report_id, accessor)

        line_item = None
        if storage_processor._processor.processed_report.line_items:
            line_item = storage_processor._processor.processed_report.line_items[
                -1]

        self.assertIsNotNone(line_item)
        self.assertEqual(line_item.get("report_period_id"), report_period_id)
        self.assertEqual(line_item.get("report_id"), report_id)
        self.assertEqual(line_item.get("persistentvolume_labels"), "{}")
        self.assertEqual(line_item.get("persistentvolumeclaim_labels"), "{}")

        self.assertIsNotNone(storage_processor._processor.line_item_columns)
    def test_create_usage_report_line_item_namespace(self):
        """Test that line item data is returned properly."""
        cluster_id = "12345"
        namespace_ocp_processor = OCPReportProcessor(
            schema_name=self.schema,
            report_path=self.namespace_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
        report_period_id = namespace_ocp_processor._processor._create_report_period(
            self.row, cluster_id, self.accessor, self.cluster_alias)
        report_id = namespace_ocp_processor._processor._create_report(
            self.row, report_period_id, self.accessor)
        row = copy.deepcopy(self.row)
        row["namespace_labels"] = "label_one:mic_check|label_two:one_two"
        namespace_ocp_processor._processor._create_usage_report_line_item(
            row, report_period_id, report_id, self.accessor)

        line_item = None
        if namespace_ocp_processor._processor.processed_report.line_items:
            line_item = namespace_ocp_processor._processor.processed_report.line_items[
                -1]

        self.assertIsNotNone(line_item)
        self.assertEqual(line_item.get("report_period_id"), report_period_id)
        self.assertEqual(line_item.get("report_id"), report_id)
        self.assertIsNotNone(line_item.get("namespace_labels"))
Beispiel #19
0
    def _set_processor(self):
        """
        Create the report processor object.

        Processor is specific to the provider's cloud service.

        Args:
            None

        Returns:
            (Object) : Provider-specific report processor

        """
        if self.provider_type in (AMAZON_WEB_SERVICES,
                                  AWS_LOCAL_SERVICE_PROVIDER):
            return AWSReportProcessor(schema_name=self.schema_name,
                                      report_path=self.report_path,
                                      compression=self.compression,
                                      provider_id=self.provider_id,
                                      manifest_id=self.manifest_id)

        if self.provider_type in (AZURE, AZURE_LOCAL_SERVICE_PROVIDER):
            return AzureReportProcessor(schema_name=self.schema_name,
                                        report_path=self.report_path,
                                        compression=self.compression,
                                        provider_id=self.provider_id,
                                        manifest_id=self.manifest_id)

        if self.provider_type in (OPENSHIFT_CONTAINER_PLATFORM, ):
            return OCPReportProcessor(schema_name=self.schema_name,
                                      report_path=self.report_path,
                                      compression=self.compression,
                                      provider_id=self.provider_id)

        return None
    def setUp(self):
        super().setUp()
        self.manifest_dict = {
            'assembly_id': self.assembly_id,
            'billing_period_start_datetime': self.billing_start,
            'num_total_files': 2,
            'provider_id': self.ocp_provider.id
        }
        self.manifest = self.manifest_accessor.add(**self.manifest_dict)
        self.manifest_accessor.commit()

        self.ocp_processor = OCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=self.ocp_provider.id,
        )
    def test_process_storage_default(self):
        """Test the processing of an uncompressed storagefile."""
        processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP["storage_line_item"]
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            before_count = table.objects.count()

        processor.process()

        with schema_context(self.schema):
            after_count = table.objects.count()
        self.assertGreater(after_count, before_count)
    def test_process_node_label_duplicates(self):
        """Test that row duplicate node label rows are not inserted into the DB."""
        counts = {}
        processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=self.node_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            counts[table_name] = count

        shutil.copy2(self.node_report_path, self.node_report)

        processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=self.node_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )
        # Process for the second time
        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            self.assertTrue(count == counts[table_name])
    def setUpClass(cls):
        """Set up the test class with required objects."""
        # These test reports should be replaced with OCP reports once processor is impelmented.
        cls.test_report = './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv'
        cls.storage_report = './tests/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv'
        cls.unknown_report = './tests/data/test_cur.csv'
        cls.test_report_gzip = './tests/data/test_cur.csv.gz'
        cls.provider_id = 1
        cls.ocp_processor = OCPReportProcessor(schema_name='acct10001',
                                               report_path=cls.test_report,
                                               compression=UNCOMPRESSED,
                                               provider_id=cls.provider_id)

        cls.date_accessor = DateAccessor()
        billing_start = cls.date_accessor.today_with_timezone('UTC').replace(
            year=2018, month=6, day=1, hour=0, minute=0, second=0)
        cls.assembly_id = '1234'
        cls.manifest_dict = {
            'assembly_id': cls.assembly_id,
            'billing_period_start_datetime': billing_start,
            'num_total_files': 2,
            'provider_id': 1
        }
        cls.manifest_accessor = ReportManifestDBAccessor()

        with ReportingCommonDBAccessor() as report_common_db:
            cls.column_map = report_common_db.column_map

        cls.accessor = OCPReportDBAccessor('acct10001', cls.column_map)
        cls.report_schema = cls.accessor.report_schema
        cls.session = cls.accessor._session

        _report_tables = copy.deepcopy(OCP_REPORT_TABLE_MAP)
        cls.report_tables = list(_report_tables.values())

        # Grab a single row of test data to work with
        with open(cls.test_report, 'r') as f:
            reader = csv.DictReader(f)
            cls.row = next(reader)
    def test_process_usage_and_storage_with_invalid_data(self):
        """Test that processing succeeds when rows are missing data."""
        pod_report = f"{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1-invalid.csv"
        storage_report = f"{self.temp_dir}/e6b3701e-1e91-433b-b238-a31e49937558_storage-invalid.csv"

        pod_data = []
        storage_data = []
        with open(self.test_report_path) as f:
            reader = csv.DictReader(f)
            for row in reader:
                row["node"] = None
                pod_data.append(row)

        header = pod_data[0].keys()
        with open(pod_report, "w") as f:
            writer = csv.DictWriter(f, fieldnames=header)
            writer.writeheader()
            writer.writerows(pod_data)

        with open(self.storage_report_path) as f:
            reader = csv.DictReader(f)
            for row in reader:
                row["persistentvolume"] = None
                storage_data.append(row)

        header = storage_data[0].keys()
        with open(storage_report, "w") as f:
            writer = csv.DictWriter(f, fieldnames=header)
            writer.writeheader()
            writer.writerows(storage_data)

        storage_processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=storage_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP["storage_line_item"]
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            storage_before_count = table.objects.count()

        storage_processor.process()

        with schema_context(self.schema):
            storage_after_count = table.objects.count()
        self.assertEqual(storage_after_count, storage_before_count)

        processor = OCPReportProcessor(
            schema_name="acct10001",
            report_path=pod_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.ocp_provider_uuid,
        )

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP["line_item"]
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            before_count = table.objects.count()

        processor.process()

        with schema_context(self.schema):
            after_count = table.objects.count()
        self.assertEqual(after_count, before_count)
class OCPReportProcessorTest(MasuTestCase):
    """Test Cases for the OCPReportProcessor object."""
    @classmethod
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()
        # These test reports should be replaced with OCP reports once processor is impelmented.
        cls.test_report = './koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_February-2019-my-ocp-cluster-1.csv'
        cls.storage_report = (
            './koku/masu/test/data/ocp/e6b3701e-1e91-433b-b238-a31e49937558_storage.csv'
        )
        cls.unknown_report = './koku/masu/test/data/test_cur.csv'
        cls.test_report_gzip = './koku/masu/test/data/test_cur.csv.gz'

        cls.date_accessor = DateAccessor()
        cls.billing_start = cls.date_accessor.today_with_timezone(
            'UTC').replace(year=2018,
                           month=6,
                           day=1,
                           hour=0,
                           minute=0,
                           second=0)
        cls.assembly_id = '1234'

        cls.manifest_accessor = ReportManifestDBAccessor()

        with ReportingCommonDBAccessor() as report_common_db:
            cls.column_map = report_common_db.column_map

        cls.accessor = OCPReportDBAccessor(cls.schema, cls.column_map)
        cls.report_schema = cls.accessor.report_schema

        _report_tables = copy.deepcopy(OCP_REPORT_TABLE_MAP)
        cls.report_tables = list(_report_tables.values())

        # Grab a single row of test data to work with
        with open(cls.test_report, 'r') as f:
            reader = csv.DictReader(f)
            cls.row = next(reader)

    @classmethod
    def tearDownClass(cls):
        super().tearDownClass()
        cls.accessor.close_connections()

    def setUp(self):
        super().setUp()
        self.manifest_dict = {
            'assembly_id': self.assembly_id,
            'billing_period_start_datetime': self.billing_start,
            'num_total_files': 2,
            'provider_id': self.ocp_provider.id
        }
        self.manifest = self.manifest_accessor.add(**self.manifest_dict)
        self.manifest_accessor.commit()

        self.ocp_processor = OCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=self.ocp_provider.id,
        )

    def tearDown(self):
        """Return the database to a pre-test state."""
        super().tearDown()
        self.ocp_processor._processor.processed_report.remove_processed_rows()
        self.ocp_processor._processor.line_item_columns = None

    def test_initializer(self):
        """Test initializer."""
        self.assertIsNotNone(self.ocp_processor._processor._schema_name)
        self.assertIsNotNone(self.ocp_processor._processor._report_path)
        self.assertIsNotNone(self.ocp_processor._processor._compression)

    def test_initializer_unsupported_compression(self):
        """Assert that an error is raised for an invalid compression."""
        with self.assertRaises(MasuProcessingError):
            OCPReportProcessor(
                schema_name='acct10001',
                report_path=self.test_report,
                compression='unsupported',
                provider_id=1,
            )

    def test_detect_report_type(self):
        usage_processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        self.assertEqual(usage_processor.report_type,
                         OCPReportTypes.CPU_MEM_USAGE)

        storage_processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        self.assertEqual(storage_processor.report_type, OCPReportTypes.STORAGE)

        with self.assertRaises(OCPReportProcessorError):
            OCPReportProcessor(
                schema_name='acct10001',
                report_path=self.unknown_report,
                compression=UNCOMPRESSED,
                provider_id=1,
            )

    def test_process_default(self):
        """Test the processing of an uncompressed file."""
        counts = {}
        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        report_db = self.accessor
        report_schema = report_db.report_schema
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            counts[table_name] = count

        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            if table_name not in (
                    'reporting_ocpusagelineitem_daily',
                    'reporting_ocpusagelineitem_daily_summary',
            ):
                self.assertTrue(count >= counts[table_name])

    def test_process_default_small_batches(self):
        """Test the processing of an uncompressed file in small batches."""
        with patch.object(Config, 'REPORT_PROCESSING_BATCH_SIZE', 5):
            counts = {}
            processor = OCPReportProcessor(
                schema_name='acct10001',
                report_path=self.test_report,
                compression=UNCOMPRESSED,
                provider_id=1,
            )
            report_db = self.accessor
            report_schema = report_db.report_schema
            for table_name in self.report_tables:
                table = getattr(report_schema, table_name)
                with schema_context(self.schema):
                    count = table.objects.count()
                counts[table_name] = count

            processor.process()
            for table_name in self.report_tables:
                table = getattr(report_schema, table_name)
                with schema_context(self.schema):
                    count = table.objects.count()
                if table_name not in (
                        'reporting_ocpusagelineitem_daily',
                        'reporting_ocpusagelineitem_daily_summary',
                ):
                    self.assertTrue(count >= counts[table_name])

    def test_process_duplicates(self):
        """Test that row duplicates are not inserted into the DB."""
        counts = {}
        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            counts[table_name] = count

        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        # Process for the second time
        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            self.assertTrue(count == counts[table_name])

    def test_process_duplicate_rows_same_file(self):
        """Test that row duplicates are not inserted into the DB."""
        data = []
        with open(self.test_report, 'r') as f:
            reader = csv.DictReader(f)
            for row in reader:
                data.append(row)

        expected_count = len(data)
        data.extend(data)
        tmp_file = '/tmp/test_process_duplicate_rows_same_file.csv'
        field_names = data[0].keys()

        with open(tmp_file, 'w') as f:
            writer = csv.DictWriter(f, fieldnames=field_names)
            writer.writeheader()
            writer.writerows(data)

        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=tmp_file,
            compression=UNCOMPRESSED,
            provider_id=1,
        )

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema
        table_name = OCP_REPORT_TABLE_MAP['line_item']
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            count = table.objects.count()
        self.assertEqual(count, expected_count)

    def test_get_file_opener_default(self):
        """Test that the default file opener is returned."""
        opener, mode = self.ocp_processor._processor._get_file_opener(
            UNCOMPRESSED)

        self.assertEqual(opener, open)
        self.assertEqual(mode, 'r')

    def test_get_file_opener_gzip(self):
        """Test that the gzip file opener is returned."""
        opener, mode = self.ocp_processor._processor._get_file_opener(
            GZIP_COMPRESSED)

        self.assertEqual(opener, gzip.open)
        self.assertEqual(mode, 'rt')

    def test_update_mappings(self):
        """Test that mappings are updated."""
        test_entry = {'key': 'value'}
        counts = {}
        ce_maps = {
            'report_periods':
            self.ocp_processor._processor.existing_report_periods_map,
            'reports': self.ocp_processor._processor.existing_report_map,
        }

        for name, ce_map in ce_maps.items():
            counts[name] = len(ce_map.values())
            ce_map.update(test_entry)

        self.ocp_processor._processor._update_mappings()

        for name, ce_map in ce_maps.items():
            self.assertTrue(len(ce_map.values()) > counts[name])
            for key in test_entry:
                self.assertIn(key, ce_map)

    def test_write_processed_rows_to_csv(self):
        """Test that the CSV bulk upload file contains proper data."""
        cluster_id = '12345'
        report_period_id = self.ocp_processor._processor._create_report_period(
            self.row, cluster_id, self.accessor)
        report_id = self.ocp_processor._processor._create_report(
            self.row, report_period_id, self.accessor)
        self.ocp_processor._processor._create_usage_report_line_item(
            self.row, report_period_id, report_id, self.accessor)

        file_obj = self.ocp_processor._processor._write_processed_rows_to_csv()
        line_item_data = self.ocp_processor._processor.processed_report.line_items.pop(
        )
        # Convert data to CSV format
        expected_values = [
            str(value) if value else None for value in line_item_data.values()
        ]

        reader = csv.reader(file_obj, delimiter='\t')
        new_row = next(reader)

        actual = {}
        for i, key in enumerate(line_item_data.keys()):
            actual[key] = new_row[i] if new_row[i] else None

        self.assertEqual(actual.keys(), line_item_data.keys())
        self.assertEqual(list(actual.values()), expected_values)

    def test_create_report_period(self):
        """Test that a report period id is returned."""
        table_name = OCP_REPORT_TABLE_MAP['report_period']
        cluster_id = '12345'
        with OCPReportDBAccessor(self.schema, self.column_map) as accessor:
            report_period_id = self.ocp_processor._processor._create_report_period(
                self.row, cluster_id, accessor)

        self.assertIsNotNone(report_period_id)

        with schema_context(self.schema):
            query = self.accessor._get_db_obj_query(table_name)
            id_in_db = query.order_by('-id').first().id

        self.assertEqual(report_period_id, id_in_db)

    def test_create_report(self):
        """Test that a report id is returned."""
        table_name = OCP_REPORT_TABLE_MAP['report']
        table = getattr(self.report_schema, table_name)
        id_column = getattr(table, 'id')
        cluster_id = '12345'
        with OCPReportDBAccessor(self.schema, self.column_map) as accessor:
            report_period_id = self.ocp_processor._processor._create_report_period(
                self.row, cluster_id, accessor)

            report_id = self.ocp_processor._processor._create_report(
                self.row, report_period_id, accessor)

            self.assertIsNotNone(report_id)

            query = accessor._get_db_obj_query(table_name)
            id_in_db = query.order_by('-id').first().id

        self.assertEqual(report_id, id_in_db)

    def test_create_usage_report_line_item(self):
        """Test that line item data is returned properly."""
        cluster_id = '12345'
        report_period_id = self.ocp_processor._processor._create_report_period(
            self.row, cluster_id, self.accessor)
        report_id = self.ocp_processor._processor._create_report(
            self.row, report_period_id, self.accessor)
        row = copy.deepcopy(self.row)
        row['pod_labels'] = 'label_one:mic_check|label_two:one_two'
        self.ocp_processor._processor._create_usage_report_line_item(
            row, report_period_id, report_id, self.accessor)

        line_item = None
        if self.ocp_processor._processor.processed_report.line_items:
            line_item = self.ocp_processor._processor.processed_report.line_items[
                -1]

        self.assertIsNotNone(line_item)
        self.assertEqual(line_item.get('report_period_id'), report_period_id)
        self.assertEqual(line_item.get('report_id'), report_id)
        self.assertIsNotNone(line_item.get('pod_labels'))

        self.assertIsNotNone(self.ocp_processor._processor.line_item_columns)

    def test_create_usage_report_line_item_storage_no_labels(self):
        """Test that line item data is returned properly."""
        cluster_id = '12345'
        report_period_id = self.ocp_processor._processor._create_report_period(
            self.row, cluster_id, self.accessor)
        report_id = self.ocp_processor._processor._create_report(
            self.row, report_period_id, self.accessor)
        row = copy.deepcopy(self.row)
        row['persistentvolume_labels'] = ''
        row['persistentvolumeclaim_labels'] = ''
        storage_processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        storage_processor._processor._create_usage_report_line_item(
            row, report_period_id, report_id, self.accessor)

        line_item = None
        if storage_processor._processor.processed_report.line_items:
            line_item = storage_processor._processor.processed_report.line_items[
                -1]

        self.assertIsNotNone(line_item)
        self.assertEqual(line_item.get('report_period_id'), report_period_id)
        self.assertEqual(line_item.get('report_id'), report_id)
        self.assertEqual(line_item.get('persistentvolume_labels'), '{}')
        self.assertEqual(line_item.get('persistentvolumeclaim_labels'), '{}')

        self.assertIsNotNone(storage_processor._processor.line_item_columns)

    def test_create_usage_report_line_item_storage_missing_labels(self):
        """Test that line item data is returned properly."""
        cluster_id = '12345'
        storage_processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        with OCPReportDBAccessor(self.schema, self.column_map) as accessor:
            report_period_id = storage_processor._processor._create_report_period(
                self.row, cluster_id, accessor)
            report_id = storage_processor._processor._create_report(
                self.row, report_period_id, accessor)
            row = copy.deepcopy(self.row)
            del row['pod_labels']

            storage_processor._processor._create_usage_report_line_item(
                row, report_period_id, report_id, accessor)

        line_item = None
        if storage_processor._processor.processed_report.line_items:
            line_item = storage_processor._processor.processed_report.line_items[
                -1]

        self.assertIsNotNone(line_item)
        self.assertEqual(line_item.get('report_period_id'), report_period_id)
        self.assertEqual(line_item.get('report_id'), report_id)
        self.assertEqual(line_item.get('persistentvolume_labels'), '{}')
        self.assertEqual(line_item.get('persistentvolumeclaim_labels'), '{}')

        self.assertIsNotNone(storage_processor._processor.line_item_columns)

    def test_create_usage_report_line_item_missing_labels(self):
        """Test that line item data with missing pod_labels is returned properly."""
        cluster_id = '12345'
        report_period_id = self.ocp_processor._processor._create_report_period(
            self.row, cluster_id, self.accessor)
        report_id = self.ocp_processor._processor._create_report(
            self.row, report_period_id, self.accessor)
        row = copy.deepcopy(self.row)

        del row['pod_labels']
        self.ocp_processor._processor._create_usage_report_line_item(
            row, report_period_id, report_id, self.accessor)

        line_item = None
        if self.ocp_processor._processor.processed_report.line_items:
            line_item = self.ocp_processor._processor.processed_report.line_items[
                -1]

        self.assertIsNotNone(line_item)
        self.assertEqual(line_item.get('report_period_id'), report_period_id)
        self.assertEqual(line_item.get('report_id'), report_id)
        self.assertEqual(line_item.get('pod_labels'), '{}')

        self.assertIsNotNone(self.ocp_processor._processor.line_item_columns)

    def test_remove_ocp_temp_cur_files(self):
        """Test to remove temporary usage report files."""
        insights_local_dir = tempfile.mkdtemp()
        cluster_id = 'my-ocp-cluster'
        manifest_date = "2018-05-01"
        manifest_data = {
            "uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5",
            "cluster_id": cluster_id,
            "date": manifest_date
        }
        manifest = '{}/{}'.format(insights_local_dir, 'manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [
            {
                'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=2),
            },
            {
                'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=1),
            },
            {
                'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz',
                'processed_date': None,
            },
        ]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(insights_local_dir, item['file'])
            f = open(path, 'w')
            obj = self.manifest_accessor.get_manifest(self.assembly_id,
                                                      self.ocp_provider.id)
            stats = ReportStatsDBAccessor(item['file'], obj.id)
            stats.update(last_completed_datetime=item['processed_date'])
            stats.commit()
            stats.close_session()
            f.close()
            if (not item['file'].startswith(manifest_data.get('uuid'))):
                expected_delete_list.append(path)
        fake_dir = tempfile.mkdtemp()
        with patch.object(Config, 'INSIGHTS_LOCAL_REPORT_DIR', fake_dir):
            destination_dir = '{}/{}/{}'.format(
                fake_dir, cluster_id,
                month_date_range(parser.parse(manifest_date)))
            os.makedirs(destination_dir, exist_ok=True)
            removed_files = self.ocp_processor.remove_temp_cur_files(
                insights_local_dir)
            self.assertEqual(sorted(removed_files),
                             sorted(expected_delete_list))
            shutil.rmtree(insights_local_dir)
            shutil.rmtree(fake_dir)

    def test_remove_temp_cur_files_missing_manifest(self):
        """Test to remove temporary usage report files with missing uuid in manifest."""
        insights_local_dir = tempfile.mkdtemp()
        cluster_id = 'my-ocp-cluster'
        manifest_data = {
            "uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5",
            "cluster_id": cluster_id
        }
        manifest = '{}/{}'.format(insights_local_dir, 'manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        expected_delete_list = []
        removed_files = self.ocp_processor.remove_temp_cur_files(
            insights_local_dir)
        self.assertEqual(sorted(removed_files), sorted(expected_delete_list))
        shutil.rmtree(insights_local_dir)

    def test_process_pod_labels(self):
        """Test that our report label string format is parsed."""
        test_label_str = 'label_one:first|label_two:next|label_three:final'

        expected = json.dumps({
            'one': 'first',
            'two': 'next',
            'three': 'final'
        })

        result = self.ocp_processor._processor._process_pod_labels(
            test_label_str)

        self.assertEqual(result, expected)

    def test_process_pod_labels_bad_label_str(self):
        """Test that a bad string is handled."""
        test_label_str = 'label_onefirst|label_twonext|label_threefinal'

        expected = json.dumps({})

        result = self.ocp_processor._processor._process_pod_labels(
            test_label_str)

        self.assertEqual(result, expected)

    def test_process_storage_default(self):
        """Test the processing of an uncompressed storagefile."""
        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP['storage_line_item']
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            before_count = table.objects.count()

        processor.process()

        with schema_context(self.schema):
            after_count = table.objects.count()
        self.assertGreater(after_count, before_count)

    def test_process_storage_duplicates(self):
        """Test that row duplicate storage rows are not inserted into the DB."""
        counts = {}
        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )

        # Process for the first time
        processor.process()

        report_db = self.accessor
        report_schema = report_db.report_schema

        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            counts[table_name] = count

        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )
        # Process for the second time
        processor.process()
        for table_name in self.report_tables:
            table = getattr(report_schema, table_name)
            with schema_context(self.schema):
                count = table.objects.count()
            self.assertTrue(count == counts[table_name])

    def test_process_usage_and_storage_default(self):
        """Test the processing of an uncompressed storage and usage files."""
        storage_processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.storage_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP['storage_line_item']
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            storage_before_count = table.objects.count()

        storage_processor.process()

        with schema_context(self.schema):
            storage_after_count = table.objects.count()
        self.assertGreater(storage_after_count, storage_before_count)

        processor = OCPReportProcessor(
            schema_name='acct10001',
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_id=1,
        )

        report_db = self.accessor
        table_name = OCP_REPORT_TABLE_MAP['line_item']
        report_schema = report_db.report_schema
        table = getattr(report_schema, table_name)
        with schema_context(self.schema):
            before_count = table.objects.count()

        processor.process()

        with schema_context(self.schema):
            after_count = table.objects.count()
        self.assertGreater(after_count, before_count)