Ejemplo n.º 1
0
def remove_temp_cur_files(path):
    """Remove temporary cost usage report files."""
    files = os.listdir(path)

    victim_list = []
    current_assembly_id = None
    for file in files:
        file_path = '{}/{}'.format(path, file)
        if file.endswith('Manifest.json'):
            with open(file_path, 'r') as manifest_file_handle:
                manifest_json = json.load(manifest_file_handle)
                current_assembly_id = manifest_json.get('assemblyId')
        else:
            stats = ReportStatsDBAccessor(file)
            completed_date = stats.get_last_completed_datetime()
            if completed_date:
                assembly_id = utils.extract_uuids_from_string(file).pop()

                victim_list.append({'file': file_path,
                                    'completed_date': completed_date,
                                    'assemblyId': assembly_id})

    removed_files = []
    for victim in victim_list:
        if victim['assemblyId'] != current_assembly_id:
            LOG.info('Removing %s, completed processing on date %s',
                     victim['file'], victim['completed_date'])
            os.remove(victim['file'])
            removed_files.append(victim['file'])
    return removed_files
Ejemplo n.º 2
0
def _process_report_file(schema_name, provider, provider_uuid, report_dict):
    """
    Task to process a Report.

    Args:
        schema_name   (String) db schema name
        provider      (String) provider type
        provider_uuid (String) provider uuid
        report_dict   (dict) The report data dict from previous task

    Returns:
        None

    """
    start_date = report_dict.get('start_date')
    report_path = report_dict.get('file')
    compression = report_dict.get('compression')
    manifest_id = report_dict.get('manifest_id')
    provider_id = report_dict.get('provider_id')
    stmt = ('Processing Report:'
            ' schema_name: {},'
            ' report_path: {},'
            ' compression: {},'
            ' provider: {},'
            ' start_date: {}')
    log_statement = stmt.format(schema_name, report_path, compression,
                                provider, start_date)
    LOG.info(log_statement)
    mem = psutil.virtual_memory()
    mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent)
    LOG.info(mem_msg)

    file_name = report_path.split('/')[-1]
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_started_datetime()
    processor = ReportProcessor(schema_name=schema_name,
                                report_path=report_path,
                                compression=compression,
                                provider=provider,
                                provider_id=provider_id,
                                manifest_id=manifest_id)
    processor.process()
    with ReportStatsDBAccessor(file_name, manifest_id) as stats_recorder:
        stats_recorder.log_last_completed_datetime()

    with ReportManifestDBAccessor() as manifest_accesor:
        manifest = manifest_accesor.get_manifest_by_id(manifest_id)
        if manifest:
            manifest.num_processed_files += 1
            manifest.save()
            manifest_accesor.mark_manifest_as_updated(manifest)
        else:
            LOG.error('Unable to find manifest for ID: %s, file %s',
                      manifest_id, file_name)

    with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor:
        provider_accessor.setup_complete()

    files = processor.remove_processed_files(path.dirname(report_path))
    LOG.info('Temporary files removed: %s', str(files))
    def test_initializer_preexisting_report(self):
        """Test getting a new accessor stats on a preexisting report."""
        saver = ReportStatsDBAccessor('myreport', self.manifest_id)
        saver.update(
            cursor_position=33,
            last_completed_datetime='2011-1-1 11:11:11',
            last_started_datetime='2022-2-2 22:22:22',
            etag='myetag',
        )
        saver.commit()

        self.assertIsNotNone(saver._obj)

        # Get another accessor for the same report and verify we get back the right information.
        saver2 = ReportStatsDBAccessor('myreport', self.manifest_id)
        last_completed = saver2.get_last_completed_datetime()

        self.assertEqual(last_completed.year, 2011)
        self.assertEqual(last_completed.month, 1)
        self.assertEqual(last_completed.day, 1)
        self.assertEqual(last_completed.hour, 11)
        self.assertEqual(last_completed.minute, 11)
        self.assertEqual(last_completed.second, 11)

        self.assertEqual(saver.get_etag(), 'myetag')
Ejemplo n.º 4
0
    def test_process_report_files_with_transaction_atomic_error(
            self, mock_files, mock_processor):
        """Test than an exception rolls back the atomic transaction."""
        path = "{}/{}".format("test", "file1.csv")
        mock_files.return_value = [{"file": path, "compression": "GZIP"}]
        schema_name = self.schema
        provider = Provider.PROVIDER_AWS
        provider_uuid = self.aws_provider_uuid
        report_month = DateHelper().today
        manifest_dict = {
            "assembly_id": "12345",
            "billing_period_start_datetime": report_month,
            "num_total_files": 1,
            "provider_uuid": self.aws_provider_uuid,
            "task": "170653c0-3e66-4b7e-a764-336496d7ca5a",
        }
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.add(**manifest_dict)
            manifest.save()
            manifest_id = manifest.id
            initial_update_time = manifest.manifest_updated_datetime

        with ReportStatsDBAccessor("file1.csv", manifest_id) as stats_accessor:
            stats_accessor.get_last_completed_datetime

        with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor:
            report_file_accessor.get_last_started_datetime()

        mock_processor.side_effect = Exception

        with self.assertRaises(Exception):
            customer_name = "Fake Customer"
            authentication = "auth"
            billing_source = "bill"
            provider_type = provider
            get_report_files(
                customer_name=customer_name,
                authentication=authentication,
                billing_source=billing_source,
                provider_type=provider_type,
                schema_name=schema_name,
                provider_uuid=provider_uuid,
                report_month=report_month,
            )

        with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor:
            self.assertIsNone(
                report_file_accessor.get_last_completed_datetime())

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            self.assertEqual(manifest.num_processed_files, 0)
            self.assertEqual(manifest.manifest_updated_datetime,
                             initial_update_time)

        with ProviderDBAccessor(
                provider_uuid=provider_uuid) as provider_accessor:
            self.assertFalse(provider_accessor.get_setup_complete())
Ejemplo n.º 5
0
    def download_report(self, date_time):
        """
        Download CUR for a given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            ([{}]) List of dictionaries containing file path and compression.

        """
        LOG.info('Current date is %s.  Attempting to get manifest...',
                 str(date_time))
        manifest = self._get_manifest(date_time)
        reports = manifest.get('reportKeys')

        cur_reports = []
        for report in reports:
            report_dictionary = {}
            local_s3_filename = utils.get_local_file_name(report)
            stats_recorder = ReportStatsDBAccessor(local_s3_filename)
            stored_etag = stats_recorder.get_etag()
            file_name, etag = self.download_file(report, stored_etag)
            stats_recorder.update(etag=etag)
            stats_recorder.commit()
            stats_recorder.close_session()

            report_dictionary['file'] = file_name
            report_dictionary['compression'] = self.report.get('Compression')

            cur_reports.append(report_dictionary)
        return cur_reports
Ejemplo n.º 6
0
    def test_process_report_files_with_transaction_atomic_error(
            self, mock_processor, mock_setup_complete):
        """Test than an exception rolls back the atomic transaction."""
        path = '{}/{}'.format('test', 'file1.csv')
        schema_name = self.schema
        provider = Provider.PROVIDER_AWS
        provider_uuid = self.aws_provider_uuid
        manifest_dict = {
            'assembly_id':
            '12345',
            'billing_period_start_datetime':
            DateAccessor().today_with_timezone('UTC'),
            'num_total_files':
            2,
            'provider_uuid':
            self.aws_provider_uuid,
            'task':
            '170653c0-3e66-4b7e-a764-336496d7ca5a',
        }
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.add(**manifest_dict)
            manifest.save()
            manifest_id = manifest.id
            initial_update_time = manifest.manifest_updated_datetime

        with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor:
            report_file_accessor.get_last_started_datetime()

        report_dict = {
            'file': path,
            'compression': 'gzip',
            'start_date': str(DateAccessor().today()),
            'manifest_id': manifest_id,
        }

        mock_setup_complete.side_effect = Exception

        with self.assertRaises(Exception):
            _process_report_file(schema_name, provider, provider_uuid,
                                 report_dict)

        with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor:
            self.assertIsNone(
                report_file_accessor.get_last_completed_datetime())

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            self.assertEqual(manifest.num_processed_files, 0)
            self.assertEqual(manifest.manifest_updated_datetime,
                             initial_update_time)

        with ProviderDBAccessor(
                provider_uuid=provider_uuid) as provider_accessor:
            self.assertFalse(provider_accessor.get_setup_complete())
    def test_get_last_report_completed_datetime(self):
        """Test that the last completed report datetime is returned."""
        manifest = self.manifest_accessor.add(**self.manifest_dict)
        earlier_time = DateAccessor().today_with_timezone('UTC')
        later_time = earlier_time + datetime.timedelta(hours=1)

        ReportStatsDBAccessor('earlier_report', manifest.id).update(last_completed_datetime=earlier_time)
        ReportStatsDBAccessor('later_report', manifest.id).update(last_completed_datetime=later_time)

        result = self.manifest_accessor.get_last_report_completed_datetime(manifest.id)

        self.assertEqual(result, later_time)
Ejemplo n.º 8
0
    def test_remove_temp_cur_files(self):
        """Test to remove temporary cost usage files."""
        cur_dir = tempfile.mkdtemp()

        manifest_data = {"assemblyId": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"}
        manifest = '{}/{}'.format(cur_dir, 'koku-Manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [{'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-1.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=3)},
                     {'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-2.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=3)},
                     {'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-koku-1.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=2)},
                     {'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-koku-1.csv.gz',
                      'processed_date': datetime.datetime(year=2018, month=5, day=1)},
                     {'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-koku-1.csv.gz',
                      'processed_date': None}]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(cur_dir, item['file'])
            f = open(path, 'w')
            obj = self.manifest_accessor.get_manifest(self.assembly_id, self.provider_id)
            stats = ReportStatsDBAccessor(item['file'], obj.id)
            stats.update(last_completed_datetime=item['processed_date'])
            stats.commit()
            stats.close_session()
            f.close()
            if not item['file'].startswith(manifest_data.get('assemblyId')) and item['processed_date']:
                expected_delete_list.append(path)

        removed_files = self.processor.remove_temp_cur_files(cur_dir)
        self.assertEqual(sorted(removed_files), sorted(expected_delete_list))
        shutil.rmtree(cur_dir)
Ejemplo n.º 9
0
 def test_clear_last_started_date(self):
     """Test convience function for clear last started date."""
     saver = ReportStatsDBAccessor("myreport", self.manifest_id)
     saver.log_last_started_datetime()
     self.assertIsNotNone(saver.get_last_started_datetime())
     saver.clear_last_started_datetime()
     self.assertIsNone(saver.get_last_started_datetime())
Ejemplo n.º 10
0
    def test_initializer(self):
        """Test Initializer"""
        saver = ReportStatsDBAccessor('myreport')
        self.assertIsNotNone(saver._session)

        saver.remove()
        saver.commit()
        saver.close_session()
Ejemplo n.º 11
0
def record_report_status(manifest_id, file_name):
    """
    Creates initial report status database entry for new report files.

    If a report has already been downloaded from the ingress service
    there is a chance that processing has already been complete.  The
    function returns the last completed date time to determine if the
    report processing should continue in extract_payload.

    Args:
        manifest_id (Integer): Manifest Identifier.
        file_name (String): Report file name

    Returns:
        DateTime - Last completed date time for a given report file.

    """
    already_processed = False
    with ReportStatsDBAccessor(file_name, manifest_id) as db_accessor:
        already_processed = db_accessor.get_last_completed_datetime()
        if already_processed:
            LOG.info(f"Report {file_name} has already been processed.")
        else:
            LOG.info(f"Recording stats entry for {file_name}")
    return already_processed
Ejemplo n.º 12
0
def record_report_status(manifest_id, file_name, request_id, context={}):
    """
    Creates initial report status database entry for new report files.

    If a report has already been downloaded from the ingress service
    there is a chance that processing has already been complete.  The
    function returns the last completed date time to determine if the
    report processing should continue in extract_payload.

    Args:
        manifest_id (Integer): Manifest Identifier.
        file_name (String): Report file name
        request_id (String): Identifier associated with the payload
        context (Dict): Context for logging (account, etc)

    Returns:
        DateTime - Last completed date time for a given report file.

    """
    already_processed = False
    with ReportStatsDBAccessor(file_name, manifest_id) as db_accessor:
        already_processed = db_accessor.get_last_completed_datetime()
        if already_processed:
            msg = f"Report {file_name} has already been processed."
        else:
            msg = f"Recording stats entry for {file_name}"
        LOG.info(log_json(request_id, msg, context))
    return already_processed
Ejemplo n.º 13
0
    def download_report(self, date_time):
        """
        Download CUR for a given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            ([{}]) List of dictionaries containing file path and compression.

        """
        LOG.info('Attempting to get %s manifest for %s...', self.provider_type, str(date_time))
        report_context = self._downloader.get_report_context_for_date(date_time)
        manifest_id = report_context.get('manifest_id')
        reports = report_context.get('files', [])
        cur_reports = []
        for report in reports:
            report_dictionary = {}
            local_file_name = self._downloader.get_local_file_for_report(report)
            with ReportStatsDBAccessor(local_file_name, manifest_id) as stats_recorder:
                stored_etag = stats_recorder.get_etag()
                file_name, etag = self._downloader.download_file(report, stored_etag)
                stats_recorder.update(etag=etag)

            report_dictionary['file'] = file_name
            report_dictionary['compression'] = report_context.get('compression')
            report_dictionary['start_date'] = date_time
            report_dictionary['assembly_id'] = report_context.get('assembly_id')
            report_dictionary['manifest_id'] = manifest_id
            report_dictionary['provider_uuid'] = self.provider_uuid

            cur_reports.append(report_dictionary)
        return cur_reports
 def test_check_if_manifest_should_be_downloaded_error_processing_manifest(
         self):
     """Test that a manifest that did not succeessfully process should be reprocessed."""
     reports = CostUsageReportStatus.objects.filter(
         manifest_id=self.manifest_id)
     with ReportStatsDBAccessor(reports[0].report_name,
                                reports[0].manifest_id) as file_accessor:
         file_accessor.log_last_started_datetime()
         file_accessor.log_last_completed_datetime()
     with ReportStatsDBAccessor(reports[1].report_name,
                                reports[1].manifest_id) as file_accessor:
         file_accessor.log_last_started_datetime()
         file_accessor.update(last_completed_datetime=None)
     result = self.downloader.check_if_manifest_should_be_downloaded(
         self.assembly_id)
     self.assertTrue(result)
Ejemplo n.º 15
0
    def download_report(self, date_time):
        """
        Download CUR for a given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            ([{}]) List of dictionaries containing file path and compression.

        """
        manifest = self._get_manifest(date_time)
        reports = manifest.get('reportKeys')

        cur_reports = []
        for report in reports:
            report_dictionary = {}
            local_s3_filename = utils.get_local_file_name(report)
            stats_recorder = ReportStatsDBAccessor(local_s3_filename)
            stored_etag = stats_recorder.get_etag()
            report_path = self.bucket_path + '/' + report
            LOG.info('Downloading %s with credential %s', report_path,
                     self.credential)
            file_name, etag = self.download_file(report_path, stored_etag)
            stats_recorder.update(etag=etag)
            stats_recorder.commit()

            report_dictionary['file'] = file_name
            report_dictionary['compression'] = 'GZIP'

            cur_reports.append(report_dictionary)
        return cur_reports
Ejemplo n.º 16
0
 def test_log_last_completed_datetime(self):
     """Test convience function for last completed processing time."""
     saver = ReportStatsDBAccessor('myreport', self.manifest_id)
     saver.log_last_completed_datetime()
     self.assertIsNotNone(saver.get_last_completed_datetime())
     saver.delete()
     self.assertEqual(CostUsageReportStatus.objects.count(), 0)
Ejemplo n.º 17
0
 def generate_test_report_files(self):
     for file_cnt in range(self._num_total_files):
         file_name = f"file_{file_cnt}"
         with ReportStatsDBAccessor(file_name, self._manifest_id):
             print(
                 f"Generating file entry ({file_name}) for manifest {self._manifest_id}"
             )
             self._report_files.append(file_name)
             return file_name
Ejemplo n.º 18
0
    def test_process_report_files_with_transaction_atomic_error(self, mock_processor, mock_setup_complete):
        """Test than an exception rolls back the atomic transaction."""
        path = "{}/{}".format("test", "file1.csv")
        schema_name = self.schema
        provider = Provider.PROVIDER_AWS
        provider_uuid = self.aws_provider_uuid
        manifest_dict = {
            "assembly_id": "12345",
            "billing_period_start_datetime": DateAccessor().today_with_timezone("UTC"),
            "num_total_files": 2,
            "provider_uuid": self.aws_provider_uuid,
            "task": "170653c0-3e66-4b7e-a764-336496d7ca5a",
        }
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.add(**manifest_dict)
            manifest.save()
            manifest_id = manifest.id
            initial_update_time = manifest.manifest_updated_datetime

        with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor:
            report_file_accessor.get_last_started_datetime()

        report_dict = {
            "file": path,
            "compression": "gzip",
            "start_date": str(DateAccessor().today()),
            "manifest_id": manifest_id,
        }

        mock_setup_complete.side_effect = Exception

        with self.assertRaises(Exception):
            _process_report_file(schema_name, provider, provider_uuid, report_dict)

        with ReportStatsDBAccessor(path, manifest_id) as report_file_accessor:
            self.assertIsNone(report_file_accessor.get_last_completed_datetime())

        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(manifest_id)
            self.assertEqual(manifest.num_processed_files, 0)
            self.assertEqual(manifest.manifest_updated_datetime, initial_update_time)

        with ProviderDBAccessor(provider_uuid=provider_uuid) as provider_accessor:
            self.assertFalse(provider_accessor.get_setup_complete())
Ejemplo n.º 19
0
    def test_record_report_status(self):
        """Test recording initial report stats."""
        test_manifest_id = 1
        test_file_name = "testreportfile.csv"
        msg_handler.record_report_status(test_manifest_id, test_file_name)

        with ReportStatsDBAccessor(test_file_name,
                                   test_manifest_id) as accessor:
            self.assertEqual(accessor._manifest_id, test_manifest_id)
            self.assertEqual(accessor._report_name, test_file_name)
Ejemplo n.º 20
0
 def generate_one_test_file(self):
     file_cnt = len(self._report_files)
     file_name = f"file_{file_cnt}"
     with ReportStatsDBAccessor(file_name, self._manifest_id):
         print(
             f"Generating file entry ({file_name}) for manifest {self._manifest_id}"
         )
         self._report_files.append(file_name)
         return file_name
     return None
Ejemplo n.º 21
0
    def download_report(self, report_context):
        """
        Download CUR for a given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            ([{}]) List of dictionaries containing file path and compression.

        """
        date_time = report_context.get("date")
        msg = f"Attempting to get {self.provider_type} manifest for {str(date_time)}."
        LOG.info(log_json(self.tracing_id, msg, self.context))

        manifest_id = report_context.get("manifest_id")
        report = report_context.get("current_file")

        local_file_name = self._downloader.get_local_file_for_report(report)

        if self.is_report_processed(local_file_name, manifest_id):
            LOG.info(
                f"File has already been processed: {local_file_name}. Skipping..."
            )
            return {}

        with ReportStatsDBAccessor(local_file_name,
                                   manifest_id) as stats_recorder:
            stored_etag = stats_recorder.get_etag()
            try:
                file_name, etag, _, split_files = self._downloader.download_file(
                    report,
                    stored_etag,
                    manifest_id=manifest_id,
                    start_date=date_time)
                stats_recorder.update(etag=etag)
            except (AWSReportDownloaderNoFileError,
                    AzureReportDownloaderError) as error:
                LOG.warning(
                    f"Unable to download report file: {report}. Reason: {str(error)}"
                )
                return {}

        # The create_table flag is used by the ParquetReportProcessor
        # to create a Hive/Trino table.
        return {
            "file": file_name,
            "split_files": split_files,
            "compression": report_context.get("compression"),
            "start_date": date_time,
            "assembly_id": report_context.get("assembly_id"),
            "manifest_id": manifest_id,
            "provider_uuid": self.provider_uuid,
            "create_table": report_context.get("create_table", False),
        }
Ejemplo n.º 22
0
    def tearDown(self):
        """Tear down each test case."""
        super().tearDown()
        with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor:
            files = file_accessor._get_db_obj_query().all()
            for file in files:
                file_accessor.delete(file)

        with ReportManifestDBAccessor() as manifest_accessor:
            manifests = manifest_accessor._get_db_obj_query().all()
            for manifest in manifests:
                manifest_accessor.delete(manifest)
Ejemplo n.º 23
0
    def test_check_if_manifest_should_be_downloaded_error_no_complete_date(self, _):
        """Test that a manifest that did not succeessfully process should be reprocessed."""
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self.manifest_id)
            manifest.num_processed_files = 1
            manifest.num_total_files = 2
            manifest.save()

        with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor:
            file_accessor.log_last_started_datetime()
        result = self.downloader.check_if_manifest_should_be_downloaded(self.assembly_id)
        self.assertTrue(result)
Ejemplo n.º 24
0
    def test_add_remove(self):
        """Test basic add/remove logic."""
        saver = ReportStatsDBAccessor("myreport", self.manifest_id)

        self.assertTrue(saver.does_db_entry_exist())
        returned_obj = saver._get_db_obj_query()
        self.assertEqual(returned_obj.first().report_name, "myreport")

        saver.delete()
        returned_obj = saver._get_db_obj_query()
        self.assertIsNone(returned_obj.first())
Ejemplo n.º 25
0
def record_all_manifest_files(manifest_id, report_files):
    """Store all report file names for manifest ID."""
    for report in report_files:
        try:
            with ReportStatsDBAccessor(report, manifest_id):
                LOG.debug(f"Logging {report} for manifest ID: {manifest_id}")
        except IntegrityError:
            # OCP records the entire file list for a new manifest when the listener
            # recieves a payload.  With multiple listeners it is possilbe for
            # two listeners to recieve a report file for the same manifest at
            # roughly the same time.  In that case the report file may already
            # exist and an IntegrityError would be thrown.
            LOG.debug(f"Report {report} has already been recorded.")
Ejemplo n.º 26
0
    def test_remove_temp_cur_files(self):
        """Test to remove temporary usage report files."""
        insights_local_dir = tempfile.mkdtemp()

        manifest_data = {"uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"}
        manifest = '{}/{}'.format(insights_local_dir, 'manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [
            {
                'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=2),
            },
            {
                'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz',
                'processed_date': datetime.datetime(year=2018, month=5, day=1),
            },
            {
                'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz',
                'processed_date': None,
            },
        ]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(insights_local_dir, item['file'])
            f = open(path, 'w')
            stats = ReportStatsDBAccessor(item['file'], None)
            stats.update(last_completed_datetime=item['processed_date'])
            stats.commit()
            stats.close_session()
            f.close()
            if (not item['file'].startswith(manifest_data.get('uuid'))
                    and item['processed_date']):
                expected_delete_list.append(path)

        removed_files = self.ocp_processor.remove_temp_cur_files(
            insights_local_dir, manifest_id=None)
        self.assertEqual(sorted(removed_files), sorted(expected_delete_list))
        shutil.rmtree(insights_local_dir)
Ejemplo n.º 27
0
    def test_check_if_manifest_should_be_downloaded_currently_processing_manifest(self, _):
        """Test that a manifest being processed should not be reprocessed."""
        with ReportManifestDBAccessor() as manifest_accessor:
            manifest = manifest_accessor.get_manifest_by_id(self.manifest_id)
            manifest.num_processed_files = 1
            manifest.num_total_files = 2
            manifest.save()

        with ReportStatsDBAccessor(self.report_name, self.manifest_id) as file_accessor:
            file_accessor.log_last_started_datetime()
            file_accessor.log_last_completed_datetime()

        result = self.downloader.check_if_manifest_should_be_downloaded(self.assembly_id)
        self.assertFalse(result)
    def test_check_if_manifest_should_be_downloaded_done_processing_manifest(
            self):
        """Test that a manifest that has finished processing is not reprocessed."""
        reports = CostUsageReportStatus.objects.filter(
            manifest_id=self.manifest_id)
        for report in reports:
            with ReportStatsDBAccessor(report.report_name,
                                       report.manifest_id) as file_accessor:
                file_accessor.log_last_started_datetime()
                file_accessor.log_last_completed_datetime()

        result = self.downloader.check_if_manifest_should_be_downloaded(
            self.assembly_id)
        self.assertFalse(result)
Ejemplo n.º 29
0
    def download_report(self, date_time):
        """
        Download CUR for a given date.

        Args:
            date_time (DateTime): The starting datetime object

        Returns:
            ([{}]) List of dictionaries containing file path and compression.

        """
        msg = f"Attempting to get {self.provider_type,} manifest for {str(date_time)}..."
        LOG.info(log_json(self.request_id, msg, self.context))
        report_context = self._downloader.get_report_context_for_date(
            date_time)
        manifest_id = report_context.get("manifest_id")
        reports = report_context.get("files", [])
        cur_reports = []
        for report in reports:
            report_dictionary = {}
            local_file_name = self._downloader.get_local_file_for_report(
                report)

            if self.is_report_processed(local_file_name, manifest_id):
                msg = f"File has already been processed: {local_file_name}. Skipping..."
                LOG.info(log_json(self.request_id, msg, self.context))
                continue
            with ReportStatsDBAccessor(local_file_name,
                                       manifest_id) as stats_recorder:
                stored_etag = stats_recorder.get_etag()
                file_name, etag = self._downloader.download_file(
                    report,
                    stored_etag,
                    manifest_id=manifest_id,
                    start_date=date_time)
                stats_recorder.update(etag=etag)

            report_dictionary["file"] = file_name
            report_dictionary["compression"] = report_context.get(
                "compression")
            report_dictionary["start_date"] = date_time
            report_dictionary["assembly_id"] = report_context.get(
                "assembly_id")
            report_dictionary["manifest_id"] = manifest_id
            report_dictionary["provider_uuid"] = self.provider_uuid

            cur_reports.append(report_dictionary)
        return cur_reports
Ejemplo n.º 30
0
    def test_azure_remove_temp_cur_files(self):
        """Test to remove temporary cost usage files."""
        cur_dir = tempfile.mkdtemp()

        manifest_data = {"assemblyId": "31727a10-f4b4-43a2-80e5-bef1aaeabfc1"}
        manifest = '{}/{}'.format(cur_dir, 'Manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [
            {
                'file': 'costreport_31727a10-f4b4-43a2-80e5-bef1aaeabfc1.csv',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': 'costreport_31727a10-f4b4-43a2-80e5-bef1aaeabfc1.csv',
                'processed_date': datetime.datetime(year=2018, month=5, day=3),
            },
            {
                'file': 'costreport_2aeb9169-2526-441c-9eca-d7ed015d52bd.csv',
                'processed_date': datetime.datetime(year=2018, month=5, day=2),
            },
            {
                'file': 'costreport_6c8487e8-c590-4e6a-b2c2-91a2375c0bad.csv',
                'processed_date': datetime.datetime(year=2018, month=5, day=1),
            },
            {
                'file': 'costreport_6c8487e8-c590-4e6a-b2c2-91a2375d0bed.csv',
                'processed_date': None,
            },
        ]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(cur_dir, item['file'])
            f = open(path, 'w')
            obj = self.manifest_accessor.get_manifest(self.assembly_id,
                                                      self.azure_provider.id)
            with ReportStatsDBAccessor(item['file'], obj.id) as stats:
                stats.update(last_completed_datetime=item['processed_date'])
            f.close()
            if (not manifest_data.get('assemblyId') in item['file']
                    and item['processed_date']):
                expected_delete_list.append(path)
        removed_files = self.processor.remove_temp_cur_files(cur_dir)
        self.assertEqual(sorted(removed_files), sorted(expected_delete_list))
        shutil.rmtree(cur_dir)