def test_get_all_running_tasks(self, mock_inspect): """Test that multiple hosts' task lists are combined.""" second_host = "koku-worker-2-sdfsdff" first_host_list = [1, 2, 3] second_host_list = [4, 5, 6] expected = first_host_list + second_host_list mock_worker_list = { "celery@kokuworker": "", f"celery@{second_host}": "" } mock_inspect.reserved.return_value = mock_worker_list _cache = WorkerCache() for task in first_host_list: _cache.add_task_to_cache(task) with override_settings(HOSTNAME=second_host): _cache = WorkerCache() for task in second_host_list: _cache.add_task_to_cache(task) self.assertEqual(sorted(_cache.get_all_running_tasks()), sorted(expected))
def test_remove_offline_worker_keys(self, mock_inspect): """Test the remove_offline_worker_keys function.""" second_host = "kokuworker2" first_host_list = [1, 2, 3] second_host_list = [4, 5, 6] all_work_list = first_host_list + second_host_list mock_worker_list = { "celery@kokuworker": "", f"celery@{second_host}": "" } mock_inspect.reserved.return_value = mock_worker_list _cache = WorkerCache() for task in first_host_list: _cache.add_task_to_cache(task) with override_settings(HOSTNAME=second_host): _cache = WorkerCache() for task in second_host_list: _cache.add_task_to_cache(task) self.assertEqual(sorted(_cache.get_all_running_tasks()), sorted(all_work_list)) # kokuworker2 goes offline mock_inspect.reset() mock_worker_list = {"celery@kokuworker": ""} mock_inspect.reserved.return_value = mock_worker_list _cache.remove_offline_worker_keys() self.assertEqual(sorted(_cache.get_all_running_tasks()), sorted(first_host_list))
def test_task_is_running_false(self): """Test that a task is not running.""" task_list = [1, 2, 3] _cache = WorkerCache() for task in task_list: _cache.add_task_to_cache(task) self.assertFalse(_cache.task_is_running(4))
def test_check_if_manifest_should_be_downloaded_task_currently_running( self): """Test that a manifest being processed should not be reprocessed.""" _cache = WorkerCache() _cache.add_task_to_cache(self.cache_key) result = self.downloader.check_if_manifest_should_be_downloaded( self.assembly_id) self.assertFalse(result)
def test_remove_task_from_cache(self): """Test that a task is removed.""" task_key = "task_key" _cache = WorkerCache() _cache.add_task_to_cache(task_key) self.assertEqual(_cache.worker_cache, [task_key]) _cache.remove_task_from_cache(task_key) self.assertEqual(_cache.worker_cache, [])
def test_add_task_to_cache(self): """Test that a single task is added.""" task_key = "task_key" _cache = WorkerCache() self.assertEqual(_cache.worker_cache, []) _cache.add_task_to_cache(task_key) self.assertEqual(_cache.worker_cache, [task_key])
def test_remove_task_from_cache_value_not_in_cache(self): """Test that a task is removed.""" task_list = [1, 2, 3, 4] _cache = WorkerCache() for task in task_list: _cache.add_task_to_cache(task) self.assertEqual(_cache.worker_cache, task_list) _cache.remove_task_from_cache(5) self.assertEqual(_cache.worker_cache, task_list)
def test_add_task_to_cache(self): """Test that a single task is added.""" task_list = [1, 2, 3] expected = [1, 2, 3, 4] _cache = WorkerCache() _cache.set_host_specific_task_list(task_list) self.assertEqual(_cache.host_specific_worker_cache, task_list) _cache.add_task_to_cache(4) self.assertEqual(_cache.host_specific_worker_cache, expected)
def test_invalidate_host(self): """Test that a host's cache is invalidated.""" task_list = [1, 2, 3] _cache = WorkerCache() for task in task_list: _cache.add_task_to_cache(task) self.assertEqual(_cache.worker_cache, task_list) _cache.invalidate_host() self.assertEqual(_cache.worker_cache, [])
def test_task_is_running_true(self, mock_inspect): """Test that a task is running.""" mock_worker_list = {"celery@kokuworker": ""} mock_inspect.reserved.return_value = mock_worker_list task_list = [1, 2, 3] _cache = WorkerCache() for task in task_list: _cache.add_task_to_cache(task) self.assertTrue(_cache.task_is_running(1))
def test_get_all_running_tasks(self): """Test that multiple hosts' task lists are combined.""" second_host = "test" first_host_list = [1, 2, 3] second_host_list = [4, 5, 6] expected = first_host_list + second_host_list _cache = WorkerCache() for task in first_host_list: _cache.add_task_to_cache(task) with patch.object(settings, "HOSTNAME", second_host): _cache = WorkerCache() for task in second_host_list: _cache.add_task_to_cache(task) self.assertEqual(sorted(_cache.get_all_running_tasks()), sorted(expected))
class ReportDownloaderBase: """ Download cost reports from a provider. Base object class for downloading cost reports from a cloud provider. """ def __init__(self, task, download_path=None, **kwargs): """ Create a downloader. Args: task (Object) bound celery object download_path (String) filesystem path to store downloaded files Kwargs: customer_name (String) customer name access_credential (Dict) provider access credentials report_source (String) cost report source provider_type (String) cloud provider type provider_uuid (String) cloud provider uuid report_name (String) cost report name """ self._task = task if download_path: self.download_path = download_path else: self.download_path = mkdtemp(prefix="masu") self.worker_cache = WorkerCache() self._cache_key = kwargs.get("cache_key") self._provider_uuid = kwargs.get("provider_uuid") self.request_id = kwargs.get("request_id") self.account = kwargs.get("account") self.context = {"request_id": self.request_id, "provider_uuid": self._provider_uuid, "account": self.account} def _get_existing_manifest_db_id(self, assembly_id): """Return a manifest DB object if it exists.""" manifest_id = None with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest(assembly_id, self._provider_uuid) if manifest: manifest_id = manifest.id return manifest_id def check_if_manifest_should_be_downloaded(self, assembly_id): """Check if we should download this manifest. We first check if we have a database record of this manifest. That would indicate that we have already downloaded and at least begun processing. We then check the last completed time for a file in this manifest. This second check is to cover the case when we did not complete processing and need to re-downlaod and process the manifest. Returns True if the manifest should be downloaded and processed. """ if self._cache_key and self.worker_cache.task_is_running(self._cache_key): msg = f"{self._cache_key} is currently running." LOG.info(log_json(self.request_id, msg, self.context)) return False with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest(assembly_id, self._provider_uuid) if manifest: manifest_id = manifest.id # check if `last_completed_datetime` is null for any report in the manifest. # if nulls exist, report processing is not complete and reports should be downloaded. need_to_download = manifest_accessor.is_last_completed_datetime_null(manifest_id) if need_to_download: self.worker_cache.add_task_to_cache(self._cache_key) return need_to_download # The manifest does not exist, this is the first time we are # downloading and processing it. self.worker_cache.add_task_to_cache(self._cache_key) return True def _process_manifest_db_record(self, assembly_id, billing_start, num_of_files): """Insert or update the manifest DB record.""" LOG.info("Inserting/updating manifest in database for assembly_id: %s", assembly_id) with ReportManifestDBAccessor() as manifest_accessor: manifest_entry = manifest_accessor.get_manifest(assembly_id, self._provider_uuid) if not manifest_entry: msg = f"No manifest entry found in database. Adding for bill period start: {billing_start}" LOG.info(log_json(self.request_id, msg, self.context)) manifest_dict = { "assembly_id": assembly_id, "billing_period_start_datetime": billing_start, "num_total_files": num_of_files, "provider_uuid": self._provider_uuid, "task": self._task.request.id, } manifest_entry = manifest_accessor.add(**manifest_dict) manifest_accessor.mark_manifest_as_updated(manifest_entry) manifest_id = manifest_entry.id return manifest_id
class ReportDownloaderBase: """ Download cost reports from a provider. Base object class for downloading cost reports from a cloud provider. """ # pylint: disable=unused-argument def __init__(self, task, download_path=None, **kwargs): """ Create a downloader. Args: task (Object) bound celery object download_path (String) filesystem path to store downloaded files Kwargs: customer_name (String) customer name access_credential (Dict) provider access credentials report_source (String) cost report source provider_type (String) cloud provider type provider_uuid (String) cloud provider uuid report_name (String) cost report name """ self._task = task if download_path: self.download_path = download_path else: self.download_path = mkdtemp(prefix="masu") self.worker_cache = WorkerCache() self._cache_key = kwargs.get("cache_key") self._provider_uuid = None self._provider_uuid = kwargs.get("provider_uuid") def _get_existing_manifest_db_id(self, assembly_id): """Return a manifest DB object if it exists.""" manifest_id = None with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest(assembly_id, self._provider_uuid) if manifest: manifest_id = manifest.id return manifest_id def check_if_manifest_should_be_downloaded(self, assembly_id): """Check if we should download this manifest. We first check if we have a database record of this manifest. That would indicate that we have already downloaded and at least begun processing. We then check the last completed time for a file in this manifest. This second check is to cover the case when we did not complete processing and need to re-downlaod and process the manifest. Returns True if the manifest should be downloaded and processed. """ if self._cache_key and self.worker_cache.task_is_running( self._cache_key): msg = f"{self._cache_key} is currently running." LOG.info(msg) return False today = DateAccessor().today_with_timezone("UTC") last_completed_cutoff = today - datetime.timedelta(hours=1) with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest(assembly_id, self._provider_uuid) if manifest: manifest_id = manifest.id num_processed_files = manifest.num_processed_files num_total_files = manifest.num_total_files if num_processed_files < num_total_files: completed_datetime = manifest_accessor.get_last_report_completed_datetime( manifest_id) if (completed_datetime and completed_datetime < last_completed_cutoff) or not completed_datetime: # It has been more than an hour since we processed a file # and we didn't finish processing. Or, if there is a # start time but no completion time recorded. # We should download and reprocess. manifest_accessor.reset_manifest(manifest_id) self.worker_cache.add_task_to_cache(self._cache_key) return True # The manifest exists and we have processed all the files. # We should not redownload. return False # The manifest does not exist, this is the first time we are # downloading and processing it. self.worker_cache.add_task_to_cache(self._cache_key) return True def _process_manifest_db_record(self, assembly_id, billing_start, num_of_files): """Insert or update the manifest DB record.""" LOG.info("Inserting manifest database record for assembly_id: %s", assembly_id) with ReportManifestDBAccessor() as manifest_accessor: manifest_entry = manifest_accessor.get_manifest( assembly_id, self._provider_uuid) if not manifest_entry: LOG.info( "No manifest entry found. Adding for bill period start: %s", billing_start) manifest_dict = { "assembly_id": assembly_id, "billing_period_start_datetime": billing_start, "num_total_files": num_of_files, "provider_uuid": self._provider_uuid, "task": self._task.request.id, } manifest_entry = manifest_accessor.add(**manifest_dict) manifest_accessor.mark_manifest_as_updated(manifest_entry) manifest_id = manifest_entry.id return manifest_id