def __call__(self): """ Invoke the callable object. All work is performed in the repository working directory and cleaned up after the call. :return: The final synchronization report. :rtype: SyncProgressReport """ self.canceled = False self.report = SyncProgressReport(self.conduit) self.tmp_dir = mkdtemp(dir=self.repo.working_dir) try: manifest = self._fetch_manifest() if manifest is not None: module_paths = self._fetch_modules(manifest) self._import_modules(module_paths) finally: # Update the progress report one last time self.report.update_progress() shutil.rmtree(self.tmp_dir) self.tmp_dir = None return self.report
def __init__(self, repo, sync_conduit, config, is_cancelled_call): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.is_cancelled_call = is_cancelled_call self.progress_report = SyncProgressReport(sync_conduit)
def __init__(self, repo, sync_conduit, config): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.progress_report = SyncProgressReport(sync_conduit) self.downloader = None # Since SynchronizeWithPuppetForge creates a Nectar downloader for each unit, we cannot # rely on telling the current downloader to cancel. Therefore, we need another state # tracker to check in the download units loop. self._canceled = False
def __call__(self, repository): """ Invoke the callable object. All work is performed in the repository working directory and cleaned up after the call. :param repository: A Pulp repository object. :type repository: pulp.server.plugins.model.Repository :return: The final synchronization report. :rtype: SyncProgressReport """ self.canceled = False self.report = SyncProgressReport(self.conduit) self.tmp_dir = mkdtemp(dir=repository.working_dir) try: inventory = Inventory(self.conduit) self._run(inventory) finally: # Update the progress report one last time self.report.update_progress() shutil.rmtree(self.tmp_dir) self.tmp_dir = None return self.report
def __init__(self, repo, sync_conduit, config): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.progress_report = SyncProgressReport(sync_conduit) self.downloader = None
def setUp(self): super(PuppetStatusRendererTests, self).setUp() self.renderer = PuppetStatusRenderer(self.context) self.config['logging'] = {'filename' : 'test-extension-status.log'} self.sync_report = SyncProgressReport.from_progress_dict(IMPORTER_REPORT) self.publish_report = PublishProgressReport.from_progress_dict(DISTRIBUTOR_REPORT)
def setUp(self): super(PuppetStatusRendererTests, self).setUp() self.renderer = PuppetStatusRenderer(self.context) self.sync_report = SyncProgressReport.from_progress_dict( IMPORTER_REPORT) self.publish_report = PublishProgressReport.from_progress_dict( DISTRIBUTOR_REPORT)
def test_directory_synchronization(self, forge_call, mock_call): conduit = Mock() repository = Mock() config = {constants.CONFIG_FEED: 'http://host/tmp/%s' % constants.MANIFEST_FILENAME} progress_report = SyncProgressReport(conduit) progress_report.metadata_state = constants.STATE_SUCCESS progress_report.modules_state = constants.STATE_SUCCESS mock_call.return_value = progress_report # test plugin = PuppetModuleImporter() report = plugin.sync_repo(repository, conduit, config) # validation mock_call.assert_called_with(repository) self.assertEquals(report, conduit.build_success_report.return_value) self.assertFalse(forge_call.called)
def test_directory_synchronization(self, forge_call, mock_call): conduit = Mock() repository = Mock() config = {constants.CONFIG_FEED: 'http://host/tmp/%s' % constants.MANIFEST_FILENAME} progress_report = SyncProgressReport(conduit) progress_report.metadata_state = constants.STATE_SUCCESS progress_report.modules_state = constants.STATE_SUCCESS mock_call.return_value = progress_report # test plugin = PuppetModuleImporter() report = plugin.sync_repo(repository, conduit, config) # validation mock_call.assert_called_with() self.assertEquals(report, conduit.build_success_report.return_value) self.assertFalse(forge_call.called)
def setUp(self): super(PuppetStatusRendererTests, self).setUp() self.renderer = PuppetStatusRenderer(self.context) self.config['logging'] = {'filename': 'test-extension-status.log'} self.sync_report = SyncProgressReport.from_progress_dict( IMPORTER_REPORT) self.publish_report = PublishProgressReport.from_progress_dict( DISTRIBUTOR_REPORT)
def display_report(self, progress_report): # Sync Steps if constants.IMPORTER_ID in progress_report: sync_report = SyncProgressReport.from_progress_dict(progress_report[constants.IMPORTER_ID]) self._display_sync_metadata_step(sync_report) self._display_sync_modules_step(sync_report) # Publish Steps if constants.DISTRIBUTOR_ID in progress_report: publish_report = PublishProgressReport.from_progress_dict(progress_report[constants.DISTRIBUTOR_ID]) self._display_publish_modules_step(publish_report) self._display_publish_metadata_step(publish_report) self._display_publish_http_https_step(publish_report)
def test_forge_synchronization(self, failed_call, mock_call): conduit = Mock() repository = Mock() config = {constants.CONFIG_FEED: 'http://host/tmp/forge'} # directory synchronization failure needed so the importer # will retry using the forge synchronization. failed_report = SyncProgressReport(conduit) failed_report.metadata_state = constants.STATE_FAILED failed_call.return_value = failed_report progress_report = SyncProgressReport(conduit) progress_report.metadata_state = constants.STATE_FAILED mock_call.return_value = progress_report # test plugin = PuppetModuleImporter() report = plugin.sync_repo(repository, conduit, config) # validation mock_call.assert_called_with() self.assertEquals(report, conduit.build_failure_report.return_value)
def display_report(self, progress_report): # Sync Steps if constants.IMPORTER_ID in progress_report: sync_report = SyncProgressReport.from_progress_dict( progress_report[constants.IMPORTER_ID]) self._display_sync_metadata_step(sync_report) self._display_sync_modules_step(sync_report) # Publish Steps if constants.DISTRIBUTOR_ID in progress_report: publish_report = PublishProgressReport.from_progress_dict( progress_report[constants.DISTRIBUTOR_ID]) self._display_publish_modules_step(publish_report) self._display_publish_metadata_step(publish_report) self._display_publish_http_https_step(publish_report)
class SynchronizeWithPuppetForge(object): """ Used to perform a single sync of a puppet repository. This class will maintain state relevant to the run and should not be reused across runs. """ def __init__(self, repo, sync_conduit, config): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.progress_report = SyncProgressReport(sync_conduit) self.downloader = None # Since SynchronizeWithPuppetForge creats a Nectar downloader for each unit, we cannot # rely on telling the current downloader to cancel. Therefore, we need another state tracker # to check in the download units loop. self._canceled = False def __call__(self): """ Performs the sync operation according to the configured state of the instance. The report to be sent back to Pulp is returned from this call. This call will make calls into the conduit's progress update as appropriate. This call executes serially. No threads are created by this call. It will not return until either a step fails or the entire sync is completed. :return: the report object to return to Pulp from the sync call :rtype: SyncProgressReport """ _logger.info('Beginning sync for repository <%s>' % self.repo.id) # quit now if there is no feed URL defined if not self.config.get(constants.CONFIG_FEED): self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Cannot perform repository sync on a repository with no feed') self.progress_report.update_progress() return self.progress_report.build_final_report() try: metadata = self._parse_metadata() if not metadata: report = self.progress_report.build_final_report() return report self._import_modules(metadata) finally: # One final progress update before finishing self.progress_report.update_progress() return self.progress_report def cancel(self): """ Cancel an in-progress sync, if there is one. """ self._canceled = True if self.downloader is None: return self.downloader.cancel() def _parse_metadata(self): """ Takes the necessary actions (according to the run configuration) to retrieve and parse the repository's metadata. This call will return either the successfully parsed metadata or None if it could not be retrieved or parsed. The progress report will be updated with the appropriate description of what went wrong in the event of an error, so the caller should interpet a None return as an error occuring and not continue the sync. :return: object representation of the metadata :rtype: RepositoryMetadata """ _logger.info('Beginning metadata retrieval for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_RUNNING self.progress_report.update_progress() start_time = datetime.now() # Retrieve the metadata from the source try: downloader = self._create_downloader() self.downloader = downloader metadata_json_docs = downloader.retrieve_metadata(self.progress_report) except Exception, e: if self._canceled: _logger.warn('Exception occurred on canceled metadata download: %s' % e) self.progress_report.metadata_state = STATE_CANCELED return None _logger.exception('Exception while retrieving metadata for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _('Error downloading metadata') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None finally:
class SynchronizeWithPuppetForge(object): """ Used to perform a single sync of a puppet repository. This class will maintain state relevant to the run and should not be reused across runs. """ def __init__(self, repo, sync_conduit, config): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.progress_report = SyncProgressReport(sync_conduit) self.downloader = None # Since SynchronizeWithPuppetForge creates a Nectar downloader for each unit, we cannot # rely on telling the current downloader to cancel. Therefore, we need another state # tracker to check in the download units loop. self._canceled = False def __call__(self): """ Sync according to the configured state of the instance and return a report. This function will make update progress as appropriate. This function executes serially, and does not create any threads. It will not return until either a step fails or the entire sync is complete. :return: the report object to return to Pulp from the sync call :rtype: SyncProgressReport """ msg = _('Beginning sync for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.info(msg, msg_dict) # quit now if there is no feed URL defined if not self.config.get(constants.CONFIG_FEED): self.progress_report.metadata_state = STATE_FAILED msg = _('Cannot perform repository sync on a repository with no feed') self.progress_report.metadata_error_message = msg self.progress_report.update_progress() return self.progress_report.build_final_report() try: metadata = self._parse_metadata() if not metadata: report = self.progress_report.build_final_report() return report self._import_modules(metadata) finally: # One final progress update before finishing self.progress_report.update_progress() return self.progress_report def cancel(self): """ Cancel an in-progress sync, if there is one. """ self._canceled = True if self.downloader is None: return self.downloader.cancel() def _parse_metadata(self): """ Takes the necessary actions (according to the run configuration) to retrieve and parse the repository's metadata. This call will return either the successfully parsed metadata or None if it could not be retrieved or parsed. The progress report will be updated with the appropriate description of what went wrong in the event of an error, so the caller should interpret a None return as an error occurring and not continue the sync. :return: object representation of the metadata :rtype: RepositoryMetadata """ msg = _('Beginning metadata retrieval for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.info(msg, msg_dict) self.progress_report.metadata_state = STATE_RUNNING self.progress_report.update_progress() start_time = datetime.now() # Retrieve the metadata from the source try: downloader = self._create_downloader() self.downloader = downloader metadata_json_docs = downloader.retrieve_metadata(self.progress_report) except Exception as e: if self._canceled: msg = _('Exception occurred on canceled metadata download: %(exc)s') msg_dict = {'exc': e} _logger.warn(msg, msg_dict) self.progress_report.metadata_state = STATE_CANCELED return None msg = _('Exception while retrieving metadata for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.exception(msg, msg_dict) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _('Error downloading metadata') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None finally: self.downloader = None # Parse the retrieved metadata documents try: metadata = RepositoryMetadata() for doc in metadata_json_docs: metadata.update_from_json(doc) except Exception as e: msg = _('Exception parsing metadata for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.exception(msg, msg_dict) self.progress_report.metadata_state = STATE_FAILED msg = _("Error parsing repository modules metadata document") self.progress_report.metadata_error_message = msg self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None # Last update to the progress report before returning self.progress_report.metadata_state = STATE_SUCCESS end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return metadata def _import_modules(self, metadata): """ Imports each module in the repository into Pulp. This method is mostly just a wrapper on top of the actual logic of performing an import to set the stage for the progress report and more importantly catch any rogue exceptions that crop up. :param metadata: object representation of the repository metadata containing the modules to import :type metadata: RepositoryMetadata """ msg = _('Retrieving modules for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.info(msg, msg_dict) self.progress_report.modules_state = STATE_RUNNING # Do not send the update about the state yet. The counts need to be # set later once we know how many are new, so to prevent a situation # where the report reflects running but does not have counts, wait # until they are populated before sending the update to Pulp. start_time = datetime.now() try: self._do_import_modules(metadata) except Exception as e: msg = _('Exception importing modules for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.exception(msg, msg_dict) self.progress_report.modules_state = STATE_FAILED self.progress_report.modules_error_message = _('Error retrieving modules') self.progress_report.modules_exception = e self.progress_report.modules_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.modules_execution_time = duration.seconds self.progress_report.update_progress() return # Last update to the progress report before returning self.progress_report.modules_state = STATE_SUCCESS end_time = datetime.now() duration = end_time - start_time self.progress_report.modules_execution_time = duration.seconds self.progress_report.update_progress() def _do_import_modules(self, metadata): """ Actual logic of the import. This method will do a best effort per module; if an individual module fails it will be recorded and the import will continue. This method will only raise an exception in an extreme case where it cannot react and continue. """ downloader = self._create_downloader() self.downloader = downloader # Ease module lookup metadata_modules_by_key = dict([(m.unit_key_as_named_tuple, m) for m in metadata.modules]) # Collect information about the repository's modules before changing it existing_module_ids_by_key = {} modules = repo_controller.find_repo_content_units( self.repo.repo_obj, unit_fields=Module.unit_key_fields, yield_content_unit=True) for module in modules: existing_module_ids_by_key[module.unit_key_as_named_tuple] = module.id new_unit_keys = self._resolve_new_units(existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) # Once we know how many things need to be processed, we can update the progress report self.progress_report.modules_total_count = len(new_unit_keys) self.progress_report.modules_finished_count = 0 self.progress_report.modules_error_count = 0 self.progress_report.update_progress() # Add new units for key in new_unit_keys: if self._canceled: break module = metadata_modules_by_key[key] try: self._add_new_module(downloader, module) self.progress_report.modules_finished_count += 1 except Exception as e: self.progress_report.add_failed_module(module, e, sys.exc_info()[2]) self.progress_report.update_progress() # Remove missing units if the configuration indicates to do so if self._should_remove_missing(): remove_unit_keys = self._resolve_remove_units(existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) doomed_ids = [existing_module_ids_by_key[key] for key in remove_unit_keys] doomed_module_iterator = Module.objects.in_bulk(doomed_ids).itervalues() repo_controller.disassociate_units(self.repo.repo_obj, doomed_module_iterator) self.downloader = None def _add_new_module(self, downloader, module): """ Performs the tasks for downloading and saving a new unit in Pulp. This method entirely skips modules that are already in the repository. :param downloader: downloader instance to use for retrieving the unit :type downloader: child of pulp_puppet.plugins.importers.downloaders.base.BaseDownloader :param module: module to download and add :type module: pulp_puppet.plugins.db.models.Module """ try: # Download the bits downloaded_filename = downloader.retrieve_module(self.progress_report, module) # Extract the extra metadata into the module metadata = metadata_module.extract_metadata(downloaded_filename, self.repo.working_dir) # Overwrite the author and name metadata.update(Module.split_filename(metadata['name'])) # Create and save the Module module = Module.from_metadata(metadata) module.set_storage_path(os.path.basename(downloaded_filename)) try: module.save_and_import_content(downloaded_filename) except NotUniqueError: module = module.__class__.objects.get(**module.unit_key) # Associate the module with the repo repo_controller.associate_single_unit(self.repo.repo_obj, module) finally: downloader.cleanup_module(module) def _resolve_new_units(self, existing, wanted): """ Decide what units are needed to be downloaded. Filter out units which are already in a repository, associate units which are already downloaded, :param existing: units which are already in a repository :type existing: list of unit keys as namedtuples :param wanted: units which should be imported into a repository :type wanted: list of unit keys as namedtuples :return: list of unit keys to download; empty list if all units are already downloaded :rtype: list of unit keys as namedtuples """ model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted) still_wanted = set(wanted) for unit in units_controller.find_units(unit_generator): file_exists = unit._storage_path is not None and os.path.isfile(unit._storage_path) if file_exists: if unit.unit_key_as_named_tuple not in existing: repo_controller.associate_single_unit(self.repo.repo_obj, unit) still_wanted.discard(unit.unit_key_as_named_tuple) return list(still_wanted) def _resolve_remove_units(self, existing_unit_keys, metadata_unit_keys): """ Returns a list of unit keys that are in the repository but not in the current repository metadata. :return: list of unit keys; empty list if none have been removed :rtype: list """ return list(set(existing_unit_keys) - set(metadata_unit_keys)) def _create_downloader(self): """ Uses the configuratoin to determine which downloader style to use for this run. :return: one of the *Downloader classes in the downloaders module """ feed = self.config.get(constants.CONFIG_FEED) return downloader_factory.get_downloader(feed, self.repo, self.sync_conduit, self.config) def _should_remove_missing(self): """ Returns whether or not missing units should be removed. :return: true if missing units should be removed; false otherwise :rtype: bool """ if constants.CONFIG_REMOVE_MISSING not in self.config.keys(): return constants.DEFAULT_REMOVE_MISSING else: return self.config.get_boolean(constants.CONFIG_REMOVE_MISSING)
class PuppetModuleSyncRun(object): """ Used to perform a single sync of a puppet repository. This class will maintain state relevant to the run and should not be reused across runs. """ def __init__(self, repo, sync_conduit, config, is_cancelled_call): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.is_cancelled_call = is_cancelled_call self.progress_report = SyncProgressReport(sync_conduit) def perform_sync(self): """ Performs the sync operation according to the configured state of the instance. The report to be sent back to Pulp is returned from this call. This call will make calls into the conduit's progress update as appropriate. This call executes serially. No threads are created by this call. It will not return until either a step fails or the entire sync is completed. :return: the report object to return to Pulp from the sync call :rtype: pulp.plugins.model.SyncReport """ _LOG.info('Beginning sync for repository <%s>' % self.repo.id) try: metadata = self._parse_metadata() if not metadata: report = self.progress_report.build_final_report() return report self._import_modules(metadata) finally: # One final progress update before finishing self.progress_report.update_progress() report = self.progress_report.build_final_report() return report def _parse_metadata(self): """ Takes the necessary actions (according to the run configuration) to retrieve and parse the repository's metadata. This call will return either the successfully parsed metadata or None if it could not be retrieved or parsed. The progress report will be updated with the appropriate description of what went wrong in the event of an error, so the caller should interpet a None return as an error occuring and not continue the sync. :return: object representation of the metadata :rtype: RepositoryMetadata """ _LOG.info('Beginning metadata retrieval for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_RUNNING self.progress_report.update_progress() start_time = datetime.now() # Retrieve the metadata from the source try: downloader = self._create_downloader() metadata_json_docs = downloader.retrieve_metadata(self.progress_report) except Exception, e: _LOG.exception('Exception while retrieving metadata for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _('Error downloading metadata') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None # Parse the retrieved metadata documents try: metadata = RepositoryMetadata() for doc in metadata_json_docs: metadata.update_from_json(doc) except Exception, e: _LOG.exception('Exception parsing metadata for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _('Error parsing repository modules metadata document') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None
class SynchronizeWithPuppetForge(object): """ Used to perform a single sync of a puppet repository. This class will maintain state relevant to the run and should not be reused across runs. """ def __init__(self, repo, sync_conduit, config): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.progress_report = SyncProgressReport(sync_conduit) self.downloader = None # Since SynchronizeWithPuppetForge creats a Nectar downloader for each unit, we cannot # rely on telling the current downloader to cancel. Therefore, we need another state tracker # to check in the download units loop. self._canceled = False def __call__(self): """ Performs the sync operation according to the configured state of the instance. The report to be sent back to Pulp is returned from this call. This call will make calls into the conduit's progress update as appropriate. This call executes serially. No threads are created by this call. It will not return until either a step fails or the entire sync is completed. :return: the report object to return to Pulp from the sync call :rtype: SyncProgressReport """ _logger.info('Beginning sync for repository <%s>' % self.repo.id) # quit now if there is no feed URL defined if not self.config.get(constants.CONFIG_FEED): self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Cannot perform repository sync on a repository with no feed') self.progress_report.update_progress() return self.progress_report.build_final_report() try: metadata = self._parse_metadata() if not metadata: report = self.progress_report.build_final_report() return report self._import_modules(metadata) finally: # One final progress update before finishing self.progress_report.update_progress() return self.progress_report def cancel(self): """ Cancel an in-progress sync, if there is one. """ self._canceled = True if self.downloader is None: return self.downloader.cancel() def _parse_metadata(self): """ Takes the necessary actions (according to the run configuration) to retrieve and parse the repository's metadata. This call will return either the successfully parsed metadata or None if it could not be retrieved or parsed. The progress report will be updated with the appropriate description of what went wrong in the event of an error, so the caller should interpet a None return as an error occuring and not continue the sync. :return: object representation of the metadata :rtype: RepositoryMetadata """ _logger.info('Beginning metadata retrieval for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_RUNNING self.progress_report.update_progress() start_time = datetime.now() # Retrieve the metadata from the source try: downloader = self._create_downloader() self.downloader = downloader metadata_json_docs = downloader.retrieve_metadata( self.progress_report) except Exception, e: if self._canceled: _logger.warn( 'Exception occurred on canceled metadata download: %s' % e) self.progress_report.metadata_state = STATE_CANCELED return None _logger.exception( 'Exception while retrieving metadata for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Error downloading metadata') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None finally:
def setUp(self): super(PuppetStatusRendererTests, self).setUp() self.renderer = PuppetStatusRenderer(self.context) self.sync_report = SyncProgressReport.from_progress_dict(IMPORTER_REPORT) self.publish_report = PublishProgressReport.from_progress_dict(DISTRIBUTOR_REPORT)
class SynchronizeWithPuppetForge(object): """ Used to perform a single sync of a puppet repository. This class will maintain state relevant to the run and should not be reused across runs. """ def __init__(self, repo, sync_conduit, config): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.progress_report = SyncProgressReport(sync_conduit) self.downloader = None # Since SynchronizeWithPuppetForge creates a Nectar downloader for each unit, we cannot # rely on telling the current downloader to cancel. Therefore, we need another state # tracker to check in the download units loop. self._canceled = False def __call__(self): """ Sync according to the configured state of the instance and return a report. This function will make update progress as appropriate. This function executes serially, and does not create any threads. It will not return until either a step fails or the entire sync is complete. :return: the report object to return to Pulp from the sync call :rtype: SyncProgressReport """ msg = _('Beginning sync for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.info(msg, msg_dict) # quit now if there is no feed URL defined if not self.config.get(constants.CONFIG_FEED): self.progress_report.metadata_state = STATE_FAILED msg = _( 'Cannot perform repository sync on a repository with no feed') self.progress_report.metadata_error_message = msg self.progress_report.update_progress() return self.progress_report.build_final_report() try: metadata = self._parse_metadata() if not metadata: report = self.progress_report.build_final_report() return report self._import_modules(metadata) finally: # One final progress update before finishing self.progress_report.update_progress() return self.progress_report def cancel(self): """ Cancel an in-progress sync, if there is one. """ self._canceled = True if self.downloader is None: return self.downloader.cancel() def _parse_metadata(self): """ Takes the necessary actions (according to the run configuration) to retrieve and parse the repository's metadata. This call will return either the successfully parsed metadata or None if it could not be retrieved or parsed. The progress report will be updated with the appropriate description of what went wrong in the event of an error, so the caller should interpret a None return as an error occurring and not continue the sync. :return: object representation of the metadata :rtype: RepositoryMetadata """ msg = _('Beginning metadata retrieval for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.info(msg, msg_dict) self.progress_report.metadata_state = STATE_RUNNING self.progress_report.update_progress() start_time = datetime.now() # Retrieve the metadata from the source try: downloader = self._create_downloader() self.downloader = downloader metadata_json_docs = downloader.retrieve_metadata( self.progress_report) except Exception as e: if self._canceled: msg = _( 'Exception occurred on canceled metadata download: %(exc)s' ) msg_dict = {'exc': e} _logger.warn(msg, msg_dict) self.progress_report.metadata_state = STATE_CANCELED return None msg = _( 'Exception while retrieving metadata for repository <%(repo_id)s>' ) msg_dict = {'repo_id': self.repo.id} _logger.exception(msg, msg_dict) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Error downloading metadata') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None finally: self.downloader = None # Parse the retrieved metadata documents try: metadata = RepositoryMetadata() for doc in metadata_json_docs: metadata.update_from_json(doc) except Exception as e: msg = _('Exception parsing metadata for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.exception(msg, msg_dict) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Error parsing repository modules metadata document') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None # Last update to the progress report before returning self.progress_report.metadata_state = STATE_SUCCESS end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return metadata def _import_modules(self, metadata): """ Imports each module in the repository into Pulp. This method is mostly just a wrapper on top of the actual logic of performing an import to set the stage for the progress report and more importantly catch any rogue exceptions that crop up. :param metadata: object representation of the repository metadata containing the modules to import :type metadata: RepositoryMetadata """ msg = _('Retrieving modules for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.info(msg, msg_dict) self.progress_report.modules_state = STATE_RUNNING # Do not send the update about the state yet. The counts need to be # set later once we know how many are new, so to prevent a situation # where the report reflects running but does not have counts, wait # until they are populated before sending the update to Pulp. start_time = datetime.now() try: self._do_import_modules(metadata) except Exception as e: msg = _('Exception importing modules for repository <%(repo_id)s>') msg_dict = {'repo_id': self.repo.id} _logger.exception(msg, msg_dict) self.progress_report.modules_state = STATE_FAILED self.progress_report.modules_error_message = _( 'Error retrieving modules') self.progress_report.modules_exception = e self.progress_report.modules_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.modules_execution_time = duration.seconds self.progress_report.update_progress() return # Last update to the progress report before returning self.progress_report.modules_state = STATE_SUCCESS end_time = datetime.now() duration = end_time - start_time self.progress_report.modules_execution_time = duration.seconds self.progress_report.update_progress() def _do_import_modules(self, metadata): """ Actual logic of the import. This method will do a best effort per module; if an individual module fails it will be recorded and the import will continue. This method will only raise an exception in an extreme case where it cannot react and continue. """ downloader = self._create_downloader() self.downloader = downloader # Ease module lookup metadata_modules_by_key = dict([(m.unit_key_str, m) for m in metadata.modules]) # Collect information about the repository's modules before changing it existing_module_ids_by_key = {} modules = repo_controller.find_repo_content_units( self.repo.repo_obj, unit_fields=Module.unit_key_fields, yield_content_unit=True) for module in modules: existing_module_ids_by_key[module.unit_key_str] = module.id new_unit_keys = self._resolve_new_units( existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) # Once we know how many things need to be processed, we can update the progress report self.progress_report.modules_total_count = len(new_unit_keys) self.progress_report.modules_finished_count = 0 self.progress_report.modules_error_count = 0 self.progress_report.update_progress() # Add new units for key in new_unit_keys: if self._canceled: break module = metadata_modules_by_key[key] try: self._add_new_module(downloader, module) self.progress_report.modules_finished_count += 1 except Exception as e: self.progress_report.add_failed_module(module, e, sys.exc_info()[2]) self.progress_report.update_progress() # Remove missing units if the configuration indicates to do so if self._should_remove_missing(): remove_unit_keys = self._resolve_remove_units( existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) doomed_ids = [ existing_module_ids_by_key[key] for key in remove_unit_keys ] doomed_module_iterator = Module.objects.in_bulk( doomed_ids).itervalues() repo_controller.disassociate_units(self.repo, doomed_module_iterator) self.downloader = None def _add_new_module(self, downloader, module): """ Performs the tasks for downloading and saving a new unit in Pulp. This method entirely skips modules that are already in the repository. :param downloader: downloader instance to use for retrieving the unit :type downloader: child of pulp_puppet.plugins.importers.downloaders.base.BaseDownloader :param module: module to download and add :type module: pulp_puppet.plugins.db.models.Module """ try: # Download the bits downloaded_filename = downloader.retrieve_module( self.progress_report, module) # Extract the extra metadata into the module metadata = metadata_module.extract_metadata( downloaded_filename, self.repo.working_dir) # Overwrite the author and name metadata.update(Module.split_filename(metadata['name'])) # Create and save the Module module = Module.from_metadata(metadata) module.set_storage_path(os.path.basename(downloaded_filename)) try: module.save_and_import_content(downloaded_filename) except NotUniqueError: module = module.__class__.objects.get(**module.unit_key) # Associate the module with the repo repo_controller.associate_single_unit(self.repo.repo_obj, module) finally: downloader.cleanup_module(module) def _resolve_new_units(self, existing_unit_keys, metadata_unit_keys): """ Returns a list of metadata keys that are new to the repository. :return: list of unit keys; empty list if none are new :rtype: list """ return list(set(metadata_unit_keys) - set(existing_unit_keys)) def _resolve_remove_units(self, existing_unit_keys, metadata_unit_keys): """ Returns a list of unit keys that are in the repository but not in the current repository metadata. :return: list of unit keys; empty list if none have been removed :rtype: list """ return list(set(existing_unit_keys) - set(metadata_unit_keys)) def _create_downloader(self): """ Uses the configuratoin to determine which downloader style to use for this run. :return: one of the *Downloader classes in the downloaders module """ feed = self.config.get(constants.CONFIG_FEED) return downloader_factory.get_downloader(feed, self.repo, self.sync_conduit, self.config) def _should_remove_missing(self): """ Returns whether or not missing units should be removed. :return: true if missing units should be removed; false otherwise :rtype: bool """ if constants.CONFIG_REMOVE_MISSING not in self.config.keys(): return constants.DEFAULT_REMOVE_MISSING else: return self.config.get_boolean(constants.CONFIG_REMOVE_MISSING)
class SynchronizeWithDirectory(object): """ A callable object used to synchronize with a directory of packaged puppet modules. The source of the import is a directory containing a PULP_MANIFEST and multiple puppet built puppet modules. :ivar conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :ivar config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration :ivar report: An import report. :type report: SyncProgressReport :ivar canceled: The operation canceled flag. :type canceled: bool :ivar tmp_dir: The path to the temporary directory used to download files. :type tmp_dir: str """ @staticmethod def _extract_metadata(module_path): """ Extract the puppet module metadata from the tarball at the specified path. Search the tarball content for a file named: */metadata.json and extract it into temporary directory. Then read the file and return the json decoded content. :param module_path: The fully qualified path to the module. :type module_path: str :return: The puppet module metadata. :rtype: dict """ tmp_dir = mkdtemp(dir=os.path.dirname(module_path)) try: with closing(tarfile.open(module_path)) as tarball: for member in tarball.getmembers(): path = member.name.split('/') if path[-1] == constants.MODULE_METADATA_FILENAME: tarball.extract(member, tmp_dir) with open(os.path.join(tmp_dir, member.name)) as fp: return json.load(fp) finally: shutil.rmtree(tmp_dir) def __init__(self, conduit, config): """ :param conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration """ self.conduit = conduit self.config = config self.report = None self.canceled = False self.tmp_dir = None def feed_url(self): """ Get the feed URL from the configuration and ensure it has a trailing '/' so urljoin will work correctly. :return: The feed URL. :rtype: str """ url = self.config.get(constants.CONFIG_FEED) if not url.endswith('/'): url += '/' return url def cancel(self): """ Cancel puppet module import. """ self.canceled = True def _download(self, urls): """ Download files by URL. Encapsulates nectar details and provides a simplified method of downloading files. :param urls: A list of tuples: (url, destination). The *url* and *destination* are both strings. The *destination* is the fully qualified path to where the file is to be downloaded. :type urls: list :return: The nectar reports. Tuple of: (succeeded_reports, failed_reports) :rtype: tuple """ feed_url = self.feed_url() nectar_config = importer_config_to_nectar_config(self.config.flatten()) nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme] downloader = nectar_class(nectar_config) listener = DownloadListener(self, downloader) request_list = [] for url, destination in urls: request_list.append(DownloadRequest(url, destination)) downloader.download(request_list) nectar_config.finalize() for report in listener.succeeded_reports: _logger.info(FETCH_SUCCEEDED % dict(url=report.url, dst=report.destination)) for report in listener.failed_reports: _logger.error(FETCH_FAILED % dict(url=report.url, msg=report.error_msg)) return listener.succeeded_reports, listener.failed_reports def _fetch_manifest(self): """ Fetch the PULP_MANIFEST. After the manifest is fetched, the file is parsed into a list of tuples. :return: The manifest content. List of: (name,checksum,size). :rtype: list """ started = time() # report progress: started self.report.metadata_state = constants.STATE_RUNNING self.report.metadata_query_total_count = 1 self.report.metadata_query_finished_count = 0 self.report.update_progress() # download manifest destination = StringIO() feed_url = self.feed_url() url = urljoin(feed_url, constants.MANIFEST_FILENAME) succeeded_reports, failed_reports = self._download([(url, destination) ]) # report download failed if failed_reports: report = failed_reports[0] self.report.metadata_state = constants.STATE_FAILED self.report.metadata_error_message = report.error_msg self.report.metadata_execution_time = time() - started return None # report download succeeded self.report.metadata_state = constants.STATE_SUCCESS self.report.metadata_query_finished_count = 1 self.report.metadata_current_query = None self.report.metadata_execution_time = time() - started self.report.update_progress() # return parsed manifest entries = destination.getvalue().split('\n') manifest = [tuple(e.split(',')) for e in entries if e] return manifest def _fetch_modules(self, manifest): """ Fetch all of the modules referenced in the manifest. :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size). :type manifest: list :return: A list of paths to the fetched module files. :rtype: list """ self.started_fetch_modules = time() # report progress: started self.report.modules_state = constants.STATE_RUNNING self.report.modules_total_count = len(manifest) self.report.modules_finished_count = 0 self.report.modules_error_count = 0 self.report.update_progress() # download modules urls = [] feed_url = self.feed_url() for path, checksum, size in manifest: url = urljoin(feed_url, path) destination = os.path.join(self.tmp_dir, os.path.basename(path)) urls.append((url, destination)) succeeded_reports, failed_reports = self._download(urls) # report failed downloads if failed_reports: self.report.modules_state = constants.STATE_FAILED self.report.modules_error_count = len(failed_reports) self.report.modules_individual_errors = [] for report in failed_reports: self.report.modules_individual_errors.append(report.error_msg) self.report.update_progress() return [r.destination for r in succeeded_reports] def _import_modules(self, module_paths): """ Import the puppet modules (tarballs) at the specified paths. This will also handle removing any modules in the local repository if they are no longer present on remote repository and the 'remove_missing' config value is True. :param module_paths: A list of paths to puppet module files. :type module_paths: list """ criteria = UnitAssociationCriteria( type_ids=[constants.TYPE_PUPPET_MODULE], unit_fields=Module.UNIT_KEY_NAMES) local_units = self.conduit.get_units(criteria=criteria) local_unit_keys = [unit.unit_key for unit in local_units] remote_unit_keys = [] for module_path in module_paths: if self.canceled: return puppet_manifest = self._extract_metadata(module_path) module = Module.from_json(puppet_manifest) remote_unit_keys.append(module.unit_key()) # Even though we've already basically processed this unit, not doing this makes the # progress reporting confusing because it shows Pulp always importing all the modules. if module.unit_key() in local_unit_keys: self.report.modules_total_count -= 1 continue _logger.debug(IMPORT_MODULE % dict(mod=module_path)) self._add_module(module_path, module) self.report.modules_finished_count += 1 self.report.update_progress() # Write the report, making sure we don't overwrite a failure in _fetch_modules if self.report.modules_state not in constants.COMPLETE_STATES: self.report.modules_state = constants.STATE_SUCCESS self.report.modules_execution_time = time( ) - self.started_fetch_modules self.report.update_progress() remove_missing = self.config.get_boolean( constants.CONFIG_REMOVE_MISSING) if remove_missing is None: remove_missing = constants.DEFAULT_REMOVE_MISSING if remove_missing: self._remove_missing(local_units, remote_unit_keys) def _remove_missing(self, local_units, remote_unit_keys): """ Removes units from the local repository if they are missing from the remote repository. :param local_units: A list of units associated with the current repository :type local_units: list of AssociatedUnit :param remote_unit_keys: a list of all the unit keys in the remote repository :type remote_unit_keys: list of dict """ for missing in [ unit for unit in local_units if unit.unit_key not in remote_unit_keys ]: if self.canceled: return self.conduit.remove_unit(missing) def _add_module(self, path, module): """ Add the specified module to Pulp using the conduit. This will both create the module and associate it to a repository. The module tarball is copied to the *storage path* only if it does not already exist at the *storage path*. :param path: The path to the downloaded module tarball. :type path: str :param module: A puppet module model object. :type module: Module """ type_id = constants.TYPE_PUPPET_MODULE unit_key = module.unit_key() unit_metadata = module.unit_metadata() relative_path = constants.STORAGE_MODULE_RELATIVE_PATH % module.filename( ) unit = self.conduit.init_unit(type_id, unit_key, unit_metadata, relative_path) if not os.path.exists(unit.storage_path): shutil.copy(path, unit.storage_path) self.conduit.save_unit(unit) def __call__(self, repository): """ Invoke the callable object. All work is performed in the repository working directory and cleaned up after the call. :param repository: A Pulp repository object. :type repository: pulp.server.plugins.model.Repository :return: The final synchronization report. :rtype: SyncProgressReport """ self.canceled = False self.report = SyncProgressReport(self.conduit) self.tmp_dir = mkdtemp(dir=repository.working_dir) try: manifest = self._fetch_manifest() if manifest is not None: module_paths = self._fetch_modules(manifest) self._import_modules(module_paths) finally: # Update the progress report one last time self.report.update_progress() shutil.rmtree(self.tmp_dir) self.tmp_dir = None return self.report
class SynchronizeWithDirectory(object): """ A callable object used to synchronize with a directory of packaged puppet modules. The source of the import is a directory containing a PULP_MANIFEST and multiple puppet built puppet modules. :ivar conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :ivar config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration :ivar report: An import report. :type report: SyncProgressReport :ivar canceled: The operation canceled flag. :type canceled: bool :ivar tmp_dir: The path to the temporary directory used to download files. :type tmp_dir: str """ @staticmethod def _extract_metadata(module_path): """ Extract the puppet module metadata from the tarball at the specified path. Search the tarball content for a file named: */metadata.json and extract it into temporary directory. Then read the file and return the json decoded content. :param module_path: The fully qualified path to the module. :type module_path: str :return: The puppet module metadata. :rtype: dict """ tmp_dir = mkdtemp(dir=os.path.dirname(module_path)) try: with closing(tarfile.open(module_path)) as tarball: for member in tarball.getmembers(): path = member.name.split('/') if path[-1] == constants.MODULE_METADATA_FILENAME: tarball.extract(member, tmp_dir) with open(os.path.join(tmp_dir, member.name)) as fp: return json.load(fp) finally: shutil.rmtree(tmp_dir) def __init__(self, conduit, config): """ :param conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration """ self.conduit = conduit self.config = config self.report = None self.canceled = False self.tmp_dir = None def feed_url(self): """ Get the feed URL from the configuration and ensure it has a trailing '/' so urljoin will work correctly. :return: The feed URL. :rtype: str """ url = self.config.get(constants.CONFIG_FEED) if not url.endswith('/'): url += '/' return url def cancel(self): """ Cancel puppet module import. """ self.canceled = True def _run(self, inventory): """ Perform the synchronization using the supplied inventory. :param inventory: An inventory object. :type inventory: Inventory """ manifest = self._fetch_manifest() if manifest is None: # fetch manifest failed return module_paths = self._fetch_modules(manifest) imported_modules = self._import_modules(inventory, module_paths) self._purge_unwanted_modules(inventory, imported_modules) def _download(self, urls): """ Download files by URL. Encapsulates nectar details and provides a simplified method of downloading files. :param urls: A list of tuples: (url, destination). The *url* and *destination* are both strings. The *destination* is the fully qualified path to where the file is to be downloaded. :type urls: list :return: The nectar reports. Tuple of: (succeeded_reports, failed_reports) :rtype: tuple """ feed_url = self.feed_url() nectar_config = importer_config_to_nectar_config(self.config.flatten()) nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme] downloader = nectar_class(nectar_config) listener = DownloadListener(self, downloader) request_list = [] for url, destination in urls: request_list.append(DownloadRequest(url, destination)) downloader.download(request_list) nectar_config.finalize() for report in listener.succeeded_reports: _LOG.info(FETCH_SUCCEEDED % dict(url=report.url, dst=report.destination)) for report in listener.failed_reports: _LOG.error(FETCH_FAILED % dict(url=report.url, msg=report.error_msg)) return listener.succeeded_reports, listener.failed_reports def _fetch_manifest(self): """ Fetch the PULP_MANIFEST. After the manifest is fetched, the file is parsed into a list of tuples. :return: The manifest content. List of: (name,checksum,size). :rtype: list """ started = time() # report progress: started self.report.metadata_state = constants.STATE_RUNNING self.report.metadata_query_total_count = 1 self.report.metadata_query_finished_count = 0 self.report.update_progress() # download manifest destination = StringIO() feed_url = self.feed_url() url = urljoin(feed_url, constants.MANIFEST_FILENAME) succeeded_reports, failed_reports = self._download([(url, destination)]) # report download failed if failed_reports: report = failed_reports[0] self.report.metadata_state = constants.STATE_FAILED self.report.metadata_error_message = report.error_msg self.report.metadata_execution_time = time() - started return None # report download succeeded self.report.metadata_state = constants.STATE_SUCCESS self.report.metadata_query_finished_count = 1 self.report.metadata_current_query = None self.report.metadata_execution_time = time() - started self.report.update_progress() # return parsed manifest entries = destination.getvalue().split('\n') manifest = [tuple(e.split(',')) for e in entries if e] return manifest def _fetch_modules(self, manifest): """ Fetch all of the modules referenced in the manifest. :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size). :type manifest: list :return: A list of paths to the fetched module files. :rtype: list """ self.started_fetch_modules = time() # report progress: started self.report.modules_state = constants.STATE_RUNNING self.report.modules_total_count = len(manifest) self.report.modules_finished_count = 0 self.report.modules_error_count = 0 self.report.update_progress() # download modules urls = [] feed_url = self.feed_url() for path, checksum, size in manifest: url = urljoin(feed_url, path) destination = os.path.join(self.tmp_dir, os.path.basename(path)) urls.append((url, destination)) succeeded_reports, failed_reports = self._download(urls) # report failed downloads if failed_reports: self.report.modules_state = constants.STATE_FAILED self.report.modules_error_count = len(failed_reports) self.report.modules_individual_errors = [] for report in failed_reports: self.report.modules_individual_errors.append(report.error_msg) self.report.update_progress() return [r.destination for r in succeeded_reports] def _import_modules(self, inventory, module_paths): """ Import the puppet modules (tarballs) at the specified paths. :param inventory: A module inventory object. :type inventory: Inventory :param module_paths: A list of paths to puppet module files. :type module_paths: list :return: A list of the imported module unit keys. :rtype: list """ imported_modules = [] for module_path in module_paths: if self.canceled: return [] puppet_manifest = self._extract_metadata(module_path) module = Module.from_json(puppet_manifest) if inventory.already_associated(module): # Decrement the total number of modules we're importing self.report.modules_total_count -= 1 continue _LOG.info(IMPORT_MODULE % dict(mod=module_path)) imported_modules.append(module.unit_key()) self._add_module(module_path, module) self.report.modules_finished_count += 1 self.report.update_progress() # Write the report, making sure we don't overwrite the a failure in _fetch_modules if self.report.modules_state != constants.STATE_FAILED: self.report.modules_state = constants.STATE_SUCCESS self.report.modules_execution_time = time() - self.started_fetch_modules self.report.update_progress() return imported_modules def _add_module(self, path, module): """ Add the specified module to Pulp using the conduit. This will both create the module and associate it to a repository. The module tarball is copied to the *storage path* only if it does not already exist at the *storage path*. :param path: The path to the downloaded module tarball. :type path: str :param module: A puppet module model object. :type module: Module """ type_id = constants.TYPE_PUPPET_MODULE unit_key = module.unit_key() unit_metadata = module.unit_metadata() relative_path = constants.STORAGE_MODULE_RELATIVE_PATH % module.filename() unit = self.conduit.init_unit(type_id, unit_key, unit_metadata, relative_path) if not os.path.exists(unit.storage_path): shutil.copy(path, unit.storage_path) self.conduit.save_unit(unit) def _purge_unwanted_modules(self, inventory, imported_modules): """ Purge unwanted puppet modules. Unwanted modules are those modules associated with the repository but not imported during this operation. Skipped when the configuration does not specify do perform it. The inventory is used to determine which modules should be removed. :param inventory: A module inventory object. :type inventory: Inventory :param imported_modules: List of modules import. List of: Module. :type imported_modules: list """ purge_option = self.config.get_boolean(constants.CONFIG_REMOVE_MISSING) if purge_option is None: purge_option = constants.DEFAULT_REMOVE_MISSING if not purge_option: # no purge requested return for unit_key in inventory.unwanted_modules(imported_modules): if self.canceled: return self.conduit.remove_unit(unit_key) def __call__(self, repository): """ Invoke the callable object. All work is performed in the repository working directory and cleaned up after the call. :param repository: A Pulp repository object. :type repository: pulp.server.plugins.model.Repository :return: The final synchronization report. :rtype: SyncProgressReport """ self.canceled = False self.report = SyncProgressReport(self.conduit) self.tmp_dir = mkdtemp(dir=repository.working_dir) try: inventory = Inventory(self.conduit) self._run(inventory) finally: # Update the progress report one last time self.report.update_progress() shutil.rmtree(self.tmp_dir) self.tmp_dir = None return self.report
class SynchronizeWithDirectory(object): """ A callable object used to synchronize with a directory of packaged puppet modules. The source of the import is a directory containing a PULP_MANIFEST and multiple puppet built puppet modules. :ivar repo: A Pulp repository object :type repo: pulp.plugins.model.Repository :ivar conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :ivar config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration :ivar report: An import report. :type report: SyncProgressReport :ivar canceled: The operation canceled flag. :type canceled: bool :ivar tmp_dir: The path to the temporary directory used to download files. :type tmp_dir: str """ @staticmethod def _extract_metadata(module_path): """ Extract the puppet module metadata from the tarball at the specified path. Search the tarball content for a file named: */metadata.json and extract it into temporary directory. Then read the file and return the json decoded content. :param module_path: The fully qualified path to the module. :type module_path: str :return: The puppet module metadata. :rtype: dict """ tmp_dir = mkdtemp(dir=os.path.dirname(module_path)) try: with closing(tarfile.open(module_path)) as tarball: for member in tarball.getmembers(): path = member.name.split('/') if path[-1] == constants.MODULE_METADATA_FILENAME: tarball.extract(member, tmp_dir) with open(os.path.join(tmp_dir, member.name)) as fp: return json.load(fp) finally: shutil.rmtree(tmp_dir) def __init__(self, repo, conduit, config): """ :param repo: A Pulp repository object :type repo: pulp.plugins.model.Repository :param conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration """ self.repo = repo self.conduit = conduit self.config = config self.report = None self.canceled = False self.tmp_dir = None def feed_url(self): """ Get the feed URL from the configuration and ensure it has a trailing '/' so urljoin will work correctly. :return: The feed URL. :rtype: str """ url = self.config.get(constants.CONFIG_FEED) if not url.endswith('/'): url += '/' return url def cancel(self): """ Cancel puppet module import. """ self.canceled = True def _download(self, urls): """ Download files by URL. Encapsulates nectar details and provides a simplified method of downloading files. :param urls: A list of tuples: (url, destination). The *url* and *destination* are both strings. The *destination* is the fully qualified path to where the file is to be downloaded. :type urls: list :return: The nectar reports. Tuple of: (succeeded_reports, failed_reports) :rtype: tuple """ feed_url = self.feed_url() nectar_config = importer_config_to_nectar_config(self.config.flatten()) nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme] downloader = nectar_class(nectar_config) listener = DownloadListener(self, downloader) request_list = [] for url, destination in urls: request_list.append(DownloadRequest(url, destination)) downloader.download(request_list) nectar_config.finalize() for report in listener.succeeded_reports: _logger.info(FETCH_SUCCEEDED, dict(url=report.url, dst=report.destination)) for report in listener.failed_reports: _logger.error(FETCH_FAILED, dict(url=report.url, msg=report.error_msg)) return listener.succeeded_reports, listener.failed_reports def _fetch_manifest(self): """ Fetch the PULP_MANIFEST. After the manifest is fetched, the file is parsed into a list of tuples. :return: The manifest content. List of: (name,checksum,size). :rtype: list """ started = time() # report progress: started self.report.metadata_state = constants.STATE_RUNNING self.report.metadata_query_total_count = 1 self.report.metadata_query_finished_count = 0 self.report.update_progress() # download manifest destination = StringIO() feed_url = self.feed_url() url = urljoin(feed_url, constants.MANIFEST_FILENAME) succeeded_reports, failed_reports = self._download([(url, destination) ]) # report download failed if failed_reports: report = failed_reports[0] self.report.metadata_state = constants.STATE_FAILED self.report.metadata_error_message = report.error_msg self.report.metadata_execution_time = time() - started return None # report download succeeded self.report.metadata_state = constants.STATE_SUCCESS self.report.metadata_query_finished_count = 1 self.report.metadata_current_query = None self.report.metadata_execution_time = time() - started self.report.update_progress() # return parsed manifest entries = destination.getvalue().split('\n') manifest = [tuple(e.split(',')) for e in entries if e] return manifest def _fetch_modules(self, manifest): """ Fetch all of the modules referenced in the manifest. :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size). :type manifest: list :return: A list of paths to the fetched module files. :rtype: list """ self.started_fetch_modules = time() # report progress: started self.report.modules_state = constants.STATE_RUNNING self.report.modules_total_count = len(manifest) self.report.modules_finished_count = 0 self.report.modules_error_count = 0 self.report.update_progress() # download modules urls = [] feed_url = self.feed_url() for path, checksum, size in manifest: url = urljoin(feed_url, path) destination = os.path.join(self.tmp_dir, os.path.basename(path)) urls.append((url, destination)) succeeded_reports, failed_reports = self._download(urls) # report failed downloads if failed_reports: self.report.modules_state = constants.STATE_FAILED self.report.modules_error_count = len(failed_reports) self.report.modules_individual_errors = [] for report in failed_reports: self.report.modules_individual_errors.append(report.error_msg) self.report.update_progress() return [r.destination for r in succeeded_reports] def _import_modules(self, module_paths): """ Import the puppet modules (tarballs) at the specified paths. This will also handle removing any modules in the local repository if they are no longer present on remote repository and the 'remove_missing' config value is True. :param module_paths: A list of paths to puppet module files. :type module_paths: list """ existing_module_ids_by_key = {} for module in Module.objects.only(*Module.unit_key_fields).all(): existing_module_ids_by_key[module.unit_key_str] = module.id remote_paths = {} list_of_modules = [] for module_path in module_paths: puppet_manifest = self._extract_metadata(module_path) module = Module.from_metadata(puppet_manifest) remote_paths[module.unit_key_str] = module_path list_of_modules.append(module) pub_step = publish_step.GetLocalUnitsStep( constants.IMPORTER_TYPE_ID, available_units=list_of_modules, repo=self.repo) pub_step.process_main() self.report.modules_total_count = len(pub_step.units_to_download) for module in pub_step.units_to_download: remote_path = remote_paths[module.unit_key_str] if self.canceled: return _logger.debug(IMPORT_MODULE, dict(mod=remote_path)) module.set_storage_path(os.path.basename(remote_path)) try: module.save_and_import_content(remote_path) except NotUniqueError: module = module.__class__.objects.get(**module.unit_key) repo_controller.associate_single_unit(self.repo.repo_obj, module) self.report.modules_finished_count += 1 self.report.update_progress() # Write the report, making sure we don't overwrite a failure in _fetch_modules if self.report.modules_state not in constants.COMPLETE_STATES: self.report.modules_state = constants.STATE_SUCCESS self.report.modules_execution_time = time( ) - self.started_fetch_modules self.report.update_progress() remove_missing = self.config.get_boolean( constants.CONFIG_REMOVE_MISSING) if remove_missing is None: remove_missing = constants.DEFAULT_REMOVE_MISSING if remove_missing: self._remove_missing(existing_module_ids_by_key, remote_paths.keys()) def _remove_missing(self, existing_module_ids_by_key, remote_unit_keys): """ Removes units from the local repository if they are missing from the remote repository. :param existing_module_ids_by_key: A dict keyed on Module unit key associated with the current repository. The values are the mongoengine id of the corresponding Module. :type existing_module_ids_by_key: dict of Module.id values keyed on unit_key_str :param remote_unit_keys: A list of all the Module keys in the remote repository :type remote_unit_keys: list of strings """ keys_to_remove = list( set(existing_module_ids_by_key.keys()) - set(remote_unit_keys)) doomed_ids = [ existing_module_ids_by_key[key] for key in keys_to_remove ] doomed_module_iterator = Module.objects.in_bulk( doomed_ids).itervalues() repo_controller.disassociate_units(self.repo, doomed_module_iterator) def __call__(self): """ Invoke the callable object. All work is performed in the repository working directory and cleaned up after the call. :return: The final synchronization report. :rtype: SyncProgressReport """ self.canceled = False self.report = SyncProgressReport(self.conduit) self.tmp_dir = mkdtemp(dir=self.repo.working_dir) try: manifest = self._fetch_manifest() if manifest is not None: module_paths = self._fetch_modules(manifest) self._import_modules(module_paths) finally: # Update the progress report one last time self.report.update_progress() shutil.rmtree(self.tmp_dir) self.tmp_dir = None return self.report
class SynchronizeWithDirectory(object): """ A callable object used to synchronize with a directory of packaged puppet modules. The source of the import is a directory containing a PULP_MANIFEST and multiple puppet built puppet modules. :ivar repo: A Pulp repository object :type repo: pulp.plugins.model.Repository :ivar conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :ivar config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration :ivar report: An import report. :type report: SyncProgressReport :ivar canceled: The operation canceled flag. :type canceled: bool :ivar tmp_dir: The path to the temporary directory used to download files. :type tmp_dir: str """ @staticmethod def _extract_metadata(module_path): """ Extract the puppet module metadata from the tarball at the specified path. Search the tarball content for a file named: */metadata.json and extract it into temporary directory. Then read the file and return the json decoded content. :param module_path: The fully qualified path to the module. :type module_path: str :return: The puppet module metadata. :rtype: dict """ tmp_dir = mkdtemp(dir=os.path.dirname(module_path)) try: with closing(tarfile.open(module_path)) as tarball: for member in tarball.getmembers(): path = member.name.split('/') if path[-1] == constants.MODULE_METADATA_FILENAME: tarball.extract(member, tmp_dir) with open(os.path.join(tmp_dir, member.name)) as fp: return json.load(fp) finally: shutil.rmtree(tmp_dir) def __init__(self, repo, conduit, config): """ :param repo: A Pulp repository object :type repo: pulp.plugins.model.Repository :param conduit: Provides access to relevant Pulp functionality. :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: Plugin configuration. :type config: pulp.plugins.config.PluginCallConfiguration """ self.repo = repo self.conduit = conduit self.config = config self.report = None self.canceled = False self.tmp_dir = None def feed_url(self): """ Get the feed URL from the configuration and ensure it has a trailing '/' so urljoin will work correctly. :return: The feed URL. :rtype: str """ url = self.config.get(constants.CONFIG_FEED) if not url.endswith('/'): url += '/' return url def cancel(self): """ Cancel puppet module import. """ self.canceled = True def _download(self, urls): """ Download files by URL. Encapsulates nectar details and provides a simplified method of downloading files. :param urls: A list of tuples: (url, destination). The *url* and *destination* are both strings. The *destination* is the fully qualified path to where the file is to be downloaded. :type urls: list :return: The nectar reports. Tuple of: (succeeded_reports, failed_reports) :rtype: tuple """ feed_url = self.feed_url() nectar_config = importer_config_to_nectar_config(self.config.flatten()) nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme] downloader = nectar_class(nectar_config) listener = DownloadListener(self, downloader) request_list = [] for url, destination in urls: request_list.append(DownloadRequest(url, destination)) downloader.download(request_list) nectar_config.finalize() for report in listener.succeeded_reports: _logger.info(FETCH_SUCCEEDED, dict(url=report.url, dst=report.destination)) for report in listener.failed_reports: _logger.error(FETCH_FAILED, dict(url=report.url, msg=report.error_msg)) return listener.succeeded_reports, listener.failed_reports def _fetch_manifest(self): """ Fetch the PULP_MANIFEST. After the manifest is fetched, the file is parsed into a list of tuples. :return: The manifest content. List of: (name,checksum,size). :rtype: list """ started = time() # report progress: started self.report.metadata_state = constants.STATE_RUNNING self.report.metadata_query_total_count = 1 self.report.metadata_query_finished_count = 0 self.report.update_progress() # download manifest destination = StringIO() feed_url = self.feed_url() url = urljoin(feed_url, constants.MANIFEST_FILENAME) succeeded_reports, failed_reports = self._download([(url, destination)]) # report download failed if failed_reports: report = failed_reports[0] self.report.metadata_state = constants.STATE_FAILED self.report.metadata_error_message = report.error_msg self.report.metadata_execution_time = time() - started return None # report download succeeded self.report.metadata_state = constants.STATE_SUCCESS self.report.metadata_query_finished_count = 1 self.report.metadata_current_query = None self.report.metadata_execution_time = time() - started self.report.update_progress() # return parsed manifest entries = destination.getvalue().split('\n') manifest = [tuple(e.split(',')) for e in entries if e] return manifest def _fetch_modules(self, manifest): """ Fetch all of the modules referenced in the manifest. :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size). :type manifest: list :return: A list of paths to the fetched module files. :rtype: list """ self.started_fetch_modules = time() # report progress: started self.report.modules_state = constants.STATE_RUNNING self.report.modules_total_count = len(manifest) self.report.modules_finished_count = 0 self.report.modules_error_count = 0 self.report.update_progress() # download modules urls = [] feed_url = self.feed_url() for path, checksum, size in manifest: url = urljoin(feed_url, path) destination = os.path.join(self.tmp_dir, os.path.basename(path)) urls.append((url, destination)) succeeded_reports, failed_reports = self._download(urls) # report failed downloads if failed_reports: self.report.modules_state = constants.STATE_FAILED self.report.modules_error_count = len(failed_reports) self.report.modules_individual_errors = [] for report in failed_reports: self.report.modules_individual_errors.append(report.error_msg) self.report.update_progress() return [r.destination for r in succeeded_reports] def _import_modules(self, module_paths): """ Import the puppet modules (tarballs) at the specified paths. This will also handle removing any modules in the local repository if they are no longer present on remote repository and the 'remove_missing' config value is True. :param module_paths: A list of paths to puppet module files. :type module_paths: list """ existing_module_ids_by_key = {} for module in Module.objects.only(*Module.unit_key_fields).all(): existing_module_ids_by_key[module.unit_key_str] = module.id remote_paths = {} list_of_modules = [] for module_path in module_paths: puppet_manifest = self._extract_metadata(module_path) module = Module.from_metadata(puppet_manifest) remote_paths[module.unit_key_str] = module_path list_of_modules.append(module) pub_step = publish_step.GetLocalUnitsStep(constants.IMPORTER_TYPE_ID, available_units=list_of_modules, repo=self.repo) pub_step.process_main() self.report.modules_total_count = len(pub_step.units_to_download) for module in pub_step.units_to_download: remote_path = remote_paths[module.unit_key_str] if self.canceled: return _logger.debug(IMPORT_MODULE, dict(mod=remote_path)) module.set_storage_path(os.path.basename(remote_path)) try: module.save_and_import_content(remote_path) except NotUniqueError: module = module.__class__.objects.get(**module.unit_key) repo_controller.associate_single_unit(self.repo.repo_obj, module) self.report.modules_finished_count += 1 self.report.update_progress() # Write the report, making sure we don't overwrite a failure in _fetch_modules if self.report.modules_state not in constants.COMPLETE_STATES: self.report.modules_state = constants.STATE_SUCCESS self.report.modules_execution_time = time() - self.started_fetch_modules self.report.update_progress() remove_missing = self.config.get_boolean(constants.CONFIG_REMOVE_MISSING) if remove_missing is None: remove_missing = constants.DEFAULT_REMOVE_MISSING if remove_missing: self._remove_missing(existing_module_ids_by_key, remote_paths.keys()) def _remove_missing(self, existing_module_ids_by_key, remote_unit_keys): """ Removes units from the local repository if they are missing from the remote repository. :param existing_module_ids_by_key: A dict keyed on Module unit key associated with the current repository. The values are the mongoengine id of the corresponding Module. :type existing_module_ids_by_key: dict of Module.id values keyed on unit_key_str :param remote_unit_keys: A list of all the Module keys in the remote repository :type remote_unit_keys: list of strings """ keys_to_remove = list(set(existing_module_ids_by_key.keys()) - set(remote_unit_keys)) doomed_ids = [existing_module_ids_by_key[key] for key in keys_to_remove] doomed_module_iterator = Module.objects.in_bulk(doomed_ids).itervalues() repo_controller.disassociate_units(self.repo, doomed_module_iterator) def __call__(self): """ Invoke the callable object. All work is performed in the repository working directory and cleaned up after the call. :return: The final synchronization report. :rtype: SyncProgressReport """ self.canceled = False self.report = SyncProgressReport(self.conduit) self.tmp_dir = mkdtemp(dir=self.repo.working_dir) try: manifest = self._fetch_manifest() if manifest is not None: module_paths = self._fetch_modules(manifest) self._import_modules(module_paths) finally: # Update the progress report one last time self.report.update_progress() shutil.rmtree(self.tmp_dir) self.tmp_dir = None return self.report
class PuppetModuleSyncRun(object): """ Used to perform a single sync of a puppet repository. This class will maintain state relevant to the run and should not be reused across runs. """ def __init__(self, repo, sync_conduit, config, is_cancelled_call): self.repo = repo self.sync_conduit = sync_conduit self.config = config self.is_cancelled_call = is_cancelled_call self.progress_report = SyncProgressReport(sync_conduit) def perform_sync(self): """ Performs the sync operation according to the configured state of the instance. The report to be sent back to Pulp is returned from this call. This call will make calls into the conduit's progress update as appropriate. This call executes serially. No threads are created by this call. It will not return until either a step fails or the entire sync is completed. :return: the report object to return to Pulp from the sync call :rtype: pulp.plugins.model.SyncReport """ _LOG.info('Beginning sync for repository <%s>' % self.repo.id) # quit now if there is no feed URL defined if not self.config.get(constants.CONFIG_FEED): self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Cannot perform repository sync on a repository with no feed') self.progress_report.update_progress() return self.progress_report.build_final_report() try: metadata = self._parse_metadata() if not metadata: report = self.progress_report.build_final_report() return report self._import_modules(metadata) finally: # One final progress update before finishing self.progress_report.update_progress() report = self.progress_report.build_final_report() return report def _parse_metadata(self): """ Takes the necessary actions (according to the run configuration) to retrieve and parse the repository's metadata. This call will return either the successfully parsed metadata or None if it could not be retrieved or parsed. The progress report will be updated with the appropriate description of what went wrong in the event of an error, so the caller should interpet a None return as an error occuring and not continue the sync. :return: object representation of the metadata :rtype: RepositoryMetadata """ _LOG.info('Beginning metadata retrieval for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_RUNNING self.progress_report.update_progress() start_time = datetime.now() # Retrieve the metadata from the source try: downloader = self._create_downloader() metadata_json_docs = downloader.retrieve_metadata( self.progress_report) except Exception, e: _LOG.exception( 'Exception while retrieving metadata for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Error downloading metadata') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None # Parse the retrieved metadata documents try: metadata = RepositoryMetadata() for doc in metadata_json_docs: metadata.update_from_json(doc) except Exception, e: _LOG.exception('Exception parsing metadata for repository <%s>' % self.repo.id) self.progress_report.metadata_state = STATE_FAILED self.progress_report.metadata_error_message = _( 'Error parsing repository modules metadata document') self.progress_report.metadata_exception = e self.progress_report.metadata_traceback = sys.exc_info()[2] end_time = datetime.now() duration = end_time - start_time self.progress_report.metadata_execution_time = duration.seconds self.progress_report.update_progress() return None