def importer_config_to_nectar_config(importer_config, working_dir=None, download_config_kwargs=None): """ DEPRECATED. Use importer_to_nectar_config instead. Translates the Pulp standard importer configuration into a DownloaderConfig instance. :param importer_config: use the PluginCallConfiguration.flatten method to retrieve a single dict view on the configuration :type importer_config: dict :param working_dir: Allow the caller to override the working directory used :type working_dir: str :param download_config_kwargs: Any additional keyword arguments you would like to include in the download config. :type download_config_kwargs: dict :rtype: nectar.config.DownloaderConfig """ if download_config_kwargs is None: download_config_kwargs = {} if working_dir is None: working_dir = common_utils.get_working_directory() download_config_kwargs['working_dir'] = working_dir adder = partial(_safe_add_arg, importer_config, download_config_kwargs) map(adder, IMPORTER_DOWNLOADER_CONFIG_MAP) download_config = DownloaderConfig(**download_config_kwargs) return download_config
def sync(repo_id, sync_config_override=None): """ Performs a synchronize operation on the given repository and triggers publishs for distributors with autopublish enabled. The given repo must have an importer configured. This method is intentionally limited to synchronizing a single repo. Performing multiple repository syncs concurrently will require a more global view of the server and must be handled outside the scope of this class. :param repo_id: identifies the repo to sync :type repo_id: str :param sync_config_override: optional config containing values to use for this sync only :type sync_config_override: dict :return: TaskResult containing sync results and a list of spawned tasks :rtype: pulp.server.async.tasks.TaskResult :raise pulp_exceptions.MissingResource: if specified repo does not exist, or it does not have an importer and associated plugin :raise pulp_exceptions.PulpExecutionException: if the task fails. """ repo_obj = model.Repository.objects.get_repo_or_missing_resource(repo_id) transfer_repo = repo_obj.to_transfer_repo() importer_collection = RepoImporter.get_collection() repo_importer = importer_collection.find_one({'repo_id': repo_obj.repo_id}) if repo_importer is None: raise pulp_exceptions.MissingResource(repository=repo_id) try: importer, imp_config = plugin_api.get_importer_by_id(repo_importer['importer_type_id']) except plugin_exceptions.PluginNotFound: raise pulp_exceptions.MissingResource(repository=repo_id) call_config = PluginCallConfiguration(imp_config, repo_importer['config'], sync_config_override) transfer_repo.working_dir = common_utils.get_working_directory() conduit = RepoSyncConduit(repo_id, repo_importer['id']) sync_result_collection = RepoSyncResult.get_collection() # Fire an events around the call fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_sync_started(repo_id) # Perform the sync sync_start_timestamp = _now_timestamp() sync_result = None try: # Replace the Importer's sync_repo() method with our register_sigterm_handler decorator, # which will set up cancel_sync_repo() as the target for the signal handler sync_repo = register_sigterm_handler(importer.sync_repo, importer.cancel_sync_repo) sync_report = sync_repo(transfer_repo, conduit, call_config) except Exception, e: sync_end_timestamp = _now_timestamp() sync_result = RepoSyncResult.error_result( repo_obj.repo_id, repo_importer['id'], repo_importer['importer_type_id'], sync_start_timestamp, sync_end_timestamp, e, sys.exc_info()[2]) raise
def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory()} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = []
def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append( request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests)
def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append(request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests)
def publish_repo(self, repo, publish_conduit, config): """ Publish the repository. :param repo: metadata describing the repo :type repo: pulp.plugins.model.Repository :param publish_conduit: The conduit for publishing a repo :type publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginConfiguration :param config_conduit: Configuration Conduit; :type config_conduit: pulp.plugins.conduits.repo_validate.RepoConfigConduit :return: report describing the publish operation :rtype: pulp.plugins.model.PublishReport """ progress_report = FilePublishProgressReport(publish_conduit) _logger.info(_('Beginning publish for repository <%(repo)s>') % {'repo': repo.id}) try: progress_report.state = progress_report.STATE_IN_PROGRESS repo_model = repo.repo_obj units = repo_controller.find_repo_content_units(repo_model, yield_content_unit=True) # Set up an empty build_dir working_dir = common_utils.get_working_directory() build_dir = os.path.join(working_dir, BUILD_DIRNAME) os.makedirs(build_dir) self.initialize_metadata(build_dir) try: # process each unit for unit in units: links_to_create = self.get_paths_for_unit(unit) self._symlink_unit(build_dir, unit, links_to_create) self.publish_metadata_for_unit(unit) finally: # Finalize the processing self.finalize_metadata() # Let's unpublish, and then republish self.unpublish_repo(repo, config) hosting_locations = self.get_hosting_locations(repo_model, config) for location in hosting_locations: shutil.copytree(build_dir, location, symlinks=True) self.post_repo_publish(repo_model, config) # Report that we are done progress_report.state = progress_report.STATE_COMPLETE return progress_report.build_final_report() except Exception, e: _logger.exception(e) # Something failed. Let's put an error message on the report progress_report.error_message = str(e) progress_report.traceback = traceback.format_exc() progress_report.state = progress_report.STATE_FAILED report = progress_report.build_final_report() return report
def importer_config_to_nectar_config(importer_config, working_dir=None, download_config_kwargs=None): """ DEPRECATED. Use importer_to_nectar_config instead. Translates the Pulp standard importer configuration into a DownloaderConfig instance. :param importer_config: use the PluginCallConfiguration.flatten method to retrieve a single dict view on the configuration :type importer_config: dict :param working_dir: Allow the caller to override the working directory used :type working_dir: str :param download_config_kwargs: Any additional keyword arguments you would like to include in the download config. :type download_config_kwargs: dict :rtype: nectar.config.DownloaderConfig """ if download_config_kwargs is None: download_config_kwargs = {} if working_dir is None: working_dir = common_utils.get_working_directory() download_config_kwargs["working_dir"] = working_dir adder = partial(_safe_add_arg, importer_config, download_config_kwargs) map(adder, IMPORTER_DOWNLOADER_CONFIG_MAP) download_config = DownloaderConfig(**download_config_kwargs) return download_config
def publish(repo_id, distributor_id, publish_config_override=None): """ Requests the given distributor publish the repository it is configured on. The publish operation is executed synchronously in the caller's thread and will block until it is completed. The caller must take the necessary steps to address the fact that a publish call may be time intensive. @param repo_id: identifies the repo being published @type repo_id: str @param distributor_id: identifies the repo's distributor to publish @type distributor_id: str @param publish_config_override: optional config values to use for this publish call only @type publish_config_override: dict, None :return: report of the details of the publish :rtype: pulp.server.db.model.repository.RepoPublishResult """ repo_coll = Repo.get_collection() distributor_coll = RepoDistributor.get_collection() # Validation repo = repo_coll.find_one({'id': repo_id}) if repo is None: raise MissingResource(repo_id) repo_distributor = distributor_coll.find_one({ 'repo_id': repo_id, 'id': distributor_id }) if repo_distributor is None: raise MissingResource(repository=repo_id, distributor=distributor_id) distributor_instance, distributor_config = RepoPublishManager.\ _get_distributor_instance_and_config(repo_id, distributor_id) # Assemble the data needed for the publish conduit = RepoPublishConduit(repo_id, distributor_id) call_config = PluginCallConfiguration(distributor_config, repo_distributor['config'], publish_config_override) transfer_repo = common_utils.to_transfer_repo(repo) transfer_repo.working_dir = common_utils.get_working_directory() # Fire events describing the publish state fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_publish_started(repo_id, distributor_id) result = RepoPublishManager._do_publish(repo, distributor_id, distributor_instance, transfer_repo, conduit, call_config) fire_manager.fire_repo_publish_finished(result) return result
def test_get_working_directory_new(self, mock_pulp_config_get, mock_mkdir, mock_path_exists, mock_celery_current_task): mock_pulp_config_get.return_value = '/var/cache/pulp' mock_celery_current_task.request = mock.Mock(id='mock-task-id', hostname='mock-host') working_directory_path = get_working_directory() mock_pulp_config_get.assert_called_with('server', 'working_directory') mock_mkdir.assert_called_with('/var/cache/pulp/mock-host/mock-task-id') self.assertEqual(working_directory_path, '/var/cache/pulp/mock-host/mock-task-id')
def sync(repo_id, sync_config_override=None, scheduled_call_id=None): """ Performs a synchronize operation on the given repository and triggers publishes for distributors with auto-publish enabled. The given repo must have an importer configured. This method is intentionally limited to synchronizing a single repo. Performing multiple repository syncs concurrently will require a more global view of the server and must be handled outside the scope of this class. :param repo_id: identifies the repo to sync :type repo_id: str :param sync_config_override: optional config containing values to use for this sync only :type sync_config_override: dict :param scheduled_call_id: id of scheduled call that dispatched this task :type scheduled_call_id: str :return: TaskResult containing sync results and a list of spawned tasks :rtype: pulp.server.async.tasks.TaskResult :raise pulp_exceptions.MissingResource: if specified repo does not exist, or it does not have an importer and associated plugin :raise pulp_exceptions.PulpExecutionException: if the task fails. """ repo_obj = model.Repository.objects.get_repo_or_missing_resource(repo_id) transfer_repo = repo_obj.to_transfer_repo() repo_importer = model.Importer.objects.get_or_404(repo_id=repo_id) try: importer, imp_config = plugin_api.get_importer_by_id(repo_importer.importer_type_id) except plugin_exceptions.PluginNotFound: raise pulp_exceptions.MissingResource(repository=repo_id) call_config = PluginCallConfiguration(imp_config, repo_importer.config, sync_config_override) transfer_repo.working_dir = common_utils.get_working_directory() conduit = RepoSyncConduit(repo_id, repo_importer.importer_type_id, repo_importer.id) sync_result_collection = RepoSyncResult.get_collection() # Fire an events around the call fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_sync_started(repo_id) # Perform the sync sync_start_timestamp = _now_timestamp() sync_result = None try: # Replace the Importer's sync_repo() method with our register_sigterm_handler decorator, # which will set up cancel_sync_repo() as the target for the signal handler sync_repo = register_sigterm_handler(importer.sync_repo, importer.cancel_sync_repo) sync_report = sync_repo(transfer_repo, conduit, call_config) except Exception, e: sync_end_timestamp = _now_timestamp() sync_result = RepoSyncResult.error_result( repo_obj.repo_id, repo_importer['id'], repo_importer['importer_type_id'], sync_start_timestamp, sync_end_timestamp, e, sys.exc_info()[2]) raise
def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'working_dir': common_utils.get_working_directory()} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit)
def __init__(self, repo, conduit, config): """ :param repo: the repository to sync :type repo: pulp.server.db.model.Repository :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.cancelled = False self.working_dir = common_utils.get_working_directory() self.content_report = ContentReport() self.distribution_report = DistributionReport() self.progress_report = { 'metadata': { 'state': 'NOT_STARTED' }, 'content': self.content_report, 'distribution': self.distribution_report, 'errata': { 'state': 'NOT_STARTED' }, 'comps': { 'state': 'NOT_STARTED' }, 'purge_duplicates': { 'state': 'NOT_STARTED' }, } self.conduit = conduit self.set_progress() self.repo = repo self.config = config self.nectar_config = nectar_utils.importer_config_to_nectar_config( config.flatten()) self.skip_repomd_steps = False self.current_revision = 0 self.downloader = None self.tmp_dir = None url_modify_config = {} if config.get('query_auth_token'): url_modify_config['query_auth_token'] = config.get( 'query_auth_token') skip_config = self.config.get(constants.CONFIG_SKIP, []) for type_id in ids.QUERY_AUTH_TOKEN_UNSUPPORTED: if type_id not in skip_config: skip_config.append(type_id) self.config.override_config[constants.CONFIG_SKIP] = skip_config _logger.info( _('The following unit types do not support query auth tokens and will be skipped:' ' {skipped_types}').format( skipped_types=ids.QUERY_AUTH_TOKEN_UNSUPPORTED)) self._url_modify = RepoURLModifier(**url_modify_config)
def _create_download_requests(content_units): """ Make a list of Nectar DownloadRequests for the given content units using the lazy catalog. :param content_units: The content units to build a list of DownloadRequests for. :type content_units: list of pulp.server.db.model.FileContentUnit :return: A list of DownloadRequests; each request includes a ``data`` instance variable which is a dict containing the FileContentUnit, the list of files in the unit, and the downloaded file's storage path. :rtype: list of nectar.request.DownloadRequest """ requests = [] working_dir = get_working_directory() signing_key = Key.load(pulp_conf.get('authentication', 'rsa_key')) for content_unit in content_units: # All files in the unit; every request for a unit has a reference to this dict. unit_files = {} unit_working_dir = os.path.join(working_dir, content_unit.id) for file_path in content_unit.list_files(): qs = LazyCatalogEntry.objects.filter( unit_id=content_unit.id, unit_type_id=content_unit.type_id, path=file_path ) catalog_entry = qs.order_by('revision').first() if catalog_entry is None: continue signed_url = _get_streamer_url(catalog_entry, signing_key) temporary_destination = os.path.join( unit_working_dir, os.path.basename(catalog_entry.path) ) mkdir(unit_working_dir) unit_files[temporary_destination] = { CATALOG_ENTRY: catalog_entry, PATH_DOWNLOADED: None, } request = DownloadRequest(signed_url, temporary_destination) # For memory reasons, only hold onto the id and type_id so we can reload the unit # once it's successfully downloaded. request.data = { TYPE_ID: content_unit.type_id, UNIT_ID: content_unit.id, UNIT_FILES: unit_files, } requests.append(request) return requests
def publish(repo_id, distributor_id, publish_config_override=None): """ Requests the given distributor publish the repository it is configured on. The publish operation is executed synchronously in the caller's thread and will block until it is completed. The caller must take the necessary steps to address the fact that a publish call may be time intensive. @param repo_id: identifies the repo being published @type repo_id: str @param distributor_id: identifies the repo's distributor to publish @type distributor_id: str @param publish_config_override: optional config values to use for this publish call only @type publish_config_override: dict, None :return: report of the details of the publish :rtype: pulp.server.db.model.repository.RepoPublishResult """ repo_coll = Repo.get_collection() distributor_coll = RepoDistributor.get_collection() # Validation repo = repo_coll.find_one({'id': repo_id}) if repo is None: raise MissingResource(repo_id) repo_distributor = distributor_coll.find_one({'repo_id': repo_id, 'id': distributor_id}) if repo_distributor is None: raise MissingResource(repository=repo_id, distributor=distributor_id) distributor_instance, distributor_config = RepoPublishManager.\ _get_distributor_instance_and_config(repo_id, distributor_id) # Assemble the data needed for the publish conduit = RepoPublishConduit(repo_id, distributor_id) call_config = PluginCallConfiguration(distributor_config, repo_distributor['config'], publish_config_override) transfer_repo = common_utils.to_transfer_repo(repo) transfer_repo.working_dir = common_utils.get_working_directory() # Fire events describing the publish state fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_publish_started(repo_id, distributor_id) result = RepoPublishManager._do_publish(repo, distributor_id, distributor_instance, transfer_repo, conduit, call_config) fire_manager.fire_repo_publish_finished(result) return result
def publish(repo_id, dist_id, publish_config_override=None, scheduled_call_id=None): """ Uses the given distributor to publish the repository. The publish operation is executed synchronously in the caller's thread and will block until it is completed. The caller must take the necessary steps to address the fact that a publish call may be time intensive. :param repo_id: identifies the repo being published :type repo_id: str :param dist_id: identifies the repo's distributor to publish :type dist_id: str :param publish_config_override: optional config values to use for this publish call only :type publish_config_override: dict, None :param scheduled_call_id: id of scheduled call that dispatched this task :type scheduled_call_id: str :return: report of the details of the publish :rtype: pulp.server.db.model.repository.RepoPublishResult :raises pulp_exceptions.MissingResource: if distributor/repo pair does not exist """ distributor_coll = RepoDistributor.get_collection() repo_obj = model.Repository.objects.get_repo_or_missing_resource(repo_id) repo_distributor = distributor_coll.find_one({ 'repo_id': repo_id, 'id': dist_id }) if repo_distributor is None: raise pulp_exceptions.MissingResource(repository=repo_id, distributor=dist_id) dist_inst, dist_conf = _get_distributor_instance_and_config( repo_id, dist_id) # Assemble the data needed for the publish conduit = RepoPublishConduit(repo_id, dist_id) call_config = PluginCallConfiguration(dist_conf, repo_distributor['config'], publish_config_override) transfer_repo = repo_obj.to_transfer_repo() transfer_repo.working_dir = common_utils.get_working_directory() # Fire events describing the publish state fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_publish_started(repo_id, dist_id) result = _do_publish(repo_obj, dist_id, dist_inst, transfer_repo, conduit, call_config) fire_manager.fire_repo_publish_finished(result) return result
def _create_download_requests(content_units): """ Make a list of Nectar DownloadRequests for the given content units using the lazy catalog. :param content_units: The content units to build a list of DownloadRequests for. :type content_units: list of pulp.server.db.model.FileContentUnit :return: A list of DownloadRequests; each request includes a ``data`` instance variable which is a dict containing the FileContentUnit, the list of files in the unit, and the downloaded file's storage path. :rtype: list of nectar.request.DownloadRequest """ requests = [] working_dir = get_working_directory() signing_key = Key.load(pulp_conf.get('authentication', 'rsa_key')) for content_unit in content_units: # All files in the unit; every request for a unit has a reference to this dict. unit_files = {} unit_working_dir = os.path.join(working_dir, content_unit.id) for file_path in content_unit.list_files(): qs = LazyCatalogEntry.objects.filter( unit_id=content_unit.id, unit_type_id=content_unit.type_id, path=file_path) catalog_entry = qs.order_by('revision').first() if catalog_entry is None: continue signed_url = _get_streamer_url(catalog_entry, signing_key) temporary_destination = os.path.join( unit_working_dir, os.path.basename(catalog_entry.path)) mkdir(unit_working_dir) unit_files[temporary_destination] = { CATALOG_ENTRY: catalog_entry, PATH_DOWNLOADED: None, } request = DownloadRequest(signed_url, temporary_destination) # For memory reasons, only hold onto the id and type_id so we can reload the unit # once it's successfully downloaded. request.data = { TYPE_ID: content_unit.type_id, UNIT_ID: content_unit.id, UNIT_FILES: unit_files, } requests.append(request) return requests
def __init__(self, repo, conduit, config): """ :param repo: the repository to sync :type repo: pulp.server.db.model.Repository :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.working_dir = common_utils.get_working_directory() self.content_report = ContentReport() self.distribution_report = DistributionReport() self.progress_report = { 'metadata': {'state': 'NOT_STARTED'}, 'content': self.content_report, 'distribution': self.distribution_report, 'errata': {'state': 'NOT_STARTED'}, 'comps': {'state': 'NOT_STARTED'}, 'purge_duplicates': {'state': 'NOT_STARTED'}, } self.conduit = conduit self.set_progress() self.repo = repo self.config = config self.nectar_config = nectar_utils.importer_config_to_nectar_config(config.flatten()) self.skip_repomd_steps = False self.current_revision = 0 self.downloader = None self.tmp_dir = None # Was any repo metadata found? Includes either yum metadata or a treeinfo file. If this is # False at the end of the sync, then an error will be presented to the user. self.metadata_found = False # Store the reason that yum repo metadata was not found. In case a treeinfo file is also # not found, this error will be the one presented to the user. That preserves pre-existing # behavior that is yum-centric. self.repomd_not_found_reason = '' url_modify_config = {} if config.get('query_auth_token'): url_modify_config['query_auth_token'] = config.get('query_auth_token') skip_config = self.config.get(constants.CONFIG_SKIP, []) for type_id in ids.QUERY_AUTH_TOKEN_UNSUPPORTED: if type_id not in skip_config: skip_config.append(type_id) self.config.override_config[constants.CONFIG_SKIP] = skip_config _logger.info( _('The following unit types do not support query auth tokens and will be skipped:' ' {skipped_types}').format(skipped_types=ids.QUERY_AUTH_TOKEN_UNSUPPORTED) ) self._url_modify = RepoURLModifier(**url_modify_config)
def get_working_dir(self): """ Return the working directory. The working dir is checked first, then the step's repo, then the parent step's repo's working dir. Note that the parent's working dir is not directly checked as part of this process. :returns: the working directory :rtype: str """ if self.working_dir: return self.working_dir elif self.parent: return self.parent.get_working_dir() else: self.working_dir = common_utils.get_working_directory() return self.working_dir
def get_working_dir(self): """ Return the working directory. The working dir is checked first, then the step's repo, then the parent step's repo's working dir. Note that the parent's working dir is not directly checked as part of this process. :returns: the working directory :rtype: str """ if self.working_dir: return self.working_dir elif self.parent: return self.parent.get_working_dir() else: self.working_dir = common_utils.get_working_directory() return self.working_dir
def __init__(self, repo, sync_conduit, call_config): """ :param repo: the repository to sync :type repo: pulp.server.db.model.Repository :param sync_conduit: provides access to relevant Pulp functionality :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param call_config: plugin configuration :type call_config: pulp.plugins.config.PluginCallConfiguration """ self.cancelled = False self.working_dir = common_utils.get_working_directory() self.content_report = ContentReport() self.distribution_report = DistributionReport() self.progress_status = { 'metadata': {'state': 'NOT_STARTED'}, 'content': self.content_report, 'distribution': self.distribution_report, 'errata': {'state': 'NOT_STARTED'}, 'comps': {'state': 'NOT_STARTED'}, } self.sync_conduit = sync_conduit self.set_progress() self.repo = repo self.call_config = call_config flat_call_config = call_config.flatten() self.nectar_config = nectar_utils.importer_config_to_nectar_config(flat_call_config) self.skip_repomd_steps = False self.current_revision = 0 url_modify_config = {} if call_config.get('query_auth_token'): url_modify_config['query_auth_token'] = call_config.get('query_auth_token') skip_config = self.call_config.get(constants.CONFIG_SKIP, []) for type_id in ids.QUERY_AUTH_TOKEN_UNSUPPORTED: if type_id not in skip_config: skip_config.append(type_id) self.call_config.override_config[constants.CONFIG_SKIP] = skip_config _logger.info( _('The following unit types do not support query auth tokens and will be skipped:' ' {skipped_types}').format(skipped_types=ids.QUERY_AUTH_TOKEN_UNSUPPORTED) ) self._url_modify = utils.RepoURLModifier(**url_modify_config)
def publish(repo_id, dist_id, publish_config_override=None, scheduled_call_id=None): """ Uses the given distributor to publish the repository. The publish operation is executed synchronously in the caller's thread and will block until it is completed. The caller must take the necessary steps to address the fact that a publish call may be time intensive. :param repo_id: identifies the repo being published :type repo_id: str :param dist_id: identifies the repo's distributor to publish :type dist_id: str :param publish_config_override: optional config values to use for this publish call only :type publish_config_override: dict, None :param scheduled_call_id: id of scheduled call that dispatched this task :type scheduled_call_id: str :return: report of the details of the publish :rtype: pulp.server.db.model.repository.RepoPublishResult :raises pulp_exceptions.MissingResource: if distributor/repo pair does not exist """ distributor_coll = RepoDistributor.get_collection() repo_obj = model.Repository.objects.get_repo_or_missing_resource(repo_id) repo_distributor = distributor_coll.find_one({'repo_id': repo_id, 'id': dist_id}) if repo_distributor is None: raise pulp_exceptions.MissingResource(repository=repo_id, distributor=dist_id) dist_inst, dist_conf = _get_distributor_instance_and_config(repo_id, dist_id) # Assemble the data needed for the publish conduit = RepoPublishConduit(repo_id, dist_id) call_config = PluginCallConfiguration(dist_conf, repo_distributor['config'], publish_config_override) transfer_repo = repo_obj.to_transfer_repo() transfer_repo.working_dir = common_utils.get_working_directory() # Fire events describing the publish state fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_publish_started(repo_id, dist_id) result = _do_publish(repo_obj, dist_id, dist_inst, transfer_repo, conduit, call_config) fire_manager.fire_repo_publish_finished(result) return result
def publish(group_id, distributor_id, publish_config_override=None): """ Requests the given distributor publish the repository group. :param group_id: identifies the repo group :type group_id: str :param distributor_id: identifies the group's distributor :type distributor_id: str :param publish_config_override: values to pass the plugin for this publish call alone :type publish_config_override: dict """ distributor_manager = manager_factory.repo_group_distributor_manager() distributor = distributor_manager.get_distributor( group_id, distributor_id) distributor_type_id = distributor['distributor_type_id'] distributor_instance, plugin_config = plugin_api.get_group_distributor_by_id( distributor_type_id) group_query_manager = manager_factory.repo_group_query_manager() # Validation group = group_query_manager.get_group(group_id) distributor_type_id = distributor['distributor_type_id'] # Assemble the data needed for publish conduit = RepoGroupPublishConduit(group_id, distributor) call_config = PluginCallConfiguration(plugin_config, distributor['config'], publish_config_override) transfer_group = common_utils.to_transfer_repo_group(group) transfer_group.working_dir = common_utils.get_working_directory() # TODO: Add events for group publish start/complete RepoGroupPublishManager._do_publish(transfer_group, distributor_id, distributor_instance, conduit, call_config)
def publish(group_id, distributor_id, publish_config_override=None): """ Requests the given distributor publish the repository group. :param group_id: identifies the repo group :type group_id: str :param distributor_id: identifies the group's distributor :type distributor_id: str :param publish_config_override: values to pass the plugin for this publish call alone :type publish_config_override: dict """ distributor_manager = manager_factory.repo_group_distributor_manager() distributor = distributor_manager.get_distributor(group_id, distributor_id) distributor_type_id = distributor['distributor_type_id'] distributor_instance, plugin_config = plugin_api.get_group_distributor_by_id( distributor_type_id) group_query_manager = manager_factory.repo_group_query_manager() # Validation group = group_query_manager.get_group(group_id) distributor_type_id = distributor['distributor_type_id'] # Assemble the data needed for publish conduit = RepoGroupPublishConduit(group_id, distributor) call_config = PluginCallConfiguration(plugin_config, distributor['config'], publish_config_override) transfer_group = common_utils.to_transfer_repo_group(group) transfer_group.working_dir = common_utils.get_working_directory() # TODO: Add events for group publish start/complete RepoGroupPublishManager._do_publish(transfer_group, distributor_id, distributor_instance, conduit, call_config)
def publish_repo(self, repo, publish_conduit, config): """ Publish the repository. :param repo: metadata describing the repo :type repo: pulp.plugins.model.Repository :param publish_conduit: The conduit for publishing a repo :type publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginConfiguration :param config_conduit: Configuration Conduit; :type config_conduit: pulp.plugins.conduits.repo_validate.RepoConfigConduit :return: report describing the publish operation :rtype: pulp.plugins.model.PublishReport """ progress_report = FilePublishProgressReport(publish_conduit) _logger.info( _('Beginning publish for repository <%(repo)s>') % {'repo': repo.id}) try: progress_report.state = progress_report.STATE_IN_PROGRESS units = publish_conduit.get_units() # Set up an empty build_dir working_dir = common_utils.get_working_directory() build_dir = os.path.join(working_dir, BUILD_DIRNAME) os.makedirs(build_dir) self.initialize_metadata(build_dir) try: # process each unit for unit in units: links_to_create = self.get_paths_for_unit(unit) self._symlink_unit(build_dir, unit, links_to_create) self.publish_metadata_for_unit(unit) finally: # Finalize the processing self.finalize_metadata() # Let's unpublish, and then republish self.unpublish_repo(repo, config) hosting_locations = self.get_hosting_locations(repo, config) for location in hosting_locations: copytree(build_dir, location, symlinks=True) self.post_repo_publish(repo, config) # Clean up our build_dir self._rmtree_if_exists(build_dir) # Report that we are done progress_report.state = progress_report.STATE_COMPLETE return progress_report.build_final_report() except Exception, e: _logger.exception(e) # Something failed. Let's put an error message on the report progress_report.error_message = str(e) progress_report.traceback = traceback.format_exc() progress_report.state = progress_report.STATE_FAILED report = progress_report.build_final_report() return report
def sync(repo_id, sync_config_override=None): """ Performs a synchronize operation on the given repository. The given repo must have an importer configured. The identity of the importer is not a parameter to this call; if multiple importers are eventually supported this will have to change to indicate which importer to use. This method is intentionally limited to synchronizing a single repo. Performing multiple repository syncs concurrently will require a more global view of the server and must be handled outside the scope of this class. @param repo_id: identifies the repo to sync @type repo_id: str @param sync_config_override: optional config containing values to use for this sync only @type sync_config_override: dict @return: The synchronization report. @rtype: L{pulp.server.plugins.model.SyncReport} @raise MissingResource: if repo_id does not refer to a valid repo @raise OperationFailed: if the given repo does not have an importer set """ repo_coll = Repo.get_collection() # Validation repo = repo_coll.find_one({'id': repo_id}) if repo is None: raise MissingResource(repo_id) importer_instance, importer_config = RepoSyncManager._get_importer_instance_and_config( repo_id) if importer_instance is None: raise MissingResource(repo_id) importer_manager = manager_factory.repo_importer_manager() repo_importer = importer_manager.get_importer(repo_id) # Assemble the data needed for the sync conduit = RepoSyncConduit(repo_id, repo_importer['id']) call_config = PluginCallConfiguration(importer_config, repo_importer['config'], sync_config_override) transfer_repo = common_utils.to_transfer_repo(repo) transfer_repo.working_dir = common_utils.get_working_directory() # Fire an events around the call fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_sync_started(repo_id) sync_result = RepoSyncManager._do_sync(repo, importer_instance, transfer_repo, conduit, call_config) fire_manager.fire_repo_sync_finished(sync_result) if sync_result['result'] == RepoSyncResult.RESULT_FAILED: raise PulpExecutionException( _('Importer indicated a failed response')) repo_publish_manager = manager_factory.repo_publish_manager() auto_distributors = repo_publish_manager.auto_distributors(repo_id) spawned_tasks = [] for distributor in auto_distributors: distributor_id = distributor['id'] spawned_tasks.append( repo_publish_manager.queue_publish(repo_id, distributor_id).task_id) return TaskResult(sync_result, spawned_tasks=spawned_tasks)
def publish_repo_fast_forward(self, repo, publish_conduit, config): """ Publish the repository. :param repo: metadata describing the repo :type repo: pulp.plugins.model.Repository :param publish_conduit: The conduit for publishing a repo :type publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginConfiguration :return: report describing the publish operation :rtype: pulp.plugins.model.PublishReport """ progress_report = FilePublishProgressReport(publish_conduit) try: progress_report.state = progress_report.STATE_IN_PROGRESS units = publish_conduit.get_units() # Set up an empty build_dir working_dir = common_utils.get_working_directory() build_dir = os.path.join(working_dir, BUILD_DIRNAME) self._rmtree_if_exists(build_dir) misc.mkdir(build_dir) self.initialize_metadata(build_dir) unit_checksum_set = set() try: # process each unit for unit in units: unit_checksum_set.add(unit.unit_key['checksum']) self.publish_metadata_for_unit(unit) finally: # Finalize the processing self.finalize_metadata() # Just generate increased files and copy them to publishing directories hosting_locations = self.get_hosting_locations(repo, config) for location in hosting_locations: unit_checksum_old_set = set() unit_over_path_map = {} metadata_filename = os.path.join(location, MANIFEST_FILENAME) if os.path.exists(metadata_filename): with open(metadata_filename, 'r') as metadata_file: for line in metadata_file: fields = line.split(',') checksum = fields[1] unit_checksum_old_set.add(checksum) if checksum not in unit_checksum_set: unit_over_path_map[checksum] = fields[0] _logger.debug("%d items were in MANIFEST %s, which exists? %s." % ( len(unit_checksum_old_set), metadata_filename, os.path.exists(metadata_filename))) # Copy incremental files into publishing directories checksum_absent_set = unit_checksum_set - unit_checksum_old_set _logger.debug("Increasing %d units" % len(checksum_absent_set)) # If added too many units, then publish repo with force_full max_increase_units = min(50000, len(units) / len(hosting_locations)) if len(checksum_absent_set) > max_increase_units: self._rmtree_if_exists(build_dir) raise FastForwardUnavailable criteria = UnitAssociationCriteria( unit_filters={'checksum': {"$in": list(checksum_absent_set)}}, unit_fields={'name', 'checksum', '_storage_path', 'size'}) unit_absent_set = publish_conduit.get_units(criteria=criteria) for unit in unit_absent_set: links_to_create = self.get_paths_for_unit(unit) self._symlink_unit(build_dir, unit, links_to_create) # Remove modified and deleted files from publishing directories for checksum, unit_path in unit_over_path_map.items(): unit_path = os.path.join(location, unit_path) if os.path.exists(unit_path): os.remove(unit_path) dir_name = os.path.dirname(unit_path) if not os.listdir(dir_name): os.removedirs(dir_name) elif os.path.islink(unit_path): os.unlink(unit_path) if len(unit_absent_set) > 0 or len(unit_over_path_map) > 0: if os.path.exists(metadata_filename): os.remove(metadata_filename) copytree(build_dir, location, symlinks=True) self.post_repo_publish(repo, config) # Clean up our build_dir self._rmtree_if_exists(build_dir) # Report that we are done progress_report.state = progress_report.STATE_COMPLETE return progress_report.build_final_report() except Exception, e: _logger.exception(e) # Something failed. Let's put an error message on the report progress_report.error_message = str(e) progress_report.traceback = traceback.format_exc() progress_report.state = progress_report.STATE_FAILED report = progress_report.build_final_report() return report
def sync(repo_id, sync_config_override=None): """ Performs a synchronize operation on the given repository. The given repo must have an importer configured. The identity of the importer is not a parameter to this call; if multiple importers are eventually supported this will have to change to indicate which importer to use. This method is intentionally limited to synchronizing a single repo. Performing multiple repository syncs concurrently will require a more global view of the server and must be handled outside the scope of this class. @param repo_id: identifies the repo to sync @type repo_id: str @param sync_config_override: optional config containing values to use for this sync only @type sync_config_override: dict @return: The synchronization report. @rtype: L{pulp.server.plugins.model.SyncReport} @raise MissingResource: if repo_id does not refer to a valid repo @raise OperationFailed: if the given repo does not have an importer set """ repo_coll = Repo.get_collection() # Validation repo = repo_coll.find_one({'id': repo_id}) if repo is None: raise MissingResource(repo_id) importer_instance, importer_config = RepoSyncManager._get_importer_instance_and_config( repo_id) if importer_instance is None: raise MissingResource(repo_id) importer_manager = manager_factory.repo_importer_manager() repo_importer = importer_manager.get_importer(repo_id) # Assemble the data needed for the sync conduit = RepoSyncConduit(repo_id, repo_importer['id'], RepoContentUnit.OWNER_TYPE_IMPORTER, repo_importer['id']) call_config = PluginCallConfiguration(importer_config, repo_importer['config'], sync_config_override) transfer_repo = common_utils.to_transfer_repo(repo) transfer_repo.working_dir = common_utils.get_working_directory() # Fire an events around the call fire_manager = manager_factory.event_fire_manager() fire_manager.fire_repo_sync_started(repo_id) sync_result = RepoSyncManager._do_sync(repo, importer_instance, transfer_repo, conduit, call_config) fire_manager.fire_repo_sync_finished(sync_result) if sync_result['result'] == RepoSyncResult.RESULT_FAILED: raise PulpExecutionException(_('Importer indicated a failed response')) repo_publish_manager = manager_factory.repo_publish_manager() auto_distributors = repo_publish_manager.auto_distributors(repo_id) spawned_tasks = [] for distributor in auto_distributors: distributor_id = distributor['id'] spawned_tasks.append( repo_publish_manager.queue_publish(repo_id, distributor_id).task_id) return TaskResult(sync_result, spawned_tasks=spawned_tasks)