def _get_and_save_file_units(filename, processing_function, tag, conduit, repo): """ Given a comps.xml file, this method decides which groups/categories to get and saves the parsed units. :param filename: open file-like object containing metadata :type filename: file :param processing_function: method to use for generating the units :type processing_function: function :param tag: XML tag that identifies each unit :type tag: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository """ repo_id = repo.repo_id process_func = functools.partial(processing_function, repo_id) package_info_generator = packages.package_list_generator( filename, tag, process_func) for model in package_info_generator: try: model.save() except NotUniqueError: model = model.__class__.objects.filter(**model.unit_key).first() repo_controller.associate_single_unit(repo, model)
def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) iso.save_and_import_content(report.destination) repo_controller.associate_single_unit(self.sync_conduit.repo, iso) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report)
def _get_and_save_file_units(filename, processing_function, tag, conduit, repo): """ Given a comps.xml file, this method decides which groups/categories to get and saves the parsed units. :param filename: open file-like object containing metadata :type filename: file :param processing_function: method to use for generating the units :type processing_function: function :param tag: XML tag that identifies each unit :type tag: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository """ repo_id = repo.repo_id process_func = functools.partial(processing_function, repo_id) package_info_generator = packages.package_list_generator(filename, tag, process_func) for model in package_info_generator: try: model.save() except NotUniqueError: model = model.__class__.objects.filter(**model.unit_key).first() repo_controller.associate_single_unit(repo, model)
def _associate_unit(dest_repo, unit): """ Associate one particular unit with the destination repository. There are behavioral exceptions based on type: Group, Category, Environment and Yum Metadata File units need to have their "repo_id" attribute set. RPMs are convenient to do all as one block, for the purpose of dependency resolution. So this method skips RPMs and lets them be done together by other means :param dest_repo: destination repo :type dest_repo: pulp.server.db.model.Repository :param unit: Unit to be copied :type unit: pulp.server.db.model.ContentUnit :return: copied unit :rtype: pulp.server.db.model.ContentUnit """ types_to_be_copied = (models.PackageGroup, models.PackageCategory, models.PackageEnvironment, models.PackageLangpacks) if isinstance(unit, types_to_be_copied): return associate_copy_for_repo(unit, dest_repo) elif isinstance(unit, models.RPM): # copy will happen in one batch return unit elif isinstance(unit, models.YumMetadataFile): return associate_copy_for_repo(unit, dest_repo, True) else: repo_controller.associate_single_unit(repository=dest_repo, unit=unit) return unit
def _handle_yum_metadata_file(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for a yum repository metadata file. :type repo: pulp.server.db.model.Repository :type type_id: str :type unit_key: dict :type metadata: dict or None :type file_path: str :type conduit: pulp.plugins.conduits.upload.UploadConduit :type config: pulp.plugins.config.PluginCallConfiguration """ # Validate the user specified data by instantiating the model model_data = dict() model_data.update(unit_key) if metadata: model_data.update(metadata) # Replicates the logic in yum/sync.py.import_unknown_metadata_files. # The local_path variable is removed since it's not included in the metadata when # synchronized. file_relative_path = model_data.pop('local_path') translated_data = models.YumMetadataFile.SERIALIZER().from_representation(model_data) model = models.YumMetadataFile(**translated_data) model.set_content(file_relative_path) model.save() # Move the file to its final storage location in Pulp repo_controller.associate_single_unit(conduit.repo, model)
def _add_new_module(self, downloader, module): """ Performs the tasks for downloading and saving a new unit in Pulp. This method entirely skips modules that are already in the repository. :param downloader: downloader instance to use for retrieving the unit :type downloader: child of pulp_puppet.plugins.importers.downloaders.base.BaseDownloader :param module: module to download and add :type module: pulp_puppet.plugins.db.models.Module """ try: # Download the bits downloaded_filename = downloader.retrieve_module( self.progress_report, module) # Extract the extra metadata into the module metadata = metadata_module.extract_metadata( downloaded_filename, self.repo.working_dir) # Overwrite the author and name metadata.update(Module.split_filename(metadata['name'])) # Create and save the Module module = Module.from_metadata(metadata) module.set_storage_path(os.path.basename(downloaded_filename)) try: module.save_and_import_content(downloaded_filename) except NotUniqueError: module = module.__class__.objects.get(**module.unit_key) # Associate the module with the repo repo_controller.associate_single_unit(self.repo.repo_obj, module) finally: downloader.cleanup_module(module)
def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() repo_controller.associate_single_unit(self.sync_conduit.repo, iso) iso.safe_import_content(report.destination) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report)
def process_main(self, item=None): """ given the passed-in unit keys, determine which of them already exist in pulp, and save those with the conduit found on the parent. :param item: The item to process or none if get_iterator is not defined :param item: object or None """ # any units that are already in pulp units_we_already_had = set() # If available_units was defined in the constructor, let's use it. Otherwise let's use the # default of self.parent.available_units available_units = self.available_units or self.parent.available_units for units_group in misc.paginate(available_units, self.unit_pagination_size): # Get this group of units query = units_controller.find_units(units_group) for found_unit in query: units_we_already_had.add(hash(found_unit)) repo_controller.associate_single_unit(self.get_repo().repo_obj, found_unit) for unit in units_group: if hash(unit) not in units_we_already_had: self.units_to_download.append(unit)
def _associate_unit(dest_repo, unit): """ Associate one particular unit with the destination repository. There are behavioral exceptions based on type: Group, Category, Environment and Yum Metadata File units need to have their "repo_id" attribute set. RPMs are convenient to do all as one block, for the purpose of dependency resolution. So this method skips RPMs and lets them be done together by other means :param dest_repo: destination repo :type dest_repo: pulp.server.db.model.Repository :param unit: Unit to be copied :type unit: pulp.server.db.model.ContentUnit :return: copied unit :rtype: pulp.server.db.model.ContentUnit """ if isinstance(unit, (models.PackageGroup, models.PackageCategory, models.PackageEnvironment)): return associate_copy_for_repo(unit, dest_repo) elif isinstance(unit, models.RPM): # copy will happen in one batch return unit elif isinstance(unit, models.YumMetadataFile): return associate_copy_for_repo(unit, dest_repo, True) else: repo_controller.associate_single_unit(repository=dest_repo, unit=unit) return unit
def _resolve_new_units(self, existing, wanted): """ Decide what units are needed to be downloaded. Filter out units which are already in a repository, associate units which are already downloaded, :param existing: units which are already in a repository :type existing: list of unit keys as namedtuples :param wanted: units which should be imported into a repository :type wanted: list of unit keys as namedtuples :return: list of unit keys to download; empty list if all units are already downloaded :rtype: list of unit keys as namedtuples """ model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted) still_wanted = set(wanted) for unit in units_controller.find_units(unit_generator): file_exists = unit._storage_path is not None and os.path.isfile(unit._storage_path) if file_exists: if unit.unit_key_as_named_tuple not in existing: repo_controller.associate_single_unit(self.repo.repo_obj, unit) still_wanted.discard(unit.unit_key_as_named_tuple) return list(still_wanted)
def import_unknown_metadata_files(self, metadata_files): """ Import metadata files whose type is not known to us. These are any files that we are not already parsing. :param metadata_files: object containing access to all metadata files :type metadata_files: pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles """ for metadata_type, file_info in metadata_files.metadata.iteritems(): if metadata_type not in metadata_files.KNOWN_TYPES: file_path = file_info['local_path'] checksum_type = file_info['checksum']['algorithm'] checksum_type = util.sanitize_checksum_type(checksum_type) checksum = file_info['checksum']['hex_digest'] # Find an existing model model = models.YumMetadataFile.objects.filter( data_type=metadata_type, repo_id=self.repo.repo_id).first() # If an existing model, use that if model: model.checksum = checksum model.checksum_type = checksum_type else: # Else, create a new mode model = models.YumMetadataFile( data_type=metadata_type, repo_id=self.repo.repo_id, checksum=checksum, checksum_type=checksum_type) model.set_storage_path(os.path.basename(file_path)) model.save_and_import_content(file_path) # associate/re-associate model to the repo repo_controller.associate_single_unit(self.repo, model)
def associate_copy_for_repo(unit, dest_repo, set_content=False): """ Associate a unit where it is required to make a copy of the unit first, and where the unit key includes the repo ID. :param unit: Unit to be copied :type unit: pulp_rpm.plugins.db.models.Package :param dest_repo: destination repo :type dest_repo: pulp.server.db.model.Repository :param set_content: if True, the set_unit() method will be called on the new unit. Default is False. :type set_content: bool :return: new unit that was saved and associated :rtype: pulp_rpm.plugins.db.models.Package """ new_unit = unit.clone() new_unit.repo_id = dest_repo.repo_id try: new_unit.save() except mongoengine.NotUniqueError: # It is possible that a previous copy exists as an orphan, in which case it can safely # be deleted and replaced with this new version. _LOGGER.debug(_('replacing pre-existing copy of %(u)s' % {'u': new_unit})) new_unit.__class__.objects.filter(**new_unit.unit_key).delete() new_unit.save() if set_content: new_unit.set_storage_path(os.path.basename(unit._storage_path)) new_unit.safe_import_content(unit._storage_path) repo_controller.associate_single_unit(repository=dest_repo, unit=new_unit) return new_unit
def import_unknown_metadata_files(self, metadata_files): """ Import metadata files whose type is not known to us. These are any files that we are not already parsing. :param metadata_files: object containing access to all metadata files :type metadata_files: pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles """ for metadata_type, file_info in metadata_files.metadata.iteritems(): if metadata_type not in metadata_files.KNOWN_TYPES: file_path = file_info['local_path'] checksum_type = file_info['checksum']['algorithm'] checksum_type = verification.sanitize_checksum_type(checksum_type) checksum = file_info['checksum']['hex_digest'] # Find an existing model model = models.YumMetadataFile.objects.filter( data_type=metadata_type, repo_id=self.repo.repo_id).first() # If an existing model, use that if model: model.checksum = checksum model.checksum_type = checksum_type else: # Else, create a new mode model = models.YumMetadataFile( data_type=metadata_type, repo_id=self.repo.repo_id, checksum=checksum, checksum_type=checksum_type) model.set_storage_path(os.path.basename(file_path)) model.save_and_import_content(file_path) # associate/re-associate model to the repo repo_controller.associate_single_unit(self.repo, model)
def _import_manifest(conduit, unit, dest_repo): """ Import a Manifest and its referenced Blobs. :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param unit: The Manifest to import :type unit: pulp_docker.plugins.Model.Manifest :param dest_repo: The destination repository that the Manifest is being imported to. :type dest_repo: pulp.server.db.model.Repository :return: list of Units that were copied to the destination repository :rtype: list """ units_added = set() # Collect referenced blobs blob_digests = set() for layer in unit.fs_layers: blob_digests.add(layer.blob_sum) # in manifest schema version 2 there is an additional blob layer called config_layer if unit.config_layer: blob_digests.add(unit.config_layer) # Add referenced blobs for blob in models.Blob.objects.filter( digest__in=sorted(blob_digests)): units_added |= set( DockerImporter._import_blob(conduit, blob, dest_repo)) # Add manifests repository.associate_single_unit(dest_repo, unit) units_added.add(unit) return units_added
def process_main(self, item): """ This method gets called with each Unit that was downloaded from the parent step. It moves each Unit's files into permanent storage, and saves each Unit into the database and into the repository. :param item: The Image to save in Pulp :type item: pulp_docker.plugins.models.Image """ with open(os.path.join(self.get_working_dir(), item.image_id, 'json')) as json_file: metadata = json.load(json_file) # at least one old docker image did not have a size specified in # its metadata size = metadata.get('Size') # an older version of docker used a lowercase "p" parent = metadata.get('parent', metadata.get('Parent')) item.parent_id = parent item.size = size try: item.save() except NotUniqueError: item = item.__class__.objects.get(**item.unit_key) else: tmp_dir = os.path.join(self.get_working_dir(), item.image_id) for name in os.listdir(tmp_dir): path = os.path.join(tmp_dir, name) item.safe_import_content(path, location=os.path.basename(path)) repo_controller.associate_single_unit(self.get_repo().repo_obj, item) return item
def _import_manifest_list(conduit, unit, dest_repo): """ Import a Manifest List and its referenced image manifests. :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param unit: The Manifest List to import :type unit: pulp_docker.plugins.Model.ManifestList :param dest_repo: The destination repository that the ManifestList is being imported to. :type dest_repo: pulp.server.db.model.Repository :return: list of Units that were copied to the destination repository :rtype: list """ units_added = set() # Collect referenced manifests manifest_digests = set() for manifest in unit.manifests: manifest_digests.add(manifest) if unit.amd64_digest: manifest_digests.add(unit.amd64_digest) # Add referenced manifests for manifest in models.Manifest.objects.filter( digest__in=sorted(manifest_digests)): units_added |= set( DockerImporter._import_manifest(conduit, manifest, dest_repo)) # Add manifest lists repository.associate_single_unit(dest_repo, unit) units_added.add(unit) return units_added
def _add_new_module(self, downloader, module): """ Performs the tasks for downloading and saving a new unit in Pulp. This method entirely skips modules that are already in the repository. :param downloader: downloader instance to use for retrieving the unit :type downloader: child of pulp_puppet.plugins.importers.downloaders.base.BaseDownloader :param module: module to download and add :type module: pulp_puppet.plugins.db.models.Module """ try: # Download the bits downloaded_filename = downloader.retrieve_module(self.progress_report, module) # Extract the extra metadata into the module metadata = metadata_module.extract_metadata(downloaded_filename, self.repo.working_dir) # Overwrite the author and name metadata.update(Module.split_filename(metadata['name'])) # Create and save the Module module = Module.from_metadata(metadata) module.set_storage_path(os.path.basename(downloaded_filename)) try: module.save_and_import_content(downloaded_filename) except NotUniqueError: module = module.__class__.objects.get(**module.unit_key) # Associate the module with the repo repo_controller.associate_single_unit(self.repo.repo_obj, module) finally: downloader.cleanup_module(module)
def process_main(self, item): """ This method gets called with each Unit that was downloaded from the parent step. It moves each Unit's files into permanent storage, and saves each Unit into the database and into the repository. :param item: The Image to save in Pulp :type item: pulp_docker.plugins.models.Image """ with open(os.path.join(self.get_working_dir(), item.image_id, 'json')) as json_file: metadata = json.load(json_file) # at least one old docker image did not have a size specified in # its metadata size = metadata.get('Size') # an older version of docker used a lowercase "p" parent = metadata.get('parent', metadata.get('Parent')) item.parent_id = parent item.size = size tmp_dir = os.path.join(self.get_working_dir(), item.image_id) item.save() for name in os.listdir(tmp_dir): path = os.path.join(tmp_dir, name) item.import_content(path, location=os.path.basename(path)) repo_controller.associate_single_unit(self.get_repo().repo_obj, item)
def _import_manifest(conduit, unit, dest_repo): """ Import a Manifest and its referenced Blobs. :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param unit: The Manifest to import :type unit: pulp_docker.plugins.Model.Manifest :param dest_repo: The destination repository that the Manifest is being imported to. :type dest_repo: pulp.server.db.model.Repository :return: list of Units that were copied to the destination repository :rtype: list """ units_added = set() # Add manifests and catalog referenced blobs blob_digests = set() repository.associate_single_unit(dest_repo, unit) units_added.add(unit) for layer in unit.fs_layers: blob_digests.add(layer.blob_sum) # Add referenced blobs for blob in models.Blob.objects.filter(digest__in=sorted(blob_digests)): units_added |= set(DockerImporter._import_blob(conduit, blob, dest_repo)) return units_added
def download_succeeded(self, report): """ The callback when a download succeeds. :param report: the report for the succeeded download. :type report: nectar.report.DownloadReport """ model = report.data try: self._verify_size(model, report) self._verify_checksum(model, report) except verification.VerificationException: # The verify methods populates the error details of the progress report. # There is also no need to clean up the bad file as the sync will blow away # the temp directory after it finishes. Simply punch out so the good unit # handling below doesn't run. return except verification.InvalidChecksumType: return # these are the only types we store repo metadata snippets on in the DB if isinstance(model, (models.RPM, models.SRPM)): self.metadata_files.add_repodata(model) purge.remove_unit_duplicate_nevra(model, self.sync_conduit.repo) model.set_content(report.destination) model.save() repo_controller.associate_single_unit(self.sync_conduit.repo, model) # TODO consider that if an exception occurs before here maybe it shouldn't call success? self.progress_report['content'].success(model) self.sync_conduit.set_progress(self.progress_report)
def upload_unit(self, repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles a user request to upload a unit into a repository. This call should use the data provided to add the unit as if it were synchronized from an external source. This includes: * Initializing the unit through the conduit which populates the final destination of the unit. * Moving the unit from the provided temporary location into the unit's final destination. * Saving the unit in Pulp, which both adds the unit to Pulp's database and associates it to the repository. This call may be invoked for either units that do not already exist as well as re-uploading an existing unit. The metadata parameter is variable in its usage. In some cases, the unit may be almost exclusively metadata driven in which case the contents of this parameter will be used directly as the unit's metadata. In others, it may function to remove the importer's need to derive the unit's metadata from the uploaded unit file. In still others, it may be extraneous user-specified information that should be merged in with any derived unit metadata. Depending on the unit type, it is possible that this call will create multiple units within Pulp. It is also possible that this call will create one or more relationships between existing units. :param repo: metadata describing the repository :type repo: pulp.plugins.model.Repository :param type_id: type of unit being uploaded :type type_id: str :param unit_key: identifier for the unit, specified by the user :type unit_key: dict :param metadata: any user-specified metadata for the unit :type metadata: dict :param file_path: path on the Pulp server's filesystem to the temporary location of the uploaded file; may be None in the event that a unit is comprised entirely of metadata and has no bits associated :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.unit_add.UnitAddConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :return: A dictionary describing the success or failure of the upload. It must contain the following keys: 'success_flag': bool. Indicates whether the upload was successful 'summary': json-serializable object, providing summary 'details': json-serializable object, providing details :rtype: dict """ package = models.Package.from_archive(file_path) try: package.save_and_import_content(file_path) except NotUniqueError: package = package.__class__.objects.get(**package.unit_key) repo_controller.associate_single_unit(repo.repo_obj, package) return {'success_flag': True, 'summary': {}, 'details': {}}
def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing( models.ISO._content_type_id.default, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_missing_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() repo_controller.associate_single_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report
def _handle_group_category_comps(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the creation of a package group, category or environment. If a file was uploaded, treat this as upload of a comps.xml file. If no file was uploaded, the process only creates the unit. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) if file_path is not None and os.path.getsize(file_path) > 0: # uploading a comps.xml _get_and_save_file_units(file_path, group.process_group_element, group.GROUP_TAG, conduit, repo) _get_and_save_file_units(file_path, group.process_category_element, group.CATEGORY_TAG, conduit, repo) _get_and_save_file_units(file_path, group.process_environment_element, group.ENVIRONMENT_TAG, conduit, repo) _get_and_save_file_units(file_path, group.process_langpacks_element, group.LANGPACKS_TAG, conduit, repo) else: # uploading a package group, package category or package environment unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) try: unit = model_class(**unit_data) except TypeError: raise ModelInstantiationError() try: unit.save() except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() repo_controller.associate_single_unit(repo, unit)
def _handle_group_category_comps(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the creation of a package group or category. If a file was uploaded, treat this as upload of a comps.xml file. If no file was uploaded, the process only creates the unit. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) if file_path is not None and os.path.getsize(file_path) > 0: # uploading a comps.xml repo_id = repo.repo_id _get_and_save_file_units(file_path, group.process_group_element, group.GROUP_TAG, conduit, repo_id) _get_and_save_file_units(file_path, group.process_category_element, group.CATEGORY_TAG, conduit, repo_id) _get_and_save_file_units(file_path, group.process_environment_element, group.ENVIRONMENT_TAG, conduit, repo_id) else: # uploading a package group or package category unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) try: unit = model_class(**unit_data) except TypeError: raise ModelInstantiationError() unit.save() if file_path: unit.set_storage_path(os.path.basename(file_path)) unit.safe_import_content(file_path) repo_controller.associate_single_unit(repo, unit)
def _handle_erratum(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an erratum. There is no file uploaded so the only steps are to save the metadata and optionally link the erratum to RPMs in the repository. NOTE: For now errata is handled differently than other units. Uploaded erratum should not overwrite the existing one if the latter exists, they should be merged. This is only because of the way erratum is stored in the MongoDB and it is in `our plans`_ to re-think how to do it correctly. .. _our plans: https://pulp.plan.io/issues/1803 :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) existing_unit = model_class.objects.filter(**unit_key).first() new_unit = model_class(**unit_data) # Add repo_id to each collection of the pkglist of the new erratum for collection in new_unit.pkglist: collection['_pulp_repo_id'] = repo.repo_id unit = new_unit if existing_unit: existing_unit.merge_errata(new_unit) unit = existing_unit unit.save() if not config.get_boolean(CONFIG_SKIP_ERRATUM_LINK): repo_controller.associate_single_unit(repo, unit)
def _handle_erratum(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an erratum. There is no file uploaded so the only steps are to save the metadata and optionally link the erratum to RPMs in the repository. NOTE: For now errata is handled differently than other units. Uploaded erratum should not overwrite the existing one if the latter exists, they should be merged. This is only because of the way erratum is stored in the MongoDB and it is in `our plans`_ to re-think how to do it correctly. .. _our plans: https://pulp.plan.io/issues/1803 :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) existing_unit = model_class.objects.filter(**unit_key).first() new_unit = model_class(**unit_data) # Add repo_id to each collection of the pkglist of the new erratum for collection in new_unit.pkglist: collection['_pulp_repo_id'] = repo.repo_id unit = new_unit if existing_unit: existing_unit.merge_errata(new_unit) unit = existing_unit unit.save() if not config.get_boolean(CONFIG_SKIP_ERRATUM_LINK): repo_controller.associate_single_unit(repo, unit)
def upload_unit(self, repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles a user request to upload a unit into a repository. This call should use the data provided to add the unit as if it were synchronized from an external source. This includes: * Initializing the unit through the conduit which populates the final destination of the unit. * Moving the unit from the provided temporary location into the unit's final destination. * Saving the unit in Pulp, which both adds the unit to Pulp's database and associates it to the repository. This call may be invoked for either units that do not already exist as well as re-uploading an existing unit. The metadata parameter is variable in its usage. In some cases, the unit may be almost exclusively metadata driven in which case the contents of this parameter will be used directly as the unit's metadata. In others, it may function to remove the importer's need to derive the unit's metadata from the uploaded unit file. In still others, it may be extraneous user-specified information that should be merged in with any derived unit metadata. Depending on the unit type, it is possible that this call will create multiple units within Pulp. It is also possible that this call will create one or more relationships between existing units. :param repo: metadata describing the repository :type repo: pulp.plugins.model.Repository :param type_id: type of unit being uploaded :type type_id: str :param unit_key: identifier for the unit, specified by the user :type unit_key: dict :param metadata: any user-specified metadata for the unit :type metadata: dict :param file_path: path on the Pulp server's filesystem to the temporary location of the uploaded file; may be None in the event that a unit is comprised entirely of metadata and has no bits associated :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.unit_add.UnitAddConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :return: A dictionary describing the success or failure of the upload. It must contain the following keys: 'success_flag': bool. Indicates whether the upload was successful 'summary': json-serializable object, providing summary 'details': json-serializable object, providing details :rtype: dict """ package = models.Package.from_archive(file_path) package.save_and_import_content(file_path) repo_controller.associate_single_unit(repo.repo_obj, package) return {'success_flag': True, 'summary': {}, 'details': {}}
def copy_rpms(units, source_repo, dest_repo, import_conduit, copy_deps, solver=None): """ Copy RPMs from the source repo to the destination repo, and optionally copy dependencies as well. Dependencies are resolved recursively. :param units: iterable of Units :type units: iterable of pulp_rpm.plugins.db.models.RPM :param source_repo: The repository we are copying units from. :type source_repo: pulp.server.db.model.Repository :param dest_repo: The repository we are copying units to :type dest_repo: pulp.server.db.model.Repository :param import_conduit: import conduit passed to the Importer :type import_conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param copy_deps: if True, copies dependencies as specified in "Requires" lines in the RPM metadata. Matches against NEVRAs and Provides declarations that are found in the source repository. Silently skips any dependencies that cannot be resolved within the source repo. :param solver: an object that can be used for dependency solving. this is useful so that data can be cached in the depsolving object and re-used by each iteration of this method. :type solver: pulp_rpm.plugins.importers.yum.depsolve.Solver :return: set of pulp.plugins.models.Unit that were copied :rtype: set """ unit_set = set() for unit in units: # we are passing in units that may have flattened "provides" metadata. # This flattened field is not used by associate_single_unit(). repo_controller.associate_single_unit(dest_repo, unit) unit_set.add(unit) if copy_deps and unit_set: if solver is None: solver = depsolve.Solver(source_repo) # This returns units that have a flattened 'provides' metadata field # for memory purposes (RHBZ #1185868) deps = solver.find_dependent_rpms(unit_set) # remove rpms already in the destination repo existing_units = set(existing.get_existing_units([dep.unit_key for dep in deps], models.RPM, dest_repo)) # the hash comparison for Units is unit key + type_id, the metadata # field is not used. to_copy = deps - existing_units _LOGGER.debug('Copying deps: %s' % str(sorted([x.name for x in to_copy]))) if to_copy: unit_set |= copy_rpms(to_copy, source_repo, dest_repo, import_conduit, copy_deps, solver) return unit_set
def _import_modules(self, module_paths): """ Import the puppet modules (tarballs) at the specified paths. This will also handle removing any modules in the local repository if they are no longer present on remote repository and the 'remove_missing' config value is True. :param module_paths: A list of paths to puppet module files. :type module_paths: list """ existing_module_ids_by_key = {} for module in Module.objects.only(*Module.unit_key_fields).all(): existing_module_ids_by_key[module.unit_key_str] = module.id remote_paths = {} list_of_modules = [] for module_path in module_paths: puppet_manifest = self._extract_metadata(module_path) puppet_manifest.update(Module.split_filename(puppet_manifest['name'])) module = Module.from_metadata(puppet_manifest) remote_paths[module.unit_key_str] = module_path list_of_modules.append(module) pub_step = publish_step.GetLocalUnitsStep(constants.IMPORTER_TYPE_ID, available_units=list_of_modules, repo=self.repo) pub_step.process_main() self.report.modules_total_count = len(pub_step.units_to_download) for module in pub_step.units_to_download: remote_path = remote_paths[module.unit_key_str] if self.canceled: return _logger.debug(IMPORT_MODULE, dict(mod=remote_path)) module.set_storage_path(os.path.basename(remote_path)) try: module.save_and_import_content(remote_path) except NotUniqueError: module = module.__class__.objects.get(**module.unit_key) repo_controller.associate_single_unit(self.repo.repo_obj, module) self.report.modules_finished_count += 1 self.report.update_progress() # Write the report, making sure we don't overwrite a failure in _fetch_modules if self.report.modules_state not in constants.COMPLETE_STATES: self.report.modules_state = constants.STATE_SUCCESS self.report.modules_execution_time = time() - self.started_fetch_modules self.report.update_progress() remove_missing = self.config.get_boolean(constants.CONFIG_REMOVE_MISSING) if remove_missing is None: remove_missing = constants.DEFAULT_REMOVE_MISSING if remove_missing: self._remove_missing(existing_module_ids_by_key, remote_paths.keys())
def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) repo_controller.associate_single_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report
def process_main(self, item=None): """ Update tags based on the parent metadata :param item: Not used by this step :type item: None """ md = self.parent.metadata tag = md.get('name') if tag is None: raise PulpCodedValidationException(error_code=error_codes.DKR1019, field='name') # https://pulp.plan.io/issues/3250 - use manifest_digest if available digest = md.get('manifest_digest', md.get('digest')) if digest is None: raise PulpCodedValidationException(error_code=error_codes.DKR1019, field='manifest_digest') pulp_user_metadata = md.get('pulp_user_metadata') repo_id = self.parent.repo.id manifest_type_id = models.Manifest._content_type_id.default repo_manifest_ids = repository.get_associated_unit_ids( repo_id, manifest_type_id) # check if there is manifest with such id within the queried repo # since we don't know if the provided digest is of an image manifest or manifest list # we need to try both. manifests = models.Manifest.objects.filter(digest=digest, id__in=repo_manifest_ids) manifest_type = constants.MANIFEST_IMAGE_TYPE if manifests.count() == 0: manifest_list_type_id = models.ManifestList._content_type_id.default repo_manifest_list_ids = repository.get_associated_unit_ids( repo_id, manifest_list_type_id) manifests = models.ManifestList.objects.filter( digest=digest, id__in=repo_manifest_list_ids) manifest_type = constants.MANIFEST_LIST_TYPE if manifests.count() == 0: raise PulpCodedValidationException( error_code=error_codes.DKR1010, digest=digest, repo_id=repo_id) new_tag = models.Tag.objects.tag_manifest( repo_id=self.parent.repo.id, tag_name=tag, manifest_digest=digest, schema_version=manifests[0].schema_version, manifest_type=manifest_type, pulp_user_metadata=pulp_user_metadata) if new_tag: repository.associate_single_unit(self.parent.repo.repo_obj, new_tag) self.parent.uploaded_unit = new_tag
def associate_rpm_unit(self, unit): """ Associate unit with a repo and report this unit as a successfully synced one. It should be a last step in the sync of one unit. :param unit: A content unit :type unit: pulp_rpm.plugins.db.models.RpmBase """ repo_controller.associate_single_unit(self.conduit.repo, unit) self.progress_report['content'].success(unit) self.conduit.set_progress(self.progress_report)
def process_main(self): """ For each tag found in the remote repository, if a Tag object exists in this repository we need to make sure its manifest_digest attribute points at this Manifest. If not, we need to create one. We'll rely on the uniqueness constraint in MongoDB to allow us to try to create it, and if that fails we'll fall back to updating the existing one. """ for tag, manifest in self.tagged_manifests.items(): new_tag = models.Tag.objects.tag_manifest(repo_id=self.get_repo().repo_obj.repo_id, tag_name=tag, manifest_digest=manifest.digest) if new_tag: repository.associate_single_unit(self.get_repo().repo_obj, new_tag)
def process_main(self, item): """ This method gets called with each Unit that was downloaded from the parent step. It moves each Unit's files into permanent storage, and saves each Unit into the database and into the repository. :param item: The Unit to save in Pulp. :type item: pulp.server.db.model.FileContentUnit """ item.set_storage_path(item.digest) item.save_and_import_content(os.path.join(self.get_working_dir(), item.digest)) repository.associate_single_unit(self.get_repo().repo_obj, item)
def migrate(*args, **kwargs): """ For each puppet module check and if needed update module name format. There was a discrepancy in the way puppet module's name was stored in pulp, depending if it was synced from filesystem or uploaded. This migration finds puppet module units that have wrong format name and replaces it with a correct format name. """ modules = Module.objects.filter( Q(name__contains='/') | Q(name__contains='-')) repos_to_rebuild = set() for puppet_unit in modules: try: author, name = puppet_unit['name'].split('-', 1) except ValueError: # This is the forge format, but Puppet still allows it author, name = puppet_unit['name'].split('/', 1) try: puppet_unit.name = name puppet_unit.save() except NotUniqueError: # find all repos that have this unit repos_with_unit = model.RepositoryContentUnit.objects.filter( unit_id=puppet_unit.id) repos_to_rebuild.update(repos_with_unit) # find unit with correct name correct_unit = Module.objects.filter(name=name).first() for repo in repos_with_unit: # unassociate wrong unit repo_controller.disassociate_units(repo, [puppet_unit]) # associate correct unit to the list of the repos repo_controller.associate_single_unit(repo, correct_unit) repo_list = [] for repo in repos_to_rebuild: repo_obj = model.Repository.objects.get_repo_or_missing_resource( repo.repo_id) repo_controller.rebuild_content_unit_counts(repo_obj) repo_list.append(repo.repo_id) repos_to_republish = model.Distributor.objects.filter( repo_id__in=repo_list, last_publish__ne=None) # redirect output to file path = os.path.join('/var/lib/pulp', '0005_puppet_module_name_change.txt') f = open(path, 'w') f.write(str([repo.repo_id for repo in repos_to_republish])) f.close() msg = _( '***Note. You may want to re-publish the list of repos found in %s.\n' ' This migration fixed an issue with modules installation related to wrong ' 'puppet_module name.' % f.name) _log.info(msg)
def get_or_create_and_associate(cls, repo, release_unit, name): unit = cls() unit.name = name unit.repoid = repo.id unit.release = release_unit.codename try: unit.save() except NotUniqueError: unit = cls.objects.filter(**unit.unit_key).first() repo_controller.associate_single_unit(repository=repo.repo_obj, unit=unit) return unit
def import_units(self, source_repo, dest_repo, import_conduit, config, units=None): """ Import content units into the given repository. This method will be called in a number of different situations: * A user is attempting to copy a content unit from one repository into the repository that uses this importer * A user is attempting to add an orphaned unit into a repository. This call has two options for handling the requested units: * Associate the given units with the destination repository. This will link the repository with the existing unit directly; changes to the unit will be reflected in all repositories that reference it. * Create a new unit and save it to the repository. This would act as a deep copy of sorts, creating a unique unit in the database. Keep in mind that the unit key must change in order for the unit to be considered different than the supplied one. The APIs for both approaches are similar to those in the sync conduit. In the case of a simple association, the init_unit step can be skipped and save_unit simply called on each specified unit. The units argument is optional. If None, all units in the source repository should be imported. The conduit is used to query for those units. If specified, only the units indicated should be imported (this is the case where the caller passed a filter to Pulp). :param source_repo: metadata describing the repository containing the units to import :type source_repo: pulp.plugins.model.Repository :param dest_repo: metadata describing the repository to import units into :type dest_repo: pulp.plugins.model.Repository :param import_conduit: provides access to relevant Pulp functionality :type import_conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration :param units: optional list of pre-filtered units to import :type units: list of pulp.plugins.model.Unit :return: list of Unit instances that were saved to the destination repository :rtype: list """ if units is None: units = chain(*repo_controller.get_unit_model_querysets( source_repo.repo_obj.repo_id, models.Package)) units = list(units) for u in units: repo_controller.associate_single_unit(dest_repo.repo_obj, u) return units
def _associate_unit(dest_repo, unit, config): """ Associate one particular unit with the destination repository. There are behavioral exceptions based on type: Group, Category, Environment and Yum Metadata File units need to have their "repo_id" attribute set. RPMs are convenient to do all as one block, for the purpose of dependency resolution. So this method skips RPMs and lets them be done together by other means :param dest_repo: destination repo :type dest_repo: pulp.server.db.model.Repository :param unit: Unit to be copied :type unit: pulp.server.db.model.ContentUnit :param config: configuration instance passed to the importer of the destination repo :type config: pulp.plugins.config.PluginCallConfiguration :return: copied unit or None if the unit was not copied :rtype: pulp.server.db.model.ContentUnit """ types_to_be_copied = ( models.PackageGroup, models.PackageCategory, models.PackageEnvironment, models.PackageLangpacks ) if isinstance(unit, types_to_be_copied): return associate_copy_for_repo(unit, dest_repo) elif isinstance(unit, models.RPM): # copy will happen in one batch return unit elif isinstance(unit, models.YumMetadataFile): return associate_copy_for_repo(unit, dest_repo, True) elif isinstance(unit, (models.DRPM, models.SRPM)): if rpm_parse.signature_enabled(config): if unit.downloaded: try: rpm_parse.filter_signature(unit, config) except PulpCodedException as e: _LOGGER.debug(e) return repo_controller.associate_single_unit(repository=dest_repo, unit=unit) return unit else: repo_controller.associate_single_unit(repository=dest_repo, unit=unit) return unit
def test_unit_association(self, mock_get_timestamp, mock_rcu_objects): mock_get_timestamp.return_value = 'foo_tstamp' test_unit = DemoModel(id='bar', key_field='baz') repo = MagicMock(repo_id='foo') repo_controller.associate_single_unit(repo, test_unit) mock_rcu_objects.assert_called_once_with( repo_id='foo', unit_id='bar', unit_type_id=DemoModel.unit_type_id ) mock_rcu_objects.return_value.update_one.assert_called_once_with( set_on_insert__created='foo_tstamp', set__updated='foo_tstamp', upsert=True)
def migrate(*args, **kwargs): """ For each puppet module check and if needed update module name format. There was a discrepancy in the way puppet module's name was stored in pulp, depending if it was synced from filesystem or uploaded. This migration finds puppet module units that have wrong format name and replaces it with a correct format name. """ modules = Module.objects.filter(Q(name__contains="/") | Q(name__contains="-")) repos_to_rebuild = set() for puppet_unit in modules: try: author, name = puppet_unit["name"].split("-", 1) except ValueError: # This is the forge format, but Puppet still allows it author, name = puppet_unit["name"].split("/", 1) try: puppet_unit.name = name puppet_unit.save() except NotUniqueError: # find all repos that have this unit repos_with_unit = model.RepositoryContentUnit.objects.filter(unit_id=puppet_unit.id) repos_to_rebuild.update(repos_with_unit) # find unit with correct name correct_unit = Module.objects.filter(name=name).first() for repo in repos_with_unit: # unassociate wrong unit repo_controller.disassociate_units(repo, [puppet_unit]) # associate correct unit to the list of the repos repo_controller.associate_single_unit(repo, correct_unit) repo_list = [] for repo in repos_to_rebuild: repo_obj = model.Repository.objects.get_repo_or_missing_resource(repo.repo_id) repo_controller.rebuild_content_unit_counts(repo_obj) repo_list.append(repo.repo_id) repos_to_republish = model.Distributor.objects.filter(repo_id__in=repo_list, last_publish__ne=None) # redirect output to file path = os.path.join("/var/lib/pulp", "0005_puppet_module_name_change.txt") f = open(path, "w") f.write(str([repo.repo_id for repo in repos_to_republish])) f.close() msg = _( "***Note. You may want to re-publish the list of repos found in %s.\n" " This migration fixed an issue with modules installation related to wrong " "puppet_module name." % f.name ) _log.info(msg)
def _import_modules(self, module_paths): """ Import the puppet modules (tarballs) at the specified paths. This will also handle removing any modules in the local repository if they are no longer present on remote repository and the 'remove_missing' config value is True. :param module_paths: A list of paths to puppet module files. :type module_paths: list """ existing_module_ids_by_key = {} for module in Module.objects.only(*Module.unit_key_fields).all(): existing_module_ids_by_key[module.unit_key_str] = module.id remote_unit_keys = [] for module_path in module_paths: if self.canceled: return puppet_manifest = self._extract_metadata(module_path) module = Module.from_metadata(puppet_manifest) remote_unit_keys.append(module.unit_key_str) # Even though we've already basically processed this unit, not doing this makes the # progress reporting confusing because it shows Pulp always importing all the modules. if module.unit_key_str in existing_module_ids_by_key: self.report.modules_total_count -= 1 continue _logger.debug(IMPORT_MODULE, dict(mod=module_path)) module.set_storage_path(os.path.basename(module_path)) module.save_and_import_content(module_path) repo_controller.associate_single_unit(self.repo.repo_obj, module) self.report.modules_finished_count += 1 self.report.update_progress() # Write the report, making sure we don't overwrite a failure in _fetch_modules if self.report.modules_state not in constants.COMPLETE_STATES: self.report.modules_state = constants.STATE_SUCCESS self.report.modules_execution_time = time( ) - self.started_fetch_modules self.report.update_progress() remove_missing = self.config.get_boolean( constants.CONFIG_REMOVE_MISSING) if remove_missing is None: remove_missing = constants.DEFAULT_REMOVE_MISSING if remove_missing: self._remove_missing(existing_module_ids_by_key, remote_unit_keys)
def _import_modules(self, module_paths): """ Import the puppet modules (tarballs) at the specified paths. This will also handle removing any modules in the local repository if they are no longer present on remote repository and the 'remove_missing' config value is True. :param module_paths: A list of paths to puppet module files. :type module_paths: list """ existing_module_ids_by_key = {} for module in Module.objects.only(*Module.unit_key_fields).all(): existing_module_ids_by_key[module.unit_key_str] = module.id remote_unit_keys = [] for module_path in module_paths: if self.canceled: return puppet_manifest = self._extract_metadata(module_path) module = Module.from_metadata(puppet_manifest) remote_unit_keys.append(module.unit_key_str) # Even though we've already basically processed this unit, not doing this makes the # progress reporting confusing because it shows Pulp always importing all the modules. if module.unit_key_str in existing_module_ids_by_key: self.report.modules_total_count -= 1 continue _logger.debug(IMPORT_MODULE, dict(mod=module_path)) module.set_content(module_path) module.save() repo_controller.associate_single_unit(self.repo.repo_obj, module) self.report.modules_finished_count += 1 self.report.update_progress() # Write the report, making sure we don't overwrite a failure in _fetch_modules if self.report.modules_state not in constants.COMPLETE_STATES: self.report.modules_state = constants.STATE_SUCCESS self.report.modules_execution_time = time() - self.started_fetch_modules self.report.update_progress() remove_missing = self.config.get_boolean(constants.CONFIG_REMOVE_MISSING) if remove_missing is None: remove_missing = constants.DEFAULT_REMOVE_MISSING if remove_missing: self._remove_missing(existing_module_ids_by_key, remote_unit_keys) repo_controller.rebuild_content_unit_counts(self.repo.repo_obj)
def check_all_and_associate(wanted, conduit, config, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param config: configuration instance passed to the importer :type config: pulp.plugins.config.PluginCallConfiguration :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and unit_type in ( ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM): if unit._storage_path is None or not os.path.isfile(unit._storage_path): continue catalog.add(unit) if rpm_parse.signature_enabled(config): try: rpm_parse.filter_signature(unit, config) except PulpCodedException as e: _LOGGER.debug(e) continue repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted
def add_rpm_unit(self, metadata_files, unit): """ Add the specified RPM unit. :param metadata_files: metadata files object. :type metadata_files: pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles :param unit: A content unit. :type unit: pulp_rpm.plugins.db.models.RpmBase """ metadata_files.add_repodata(unit) unit.set_storage_path(unit.filename) unit.save() repo_controller.associate_single_unit(self.conduit.repo, unit) self.progress_report['content'].success(unit) self.conduit.set_progress(self.progress_report)
def add_rpm_unit(self, metadata_files, unit): """ Add the specified RPM unit. :param metadata_files: metadata files object. :type metadata_files: pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles :param unit: A content unit. :type unit: pulp_rpm.plugins.db.models.RpmBase """ metadata_files.add_repodata(unit) unit.set_storage_path(unit.filename) unit.save() repo_controller.associate_single_unit(self.conduit.repo, unit) self.progress_report['content'].success(unit) self.conduit.set_progress(self.progress_report)
def _import_blob(conduit, unit, dest_repo): """ Import a Blob. :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param unit: The Blob to import :type unit: pulp_docker.plugins.Model.Blob :param dest_repo: The destination repository that the Blob is being imported to. :type dest_repo: pulp.server.db.model.Repository :return: list containing the Blob that was copied to the destination repository :rtype: list """ repository.associate_single_unit(dest_repo, unit) return [unit]
def process_main(self, item): """ This method gets called with each Unit that was downloaded from the parent step. It moves each Unit's files into permanent storage, and saves each Unit into the database and into the repository. :param item: The Unit to save in Pulp. :type item: pulp.server.db.model.FileContentUnit """ item.set_storage_path(item.digest) try: item.save_and_import_content(os.path.join(self.get_working_dir(), item.digest)) except NotUniqueError: item = item.__class__.objects.get(**item.unit_key) repository.associate_single_unit(self.get_repo().repo_obj, item)
def _import_blob(conduit, unit, dest_repo): """ Import a Blob. :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param unit: The Blob to import :type unit: pulp_docker.plugins.Model.Blob :param dest_repo: The destination repository that the Blob is being imported to. :type dest_repo: pulp.server.db.model.Repository :return: list containing the Blob that was copied to the destination repository :rtype: list """ repository.associate_single_unit(dest_repo, unit) return [unit]
def import_units(self, source_repo, dest_repo, import_conduit, config, units=None): """ Import content units into the given repository. This method will be called in a number of different situations: * A user is attempting to copy a content unit from one repository into the repository that uses this importer * A user is attempting to add an orphaned unit into a repository. This call has two options for handling the requested units: * Associate the given units with the destination repository. This will link the repository with the existing unit directly; changes to the unit will be reflected in all repositories that reference it. * Create a new unit and save it to the repository. This would act as a deep copy of sorts, creating a unique unit in the database. Keep in mind that the unit key must change in order for the unit to be considered different than the supplied one. The APIs for both approaches are similar to those in the sync conduit. In the case of a simple association, the init_unit step can be skipped and save_unit simply called on each specified unit. The units argument is optional. If None, all units in the source repository should be imported. The conduit is used to query for those units. If specified, only the units indicated should be imported (this is the case where the caller passed a filter to Pulp). :param source_repo: metadata describing the repository containing the units to import :type source_repo: pulp.plugins.model.Repository :param dest_repo: metadata describing the repository to import units into :type dest_repo: pulp.plugins.model.Repository :param import_conduit: provides access to relevant Pulp functionality :type import_conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration :param units: optional list of pre-filtered units to import :type units: list of pulp.plugins.model.Unit :return: list of Unit instances that were saved to the destination repository :rtype: list """ if units is None: units = chain(*repo_controller.get_unit_model_querysets(source_repo.repo_obj.repo_id, models.Package)) units = list(units) for u in units: repo_controller.associate_single_unit(dest_repo.repo_obj, u) return units
def handle_uploaded_unit(repo, type_id, unit_key, metadata, file_path, conduit): """ Handles an upload unit request to the importer. This call is responsible for moving the unit from its temporary location where Pulp stored the upload to the final storage location (as dictated by Pulp) for the unit. This call will also update the database in Pulp to reflect the unit and its association to the repository. :param repo: repository into which the unit is being uploaded :type repo: pulp.plugins.model.Repository :param type_id: type of unit being uploaded :type type_id: str :param unit_key: unique identifier for the unit :type unit_key: dict :param metadata: extra data about the unit :type metadata: dict :param file_path: temporary location of the uploaded file :type file_path: str :param conduit: for calls back into Pulp :type conduit: pulp.plugins.conduit.upload.UploadConduit """ if type_id != constants.TYPE_PUPPET_MODULE: raise NotImplementedError() # Extract the metadata from the module extracted_data = metadata_parser.extract_metadata(file_path, repo.working_dir) # Overwrite the author and name extracted_data.update(Module.split_filename(extracted_data['name'])) uploaded_module = Module.from_metadata(extracted_data) # rename the file so it has the original module name new_file_path = os.path.join(os.path.dirname(file_path), uploaded_module.puppet_standard_filename()) shutil.move(file_path, new_file_path) uploaded_module.set_storage_path(os.path.basename(new_file_path)) try: uploaded_module.save_and_import_content(new_file_path) except NotUniqueError: uploaded_module = uploaded_module.__class__.objects.get( **uploaded_module.unit_key) repo_controller.associate_single_unit(repo.repo_obj, uploaded_module) return {'success_flag': True, 'summary': '', 'details': {}}
def process_main(self): """ For each tag found in the remote repository, if a Tag object exists in this repository we need to make sure its manifest_digest attribute points at this Manifest. If not, we need to create one. We'll rely on the uniqueness constraint in MongoDB to allow us to try to create it, and if that fails we'll fall back to updating the existing one. """ self.total_units = len(self.tagged_manifests) for tag, manifest, manifest_type in self.tagged_manifests: new_tag = models.Tag.objects.tag_manifest(repo_id=self.get_repo().repo_obj.repo_id, tag_name=tag, manifest_digest=manifest.digest, schema_version=manifest.schema_version, manifest_type=manifest_type) if new_tag: repository.associate_single_unit(self.get_repo().repo_obj, new_tag) self.progress_successes += 1
def _associate_unit(dest_repo, unit, config): """ Associate one particular unit with the destination repository. There are behavioral exceptions based on type: Group, Category, Environment and Yum Metadata File units need to have their "repo_id" attribute set. RPMs are convenient to do all as one block, for the purpose of dependency resolution. So this method skips RPMs and lets them be done together by other means :param dest_repo: destination repo :type dest_repo: pulp.server.db.model.Repository :param unit: Unit to be copied :type unit: pulp.server.db.model.ContentUnit :param config: configuration instance passed to the importer of the destination repo :type config: pulp.plugins.config.PluginCallConfiguration :return: copied unit or None if the unit was not copied :rtype: pulp.server.db.model.ContentUnit """ types_to_be_copied = (models.PackageGroup, models.PackageCategory, models.PackageEnvironment, models.PackageLangpacks) if isinstance(unit, types_to_be_copied): return associate_copy_for_repo(unit, dest_repo) elif isinstance(unit, models.RPM): # copy will happen in one batch return unit elif isinstance(unit, models.YumMetadataFile): return associate_copy_for_repo(unit, dest_repo, True) elif isinstance(unit, (models.DRPM, models.SRPM)): if rpm_parse.signature_enabled(config): if unit.downloaded: try: rpm_parse.filter_signature(unit, config) except PulpCodedException as e: _LOGGER.debug(e) return repo_controller.associate_single_unit(repository=dest_repo, unit=unit) return unit else: repo_controller.associate_single_unit(repository=dest_repo, unit=unit) return unit
def download_succeeded(self, report): """ This method processes a downloaded Python package. It opens the package and reads its PKG-INFO metadata file to determine all of its metadata. This step can be slow for larger packages since it needs to decompress them to do this. Despite it being slower to do this than it would be to read the metadata from the metadata file we downloaded earlier, we do not get the metadata for older versions from that file. Thus, this is the only reliable way to represent the metadata for different versions of a package. It also has the benefit of code reuse for determining the metadata, as the upload code also acquires the metadata this way. This method also ensures that the checksum of the downloaded package matches the checksum that was listed in the manifest. If everything checks out, the package is added to the repository and moved to the proper storage path. :param report: The report that details the download :type report: nectar.report.DownloadReport """ _logger.info( _('Processing package retrieved from %(url)s.') % {'url': report.url}) checksum = models.Package.checksum(report.destination, report.data._checksum_type) if checksum != report.data._checksum: report.state = 'failed' report.error_report = { 'expected_checksum': report.data._checksum, 'actual_checksum': checksum } return self.download_failed(report) package = models.Package.from_archive(report.destination) package.set_storage_path(os.path.basename(report.destination)) try: package.save() except mongoengine.NotUniqueError: package = models.Package.objects.get(name=package.name, version=package.version) package.import_content(report.destination) repo_controller.associate_single_unit(self.get_repo().repo_obj, package) super(DownloadPackagesStep, self).download_succeeded(report)
def handle_uploaded_unit(repo, type_id, unit_key, metadata, file_path, conduit): """ Handles an upload unit request to the importer. This call is responsible for moving the unit from its temporary location where Pulp stored the upload to the final storage location (as dictated by Pulp) for the unit. This call will also update the database in Pulp to reflect the unit and its association to the repository. :param repo: repository into which the unit is being uploaded :type repo: pulp.plugins.model.Repository :param type_id: type of unit being uploaded :type type_id: str :param unit_key: unique identifier for the unit :type unit_key: dict :param metadata: extra data about the unit :type metadata: dict :param file_path: temporary location of the uploaded file :type file_path: str :param conduit: for calls back into Pulp :type conduit: pulp.plugins.conduit.upload.UploadConduit """ if type_id != constants.TYPE_PUPPET_MODULE: raise NotImplementedError() # Extract the metadata from the module extracted_data = metadata_parser.extract_metadata(file_path, repo.working_dir) # Overwrite the author and name extracted_data.update(Module.split_filename(extracted_data['name'])) uploaded_module = Module.from_metadata(extracted_data) # rename the file so it has the original module name new_file_path = os.path.join(os.path.dirname(file_path), uploaded_module.puppet_standard_filename()) shutil.move(file_path, new_file_path) uploaded_module.set_storage_path(os.path.basename(new_file_path)) try: uploaded_module.save_and_import_content(new_file_path) except NotUniqueError: uploaded_module = uploaded_module.__class__.objects.get(**uploaded_module.unit_key) repo_controller.associate_single_unit(repo.repo_obj, uploaded_module) return {'success_flag': True, 'summary': '', 'details': {}}
def check_all_and_associate(wanted, conduit, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM): if unit._storage_path is None or not os.path.isfile( unit._storage_path): continue catalog.add(unit) repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted
def process_main(self, item=None): """ Update tags based on the parent metadata :param item: Not used by this step :type item: None """ tag = self.parent.metadata['name'] digest = self.parent.metadata['digest'] repo_id = self.parent.repo.id manifest_type_id = models.Manifest._content_type_id.default repo_manifest_ids = repository.get_associated_unit_ids( repo_id, manifest_type_id) # check if there is manifest with such id within the queried repo # since we don't know if the provided digest is of an image manifest or manifest list # we need to try both. manifests = models.Manifest.objects.filter(digest=digest, id__in=repo_manifest_ids) manifest_type = constants.MANIFEST_IMAGE_TYPE if manifests.count() == 0: manifest_list_type_id = models.ManifestList._content_type_id.default repo_manifest_list_ids = repository.get_associated_unit_ids( repo_id, manifest_list_type_id) manifests = models.ManifestList.objects.filter( digest=digest, id__in=repo_manifest_list_ids) manifest_type = constants.MANIFEST_LIST_TYPE if manifests.count() == 0: raise PulpCodedValidationException( error_code=error_codes.DKR1010, digest=digest, repo_id=repo_id) new_tag = models.Tag.objects.tag_manifest( repo_id=self.parent.repo.id, tag_name=tag, manifest_digest=digest, schema_version=manifests[0].schema_version, manifest_type=manifest_type) if new_tag: repository.associate_single_unit(self.parent.repo.repo_obj, new_tag)
def add_rpm_unit(self, metadata_files, unit): """ Add the specified RPM, SRPM or DRPM unit. :param metadata_files: metadata files object. :type metadata_files: pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles :param unit: A content unit. :type unit: pulp_rpm.plugins.db.models.RpmBase """ if isinstance(unit, (models.RPM, models.SRPM)): metadata_files.add_repodata(unit) unit.set_storage_path(unit.filename) try: unit.save() except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() repo_controller.associate_single_unit(self.conduit.repo, unit) self.progress_report['content'].success(unit) self.conduit.set_progress(self.progress_report)
def associate(self, repo): # actually update the corresponding unit in the repository # or create a new copy unit = self if unit.repoid != repo.repo_id: # find the corresponding unit unit = self.__class__.objects.filter( repoid=repo.repo_id, codename=self.codename).first() if unit is None: # create a new one unit = self.__class__() unit.repoid = repo.repo_id unit.codename = self.codename # update data unit.suite = self.suite unit.save() repo_controller.associate_single_unit(repository=repo, unit=unit) return unit
def download_succeeded(self, report): """ When the package's bits are successfully downloaded, the checksum is verified, the package is moved to its final location, and the models.Package object is saved into the database and associated to the repository. :param report: The report that details the download :type report: nectar.report.DownloadReport """ _logger.info( _('Processing package retrieved from %(url)s.') % {'url': report.url}) package = report.data checksum = models.Package.checksum(report.destination, package._checksum_type) if checksum != package._checksum: report.state = 'failed' report.error_report = { 'expected_checksum': package._checksum, 'actual_checksum': checksum } return self.download_failed(report) # Unless checksum from upstream is default type, recalculate checksum if package._checksum_type != models.CHECKSUM_TYPE: package._checksum = models.Package.checksum( report.destination, models.CHECKSUM_TYPE) package._checksum_type = models.CHECKSUM_TYPE package.set_storage_path(os.path.basename(report.destination)) # If the same package was simultaneously created by another task, it is possible that this # will attempt to save a duplicate unit into the database. In that case, catch the error, # retrieve the unit, and associate it to this repo. try: package.save() except mongoengine.NotUniqueError: package = models.Package.objects.get(filename=package.filename) package.import_content(report.destination) repo_controller.associate_single_unit(self.get_repo().repo_obj, package) super(DownloadPackagesStep, self).download_succeeded(report)