def process_repomd_data_element(data_element): """ Process the data elements of the repomd.xml file. This returns a file information dictionary with the following keys: * `name`: name of the element * `relative_path`: the path of the metadata file, relative to the repository URL * `checksum`: dictionary of `algorithm` and `hex_digest` keys and values * `size`: size of the metadata file, in bytes * `timestamp`: unix timestamp of the file's creation, as a float * `open_checksum`: optional checksum dictionary of uncompressed metadata file * `open_size`: optional size of the uncompressed metadata file, in bytes :param data_element: XML data element parsed from the repomd.xml file :return: file_info dictionary :rtype: dict """ file_info = deepcopy(FILE_INFO_SKEL) file_info['name'] = data_element.attrib['type'] location_element = data_element.find(LOCATION_TAG) if location_element is not None: file_info['relative_path'] = location_element.attrib['href'] checksum_element = data_element.find(CHECKSUM_TAG) if checksum_element is not None: checksum_type = util.sanitize_checksum_type( checksum_element.attrib['type']) file_info['checksum']['algorithm'] = checksum_type file_info['checksum']['hex_digest'] = checksum_element.text size_element = data_element.find(SIZE_TAG) if size_element is not None: file_info['size'] = int(size_element.text) timestamp_element = data_element.find(TIMESTAMP_TAG) if timestamp_element is not None: file_info['timestamp'] = float(timestamp_element.text) open_checksum_element = data_element.find(OPEN_CHECKSUM_TAG) if open_checksum_element is not None: checksum_type = util.sanitize_checksum_type( open_checksum_element.attrib['type']) file_info['open_checksum']['algorithm'] = checksum_type file_info['open_checksum']['hex_digest'] = open_checksum_element.text open_size_element = data_element.find(OPEN_SIZE_TAG) if open_size_element is not None: file_info['open_size'] = int(open_size_element.text) for child in data_element.getchildren(): child.clear() data_element.clear() return file_info
def process_repomd_data_element(data_element): """ Process the data elements of the repomd.xml file. This returns a file information dictionary with the following keys: * `name`: name of the element * `relative_path`: the path of the metadata file, relative to the repository URL * `checksum`: dictionary of `algorithm` and `hex_digest` keys and values * `size`: size of the metadata file, in bytes * `timestamp`: unix timestamp of the file's creation, as a float * `open_checksum`: optional checksum dictionary of uncompressed metadata file * `open_size`: optional size of the uncompressed metadata file, in bytes :param data_element: XML data element parsed from the repomd.xml file :return: file_info dictionary :rtype: dict """ file_info = deepcopy(FILE_INFO_SKEL) file_info['name'] = data_element.attrib['type'] location_element = data_element.find(LOCATION_TAG) if location_element is not None: file_info['relative_path'] = location_element.attrib['href'] checksum_element = data_element.find(CHECKSUM_TAG) if checksum_element is not None: checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type']) file_info['checksum']['algorithm'] = checksum_type file_info['checksum']['hex_digest'] = checksum_element.text size_element = data_element.find(SIZE_TAG) if size_element is not None: file_info['size'] = int(size_element.text) timestamp_element = data_element.find(TIMESTAMP_TAG) if timestamp_element is not None: file_info['timestamp'] = float(timestamp_element.text) open_checksum_element = data_element.find(OPEN_CHECKSUM_TAG) if open_checksum_element is not None: checksum_type = util.sanitize_checksum_type(open_checksum_element.attrib['type']) file_info['open_checksum']['algorithm'] = checksum_type file_info['open_checksum']['hex_digest'] = open_checksum_element.text open_size_element = data_element.find(OPEN_SIZE_TAG) if open_size_element is not None: file_info['open_size'] = int(open_size_element.text) for child in data_element.getchildren(): child.clear() data_element.clear() return file_info
def validate(self, value): """ Validates that value is a checksumtype known to pulp platform :param value: The value to validate :type value: basestring :return: None """ super(ChecksumTypeStringField, self).validate(value) util.sanitize_checksum_type(value)
def test_SHA256_to_sha256(self): """ Assert that "SHA256" is converted to "sha256". """ checksum_type = util.sanitize_checksum_type('SHA256') self.assertEqual(checksum_type, 'sha256')
def process_package_element(element): """ Process one XML block from prestodelta.xml and return a models.DRPM instance :param element: object representing one "DRPM" block from the XML file :type element: xml.etree.ElementTree.Element :return: models.DRPM instance for the XML block :rtype: pulp_rpm.plugins.db.models.DRPM """ delta = element.find('delta') filename = delta.find('filename') sequence = delta.find('sequence') size = delta.find('size') checksum = delta.find('checksum') checksum_type = util.sanitize_checksum_type(checksum.attrib['type']) return models.DRPM( new_package=element.attrib['name'], epoch=element.attrib['epoch'], version=element.attrib['version'], release=element.attrib['release'], arch=element.attrib['arch'], oldepoch=delta.attrib['oldepoch'], oldversion=delta.attrib['oldversion'], oldrelease=delta.attrib['oldrelease'], filename=filename.text, sequence=sequence.text, size=int(size.text), checksum=checksum.text, checksumtype=checksum_type)
def test_ShA_to_sha1(self): """ Assert that "ShA" is converted to "sha1". """ checksum_type = util.sanitize_checksum_type('ShA') self.assertEqual(checksum_type, 'sha1')
def import_unknown_metadata_files(self, metadata_files): """ Import metadata files whose type is not known to us. These are any files that we are not already parsing. :param metadata_files: object containing access to all metadata files :type metadata_files: pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles """ for metadata_type, file_info in metadata_files.metadata.iteritems(): if metadata_type not in metadata_files.KNOWN_TYPES: file_path = file_info['local_path'] checksum_type = file_info['checksum']['algorithm'] checksum_type = util.sanitize_checksum_type(checksum_type) checksum = file_info['checksum']['hex_digest'] # Find an existing model model = models.YumMetadataFile.objects.filter( data_type=metadata_type, repo_id=self.repo.repo_id).first() # If an existing model, use that if model: model.checksum = checksum model.checksum_type = checksum_type else: # Else, create a new mode model = models.YumMetadataFile( data_type=metadata_type, repo_id=self.repo.repo_id, checksum=checksum, checksum_type=checksum_type) model.set_storage_path(os.path.basename(file_path)) model.save_and_import_content(file_path) # associate/re-associate model to the repo repo_controller.associate_single_unit(self.repo, model)
def process_package_element(element): """ Process one XML block from prestodelta.xml and return a models.DRPM instance :param element: object representing one "DRPM" block from the XML file :type element: xml.etree.ElementTree.Element :return: models.DRPM instance for the XML block :rtype: pulp_rpm.plugins.db.models.DRPM """ delta = element.find('delta') filename = delta.find('filename') sequence = delta.find('sequence') size = delta.find('size') checksum = delta.find('checksum') checksum_type = util.sanitize_checksum_type(checksum.attrib['type']) return models.DRPM(new_package=element.attrib['name'], epoch=element.attrib['epoch'], version=element.attrib['version'], release=element.attrib['release'], arch=element.attrib['arch'], oldepoch=delta.attrib['oldepoch'], oldversion=delta.attrib['oldversion'], oldrelease=delta.attrib['oldrelease'], filename=filename.text, sequence=sequence.text, size=int(size.text), checksum=checksum.text, checksumtype=checksum_type)
def _migrate_rpmlike_units(unit_type): """ This function performs the migration on RPMs, DRPMs, and SRPMs. These all have the same schema when it comes to checksumtype, so they can be treated the same way. :param unit_type: The unit_type_id, as found in pulp_rpm.common.ids. :type unit_type: basestring """ repos = connection.get_collection('repos') repo_content_units = connection.get_collection('repo_content_units') unit_collection = connection.get_collection('units_%s' % unit_type) for unit in unit_collection.find(): try: sanitized_type = util.sanitize_checksum_type(unit['checksumtype']) if sanitized_type != unit['checksumtype']: # Let's see if we can get away with changing its checksumtype to the sanitized # value. If this works, we won't have to do anything else. unit_collection.update({'_id': unit['_id']}, {'$set': {'checksumtype': sanitized_type}}) except errors.DuplicateKeyError: # Looks like there is already an identical unit with the sanitized checksum type. This # means we need to remove the current unit, but first we will need to change any # references to this unit to point to the other. conflicting_unit = unit_collection.find_one( {'name': unit['name'], 'epoch': unit['epoch'], 'version': unit['version'], 'release': unit['release'], 'arch': unit['arch'], 'checksum': unit['checksum'], 'checksumtype': sanitized_type}) for rcu in repo_content_units.find({'unit_type_id': unit_type, 'unit_id': unit['_id']}): # Now we must either switch the rcu from pointing to unit to pointing to # conflicting_unit, or delete the rcu if there is already one in the same repo. try: msg = _('Updating %(repo_id)s to contain %(type)s %(conflicting)s instead of ' '%(old_id)s.') msg = msg % {'repo_id': rcu['repo_id'], 'type': unit_type, 'conflicting': conflicting_unit['_id'], 'old_id': unit['_id']} _logger.debug(msg) repo_content_units.update({'_id': rcu['_id']}, {'$set': {'unit_id': conflicting_unit['_id']}}) except errors.DuplicateKeyError: # We will delete this RepoContentUnit since the sha1 RPM is already in the # repository. msg = _('Removing %(type)s %(old_id)s from repo %(repo_id)s since it conflicts ' 'with %(conflicting)s.') msg = msg % {'repo_id': rcu['repo_id'], 'type': unit_type, 'conflicting': conflicting_unit['_id'], 'old_id': unit['_id']} _logger.debug(msg) repo_content_units.remove({'_id': rcu['_id']}) # In this case, we now need to decrement the repository's "content_unit_counts" # for this unit_type by one, since we removed a unit from a repository. repos.update( {'id': rcu['repo_id']}, {'$inc': {'content_unit_counts.%s' % unit_type: -1}}) # Now that we have removed or altered all references to the "sha" Unit, we need to # remove it since it is a duplicate. unit_collection.remove({'_id': unit['_id']})
def _migrate_yum_metadata_files(): """ Migrate each YumMetadataFile to use the new sanitized checksum_type. This is mostly similar to _migrate_rpmlike_units, except that the checksum type field name is checksum_type instead of checksumtype, and there can't be any collisions since the checksum type isn't part of this unit's unit_key. This means we don't have to worry about the repo_content_units table. """ collection = connection.get_collection('units_yum_repo_metadata_file') for unit in collection.find(): sanitized_type = util.sanitize_checksum_type(unit['checksum_type']) if sanitized_type != unit['checksum_type']: collection.update({'_id': unit['_id']}, {'$set': {'checksum_type': sanitized_type}})
def _migrate_yum_metadata_files(): """ Migrate each YumMetadataFile to use the new sanitized checksum_type. This is mostly similar to _migrate_rpmlike_units, except that the checksum type field name is checksum_type instead of checksumtype, and there can't be any collisions since the checksum type isn't part of this unit's unit_key. This means we don't have to worry about the repo_content_units table. """ collection = connection.get_collection('units_yum_repo_metadata_file') for unit in collection.find(): sanitized_type = util.sanitize_checksum_type(unit['checksum_type']) if sanitized_type != unit['checksum_type']: collection.update({'_id': unit['_id']}, {'$set': { 'checksum_type': sanitized_type }})
def update_unit_files(unit, files): """ Update the *files* list on the unit. :param unit: A distribution model object. :type unit: pulp_rpm.plugins.db.models.Distribution :param files: List of distribution files. :type files: list """ _list = [] if not isinstance(unit.files, list): _list = list(unit.files) for _file in files: if _file[CHECKSUM_TYPE] is not None: _file[CHECKSUM_TYPE] = util.sanitize_checksum_type(_file[CHECKSUM_TYPE]) _list.append({ RELATIVE_PATH: _file[RELATIVE_PATH], CHECKSUM: _file[CHECKSUM], CHECKSUM_TYPE: _file[CHECKSUM_TYPE]}) unit.files = _list
def save_default_metadata_checksum_on_repo(self, metadata_files): """ Determine the default checksum that should be used for metadata files and save it in the repo scratchpad. There is no good way to order a preference on the checksum type so the first one found is used. :param metadata_files: object containing access to all metadata files :type metadata_files: pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles """ checksum_type = None for metadata_item in metadata_files.metadata.iteritems(): if 'checksum' in metadata_item[1]: checksum_type = metadata_item[1]['checksum']['algorithm'] break if checksum_type: checksum_type = util.sanitize_checksum_type(checksum_type) scratchpad = self.conduit.get_repo_scratchpad() scratchpad[constants.SCRATCHPAD_DEFAULT_METADATA_CHECKSUM] = checksum_type self.conduit.set_repo_scratchpad(scratchpad)
def _migrate_errata(): """ Visit each errata and check its references RPMs for the checksum type, sanitizing it if necessary. Since these sums aren't part of the unit key for erratum, this will not cause any collisions. The erratum also do not reference RPMs by unit_id, but by unit_key, so this is important. """ errata = connection.get_collection('units_erratum') for erratum in errata.find(): changed = False pkglist = erratum.get('pkglist', []) for collection in pkglist: for package in collection.get('packages', []): if 'sum' in package and package['sum']: sanitized_type = util.sanitize_checksum_type(package['sum'][0]) if sanitized_type != package['sum'][0]: package['sum'][0] = sanitized_type changed = True if changed: errata.update({'_id': erratum['_id']}, {'$set': {'pkglist': pkglist}})
def process_successful_download_reports(unit, reports): """ Once downloading is complete, add information about each file to this model instance. This is required before saving the new unit. :param unit: A distribution model object. :type unit: pulp_rpm.plugins.db.models.Distribution :param reports: list of successful pulp.common.download.report.DownloadReport :type reports: list """ files = [] if not isinstance(unit.files, list): files = list(unit.files) for report in reports: _file = report.data if _file[CHECKSUM_TYPE] is not None: _file[CHECKSUM_TYPE] = util.sanitize_checksum_type(_file[CHECKSUM_TYPE]) files.append({ RELATIVE_PATH: _file[RELATIVE_PATH], CHECKSUM: _file[CHECKSUM], CHECKSUM_TYPE: _file[CHECKSUM_TYPE]}) unit.files = files
def _migrate_errata(): """ Visit each errata and check its references RPMs for the checksum type, sanitizing it if necessary. Since these sums aren't part of the unit key for erratum, this will not cause any collisions. The erratum also do not reference RPMs by unit_id, but by unit_key, so this is important. """ errata = connection.get_collection('units_erratum') for erratum in errata.find(): changed = False pkglist = erratum.get('pkglist', []) for collection in pkglist: for package in collection.get('packages', []): if 'sum' in package and package['sum']: sanitized_type = util.sanitize_checksum_type( package['sum'][0]) if sanitized_type != package['sum'][0]: package['sum'][0] = sanitized_type changed = True if changed: errata.update({'_id': erratum['_id']}, {'$set': { 'pkglist': pkglist }})
def process_package_element(package_element): """ Process a parsed primary.xml package element into a model instance. In addition to parsing the data, this templatizes the raw XML that gets added. :param package_element: parsed primary.xml package element :return: package information dictionary :rtype: pulp_rpm.plugins.db.models.RPM """ # NOTE the use of deepcopy relies on cpython's very sensible policy of never # duplicating string literals, this may not hold up in other implementations # the python interpreter. package_info = dict() name_element = package_element.find(NAME_TAG) if name_element is not None: package_info['name'] = name_element.text arch_element = package_element.find(ARCH_TAG) if arch_element is not None: package_info['arch'] = arch_element.text version_element = package_element.find(VERSION_TAG) if version_element is not None: package_info['version'] = version_element.attrib['ver'] package_info['release'] = version_element.attrib.get('rel', None) package_info['epoch'] = version_element.attrib.get('epoch', None) checksum_element = package_element.find(CHECKSUM_TAG) if checksum_element is not None: checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type']) package_info['checksumtype'] = checksum_type package_info['checksum'] = checksum_element.text # convert these to template targets that will be rendered at publish time checksum_element.text = models.RpmBase.CHECKSUM_TEMPLATE checksum_element.attrib['type'] = models.RpmBase.CHECKSUMTYPE_TEMPLATE summary_element = package_element.find(SUMMARY_TAG) if summary_element is not None: package_info['summary'] = summary_element.text description_element = package_element.find(DESCRIPTION_TAG) if description_element is not None: package_info['description'] = description_element.text url_element = package_element.find(URL_TAG) if url_element is not None: package_info['url'] = url_element.text time_element = package_element.find(TIME_TAG) if time_element is not None: package_info['time'] = int(time_element.attrib['file']) package_info['build_time'] = int(time_element.attrib['build']) size_element = package_element.find(SIZE_TAG) if size_element is not None: package_info['size'] = int(size_element.attrib['package']) location_element = package_element.find(LOCATION_TAG) if location_element is not None: href = location_element.attrib['href'] base_url = None for attribute, value in location_element.items(): if attribute == 'base' or attribute.endswith('}base'): base_url = value package_info['base_url'] = base_url filename = os.path.basename(href) package_info['relativepath'] = href package_info['filename'] = filename # we don't make any attempt to preserve the original directory structure # this element will end up being converted back to XML and stuffed into # the DB on the unit object, so this is our chance to modify it. location_element.attrib['href'] = filename format_element = package_element.find(FORMAT_TAG) package_info.update(_process_format_element(format_element)) if package_info['arch'].lower() == 'src': model = models.SRPM(**package_info) else: model = models.RPM(**package_info) # add the raw XML so it can be saved in the database later rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL) model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL) return model
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM, SRPM or DRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: if type_id == models.DRPM._content_type_id.default: unit = models.DRPM(**_extract_drpm_data(file_path)) else: repodata = rpm_parse.get_package_xml(file_path, sumtype=util.TYPE_SHA256) package_xml = (utils.fake_xml_element( repodata['primary'], constants.COMMON_NAMESPACE).find(primary.PACKAGE_TAG)) unit = primary.process_package_element(package_xml) except Exception: raise PulpCodedException(error_codes.RPM1016) # metadata can be None metadata = metadata or {} model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) with open(file_path) as fp: sums = util.calculate_checksums(fp, models.RpmBase.DEFAULT_CHECKSUM_TYPES) # validate checksum if possible if metadata.get('checksum'): checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256) checksumtype = util.sanitize_checksum_type(checksumtype) if checksumtype not in sums: raise PulpCodedException(error_code=error_codes.RPM1009, checksumtype=checksumtype) if metadata['checksum'] != sums[checksumtype]: raise PulpCodedException(error_code=platform_errors.PLP1013) _LOGGER.debug(_('Upload checksum matches.')) # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other # types, regardless of what is in the unit key. unit.checksumtype = util.TYPE_SHA256 unit.checksum = sums[util.TYPE_SHA256] # keep all available checksum values on the model unit.checksums = sums # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. for key, value in metadata.items(): setattr(unit, key, value) for key, value in unit_key.items(): setattr(unit, key, value) if type_id != models.DRPM._content_type_id.default: # Extract/adjust the repodata snippets unit.signing_key = rpm_parse.package_signature( rpm_parse.package_headers(file_path)) # construct filename from metadata (BZ #1101168) if type_id == models.SRPM._content_type_id.default: rpm_basefilename = "%s-%s-%s.src.rpm" % (unit.name, unit.version, unit.release) else: rpm_basefilename = "%s-%s-%s.%s.rpm" % (unit.name, unit.version, unit.release, unit.arch) unit.relativepath = rpm_basefilename unit.filename = rpm_basefilename _update_files(unit, repodata) unit.modify_xml(repodata) # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) try: unit.save_and_import_content(file_path) except TypeError: raise ModelInstantiationError() except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() if rpm_parse.signature_enabled(config): rpm_parse.filter_signature(unit, config) repo_controller.associate_single_unit(repo, unit)
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM, SRPM or DRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: if type_id == models.DRPM._content_type_id.default: rpm_data = _extract_drpm_data(file_path) else: rpm_data = _extract_rpm_data(type_id, file_path) except: _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path) raise # metadata can be None metadata = metadata or {} model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) with open(file_path) as fp: sums = util.calculate_checksums(fp, models.RpmBase.DEFAULT_CHECKSUM_TYPES) # validate checksum if possible if metadata.get('checksum'): checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256) checksumtype = util.sanitize_checksum_type(checksumtype) if checksumtype not in sums: raise PulpCodedException(error_code=error_codes.RPM1009, checksumtype=checksumtype) if metadata['checksum'] != sums[checksumtype]: raise PulpCodedException(error_code=platform_errors.PLP1013) _LOGGER.debug(_('Upload checksum matches.')) # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other # types, regardless of what is in the unit key. rpm_data['checksumtype'] = util.TYPE_SHA256 rpm_data['checksum'] = sums[util.TYPE_SHA256] # keep all available checksum values on the model rpm_data['checksums'] = sums # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. rpm_data.update(metadata or {}) rpm_data.update(unit_key or {}) # Validate the user specified data by instantiating the model try: unit = model_class(**rpm_data) except TypeError: raise ModelInstantiationError() if type_id != models.DRPM._content_type_id.default: # Extract/adjust the repodata snippets repodata = rpm_parse.get_package_xml(file_path, sumtype=unit.checksumtype) _update_provides_requires(unit, repodata) _update_files(unit, repodata) unit.modify_xml(repodata) # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) try: unit.save_and_import_content(file_path) except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() if rpm_parse.signature_enabled(config): rpm_parse.filter_signature(unit, config) repo_controller.associate_single_unit(repo, unit)
def parse_treeinfo_file(path): """ The treefile seems to be approximately in INI format, which can be read by the standard library's ConfigParser. :param path: The absolute path to the treefile :return: instance of Distribution model, and a list of dicts describing the distribution's files :rtype: (pulp_rpm.plugins.db.models.Distribution, list of dict) """ parser = ConfigParser.RawConfigParser() # the default implementation of this method makes all option names lowercase, # which we don't want. This is the suggested solution in the python.org docs. parser.optionxform = str with open(path) as fp: try: parser.readfp(fp) except ConfigParser.ParsingError: # wouldn't need this if ParsingError subclassed ValueError. raise ValueError(_('could not parse treeinfo file')) # apparently the 'variant' is optional. for example, it does not appear # in the RHEL 5.9 treeinfo file. This is how the previous importer # handled that. try: variant = parser.get(SECTION_GENERAL, 'variant') except ConfigParser.NoOptionError: variant = None try: packagedir = parser.get(SECTION_GENERAL, KEY_PACKAGEDIR) except ConfigParser.NoOptionError: packagedir = None try: new_dist = Distribution( family=parser.get(SECTION_GENERAL, 'family'), variant=variant, version=parser.get(SECTION_GENERAL, 'version'), arch=parser.get(SECTION_GENERAL, 'arch'), packagedir=packagedir, timestamp=float(parser.get(SECTION_GENERAL, KEY_TIMESTAMP))) # Look for an existing distribution existing_dist = Distribution.objects.filter( family=new_dist.family, variant=new_dist.variant, version=new_dist.version, arch=new_dist.arch).first() if existing_dist: # update with the new information: existing_dist.packagedir = packagedir existing_dist.timestamp = new_dist.timestamp unit = existing_dist else: unit = new_dist except (ConfigParser.NoSectionError, ConfigParser.NoOptionError): raise ValueError( 'invalid treefile: could not find unit key components') files = {} # this section is likely to have all the files we care about listed with # checksums. But, it might not. Other sections checked below will only add # files to the "files" dict if they are not already present. For those cases, # there will not be checksums available. if parser.has_section(SECTION_CHECKSUMS): for item in parser.items(SECTION_CHECKSUMS): relativepath = item[0] checksumtype, checksum = item[1].split(':') checksumtype = util.sanitize_checksum_type(checksumtype) files[relativepath] = { RELATIVE_PATH: relativepath, CHECKSUM: checksum, CHECKSUM_TYPE: checksumtype } for section_name in parser.sections(): if section_name.startswith( 'images-') or section_name == SECTION_STAGE2: for item in parser.items(section_name): if item[1] not in files: relativepath = item[1] files[relativepath] = { RELATIVE_PATH: relativepath, CHECKSUM: None, CHECKSUM_TYPE: None, } return unit, files.values()
def process_package_element(package_element): """ Process a parsed primary.xml package element into a model instance. In addition to parsing the data, this templatizes the raw XML that gets added. :param package_element: parsed primary.xml package element :return: package information dictionary :rtype: pulp_rpm.plugins.db.models.RPM """ package_info = dict() name_element = package_element.find(NAME_TAG) if name_element is not None: package_info['name'] = name_element.text arch_element = package_element.find(ARCH_TAG) if arch_element is not None: package_info['arch'] = arch_element.text version_element = package_element.find(VERSION_TAG) if version_element is not None: package_info['version'] = version_element.attrib['ver'] package_info['release'] = version_element.attrib.get('rel', None) package_info['epoch'] = version_element.attrib.get('epoch', None) checksum_element = package_element.find(CHECKSUM_TAG) if checksum_element is not None: checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type']) package_info['checksumtype'] = checksum_type package_info['checksum'] = checksum_element.text # convert these to template targets that will be rendered at publish time checksum_element.text = models.RpmBase.CHECKSUM_TEMPLATE checksum_element.attrib['type'] = models.RpmBase.CHECKSUMTYPE_TEMPLATE summary_element = package_element.find(SUMMARY_TAG) if summary_element is not None: package_info['summary'] = summary_element.text description_element = package_element.find(DESCRIPTION_TAG) if description_element is not None: package_info['description'] = description_element.text url_element = package_element.find(URL_TAG) if url_element is not None: package_info['url'] = url_element.text time_element = package_element.find(TIME_TAG) if time_element is not None: package_info['time'] = int(time_element.attrib['file']) package_info['build_time'] = int(time_element.attrib['build']) size_element = package_element.find(SIZE_TAG) if size_element is not None: package_info['size'] = int(size_element.attrib['package']) location_element = package_element.find(LOCATION_TAG) if location_element is not None: href = location_element.attrib['href'] base_url = None for attribute, value in location_element.items(): if attribute == 'base' or attribute.endswith('}base'): base_url = value package_info['base_url'] = base_url filename = os.path.basename(href) package_info['relativepath'] = href package_info['filename'] = filename # we don't make any attempt to preserve the original directory structure # this element will end up being converted back to XML and stuffed into # the DB on the unit object, so this is our chance to modify it. location_element.attrib['href'] = filename format_element = package_element.find(FORMAT_TAG) package_info.update(_process_format_element(format_element)) if package_info['arch'].lower() == 'src': model = models.SRPM(**package_info) else: model = models.RPM(**package_info) # add the raw XML so it can be saved in the database later rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL) model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL) return model
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM or SRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: rpm_data = _extract_rpm_data(type_id, file_path) except: _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path) raise # metadata can be None metadata = metadata or {} model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) with open(file_path) as fp: sums = util.calculate_checksums(fp, models.RpmBase.DEFAULT_CHECKSUM_TYPES) # validate checksum if possible if metadata.get('checksum'): checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256) checksumtype = util.sanitize_checksum_type(checksumtype) if checksumtype not in sums: raise PulpCodedException(error_code=error_codes.RPM1009, checksumtype=checksumtype) if metadata['checksum'] != sums[checksumtype]: raise PulpCodedException(error_code=platform_errors.PLP1013) _LOGGER.debug(_('Upload checksum matches.')) # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other # types, regardless of what is in the unit key. rpm_data['checksumtype'] = util.TYPE_SHA256 rpm_data['checksum'] = sums[util.TYPE_SHA256] # keep all available checksum values on the model rpm_data['checksums'] = sums # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. rpm_data.update(metadata or {}) rpm_data.update(unit_key or {}) # Validate the user specified data by instantiating the model try: unit = model_class(**rpm_data) except TypeError: raise ModelInstantiationError() # Extract/adjust the repodata snippets unit.repodata = rpm_parse.get_package_xml(file_path, sumtype=unit.checksumtype) _update_provides_requires(unit) unit.modify_xml() # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) try: unit.save_and_import_content(file_path) except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() repo_controller.associate_single_unit(repo, unit)
def _migrate_rpmlike_units(unit_type): """ This function performs the migration on RPMs, DRPMs, and SRPMs. These all have the same schema when it comes to checksumtype, so they can be treated the same way. :param unit_type: The unit_type_id, as found in pulp_rpm.common.ids. :type unit_type: basestring """ repos = connection.get_collection('repos') repo_content_units = connection.get_collection('repo_content_units') unit_collection = connection.get_collection('units_%s' % unit_type) for unit in unit_collection.find(): try: sanitized_type = util.sanitize_checksum_type(unit['checksumtype']) if sanitized_type != unit['checksumtype']: # Let's see if we can get away with changing its checksumtype to the sanitized # value. If this works, we won't have to do anything else. unit_collection.update( {'_id': unit['_id']}, {'$set': { 'checksumtype': sanitized_type }}) except errors.DuplicateKeyError: # Looks like there is already an identical unit with the sanitized checksum type. This # means we need to remove the current unit, but first we will need to change any # references to this unit to point to the other. conflicting_unit = unit_collection.find_one({ 'name': unit['name'], 'epoch': unit['epoch'], 'version': unit['version'], 'release': unit['release'], 'arch': unit['arch'], 'checksum': unit['checksum'], 'checksumtype': sanitized_type }) for rcu in repo_content_units.find({ 'unit_type_id': unit_type, 'unit_id': unit['_id'] }): # Now we must either switch the rcu from pointing to unit to pointing to # conflicting_unit, or delete the rcu if there is already one in the same repo. try: msg = _( 'Updating %(repo_id)s to contain %(type)s %(conflicting)s instead of ' '%(old_id)s.') msg = msg % { 'repo_id': rcu['repo_id'], 'type': unit_type, 'conflicting': conflicting_unit['_id'], 'old_id': unit['_id'] } _logger.debug(msg) repo_content_units.update( {'_id': rcu['_id']}, {'$set': { 'unit_id': conflicting_unit['_id'] }}) except errors.DuplicateKeyError: # We will delete this RepoContentUnit since the sha1 RPM is already in the # repository. msg = _( 'Removing %(type)s %(old_id)s from repo %(repo_id)s since it conflicts ' 'with %(conflicting)s.') msg = msg % { 'repo_id': rcu['repo_id'], 'type': unit_type, 'conflicting': conflicting_unit['_id'], 'old_id': unit['_id'] } _logger.debug(msg) repo_content_units.remove({'_id': rcu['_id']}) # In this case, we now need to decrement the repository's "content_unit_counts" # for this unit_type by one, since we removed a unit from a repository. repos.update( {'id': rcu['repo_id']}, {'$inc': { 'content_unit_counts.%s' % unit_type: -1 }}) # Now that we have removed or altered all references to the "sha" Unit, we need to # remove it since it is a duplicate. unit_collection.remove({'_id': unit['_id']})
def parse_treeinfo_file(path): """ The treefile seems to be approximately in INI format, which can be read by the standard library's ConfigParser. :param path: The absolute path to the treefile :return: instance of Distribution model, and a list of dicts describing the distribution's files :rtype: (pulp_rpm.plugins.db.models.Distribution, list of dict) """ parser = ConfigParser.RawConfigParser() # the default implementation of this method makes all option names lowercase, # which we don't want. This is the suggested solution in the python.org docs. parser.optionxform = str with open(path) as fp: try: parser.readfp(fp) except ConfigParser.ParsingError: # wouldn't need this if ParsingError subclassed ValueError. raise ValueError(_('could not parse treeinfo file')) # apparently the 'variant' is optional. for example, it does not appear # in the RHEL 5.9 treeinfo file. This is how the previous importer # handled that. try: variant = parser.get(SECTION_GENERAL, 'variant') except ConfigParser.NoOptionError: variant = None try: packagedir = parser.get(SECTION_GENERAL, KEY_PACKAGEDIR) except ConfigParser.NoOptionError: packagedir = None try: new_dist = Distribution( family=parser.get(SECTION_GENERAL, 'family'), variant=variant, version=parser.get(SECTION_GENERAL, 'version'), arch=parser.get(SECTION_GENERAL, 'arch'), packagedir=packagedir, timestamp=float(parser.get(SECTION_GENERAL, KEY_TIMESTAMP)) ) # Look for an existing distribution existing_dist = Distribution.objects.filter( family=new_dist.family, variant=new_dist.variant, version=new_dist.version, arch=new_dist.arch ).first() if existing_dist: # update with the new information: existing_dist.packagedir = packagedir existing_dist.timestamp = new_dist.timestamp unit = existing_dist else: unit = new_dist except (ConfigParser.NoSectionError, ConfigParser.NoOptionError): raise ValueError('invalid treefile: could not find unit key components') files = {} # this section is likely to have all the files we care about listed with # checksums. But, it might not. Other sections checked below will only add # files to the "files" dict if they are not already present. For those cases, # there will not be checksums available. if parser.has_section(SECTION_CHECKSUMS): for item in parser.items(SECTION_CHECKSUMS): relativepath = item[0] checksumtype, checksum = item[1].split(':') checksumtype = util.sanitize_checksum_type(checksumtype) files[relativepath] = { RELATIVE_PATH: relativepath, CHECKSUM: checksum, CHECKSUM_TYPE: checksumtype } for section_name in parser.sections(): if section_name.startswith('images-') or section_name == SECTION_STAGE2: for item in parser.items(section_name): if item[1] not in files: relativepath = item[1] files[relativepath] = { RELATIVE_PATH: relativepath, CHECKSUM: None, CHECKSUM_TYPE: None, } return unit, files.values()