Beispiel #1
0
def process_repomd_data_element(data_element):
    """
    Process the data elements of the repomd.xml file.

    This returns a file information dictionary with the following keys:

     * `name`: name of the element
     * `relative_path`: the path of the metadata file, relative to the repository URL
     * `checksum`: dictionary of `algorithm` and `hex_digest` keys and values
     * `size`: size of the metadata file, in bytes
     * `timestamp`: unix timestamp of the file's creation, as a float
     * `open_checksum`: optional checksum dictionary of uncompressed metadata file
     * `open_size`: optional size of the uncompressed metadata file, in bytes

    :param data_element: XML data element parsed from the repomd.xml file
    :return: file_info dictionary
    :rtype: dict
    """

    file_info = deepcopy(FILE_INFO_SKEL)

    file_info['name'] = data_element.attrib['type']

    location_element = data_element.find(LOCATION_TAG)
    if location_element is not None:
        file_info['relative_path'] = location_element.attrib['href']

    checksum_element = data_element.find(CHECKSUM_TAG)
    if checksum_element is not None:
        checksum_type = util.sanitize_checksum_type(
            checksum_element.attrib['type'])
        file_info['checksum']['algorithm'] = checksum_type
        file_info['checksum']['hex_digest'] = checksum_element.text

    size_element = data_element.find(SIZE_TAG)
    if size_element is not None:
        file_info['size'] = int(size_element.text)

    timestamp_element = data_element.find(TIMESTAMP_TAG)
    if timestamp_element is not None:
        file_info['timestamp'] = float(timestamp_element.text)

    open_checksum_element = data_element.find(OPEN_CHECKSUM_TAG)
    if open_checksum_element is not None:
        checksum_type = util.sanitize_checksum_type(
            open_checksum_element.attrib['type'])
        file_info['open_checksum']['algorithm'] = checksum_type
        file_info['open_checksum']['hex_digest'] = open_checksum_element.text

    open_size_element = data_element.find(OPEN_SIZE_TAG)
    if open_size_element is not None:
        file_info['open_size'] = int(open_size_element.text)

    for child in data_element.getchildren():
        child.clear()
    data_element.clear()

    return file_info
Beispiel #2
0
def process_repomd_data_element(data_element):
    """
    Process the data elements of the repomd.xml file.

    This returns a file information dictionary with the following keys:

     * `name`: name of the element
     * `relative_path`: the path of the metadata file, relative to the repository URL
     * `checksum`: dictionary of `algorithm` and `hex_digest` keys and values
     * `size`: size of the metadata file, in bytes
     * `timestamp`: unix timestamp of the file's creation, as a float
     * `open_checksum`: optional checksum dictionary of uncompressed metadata file
     * `open_size`: optional size of the uncompressed metadata file, in bytes

    :param data_element: XML data element parsed from the repomd.xml file
    :return: file_info dictionary
    :rtype: dict
    """

    file_info = deepcopy(FILE_INFO_SKEL)

    file_info['name'] = data_element.attrib['type']

    location_element = data_element.find(LOCATION_TAG)
    if location_element is not None:
        file_info['relative_path'] = location_element.attrib['href']

    checksum_element = data_element.find(CHECKSUM_TAG)
    if checksum_element is not None:
        checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type'])
        file_info['checksum']['algorithm'] = checksum_type
        file_info['checksum']['hex_digest'] = checksum_element.text

    size_element = data_element.find(SIZE_TAG)
    if size_element is not None:
        file_info['size'] = int(size_element.text)

    timestamp_element = data_element.find(TIMESTAMP_TAG)
    if timestamp_element is not None:
        file_info['timestamp'] = float(timestamp_element.text)

    open_checksum_element = data_element.find(OPEN_CHECKSUM_TAG)
    if open_checksum_element is not None:
        checksum_type = util.sanitize_checksum_type(open_checksum_element.attrib['type'])
        file_info['open_checksum']['algorithm'] = checksum_type
        file_info['open_checksum']['hex_digest'] = open_checksum_element.text

    open_size_element = data_element.find(OPEN_SIZE_TAG)
    if open_size_element is not None:
        file_info['open_size'] = int(open_size_element.text)

    for child in data_element.getchildren():
        child.clear()
    data_element.clear()

    return file_info
Beispiel #3
0
    def validate(self, value):
        """
        Validates that value is a checksumtype known to pulp platform

        :param value: The value to validate
        :type  value: basestring

        :return: None
        """
        super(ChecksumTypeStringField, self).validate(value)
        util.sanitize_checksum_type(value)
Beispiel #4
0
    def test_SHA256_to_sha256(self):
        """
        Assert that "SHA256" is converted to "sha256".
        """
        checksum_type = util.sanitize_checksum_type('SHA256')

        self.assertEqual(checksum_type, 'sha256')
Beispiel #5
0
def process_package_element(element):
    """
    Process one XML block from prestodelta.xml and return a models.DRPM instance

    :param element: object representing one "DRPM" block from the XML file
    :type  element: xml.etree.ElementTree.Element

    :return:    models.DRPM instance for the XML block
    :rtype:     pulp_rpm.plugins.db.models.DRPM
    """
    delta = element.find('delta')
    filename = delta.find('filename')
    sequence = delta.find('sequence')
    size = delta.find('size')
    checksum = delta.find('checksum')
    checksum_type = util.sanitize_checksum_type(checksum.attrib['type'])

    return models.DRPM(
        new_package=element.attrib['name'],
        epoch=element.attrib['epoch'],
        version=element.attrib['version'],
        release=element.attrib['release'],
        arch=element.attrib['arch'],
        oldepoch=delta.attrib['oldepoch'],
        oldversion=delta.attrib['oldversion'],
        oldrelease=delta.attrib['oldrelease'],
        filename=filename.text,
        sequence=sequence.text,
        size=int(size.text),
        checksum=checksum.text,
        checksumtype=checksum_type)
Beispiel #6
0
    def test_ShA_to_sha1(self):
        """
        Assert that "ShA" is converted to "sha1".
        """
        checksum_type = util.sanitize_checksum_type('ShA')

        self.assertEqual(checksum_type, 'sha1')
Beispiel #7
0
    def import_unknown_metadata_files(self, metadata_files):
        """
        Import metadata files whose type is not known to us. These are any files
        that we are not already parsing.

        :param metadata_files:  object containing access to all metadata files
        :type  metadata_files:  pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles
        """
        for metadata_type, file_info in metadata_files.metadata.iteritems():
            if metadata_type not in metadata_files.KNOWN_TYPES:
                file_path = file_info['local_path']
                checksum_type = file_info['checksum']['algorithm']
                checksum_type = util.sanitize_checksum_type(checksum_type)
                checksum = file_info['checksum']['hex_digest']
                # Find an existing model
                model = models.YumMetadataFile.objects.filter(
                    data_type=metadata_type,
                    repo_id=self.repo.repo_id).first()
                # If an existing model, use that
                if model:
                    model.checksum = checksum
                    model.checksum_type = checksum_type
                else:
                    # Else, create a  new mode
                    model = models.YumMetadataFile(
                        data_type=metadata_type,
                        repo_id=self.repo.repo_id,
                        checksum=checksum,
                        checksum_type=checksum_type)

                model.set_storage_path(os.path.basename(file_path))
                model.save_and_import_content(file_path)

                # associate/re-associate model to the repo
                repo_controller.associate_single_unit(self.repo, model)
Beispiel #8
0
    def test_ShA_to_sha1(self):
        """
        Assert that "ShA" is converted to "sha1".
        """
        checksum_type = util.sanitize_checksum_type('ShA')

        self.assertEqual(checksum_type, 'sha1')
Beispiel #9
0
    def import_unknown_metadata_files(self, metadata_files):
        """
        Import metadata files whose type is not known to us. These are any files
        that we are not already parsing.

        :param metadata_files:  object containing access to all metadata files
        :type  metadata_files:  pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles
        """
        for metadata_type, file_info in metadata_files.metadata.iteritems():
            if metadata_type not in metadata_files.KNOWN_TYPES:
                file_path = file_info['local_path']
                checksum_type = file_info['checksum']['algorithm']
                checksum_type = util.sanitize_checksum_type(checksum_type)
                checksum = file_info['checksum']['hex_digest']
                # Find an existing model
                model = models.YumMetadataFile.objects.filter(
                    data_type=metadata_type,
                    repo_id=self.repo.repo_id).first()
                # If an existing model, use that
                if model:
                    model.checksum = checksum
                    model.checksum_type = checksum_type
                else:
                    # Else, create a  new mode
                    model = models.YumMetadataFile(
                        data_type=metadata_type,
                        repo_id=self.repo.repo_id,
                        checksum=checksum,
                        checksum_type=checksum_type)

                model.set_storage_path(os.path.basename(file_path))
                model.save_and_import_content(file_path)

                # associate/re-associate model to the repo
                repo_controller.associate_single_unit(self.repo, model)
Beispiel #10
0
def process_package_element(element):
    """
    Process one XML block from prestodelta.xml and return a models.DRPM instance

    :param element: object representing one "DRPM" block from the XML file
    :type  element: xml.etree.ElementTree.Element

    :return:    models.DRPM instance for the XML block
    :rtype:     pulp_rpm.plugins.db.models.DRPM
    """
    delta = element.find('delta')
    filename = delta.find('filename')
    sequence = delta.find('sequence')
    size = delta.find('size')
    checksum = delta.find('checksum')
    checksum_type = util.sanitize_checksum_type(checksum.attrib['type'])

    return models.DRPM(new_package=element.attrib['name'],
                       epoch=element.attrib['epoch'],
                       version=element.attrib['version'],
                       release=element.attrib['release'],
                       arch=element.attrib['arch'],
                       oldepoch=delta.attrib['oldepoch'],
                       oldversion=delta.attrib['oldversion'],
                       oldrelease=delta.attrib['oldrelease'],
                       filename=filename.text,
                       sequence=sequence.text,
                       size=int(size.text),
                       checksum=checksum.text,
                       checksumtype=checksum_type)
Beispiel #11
0
    def test_SHA256_to_sha256(self):
        """
        Assert that "SHA256" is converted to "sha256".
        """
        checksum_type = util.sanitize_checksum_type('SHA256')

        self.assertEqual(checksum_type, 'sha256')
Beispiel #12
0
def _migrate_rpmlike_units(unit_type):
    """
    This function performs the migration on RPMs, DRPMs, and SRPMs. These all have the same schema
    when it comes to checksumtype, so they can be treated the same way.

    :param unit_type:          The unit_type_id, as found in pulp_rpm.common.ids.
    :type  unit_type:          basestring
    """
    repos = connection.get_collection('repos')
    repo_content_units = connection.get_collection('repo_content_units')
    unit_collection = connection.get_collection('units_%s' % unit_type)

    for unit in unit_collection.find():
        try:
            sanitized_type = util.sanitize_checksum_type(unit['checksumtype'])
            if sanitized_type != unit['checksumtype']:
                # Let's see if we can get away with changing its checksumtype to the sanitized
                # value. If this works, we won't have to do anything else.
                unit_collection.update({'_id': unit['_id']},
                                       {'$set': {'checksumtype': sanitized_type}})
        except errors.DuplicateKeyError:
            # Looks like there is already an identical unit with the sanitized checksum type. This
            # means we need to remove the current unit, but first we will need to change any
            # references to this unit to point to the other.
            conflicting_unit = unit_collection.find_one(
                {'name': unit['name'], 'epoch': unit['epoch'], 'version': unit['version'],
                 'release': unit['release'], 'arch': unit['arch'], 'checksum': unit['checksum'],
                 'checksumtype': sanitized_type})
            for rcu in repo_content_units.find({'unit_type_id': unit_type, 'unit_id': unit['_id']}):
                # Now we must either switch the rcu from pointing to unit to pointing to
                # conflicting_unit, or delete the rcu if there is already one in the same repo.
                try:
                    msg = _('Updating %(repo_id)s to contain %(type)s %(conflicting)s instead of '
                            '%(old_id)s.')
                    msg = msg % {'repo_id': rcu['repo_id'], 'type': unit_type,
                                 'conflicting': conflicting_unit['_id'], 'old_id': unit['_id']}
                    _logger.debug(msg)
                    repo_content_units.update({'_id': rcu['_id']},
                                              {'$set': {'unit_id': conflicting_unit['_id']}})
                except errors.DuplicateKeyError:
                    # We will delete this RepoContentUnit since the sha1 RPM is already in the
                    # repository.
                    msg = _('Removing %(type)s %(old_id)s from repo %(repo_id)s since it conflicts '
                            'with %(conflicting)s.')
                    msg = msg % {'repo_id': rcu['repo_id'], 'type': unit_type,
                                 'conflicting': conflicting_unit['_id'], 'old_id': unit['_id']}
                    _logger.debug(msg)
                    repo_content_units.remove({'_id': rcu['_id']})
                    # In this case, we now need to decrement the repository's "content_unit_counts"
                    # for this unit_type by one, since we removed a unit from a repository.
                    repos.update(
                        {'id': rcu['repo_id']},
                        {'$inc': {'content_unit_counts.%s' % unit_type: -1}})
            # Now that we have removed or altered all references to the "sha" Unit, we need to
            # remove it since it is a duplicate.
            unit_collection.remove({'_id': unit['_id']})
Beispiel #13
0
def _migrate_yum_metadata_files():
    """
    Migrate each YumMetadataFile to use the new sanitized checksum_type. This is mostly similar to
    _migrate_rpmlike_units, except that the checksum type field name is checksum_type instead of
    checksumtype, and there can't be any collisions since the checksum type isn't part of this
    unit's unit_key. This means we don't have to worry about the repo_content_units table.
    """
    collection = connection.get_collection('units_yum_repo_metadata_file')
    for unit in collection.find():
        sanitized_type = util.sanitize_checksum_type(unit['checksum_type'])
        if sanitized_type != unit['checksum_type']:
            collection.update({'_id': unit['_id']},
                              {'$set': {'checksum_type': sanitized_type}})
Beispiel #14
0
def _migrate_yum_metadata_files():
    """
    Migrate each YumMetadataFile to use the new sanitized checksum_type. This is mostly similar to
    _migrate_rpmlike_units, except that the checksum type field name is checksum_type instead of
    checksumtype, and there can't be any collisions since the checksum type isn't part of this
    unit's unit_key. This means we don't have to worry about the repo_content_units table.
    """
    collection = connection.get_collection('units_yum_repo_metadata_file')
    for unit in collection.find():
        sanitized_type = util.sanitize_checksum_type(unit['checksum_type'])
        if sanitized_type != unit['checksum_type']:
            collection.update({'_id': unit['_id']},
                              {'$set': {
                                  'checksum_type': sanitized_type
                              }})
Beispiel #15
0
    def update_unit_files(unit, files):
        """
        Update the *files* list on the unit.

        :param unit: A distribution model object.
        :type unit: pulp_rpm.plugins.db.models.Distribution
        :param files: List of distribution files.
        :type files: list
        """
        _list = []
        if not isinstance(unit.files, list):
            _list = list(unit.files)
        for _file in files:
            if _file[CHECKSUM_TYPE] is not None:
                _file[CHECKSUM_TYPE] = util.sanitize_checksum_type(_file[CHECKSUM_TYPE])
            _list.append({
                RELATIVE_PATH: _file[RELATIVE_PATH],
                CHECKSUM: _file[CHECKSUM],
                CHECKSUM_TYPE: _file[CHECKSUM_TYPE]})
        unit.files = _list
Beispiel #16
0
    def update_unit_files(unit, files):
        """
        Update the *files* list on the unit.

        :param unit: A distribution model object.
        :type unit: pulp_rpm.plugins.db.models.Distribution
        :param files: List of distribution files.
        :type files: list
        """
        _list = []
        if not isinstance(unit.files, list):
            _list = list(unit.files)
        for _file in files:
            if _file[CHECKSUM_TYPE] is not None:
                _file[CHECKSUM_TYPE] = util.sanitize_checksum_type(_file[CHECKSUM_TYPE])
            _list.append({
                RELATIVE_PATH: _file[RELATIVE_PATH],
                CHECKSUM: _file[CHECKSUM],
                CHECKSUM_TYPE: _file[CHECKSUM_TYPE]})
        unit.files = _list
Beispiel #17
0
    def save_default_metadata_checksum_on_repo(self, metadata_files):
        """
        Determine the default checksum that should be used for metadata files and save it in
        the repo scratchpad.

        There is no good way to order a preference on the checksum type so the first one
        found is used.

        :param metadata_files:  object containing access to all metadata files
        :type  metadata_files:  pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles
        """
        checksum_type = None
        for metadata_item in metadata_files.metadata.iteritems():
            if 'checksum' in metadata_item[1]:
                checksum_type = metadata_item[1]['checksum']['algorithm']
                break
        if checksum_type:
            checksum_type = util.sanitize_checksum_type(checksum_type)
            scratchpad = self.conduit.get_repo_scratchpad()
            scratchpad[constants.SCRATCHPAD_DEFAULT_METADATA_CHECKSUM] = checksum_type
            self.conduit.set_repo_scratchpad(scratchpad)
Beispiel #18
0
    def save_default_metadata_checksum_on_repo(self, metadata_files):
        """
        Determine the default checksum that should be used for metadata files and save it in
        the repo scratchpad.

        There is no good way to order a preference on the checksum type so the first one
        found is used.

        :param metadata_files:  object containing access to all metadata files
        :type  metadata_files:  pulp_rpm.plugins.importers.yum.repomd.metadata.MetadataFiles
        """
        checksum_type = None
        for metadata_item in metadata_files.metadata.iteritems():
            if 'checksum' in metadata_item[1]:
                checksum_type = metadata_item[1]['checksum']['algorithm']
                break
        if checksum_type:
            checksum_type = util.sanitize_checksum_type(checksum_type)
            scratchpad = self.conduit.get_repo_scratchpad()
            scratchpad[constants.SCRATCHPAD_DEFAULT_METADATA_CHECKSUM] = checksum_type
            self.conduit.set_repo_scratchpad(scratchpad)
Beispiel #19
0
def _migrate_errata():
    """
    Visit each errata and check its references RPMs for the checksum type, sanitizing it if
    necessary. Since these sums aren't part of the unit key for erratum, this will not cause any
    collisions. The erratum also do not reference RPMs by unit_id, but by unit_key, so this is
    important.
    """
    errata = connection.get_collection('units_erratum')
    for erratum in errata.find():
        changed = False
        pkglist = erratum.get('pkglist', [])
        for collection in pkglist:
            for package in collection.get('packages', []):
                if 'sum' in package and package['sum']:
                    sanitized_type = util.sanitize_checksum_type(package['sum'][0])
                    if sanitized_type != package['sum'][0]:
                        package['sum'][0] = sanitized_type
                        changed = True
        if changed:
            errata.update({'_id': erratum['_id']},
                          {'$set': {'pkglist': pkglist}})
Beispiel #20
0
    def process_successful_download_reports(unit, reports):
        """
        Once downloading is complete, add information about each file to this
        model instance. This is required before saving the new unit.

        :param unit:    A distribution model object.
        :type  unit:    pulp_rpm.plugins.db.models.Distribution
        :param reports: list of successful pulp.common.download.report.DownloadReport
        :type  reports: list
        """
        files = []
        if not isinstance(unit.files, list):
            files = list(unit.files)
        for report in reports:
            _file = report.data
            if _file[CHECKSUM_TYPE] is not None:
                _file[CHECKSUM_TYPE] = util.sanitize_checksum_type(_file[CHECKSUM_TYPE])
            files.append({
                RELATIVE_PATH: _file[RELATIVE_PATH],
                CHECKSUM: _file[CHECKSUM],
                CHECKSUM_TYPE: _file[CHECKSUM_TYPE]})
        unit.files = files
Beispiel #21
0
    def process_successful_download_reports(unit, reports):
        """
        Once downloading is complete, add information about each file to this
        model instance. This is required before saving the new unit.

        :param unit:    A distribution model object.
        :type  unit:    pulp_rpm.plugins.db.models.Distribution
        :param reports: list of successful pulp.common.download.report.DownloadReport
        :type  reports: list
        """
        files = []
        if not isinstance(unit.files, list):
            files = list(unit.files)
        for report in reports:
            _file = report.data
            if _file[CHECKSUM_TYPE] is not None:
                _file[CHECKSUM_TYPE] = util.sanitize_checksum_type(_file[CHECKSUM_TYPE])
            files.append({
                RELATIVE_PATH: _file[RELATIVE_PATH],
                CHECKSUM: _file[CHECKSUM],
                CHECKSUM_TYPE: _file[CHECKSUM_TYPE]})
        unit.files = files
Beispiel #22
0
def _migrate_errata():
    """
    Visit each errata and check its references RPMs for the checksum type, sanitizing it if
    necessary. Since these sums aren't part of the unit key for erratum, this will not cause any
    collisions. The erratum also do not reference RPMs by unit_id, but by unit_key, so this is
    important.
    """
    errata = connection.get_collection('units_erratum')
    for erratum in errata.find():
        changed = False
        pkglist = erratum.get('pkglist', [])
        for collection in pkglist:
            for package in collection.get('packages', []):
                if 'sum' in package and package['sum']:
                    sanitized_type = util.sanitize_checksum_type(
                        package['sum'][0])
                    if sanitized_type != package['sum'][0]:
                        package['sum'][0] = sanitized_type
                        changed = True
        if changed:
            errata.update({'_id': erratum['_id']},
                          {'$set': {
                              'pkglist': pkglist
                          }})
Beispiel #23
0
def process_package_element(package_element):
    """
    Process a parsed primary.xml package element into a model instance.

    In addition to parsing the data, this templatizes the raw XML that gets added.

    :param package_element: parsed primary.xml package element
    :return: package information dictionary
    :rtype: pulp_rpm.plugins.db.models.RPM
    """
    # NOTE the use of deepcopy relies on cpython's very sensible policy of never
    # duplicating string literals, this may not hold up in other implementations
    # the python interpreter.
    package_info = dict()

    name_element = package_element.find(NAME_TAG)
    if name_element is not None:
        package_info['name'] = name_element.text

    arch_element = package_element.find(ARCH_TAG)
    if arch_element is not None:
        package_info['arch'] = arch_element.text

    version_element = package_element.find(VERSION_TAG)
    if version_element is not None:
        package_info['version'] = version_element.attrib['ver']
        package_info['release'] = version_element.attrib.get('rel', None)
        package_info['epoch'] = version_element.attrib.get('epoch', None)

    checksum_element = package_element.find(CHECKSUM_TAG)
    if checksum_element is not None:
        checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type'])
        package_info['checksumtype'] = checksum_type
        package_info['checksum'] = checksum_element.text

        # convert these to template targets that will be rendered at publish time
        checksum_element.text = models.RpmBase.CHECKSUM_TEMPLATE
        checksum_element.attrib['type'] = models.RpmBase.CHECKSUMTYPE_TEMPLATE

    summary_element = package_element.find(SUMMARY_TAG)
    if summary_element is not None:
        package_info['summary'] = summary_element.text

    description_element = package_element.find(DESCRIPTION_TAG)
    if description_element is not None:
        package_info['description'] = description_element.text

    url_element = package_element.find(URL_TAG)
    if url_element is not None:
        package_info['url'] = url_element.text

    time_element = package_element.find(TIME_TAG)
    if time_element is not None:
        package_info['time'] = int(time_element.attrib['file'])
        package_info['build_time'] = int(time_element.attrib['build'])

    size_element = package_element.find(SIZE_TAG)
    if size_element is not None:
        package_info['size'] = int(size_element.attrib['package'])

    location_element = package_element.find(LOCATION_TAG)
    if location_element is not None:
        href = location_element.attrib['href']
        base_url = None
        for attribute, value in location_element.items():
            if attribute == 'base' or attribute.endswith('}base'):
                base_url = value
        package_info['base_url'] = base_url
        filename = os.path.basename(href)
        package_info['relativepath'] = href
        package_info['filename'] = filename
        # we don't make any attempt to preserve the original directory structure
        # this element will end up being converted back to XML and stuffed into
        # the DB on the unit object, so this  is our chance to modify it.
        location_element.attrib['href'] = filename

    format_element = package_element.find(FORMAT_TAG)
    package_info.update(_process_format_element(format_element))

    if package_info['arch'].lower() == 'src':
        model = models.SRPM(**package_info)
    else:
        model = models.RPM(**package_info)
    # add the raw XML so it can be saved in the database later
    rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL)
    model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL)
    return model
Beispiel #24
0
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit,
                    config):
    """
    Handles the upload for an RPM, SRPM or DRPM.

    This inspects the package contents to determine field values. The unit_key
    and metadata fields overwrite field values determined through package inspection.

    :param repo: The repository to import the package into
    :type  repo: pulp.server.db.model.Repository

    :param type_id: The type_id of the package being uploaded
    :type  type_id: str

    :param unit_key: A dictionary of fields to overwrite introspected field values
    :type  unit_key: dict

    :param metadata: A dictionary of fields to overwrite introspected field values, or None
    :type  metadata: dict or None

    :param file_path: The path to the uploaded package
    :type  file_path: str

    :param conduit: provides access to relevant Pulp functionality
    :type  conduit: pulp.plugins.conduits.upload.UploadConduit

    :param config: plugin configuration for the repository
    :type  config: pulp.plugins.config.PluginCallConfiguration

    :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized
    :raises PulpCodedException PLP1013: if the checksum value from the user does not validate
    """
    try:
        if type_id == models.DRPM._content_type_id.default:
            unit = models.DRPM(**_extract_drpm_data(file_path))
        else:
            repodata = rpm_parse.get_package_xml(file_path,
                                                 sumtype=util.TYPE_SHA256)
            package_xml = (utils.fake_xml_element(
                repodata['primary'],
                constants.COMMON_NAMESPACE).find(primary.PACKAGE_TAG))
            unit = primary.process_package_element(package_xml)
    except Exception:
        raise PulpCodedException(error_codes.RPM1016)

    # metadata can be None
    metadata = metadata or {}

    model_class = plugin_api.get_unit_model_by_id(type_id)
    update_fields_inbound(model_class, unit_key or {})
    update_fields_inbound(model_class, metadata or {})

    with open(file_path) as fp:
        sums = util.calculate_checksums(fp,
                                        models.RpmBase.DEFAULT_CHECKSUM_TYPES)

    # validate checksum if possible
    if metadata.get('checksum'):
        checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256)
        checksumtype = util.sanitize_checksum_type(checksumtype)
        if checksumtype not in sums:
            raise PulpCodedException(error_code=error_codes.RPM1009,
                                     checksumtype=checksumtype)
        if metadata['checksum'] != sums[checksumtype]:
            raise PulpCodedException(error_code=platform_errors.PLP1013)
        _LOGGER.debug(_('Upload checksum matches.'))

    # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other
    # types, regardless of what is in the unit key.
    unit.checksumtype = util.TYPE_SHA256
    unit.checksum = sums[util.TYPE_SHA256]
    # keep all available checksum values on the model
    unit.checksums = sums

    # Update the RPM-extracted data with anything additional the user specified.
    # Allow the user-specified values to override the extracted ones.
    for key, value in metadata.items():
        setattr(unit, key, value)
    for key, value in unit_key.items():
        setattr(unit, key, value)

    if type_id != models.DRPM._content_type_id.default:
        # Extract/adjust the repodata snippets
        unit.signing_key = rpm_parse.package_signature(
            rpm_parse.package_headers(file_path))
        # construct filename from metadata (BZ #1101168)
        if type_id == models.SRPM._content_type_id.default:
            rpm_basefilename = "%s-%s-%s.src.rpm" % (unit.name, unit.version,
                                                     unit.release)
        else:
            rpm_basefilename = "%s-%s-%s.%s.rpm" % (unit.name, unit.version,
                                                    unit.release, unit.arch)
        unit.relativepath = rpm_basefilename
        unit.filename = rpm_basefilename
        _update_files(unit, repodata)
        unit.modify_xml(repodata)

    # check if the unit has duplicate nevra
    purge.remove_unit_duplicate_nevra(unit, repo)

    unit.set_storage_path(os.path.basename(file_path))
    try:
        unit.save_and_import_content(file_path)
    except TypeError:
        raise ModelInstantiationError()
    except NotUniqueError:
        unit = unit.__class__.objects.filter(**unit.unit_key).first()

    if rpm_parse.signature_enabled(config):
        rpm_parse.filter_signature(unit, config)
    repo_controller.associate_single_unit(repo, unit)
Beispiel #25
0
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config):
    """
    Handles the upload for an RPM, SRPM or DRPM.

    This inspects the package contents to determine field values. The unit_key
    and metadata fields overwrite field values determined through package inspection.

    :param repo: The repository to import the package into
    :type  repo: pulp.server.db.model.Repository

    :param type_id: The type_id of the package being uploaded
    :type  type_id: str

    :param unit_key: A dictionary of fields to overwrite introspected field values
    :type  unit_key: dict

    :param metadata: A dictionary of fields to overwrite introspected field values, or None
    :type  metadata: dict or None

    :param file_path: The path to the uploaded package
    :type  file_path: str

    :param conduit: provides access to relevant Pulp functionality
    :type  conduit: pulp.plugins.conduits.upload.UploadConduit

    :param config: plugin configuration for the repository
    :type  config: pulp.plugins.config.PluginCallConfiguration

    :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized
    :raises PulpCodedException PLP1013: if the checksum value from the user does not validate
    """
    try:
        if type_id == models.DRPM._content_type_id.default:
            rpm_data = _extract_drpm_data(file_path)
        else:
            rpm_data = _extract_rpm_data(type_id, file_path)
    except:
        _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path)
        raise

    # metadata can be None
    metadata = metadata or {}

    model_class = plugin_api.get_unit_model_by_id(type_id)
    update_fields_inbound(model_class, unit_key or {})
    update_fields_inbound(model_class, metadata or {})

    with open(file_path) as fp:
        sums = util.calculate_checksums(fp, models.RpmBase.DEFAULT_CHECKSUM_TYPES)

    # validate checksum if possible
    if metadata.get('checksum'):
        checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256)
        checksumtype = util.sanitize_checksum_type(checksumtype)
        if checksumtype not in sums:
            raise PulpCodedException(error_code=error_codes.RPM1009, checksumtype=checksumtype)
        if metadata['checksum'] != sums[checksumtype]:
            raise PulpCodedException(error_code=platform_errors.PLP1013)
        _LOGGER.debug(_('Upload checksum matches.'))

    # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other
    # types, regardless of what is in the unit key.
    rpm_data['checksumtype'] = util.TYPE_SHA256
    rpm_data['checksum'] = sums[util.TYPE_SHA256]
    # keep all available checksum values on the model
    rpm_data['checksums'] = sums

    # Update the RPM-extracted data with anything additional the user specified.
    # Allow the user-specified values to override the extracted ones.
    rpm_data.update(metadata or {})
    rpm_data.update(unit_key or {})

    # Validate the user specified data by instantiating the model
    try:
        unit = model_class(**rpm_data)
    except TypeError:
        raise ModelInstantiationError()

    if type_id != models.DRPM._content_type_id.default:
        # Extract/adjust the repodata snippets
        repodata = rpm_parse.get_package_xml(file_path, sumtype=unit.checksumtype)
        _update_provides_requires(unit, repodata)
        _update_files(unit, repodata)
        unit.modify_xml(repodata)

    # check if the unit has duplicate nevra
    purge.remove_unit_duplicate_nevra(unit, repo)

    unit.set_storage_path(os.path.basename(file_path))
    try:
        unit.save_and_import_content(file_path)
    except NotUniqueError:
        unit = unit.__class__.objects.filter(**unit.unit_key).first()

    if rpm_parse.signature_enabled(config):
        rpm_parse.filter_signature(unit, config)
    repo_controller.associate_single_unit(repo, unit)
Beispiel #26
0
    def parse_treeinfo_file(path):
        """
        The treefile seems to be approximately in INI format, which can be read
        by the standard library's ConfigParser.

        :param path: The absolute path to the treefile
        :return: instance of Distribution model, and a list of dicts
            describing the distribution's files
        :rtype: (pulp_rpm.plugins.db.models.Distribution, list of dict)
        """
        parser = ConfigParser.RawConfigParser()
        # the default implementation of this method makes all option names lowercase,
        # which we don't want. This is the suggested solution in the python.org docs.
        parser.optionxform = str
        with open(path) as fp:
            try:
                parser.readfp(fp)
            except ConfigParser.ParsingError:
                # wouldn't need this if ParsingError subclassed ValueError.
                raise ValueError(_('could not parse treeinfo file'))

        # apparently the 'variant' is optional. for example, it does not appear
        # in the RHEL 5.9 treeinfo file. This is how the previous importer
        # handled that.
        try:
            variant = parser.get(SECTION_GENERAL, 'variant')
        except ConfigParser.NoOptionError:
            variant = None
        try:
            packagedir = parser.get(SECTION_GENERAL, KEY_PACKAGEDIR)
        except ConfigParser.NoOptionError:
            packagedir = None

        try:
            new_dist = Distribution(
                family=parser.get(SECTION_GENERAL, 'family'),
                variant=variant,
                version=parser.get(SECTION_GENERAL, 'version'),
                arch=parser.get(SECTION_GENERAL, 'arch'),
                packagedir=packagedir,
                timestamp=float(parser.get(SECTION_GENERAL, KEY_TIMESTAMP)))
            # Look for an existing distribution
            existing_dist = Distribution.objects.filter(
                family=new_dist.family,
                variant=new_dist.variant,
                version=new_dist.version,
                arch=new_dist.arch).first()
            if existing_dist:
                # update with the new information:
                existing_dist.packagedir = packagedir
                existing_dist.timestamp = new_dist.timestamp
                unit = existing_dist
            else:
                unit = new_dist
        except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
            raise ValueError(
                'invalid treefile: could not find unit key components')

        files = {}
        # this section is likely to have all the files we care about listed with
        # checksums. But, it might not. Other sections checked below will only add
        # files to the "files" dict if they are not already present. For those cases,
        # there will not be checksums available.
        if parser.has_section(SECTION_CHECKSUMS):
            for item in parser.items(SECTION_CHECKSUMS):
                relativepath = item[0]
                checksumtype, checksum = item[1].split(':')
                checksumtype = util.sanitize_checksum_type(checksumtype)
                files[relativepath] = {
                    RELATIVE_PATH: relativepath,
                    CHECKSUM: checksum,
                    CHECKSUM_TYPE: checksumtype
                }
        for section_name in parser.sections():
            if section_name.startswith(
                    'images-') or section_name == SECTION_STAGE2:
                for item in parser.items(section_name):
                    if item[1] not in files:
                        relativepath = item[1]
                        files[relativepath] = {
                            RELATIVE_PATH: relativepath,
                            CHECKSUM: None,
                            CHECKSUM_TYPE: None,
                        }

        return unit, files.values()
Beispiel #27
0
def process_package_element(package_element):
    """
    Process a parsed primary.xml package element into a model instance.

    In addition to parsing the data, this templatizes the raw XML that gets added.

    :param package_element: parsed primary.xml package element
    :return: package information dictionary
    :rtype: pulp_rpm.plugins.db.models.RPM
    """
    package_info = dict()

    name_element = package_element.find(NAME_TAG)
    if name_element is not None:
        package_info['name'] = name_element.text

    arch_element = package_element.find(ARCH_TAG)
    if arch_element is not None:
        package_info['arch'] = arch_element.text

    version_element = package_element.find(VERSION_TAG)
    if version_element is not None:
        package_info['version'] = version_element.attrib['ver']
        package_info['release'] = version_element.attrib.get('rel', None)
        package_info['epoch'] = version_element.attrib.get('epoch', None)

    checksum_element = package_element.find(CHECKSUM_TAG)
    if checksum_element is not None:
        checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type'])
        package_info['checksumtype'] = checksum_type
        package_info['checksum'] = checksum_element.text

        # convert these to template targets that will be rendered at publish time
        checksum_element.text = models.RpmBase.CHECKSUM_TEMPLATE
        checksum_element.attrib['type'] = models.RpmBase.CHECKSUMTYPE_TEMPLATE

    summary_element = package_element.find(SUMMARY_TAG)
    if summary_element is not None:
        package_info['summary'] = summary_element.text

    description_element = package_element.find(DESCRIPTION_TAG)
    if description_element is not None:
        package_info['description'] = description_element.text

    url_element = package_element.find(URL_TAG)
    if url_element is not None:
        package_info['url'] = url_element.text

    time_element = package_element.find(TIME_TAG)
    if time_element is not None:
        package_info['time'] = int(time_element.attrib['file'])
        package_info['build_time'] = int(time_element.attrib['build'])

    size_element = package_element.find(SIZE_TAG)
    if size_element is not None:
        package_info['size'] = int(size_element.attrib['package'])

    location_element = package_element.find(LOCATION_TAG)
    if location_element is not None:
        href = location_element.attrib['href']
        base_url = None
        for attribute, value in location_element.items():
            if attribute == 'base' or attribute.endswith('}base'):
                base_url = value
        package_info['base_url'] = base_url
        filename = os.path.basename(href)
        package_info['relativepath'] = href
        package_info['filename'] = filename
        # we don't make any attempt to preserve the original directory structure
        # this element will end up being converted back to XML and stuffed into
        # the DB on the unit object, so this  is our chance to modify it.
        location_element.attrib['href'] = filename

    format_element = package_element.find(FORMAT_TAG)
    package_info.update(_process_format_element(format_element))

    if package_info['arch'].lower() == 'src':
        model = models.SRPM(**package_info)
    else:
        model = models.RPM(**package_info)
    # add the raw XML so it can be saved in the database later
    rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL)
    model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL)
    return model
Beispiel #28
0
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit,
                    config):
    """
    Handles the upload for an RPM or SRPM.

    This inspects the package contents to determine field values. The unit_key
    and metadata fields overwrite field values determined through package inspection.

    :param repo: The repository to import the package into
    :type  repo: pulp.server.db.model.Repository

    :param type_id: The type_id of the package being uploaded
    :type  type_id: str

    :param unit_key: A dictionary of fields to overwrite introspected field values
    :type  unit_key: dict

    :param metadata: A dictionary of fields to overwrite introspected field values, or None
    :type  metadata: dict or None

    :param file_path: The path to the uploaded package
    :type  file_path: str

    :param conduit: provides access to relevant Pulp functionality
    :type  conduit: pulp.plugins.conduits.upload.UploadConduit

    :param config: plugin configuration for the repository
    :type  config: pulp.plugins.config.PluginCallConfiguration

    :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized
    :raises PulpCodedException PLP1013: if the checksum value from the user does not validate
    """
    try:
        rpm_data = _extract_rpm_data(type_id, file_path)
    except:
        _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path)
        raise

    # metadata can be None
    metadata = metadata or {}

    model_class = plugin_api.get_unit_model_by_id(type_id)
    update_fields_inbound(model_class, unit_key or {})
    update_fields_inbound(model_class, metadata or {})

    with open(file_path) as fp:
        sums = util.calculate_checksums(fp,
                                        models.RpmBase.DEFAULT_CHECKSUM_TYPES)

    # validate checksum if possible
    if metadata.get('checksum'):
        checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256)
        checksumtype = util.sanitize_checksum_type(checksumtype)
        if checksumtype not in sums:
            raise PulpCodedException(error_code=error_codes.RPM1009,
                                     checksumtype=checksumtype)
        if metadata['checksum'] != sums[checksumtype]:
            raise PulpCodedException(error_code=platform_errors.PLP1013)
        _LOGGER.debug(_('Upload checksum matches.'))

    # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other
    # types, regardless of what is in the unit key.
    rpm_data['checksumtype'] = util.TYPE_SHA256
    rpm_data['checksum'] = sums[util.TYPE_SHA256]
    # keep all available checksum values on the model
    rpm_data['checksums'] = sums

    # Update the RPM-extracted data with anything additional the user specified.
    # Allow the user-specified values to override the extracted ones.
    rpm_data.update(metadata or {})
    rpm_data.update(unit_key or {})

    # Validate the user specified data by instantiating the model
    try:
        unit = model_class(**rpm_data)
    except TypeError:
        raise ModelInstantiationError()

    # Extract/adjust the repodata snippets
    unit.repodata = rpm_parse.get_package_xml(file_path,
                                              sumtype=unit.checksumtype)
    _update_provides_requires(unit)
    unit.modify_xml()

    # check if the unit has duplicate nevra
    purge.remove_unit_duplicate_nevra(unit, repo)

    unit.set_storage_path(os.path.basename(file_path))
    try:
        unit.save_and_import_content(file_path)
    except NotUniqueError:
        unit = unit.__class__.objects.filter(**unit.unit_key).first()

    repo_controller.associate_single_unit(repo, unit)
Beispiel #29
0
def _migrate_rpmlike_units(unit_type):
    """
    This function performs the migration on RPMs, DRPMs, and SRPMs. These all have the same schema
    when it comes to checksumtype, so they can be treated the same way.

    :param unit_type:          The unit_type_id, as found in pulp_rpm.common.ids.
    :type  unit_type:          basestring
    """
    repos = connection.get_collection('repos')
    repo_content_units = connection.get_collection('repo_content_units')
    unit_collection = connection.get_collection('units_%s' % unit_type)

    for unit in unit_collection.find():
        try:
            sanitized_type = util.sanitize_checksum_type(unit['checksumtype'])
            if sanitized_type != unit['checksumtype']:
                # Let's see if we can get away with changing its checksumtype to the sanitized
                # value. If this works, we won't have to do anything else.
                unit_collection.update(
                    {'_id': unit['_id']},
                    {'$set': {
                        'checksumtype': sanitized_type
                    }})
        except errors.DuplicateKeyError:
            # Looks like there is already an identical unit with the sanitized checksum type. This
            # means we need to remove the current unit, but first we will need to change any
            # references to this unit to point to the other.
            conflicting_unit = unit_collection.find_one({
                'name':
                unit['name'],
                'epoch':
                unit['epoch'],
                'version':
                unit['version'],
                'release':
                unit['release'],
                'arch':
                unit['arch'],
                'checksum':
                unit['checksum'],
                'checksumtype':
                sanitized_type
            })
            for rcu in repo_content_units.find({
                    'unit_type_id': unit_type,
                    'unit_id': unit['_id']
            }):
                # Now we must either switch the rcu from pointing to unit to pointing to
                # conflicting_unit, or delete the rcu if there is already one in the same repo.
                try:
                    msg = _(
                        'Updating %(repo_id)s to contain %(type)s %(conflicting)s instead of '
                        '%(old_id)s.')
                    msg = msg % {
                        'repo_id': rcu['repo_id'],
                        'type': unit_type,
                        'conflicting': conflicting_unit['_id'],
                        'old_id': unit['_id']
                    }
                    _logger.debug(msg)
                    repo_content_units.update(
                        {'_id': rcu['_id']},
                        {'$set': {
                            'unit_id': conflicting_unit['_id']
                        }})
                except errors.DuplicateKeyError:
                    # We will delete this RepoContentUnit since the sha1 RPM is already in the
                    # repository.
                    msg = _(
                        'Removing %(type)s %(old_id)s from repo %(repo_id)s since it conflicts '
                        'with %(conflicting)s.')
                    msg = msg % {
                        'repo_id': rcu['repo_id'],
                        'type': unit_type,
                        'conflicting': conflicting_unit['_id'],
                        'old_id': unit['_id']
                    }
                    _logger.debug(msg)
                    repo_content_units.remove({'_id': rcu['_id']})
                    # In this case, we now need to decrement the repository's "content_unit_counts"
                    # for this unit_type by one, since we removed a unit from a repository.
                    repos.update(
                        {'id': rcu['repo_id']},
                        {'$inc': {
                            'content_unit_counts.%s' % unit_type: -1
                        }})
            # Now that we have removed or altered all references to the "sha" Unit, we need to
            # remove it since it is a duplicate.
            unit_collection.remove({'_id': unit['_id']})
Beispiel #30
0
    def parse_treeinfo_file(path):
        """
        The treefile seems to be approximately in INI format, which can be read
        by the standard library's ConfigParser.

        :param path: The absolute path to the treefile
        :return: instance of Distribution model, and a list of dicts
            describing the distribution's files
        :rtype: (pulp_rpm.plugins.db.models.Distribution, list of dict)
        """
        parser = ConfigParser.RawConfigParser()
        # the default implementation of this method makes all option names lowercase,
        # which we don't want. This is the suggested solution in the python.org docs.
        parser.optionxform = str
        with open(path) as fp:
            try:
                parser.readfp(fp)
            except ConfigParser.ParsingError:
                # wouldn't need this if ParsingError subclassed ValueError.
                raise ValueError(_('could not parse treeinfo file'))

        # apparently the 'variant' is optional. for example, it does not appear
        # in the RHEL 5.9 treeinfo file. This is how the previous importer
        # handled that.
        try:
            variant = parser.get(SECTION_GENERAL, 'variant')
        except ConfigParser.NoOptionError:
            variant = None
        try:
            packagedir = parser.get(SECTION_GENERAL, KEY_PACKAGEDIR)
        except ConfigParser.NoOptionError:
            packagedir = None

        try:
            new_dist = Distribution(
                family=parser.get(SECTION_GENERAL, 'family'),
                variant=variant,
                version=parser.get(SECTION_GENERAL, 'version'),
                arch=parser.get(SECTION_GENERAL, 'arch'),
                packagedir=packagedir,
                timestamp=float(parser.get(SECTION_GENERAL, KEY_TIMESTAMP))
            )
            # Look for an existing distribution
            existing_dist = Distribution.objects.filter(
                family=new_dist.family,
                variant=new_dist.variant,
                version=new_dist.version,
                arch=new_dist.arch
            ).first()
            if existing_dist:
                # update with the new information:
                existing_dist.packagedir = packagedir
                existing_dist.timestamp = new_dist.timestamp
                unit = existing_dist
            else:
                unit = new_dist
        except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
            raise ValueError('invalid treefile: could not find unit key components')

        files = {}
        # this section is likely to have all the files we care about listed with
        # checksums. But, it might not. Other sections checked below will only add
        # files to the "files" dict if they are not already present. For those cases,
        # there will not be checksums available.
        if parser.has_section(SECTION_CHECKSUMS):
            for item in parser.items(SECTION_CHECKSUMS):
                relativepath = item[0]
                checksumtype, checksum = item[1].split(':')
                checksumtype = util.sanitize_checksum_type(checksumtype)
                files[relativepath] = {
                    RELATIVE_PATH: relativepath,
                    CHECKSUM: checksum,
                    CHECKSUM_TYPE: checksumtype
                }
        for section_name in parser.sections():
            if section_name.startswith('images-') or section_name == SECTION_STAGE2:
                for item in parser.items(section_name):
                    if item[1] not in files:
                        relativepath = item[1]
                        files[relativepath] = {
                            RELATIVE_PATH: relativepath,
                            CHECKSUM: None,
                            CHECKSUM_TYPE: None,
                        }

        return unit, files.values()