def test_rpm(self): namespaces = [ utils.Namespace('rpm', primary.RPM_SPEC_URL), ] raw_xml = utils.element_to_raw_xml(self.rpm_element, namespaces, primary.COMMON_SPEC_URL) # make sure it stripped out any namespace declarations and root elements self.assertTrue(re.match(r'^<package +type="rpm">', raw_xml)) # make sure there are no stray closing elements, like </metadata> self.assertTrue(raw_xml.rstrip().endswith('</package>')) # make sure it preserved the "rpm" prefix self.assertTrue(re.search(r'<rpm:license *>GPLv2</rpm:license>', raw_xml)) # make sure it got the requires and provides entries self.assertTrue(raw_xml.find('dolphin') >= 0) self.assertTrue(raw_xml.find('penguin') >= 0) # these should all be stripped out self.assertTrue(raw_xml.find('xmlns') == -1) # had this problem on python 2.6 where it treated the default namespace # as a namespace with prefix '' self.assertTrue(raw_xml.find('<:') == -1) # try to re-parse the XML to make sure it's valid. fake tag is necessary # to declare the prefix "rpm" fake_xml = '<fake xmlns:rpm="http://pulpproject.org">%s</fake>' % raw_xml # fromstring just to make sure this is valid ET.fromstring(fake_xml)
def process_package_element(package_element): """ Process a parsed primary.xml package element into a model instance. In addition to parsing the data, this templatizes the raw XML that gets added. :param package_element: parsed primary.xml package element :return: package information dictionary :rtype: pulp_rpm.plugins.db.models.RPM """ # NOTE the use of deepcopy relies on cpython's very sensible policy of never # duplicating string literals, this may not hold up in other implementations # the python interpreter. package_info = dict() name_element = package_element.find(NAME_TAG) if name_element is not None: package_info['name'] = name_element.text arch_element = package_element.find(ARCH_TAG) if arch_element is not None: package_info['arch'] = arch_element.text version_element = package_element.find(VERSION_TAG) if version_element is not None: package_info['version'] = version_element.attrib['ver'] package_info['release'] = version_element.attrib.get('rel', None) package_info['epoch'] = version_element.attrib.get('epoch', None) checksum_element = package_element.find(CHECKSUM_TAG) if checksum_element is not None: checksum_type = util.sanitize_checksum_type(checksum_element.attrib['type']) package_info['checksumtype'] = checksum_type package_info['checksum'] = checksum_element.text # convert these to template targets that will be rendered at publish time checksum_element.text = models.RpmBase.CHECKSUM_TEMPLATE checksum_element.attrib['type'] = models.RpmBase.CHECKSUMTYPE_TEMPLATE summary_element = package_element.find(SUMMARY_TAG) if summary_element is not None: package_info['summary'] = summary_element.text description_element = package_element.find(DESCRIPTION_TAG) if description_element is not None: package_info['description'] = description_element.text url_element = package_element.find(URL_TAG) if url_element is not None: package_info['url'] = url_element.text time_element = package_element.find(TIME_TAG) if time_element is not None: package_info['time'] = int(time_element.attrib['file']) package_info['build_time'] = int(time_element.attrib['build']) size_element = package_element.find(SIZE_TAG) if size_element is not None: package_info['size'] = int(size_element.attrib['package']) location_element = package_element.find(LOCATION_TAG) if location_element is not None: href = location_element.attrib['href'] base_url = None for attribute, value in location_element.items(): if attribute == 'base' or attribute.endswith('}base'): base_url = value package_info['base_url'] = base_url filename = os.path.basename(href) package_info['relativepath'] = href package_info['filename'] = filename # we don't make any attempt to preserve the original directory structure # this element will end up being converted back to XML and stuffed into # the DB on the unit object, so this is our chance to modify it. location_element.attrib['href'] = filename format_element = package_element.find(FORMAT_TAG) package_info.update(_process_format_element(format_element)) if package_info['arch'].lower() == 'src': model = models.SRPM(**package_info) else: model = models.RPM(**package_info) # add the raw XML so it can be saved in the database later rpm_namespace = utils.Namespace('rpm', RPM_SPEC_URL) model.raw_xml = utils.element_to_raw_xml(package_element, [rpm_namespace], COMMON_SPEC_URL) return model