def test_serialize_meta(self): metadata = Metadata() metadata.version = Version('1.0', comment='First version') metadata.date = '2017-09-01' metadata.description = Description('The LGR description', description_type='text/plain') metadata.scopes = [Scope('.', scope_type='domain')] self.lgr.metadata = metadata meta_node = etree.SubElement(self.root, 'meta') _serialize_meta(self.lgr, meta_node) version = meta_node.find('version', namespaces=NSMAP) self.assertEqual(version.text, '1.0') # LXML can return strings as bytestring in python2... # See https://mailman-mail5.webfaction.com/pipermail/lxml/2011-December/006239.html self.assertEqual('' + version.get('comment'), 'First version') date = meta_node.find('date', namespaces=NSMAP) self.assertEqual(date.text, '2017-09-01') description = meta_node.find('description', namespaces=NSMAP) self.assertEqual(description.text, 'The LGR description') self.assertEqual(description.get('type'), 'text/plain') scopes = meta_node.findall('scope', namespaces=NSMAP) self.assertEqual(len(scopes), 1) self.assertEqual(scopes[0].text, '.') self.assertEqual(scopes[0].get('type'), 'domain')
def merge_metadata(lgr_set): """ Merge metadata from LGR set. :param lgr_set: The LGRs in the set :return: The merged metadata object """ logger.debug("Merge metadata") output = Metadata() output.version = merge_version(lgr_set) output.description = merge_description(lgr_set) scopes = OrderedDict() languages = OrderedDict() for metadata in [lgr.metadata for lgr in lgr_set]: scopes.update(OrderedDict.fromkeys(metadata.scopes)) languages.update(OrderedDict.fromkeys(metadata.languages)) output.validity_start = compare_objects(output.validity_start, metadata.validity_start, max) output.validity_end = compare_objects(output.validity_end, metadata.validity_end, min) output.unicode_version = compare_objects(output.unicode_version, metadata.unicode_version, max) output.scopes = list(scopes.keys()) output.languages = list(languages.keys()) output.date = date.today().isoformat() return output
def union_metadata(first, second): """ Union two metadata. :param first: The first metadata object to union. :param second: The other metadata object to union. :return: A new metadata object. """ logger.debug("Union of metadata") output = Metadata() if first.version is not None: output.version = union_version(first.version, second.version) if first.description is not None: output.description = union_description(first.description, second.description) output.scopes = set.union(set(first.scopes), set(first.scopes)) output.languages = set.union(set(first.languages), set(second.languages)) output.date = date.today().isoformat() output.validity_start = compare_objects(first.validity_start, second.validity_start, max) output.validity_end = compare_objects(first.validity_end, second.validity_end, min) output.unicode_version = compare_objects(first.unicode_version, second.unicode_version, max) return output
def intersect_metadata(first, second): """ Intersect two metadata. :param first: The first metadata object to intersect with. :param second: The other metadata object to intersect with. :return: A new metadata object. """ output = Metadata() output.version = intersect_version(first.version, second.version) output.description = intersect_description(first.description, second.description) output.scopes = set.intersection(set(first.scopes), set(first.scopes)) output.languages = set.intersection(set(first.languages), set(second.languages)) output.date = date.today().isoformat() output.validity_start = compare_objects(first.validity_start, second.validity_start, max) output.validity_end = compare_objects(first.validity_end, second.validity_end, min) output.unicode_version = compare_objects(first.unicode_version, second.unicode_version, max) return output
def _process_meta(self, elem): """ Process the <meta> element of an LGR XML file. """ metadata = Metadata(self.rfc7940_checks) reference_manager = ReferenceManager() MAPPER = { DATE_TAG: lambda d: metadata.set_date(d, force=self.force_mode), VALIDITY_START_TAG: lambda d: metadata.set_validity_start(d, force=self.force_mode), VALIDITY_END_TAG: lambda d: metadata.set_validity_end(d, force=self.force_mode), UNICODE_VERSION_TAG: lambda d: metadata.set_unicode_version(d, force=self.force_mode), } unicode_version_tag_found = False for child in elem: tag = child.tag logger.debug("Got '%s' element", tag) if tag in MAPPER: MAPPER[tag](child.text) if tag == UNICODE_VERSION_TAG: unicode_version_tag_found = True elif tag == VERSION_TAG: metadata.version = Version(child.text, child.get('comment', None)) elif tag == LANGUAGE_TAG: metadata.add_language(child.text, force=self.force_mode) elif tag == SCOPE_TAG: metadata.scopes.append( Scope(child.text, child.get('type', None))) elif tag == DESCRIPTION_TAG: # Seems to be an issue with CDATA/iterparse: https://bugs.launchpad.net/lxml/+bug/1788449 # For now, manually replace CRLF with LF metadata.description = Description( child.text.replace('\r\n', '\n'), child.get('type', None)) elif tag == REFERENCES_TAG: for reference in child: value = reference.text # Don't convert it to an int since ref_id may be a string ref_id = reference.get('id') comment = reference.get('comment', None) reference_manager.add_reference(value, comment=comment, ref_id=ref_id) # Since we have processed <reference> elements here, let's clean-up child.clear() else: logger.warning("Unhandled '%s' element in <meta> section", tag) self.rfc7940_checks.error('parse_xml') child.clear() self.rfc7940_checks.add_test_result('explicit_unicode_version', unicode_version_tag_found) self._lgr = LGR(name=self.filename, metadata=metadata, reference_manager=reference_manager, unicode_database=self._unicode_database)
def test_serialize_meta_unicode(self): metadata = Metadata() metadata.version = Version('1.0 日本', comment='First version (はじめて)') metadata.description = Description( 'The LGR description containing Unicode characters: ΘΞΠ', description_type='text/plain') self.lgr.metadata = metadata meta_node = etree.SubElement(self.root, 'meta') _serialize_meta(self.lgr, meta_node) version = meta_node.find('version', namespaces=NSMAP) self.assertEqual(version.text, '1.0 日本') self.assertEqual(version.get('comment'), 'First version (はじめて)') description = meta_node.find('description', namespaces=NSMAP) self.assertEqual( description.text, 'The LGR description containing Unicode characters: ΘΞΠ') self.assertEqual(description.get('type'), 'text/plain')