def test_serialize_meta(self):
        metadata = Metadata()
        metadata.version = Version('1.0', comment='First version')
        metadata.date = '2017-09-01'
        metadata.description = Description('The LGR description',
                                           description_type='text/plain')
        metadata.scopes = [Scope('.', scope_type='domain')]
        self.lgr.metadata = metadata

        meta_node = etree.SubElement(self.root, 'meta')

        _serialize_meta(self.lgr, meta_node)

        version = meta_node.find('version', namespaces=NSMAP)
        self.assertEqual(version.text, '1.0')
        # LXML can return strings as bytestring in python2...
        # See https://mailman-mail5.webfaction.com/pipermail/lxml/2011-December/006239.html
        self.assertEqual('' + version.get('comment'), 'First version')

        date = meta_node.find('date', namespaces=NSMAP)
        self.assertEqual(date.text, '2017-09-01')

        description = meta_node.find('description', namespaces=NSMAP)
        self.assertEqual(description.text, 'The LGR description')
        self.assertEqual(description.get('type'), 'text/plain')

        scopes = meta_node.findall('scope', namespaces=NSMAP)
        self.assertEqual(len(scopes), 1)
        self.assertEqual(scopes[0].text, '.')
        self.assertEqual(scopes[0].get('type'), 'domain')
예제 #2
0
    def _process_meta(self, elem):
        """
        Process the <meta> element of an LGR XML file.
        """
        metadata = Metadata(self.rfc7940_checks)
        reference_manager = ReferenceManager()
        MAPPER = {
            DATE_TAG:
            lambda d: metadata.set_date(d, force=self.force_mode),
            VALIDITY_START_TAG:
            lambda d: metadata.set_validity_start(d, force=self.force_mode),
            VALIDITY_END_TAG:
            lambda d: metadata.set_validity_end(d, force=self.force_mode),
            UNICODE_VERSION_TAG:
            lambda d: metadata.set_unicode_version(d, force=self.force_mode),
        }
        unicode_version_tag_found = False
        for child in elem:
            tag = child.tag
            logger.debug("Got '%s' element", tag)
            if tag in MAPPER:
                MAPPER[tag](child.text)
                if tag == UNICODE_VERSION_TAG:
                    unicode_version_tag_found = True
            elif tag == VERSION_TAG:
                metadata.version = Version(child.text,
                                           child.get('comment', None))
            elif tag == LANGUAGE_TAG:
                metadata.add_language(child.text, force=self.force_mode)
            elif tag == SCOPE_TAG:
                metadata.scopes.append(
                    Scope(child.text, child.get('type', None)))
            elif tag == DESCRIPTION_TAG:
                # Seems to be an issue with CDATA/iterparse: https://bugs.launchpad.net/lxml/+bug/1788449
                # For now, manually replace CRLF with LF
                metadata.description = Description(
                    child.text.replace('\r\n', '\n'), child.get('type', None))
            elif tag == REFERENCES_TAG:
                for reference in child:
                    value = reference.text
                    # Don't convert it to an int since ref_id may be a string
                    ref_id = reference.get('id')
                    comment = reference.get('comment', None)
                    reference_manager.add_reference(value,
                                                    comment=comment,
                                                    ref_id=ref_id)
                # Since we have processed <reference> elements here, let's clean-up
                child.clear()
            else:
                logger.warning("Unhandled '%s' element in <meta> section", tag)
                self.rfc7940_checks.error('parse_xml')
            child.clear()

        self.rfc7940_checks.add_test_result('explicit_unicode_version',
                                            unicode_version_tag_found)
        self._lgr = LGR(name=self.filename,
                        metadata=metadata,
                        reference_manager=reference_manager,
                        unicode_database=self._unicode_database)
예제 #3
0
파일: api.py 프로젝트: ptudor/lgr-django
 def create(cls, name, unicode_version, validating_repertoire_name):
     metadata = Metadata()
     metadata.version = Version('1')
     metadata.set_unicode_version(unicode_version)
     lgr = LGR(name, metadata=metadata)
     lgr.unicode_database = unidb.manager.get_db_by_version(unicode_version)
     validating_repertoire = get_by_name(
         validating_repertoire_name) if validating_repertoire_name else None
     lgr_info = cls(name,
                    lgr=lgr,
                    validating_repertoire=validating_repertoire)
     return lgr_info
예제 #4
0
파일: union.py 프로젝트: j-bernard/lgr-core
def union_version(first, second):
    """
    Union two version objects.
    :param first: First version object to union.
    :param second: Other version object to union.
    :return: New object.
    """
    # Check that none of the object is None before processing
    if first is None:
        return second
    if second is None:
        return first

    value = let_user_choose(first.value, second.value)
    comment = let_user_choose(first.comment, second.comment)

    return Version(value, comment)
예제 #5
0
파일: merge_set.py 프로젝트: g11r/lgr-core
def merge_version(lgr_set):
    """
    Merge versions from LGR set.

    :param lgr_set: The LGRs in the set
    :return: The merged version object
    """
    values = OrderedDict()
    comments = OrderedDict()
    for version in [lgr.metadata.version for lgr in lgr_set]:
        if not version:
            continue
        if version.value:
            values.update(OrderedDict.fromkeys([version.value]))
        if version.comment:
            comments.update(OrderedDict.fromkeys([version.comment]))

    return Version('|'.join(values.keys()), '|'.join(comments.keys()))
    def test_serialize_meta_unicode(self):
        metadata = Metadata()
        metadata.version = Version('1.0 日本', comment='First version (はじめて)')
        metadata.description = Description(
            'The LGR description containing Unicode characters: ΘΞΠ',
            description_type='text/plain')
        self.lgr.metadata = metadata

        meta_node = etree.SubElement(self.root, 'meta')

        _serialize_meta(self.lgr, meta_node)

        version = meta_node.find('version', namespaces=NSMAP)
        self.assertEqual(version.text, '1.0 日本')
        self.assertEqual(version.get('comment'), 'First version (はじめて)')

        description = meta_node.find('description', namespaces=NSMAP)
        self.assertEqual(
            description.text,
            'The LGR description containing Unicode characters: ΘΞΠ')
        self.assertEqual(description.get('type'), 'text/plain')
예제 #7
0
    def _parse_doc(self, rule_file):
        """
        Actual parsing of document.

        :param rule_file: Content of the rule, as a file-like object.
        """
        line_num = 0
        for line in rule_file:
            line_num += 1

            line = line.strip()
            if len(line) == 0:
                continue
            if line[0] == '#':
                continue

            reference = REFERENCE_RE.match(line)
            if reference is not None:
                ref_id = reference.group('ref_id')
                value = reference.group('value')
                comment = reference.group('comment')
                try:
                    self._lgr.add_reference(value,
                                            ref_id=ref_id,
                                            comment=comment)
                except LGRException:
                    logger.error("Invalid reference '%s' on line %d", line,
                                 line_num)
                continue

            version = VERSION_RE.match(line)
            if version is not None:
                version_no = version.group('version_no')
                date = version.group('date')
                comment = version.group('comment')

                try:
                    self._lgr.metadata.version = Version(version_no,
                                                         comment=comment)
                    self._lgr.metadata.date = date
                except LGRException:
                    logger.error("Invalid version '%s' on line %d", line,
                                 line_num)
                continue

            if UNICODE_CODEPOINT_RE.match(line) is None:
                logger.debug("Skipping non-parsable line %d:\n%s", line_num,
                             line)
                # Line is not starting with a valid unicode code point, skip
                continue

            # Split base character from variant(s)
            char_variant = line.split(';')
            char = char_variant[0]

            try:
                [(codepoints, references)] = parse_char(char)
                self._lgr.add_cp(codepoints, ref=references)
            except ValueError:
                logger.error("Invalid character '%s' at line %d", char,
                             line_num)
            except LGRException as exc:
                logger.error("Cannot add code point '%s' at line %d: %s",
                             format_cp(codepoints), line_num, exc)

            if len(char_variant) > 1:
                preferred_variants = char_variant[1].strip()
                if len(preferred_variants
                       ) > 0 and preferred_variants[0] != '#':
                    # From RFC7940, Section 7.3. Recommended Disposition Values:
                    # activated  The resulting string should be activated for use.  (This
                    # is the same as a Preferred Variant [RFC3743].)
                    var_type = "activated"
                    self.insert_variant(line_num, codepoints,
                                        preferred_variants, var_type)

            if len(char_variant) > 2:
                variants = char_variant[2].strip()
                if len(variants) > 0 and variants[0] != '#':
                    self.insert_variant(line_num, codepoints, variants)