Example #1
0
    def add_language(self, language, force=False):
        """
        Add a language handled by the LGR to the metadata list.

        Ensure the language is a valid RFC 5646 language tag.

        3.3.3.  The language Element
        The value of the "language"
        element MUST be a valid language tag as described in [RFC5646].

        :param str language: A new language of the LGR.
        :param bool force: If True, add the language even if format is invalid.
        :raises LGRFormatException: if the language parameter
                                    has an invalid format.
        """
        try:
            if not rfc5646.check(language):
                logger.log(logging.WARNING if force else logging.ERROR,
                           "Invalid language: '%s'", language)
                if not force:
                    raise LGRFormatException(
                        LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG
                    )
            self.languages.append(language)
        except UnicodeEncodeError:
            # Can't skip this one
            logger.error("Invalid non-ASCII language tag '%s'", language)
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
Example #2
0
    def validate(self, parents, rules_lookup, classes_lookup):
        """
        Ensure a class has a valid definition.

        :param parents: List of parent objects of this instance.
        :param rules_lookup: Dictionary of defined rules in the LGR. Not used.
        :param classes_lookup: Dictionary of defined classes in the LGR to use
                               for by-ref classes.
        """
        logger.debug('Validate %s', self)
        if self.by_ref is not None:
            if self.by_ref not in classes_lookup:
                # From RFC7940, section 6.2.1. Declaring and Invoking Named Classes
                # It is an error to reference a named class for which the
                # definition has not been seen.
                logger.error("Class cannot reference inexisting class '%s'",
                             self.by_ref)
                raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_BY_REF)

        is_top_level = len(parents) == 0
        if (is_top_level and self.name is None) or \
            (not is_top_level and self.name is not None):
            # From RFC7940, section 6.2.1. Declaring and Invoking Named Classes
            # The "name" attribute MUST be present if and only if the class
            # is a direct child element of the "rules" element.
            logger.error("'name' attribute MUST be present only and only if "
                         "class is a direct child of the 'rules' element")
            raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_TOP_LEVEL_NAME)

        if self.from_tag is not None:
            tag_classname = TAG_CLASSNAME_PREFIX + self.from_tag
            if tag_classname not in classes_lookup:
                logger.warning("Undefined tag '%s' in current LGR",
                               self.from_tag)
Example #3
0
    def __init__(self,
                 name=None,
                 comment=None,
                 ref=None,
                 from_tag=None,
                 unicode_property=None,
                 codepoints=None,
                 by_ref=None):
        """
        Create a class.

        :param name: Name of the class.
        :param comment: Optional comment associated to the class.
        :param ref: Optional list of references.
        :param from_tag: Define a tag-based class.
        :param unicode_property: Define a Unicode property-based class.
        :param codepoints: Initial sequence of code points.
        :param by_ref: Name of the referenced class.
        """
        self.name = name
        self.comment = comment
        self.ref = ref
        self.from_tag = from_tag
        self.unicode_property = unicode_property
        self.codepoints = set(codepoints or [])
        self.by_ref = by_ref

        if by_ref is not None:
            if name is not None:
                logger.error("Cannot create a class with "
                             "both a 'by-ref' and a 'name'")
                raise LGRFormatException(
                    LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
            elif from_tag is not None:
                logger.error("Cannot create a class with "
                             "both a 'by-ref' and a 'from-tag'")
                raise LGRFormatException(
                    LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
            elif unicode_property is not None:
                logger.error("Cannot create a class with "
                             "both a 'by-ref' and a 'property'")
                raise LGRFormatException(
                    LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
            elif ref is not None:
                logger.error("Cannot create a class with "
                             "both a 'by-ref' and a 'ref'")
                raise LGRFormatException(
                    LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
Example #4
0
    def set_unicode_version(self, unicode_version, force=False):
        """
        Set the unicode-version of the LGR.

        Ensure the unicode_version is a valid x.y.z string.

        3.3.7.  The unicode-version Element
        the version number used in creating the LGR
        MUST be listed in the form x.y.z, where x, y, and z are positive,
        decimal integers (see [Unicode-Versions]).

        :param str unicode_version: The Unicode version.
        :param bool force: If True, set the date even if format is invalid.
        :raises LGRFormatException: If the unicode_version parameter
                                    has an invalid format.
        """

        if re.match(r'\d{1,}\.\d{1,}\.\d{1,}', unicode_version) is None:
            logger.log(logging.WARNING if force else logging.ERROR,
                       "Invalid Unicode version: '%s'", unicode_version)
            if not force:
                raise LGRFormatException(LGRFormatException.LGRFormatReason.
                                         INVALID_UNICODE_VERSION_TAG)

        self.unicode_version = unicode_version
Example #5
0
    def set_languages(self, languages, force=False):
        """
        Convenience function to update the languages in the metadata.

        :param iterable languages: a collection of language tags as described in [RFC5646].
        :param bool force: If True, add the languages even if format is invalid.
        :raises LGRFormatException: if the language parameter
                                    has an invalid format.
        """
        # check all languages
        found_error = False
        for language in languages:
            try:
                if not rfc5646.check(language):
                    logger.log(logging.WARNING if force else logging.ERROR,
                               "Invalid language: '%s'", language)
                    found_error = True
            except UnicodeEncodeError:
                # Can't skip this one
                logger.error("Invalid non-ASCII language tag '%s'", language)
                languages.remove(language)

        if found_error and not force:
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
        else:
            self.languages = list(languages)
Example #6
0
def _validate_date(date, force):
    """
    Ensure the date is a valid ISO 8601 "full-date" string.

    :param str date: Date to validate, as a string.
    :param force: If True, do not raise exception on error.
    :return: date input.
    :raises LGRFormatException: If the date parameter
                                has an invalid format.

    >>> _validate_date('2015-06-25', False) == '2015-06-25'
    True
    >>> _validate_date('2015-13-26', False) # doctest: +IGNORE_EXCEPTION_DETAIL
    Traceback (most recent call last):
        ...
    LGRFormatException
    >>> _validate_date('2015', False) # doctest: +IGNORE_EXCEPTION_DETAIL
    Traceback (most recent call last):
        ...
    LGRFormatException
    """

    # Date shoud be date-fullyear "-" date-month "-" date-mday
    date_elements = date.split('-')
    if len(date_elements) == 3:
        try:
            date_elements = [int(d) for d in date_elements]
            date_object = datetime.date(date_elements[0], date_elements[1],
                                        date_elements[2])
            if date_object.isoformat() == date:
                return date
        except ValueError:
            logger.log(logging.WARNING if force else logging.ERROR,
                       "Invalid date format: '%s'", date)
            if force:
                return date
            else:
                raise LGRFormatException(
                    LGRFormatException.LGRFormatReason.INVALID_DATE_TAG)

    logger.log(logging.WARNING if force else logging.ERROR,
               "Invalid date format: '%s'", date)
    if force:
        return date
    else:
        raise LGRFormatException(
            LGRFormatException.LGRFormatReason.INVALID_DATE_TAG)
Example #7
0
    def validate(self, parents, rules_lookup, classes_lookup):
        super(ComplementClass, self).validate(parents,
                                              rules_lookup, classes_lookup)
        if len(self._children) != 1:
            logger.error("'Complement' class MUST contain one element")
            raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_CHILDREN_NUMBER)

        logger.debug('%s is valid', self)
Example #8
0
    def validate(self, parents, rules_lookup, classes_lookup):
        super(DifferenceClass, self).validate(parents,
                                              rules_lookup, classes_lookup)
        if len(self._children) != 2:
            logger.error("'Difference' class MUST contain two elements")
            raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_CHILDREN_NUMBER)

        logger.debug('%s is valid', self)
Example #9
0
    def add_child(self, cls):
        if self.MAX_CHILDREN > 0 and len(self._children) >= self.MAX_CHILDREN:
            logger.error("Cannot have more than %s children",
                         self.MAX_CHILDREN)
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.INVALID_CHILDREN_NUMBER)

        self._children.append(cls)
Example #10
0
    def add_codepoint(self, cp):
        """
        Add (a) codepoint(s) to the set of codepoints.

        :param cp: Code point(s) to add. An integer or a sequence of code points.
        """
        if self.by_ref is not None:
            logger.error("Cannot add code point to a 'by-ref' class")
            raise LGRFormatException(LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)

        if isinstance(cp, int):
            cp = [cp]
        self.codepoints.update(cp)
Example #11
0
    def del_codepoint(self, cp):
        """
        Delete (a) codepoint(s) from the set of codepoints.

        :param cp: Code point(s) to delete. An integer or a sequence of code points.
        """
        if self.by_ref is not None:
            logger.error("Cannot delete code point from a 'by-ref' class")
            raise LGRFormatException(LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)

        if isinstance(cp, int):
            cp = [cp]
        for c in cp:
            self.codepoints.discard(c)
Example #12
0
    def validate(self, parents, rules_lookup, classes_lookup):
        """
        Ensure a rule has a valid definition.

        :param parents: List of parent objects of this instance.
        :param rules_lookup: Dictionary of defined rules in the LGR. Not used.
        :param classes_lookup: Dictionary of defined classes in the LGR to use
                               for by-ref classes.
        """
        logger.debug('Validate %s', self)
        if self.by_ref is not None:
            if self.by_ref not in rules_lookup:
                # From RFC7940, section 6.3.4. The "name" and "by-ref" Attributes
                # It is an error to reference a rule or class for which
                # the definition has not been seen.
                logger.error("Rule cannot reference inexisting rule '%s'",
                             self.by_ref)
                raise LGRFormatException(
                    LGRFormatException.LGRFormatReason.INVALID_BY_REF)

        is_top_level = len(parents) == 0
        if (is_top_level and self.name is None) or \
            (not is_top_level and self.name is not None):
            # From RFC7940, section section 6.3.4. The name and by-ref Attributes
            # rules declared as immediate child elements of the "rules" element
            # MUST be named using a unique "name" attribute,
            # and all other instances MUST NOT be named.
            logger.error("'name' attribute MUST be present only and only if "
                         "rule is a direct child of the 'rules' element")
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.INVALID_TOP_LEVEL_NAME)

        logger.debug('Validate %s children', self)
        for child in self.children:
            child.validate(parents + [self], rules_lookup, classes_lookup)

        logger.debug('%s is valid', self)
Example #13
0
    def __init__(self, name=None, comment=None, by_ref=None):
        """
        Create a new rule.

        :param name: Name of the rule.
        :param comment: Optional comment.
        :param by_ref: Name of the referenced rule.
        :raises LGRFormatException: If the rule has conflictual parameters.
        """
        self.name = name
        self.comment = comment
        self.by_ref = by_ref
        self.children = []

        if name is not None and by_ref is not None:
            logger.error(
                "Cannot create a rule with both a 'name' and a 'by-ref'")
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
Example #14
0
    def __init__(self,
                 disp,
                 comment=None,
                 ref=None,
                 match=None,
                 not_match=None,
                 any_variant=None,
                 all_variants=None,
                 only_variants=None):
        """
        Create the action element.

        :param disp: Disposition of the action.
        :param comment: Optional comment.
        :param ref: Optional list of references.
        :param match: Name of a rule that must be matched.
        :param not_match: Name of a rule that must not be matched.
        :param any_variant: Sequence of disposition to match to trigger action.
        :param all_variants: Sequence of disposition to match to trigger action.
        :param only_variants: Sequence of disposition to match to trigger action.
        """
        self.disp = disp
        self.comment = comment
        self.references = ref or []
        self.match = match
        self.not_match = not_match
        self.any_variant = frozenset(any_variant) if any_variant else None
        self.all_variants = frozenset(all_variants) if all_variants else None
        self.only_variants = frozenset(
            only_variants) if only_variants else None

        if match is not None and not_match is not None:
            # From RFC7940, section 7.1. The "match" and "not-match" Attributes
            # An action MUST NOT contain both a "match" and a "not-match" attribute
            logger.error("Action contains both 'match' and 'not-match' "
                         "attributes")
            raise LGRFormatException(
                LGRFormatException.LGRFormatReason.MATCH_NOT_MATCH)
Example #15
0
 def del_variant(self, *args, **kwargs):
     # From draft-davies-idntables-09, section 4. Code Points and Variants
     # A "range" element has no child elements.
     logger.error("%r: Range has no variant", self)
     raise LGRFormatException(LGRFormatException.
                              LGRFormatReason.RANGE_NO_CHILD)
Example #16
0
def get_script(lgr):
    try:
        return lgr.metadata.languages[0]
    except IndexError:
        raise LGRFormatException(
            reason=LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
Example #17
0
 def add_child(self, child):
     if self.by_ref is not None:
         logger.error("Cannot add child to a 'by-ref' rule")
         raise LGRFormatException(
             LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
     self.children.append(child)
Example #18
0
 def del_variant(self, *args, **kwargs):
     # From RFC7940, section 5. Code Points and Variants
     # A "range" element has no child elements.
     logger.error("%r: Range has no variant", self)
     raise LGRFormatException(
         LGRFormatException.LGRFormatReason.RANGE_NO_CHILD)