def add_language(self, language, force=False): """ Add a language handled by the LGR to the metadata list. Ensure the language is a valid RFC 5646 language tag. 3.3.3. The language Element The value of the "language" element MUST be a valid language tag as described in [RFC5646]. :param str language: A new language of the LGR. :param bool force: If True, add the language even if format is invalid. :raises LGRFormatException: if the language parameter has an invalid format. """ try: if not rfc5646.check(language): logger.log(logging.WARNING if force else logging.ERROR, "Invalid language: '%s'", language) if not force: raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG ) self.languages.append(language) except UnicodeEncodeError: # Can't skip this one logger.error("Invalid non-ASCII language tag '%s'", language) raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
def validate(self, parents, rules_lookup, classes_lookup): """ Ensure a class has a valid definition. :param parents: List of parent objects of this instance. :param rules_lookup: Dictionary of defined rules in the LGR. Not used. :param classes_lookup: Dictionary of defined classes in the LGR to use for by-ref classes. """ logger.debug('Validate %s', self) if self.by_ref is not None: if self.by_ref not in classes_lookup: # From RFC7940, section 6.2.1. Declaring and Invoking Named Classes # It is an error to reference a named class for which the # definition has not been seen. logger.error("Class cannot reference inexisting class '%s'", self.by_ref) raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_BY_REF) is_top_level = len(parents) == 0 if (is_top_level and self.name is None) or \ (not is_top_level and self.name is not None): # From RFC7940, section 6.2.1. Declaring and Invoking Named Classes # The "name" attribute MUST be present if and only if the class # is a direct child element of the "rules" element. logger.error("'name' attribute MUST be present only and only if " "class is a direct child of the 'rules' element") raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_TOP_LEVEL_NAME) if self.from_tag is not None: tag_classname = TAG_CLASSNAME_PREFIX + self.from_tag if tag_classname not in classes_lookup: logger.warning("Undefined tag '%s' in current LGR", self.from_tag)
def __init__(self, name=None, comment=None, ref=None, from_tag=None, unicode_property=None, codepoints=None, by_ref=None): """ Create a class. :param name: Name of the class. :param comment: Optional comment associated to the class. :param ref: Optional list of references. :param from_tag: Define a tag-based class. :param unicode_property: Define a Unicode property-based class. :param codepoints: Initial sequence of code points. :param by_ref: Name of the referenced class. """ self.name = name self.comment = comment self.ref = ref self.from_tag = from_tag self.unicode_property = unicode_property self.codepoints = set(codepoints or []) self.by_ref = by_ref if by_ref is not None: if name is not None: logger.error("Cannot create a class with " "both a 'by-ref' and a 'name'") raise LGRFormatException( LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER) elif from_tag is not None: logger.error("Cannot create a class with " "both a 'by-ref' and a 'from-tag'") raise LGRFormatException( LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER) elif unicode_property is not None: logger.error("Cannot create a class with " "both a 'by-ref' and a 'property'") raise LGRFormatException( LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER) elif ref is not None: logger.error("Cannot create a class with " "both a 'by-ref' and a 'ref'") raise LGRFormatException( LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
def set_unicode_version(self, unicode_version, force=False): """ Set the unicode-version of the LGR. Ensure the unicode_version is a valid x.y.z string. 3.3.7. The unicode-version Element the version number used in creating the LGR MUST be listed in the form x.y.z, where x, y, and z are positive, decimal integers (see [Unicode-Versions]). :param str unicode_version: The Unicode version. :param bool force: If True, set the date even if format is invalid. :raises LGRFormatException: If the unicode_version parameter has an invalid format. """ if re.match(r'\d{1,}\.\d{1,}\.\d{1,}', unicode_version) is None: logger.log(logging.WARNING if force else logging.ERROR, "Invalid Unicode version: '%s'", unicode_version) if not force: raise LGRFormatException(LGRFormatException.LGRFormatReason. INVALID_UNICODE_VERSION_TAG) self.unicode_version = unicode_version
def set_languages(self, languages, force=False): """ Convenience function to update the languages in the metadata. :param iterable languages: a collection of language tags as described in [RFC5646]. :param bool force: If True, add the languages even if format is invalid. :raises LGRFormatException: if the language parameter has an invalid format. """ # check all languages found_error = False for language in languages: try: if not rfc5646.check(language): logger.log(logging.WARNING if force else logging.ERROR, "Invalid language: '%s'", language) found_error = True except UnicodeEncodeError: # Can't skip this one logger.error("Invalid non-ASCII language tag '%s'", language) languages.remove(language) if found_error and not force: raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG) else: self.languages = list(languages)
def _validate_date(date, force): """ Ensure the date is a valid ISO 8601 "full-date" string. :param str date: Date to validate, as a string. :param force: If True, do not raise exception on error. :return: date input. :raises LGRFormatException: If the date parameter has an invalid format. >>> _validate_date('2015-06-25', False) == '2015-06-25' True >>> _validate_date('2015-13-26', False) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... LGRFormatException >>> _validate_date('2015', False) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... LGRFormatException """ # Date shoud be date-fullyear "-" date-month "-" date-mday date_elements = date.split('-') if len(date_elements) == 3: try: date_elements = [int(d) for d in date_elements] date_object = datetime.date(date_elements[0], date_elements[1], date_elements[2]) if date_object.isoformat() == date: return date except ValueError: logger.log(logging.WARNING if force else logging.ERROR, "Invalid date format: '%s'", date) if force: return date else: raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_DATE_TAG) logger.log(logging.WARNING if force else logging.ERROR, "Invalid date format: '%s'", date) if force: return date else: raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_DATE_TAG)
def validate(self, parents, rules_lookup, classes_lookup): super(ComplementClass, self).validate(parents, rules_lookup, classes_lookup) if len(self._children) != 1: logger.error("'Complement' class MUST contain one element") raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_CHILDREN_NUMBER) logger.debug('%s is valid', self)
def validate(self, parents, rules_lookup, classes_lookup): super(DifferenceClass, self).validate(parents, rules_lookup, classes_lookup) if len(self._children) != 2: logger.error("'Difference' class MUST contain two elements") raise LGRFormatException(LGRFormatException.LGRFormatReason.INVALID_CHILDREN_NUMBER) logger.debug('%s is valid', self)
def add_child(self, cls): if self.MAX_CHILDREN > 0 and len(self._children) >= self.MAX_CHILDREN: logger.error("Cannot have more than %s children", self.MAX_CHILDREN) raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_CHILDREN_NUMBER) self._children.append(cls)
def add_codepoint(self, cp): """ Add (a) codepoint(s) to the set of codepoints. :param cp: Code point(s) to add. An integer or a sequence of code points. """ if self.by_ref is not None: logger.error("Cannot add code point to a 'by-ref' class") raise LGRFormatException(LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER) if isinstance(cp, int): cp = [cp] self.codepoints.update(cp)
def del_codepoint(self, cp): """ Delete (a) codepoint(s) from the set of codepoints. :param cp: Code point(s) to delete. An integer or a sequence of code points. """ if self.by_ref is not None: logger.error("Cannot delete code point from a 'by-ref' class") raise LGRFormatException(LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER) if isinstance(cp, int): cp = [cp] for c in cp: self.codepoints.discard(c)
def validate(self, parents, rules_lookup, classes_lookup): """ Ensure a rule has a valid definition. :param parents: List of parent objects of this instance. :param rules_lookup: Dictionary of defined rules in the LGR. Not used. :param classes_lookup: Dictionary of defined classes in the LGR to use for by-ref classes. """ logger.debug('Validate %s', self) if self.by_ref is not None: if self.by_ref not in rules_lookup: # From RFC7940, section 6.3.4. The "name" and "by-ref" Attributes # It is an error to reference a rule or class for which # the definition has not been seen. logger.error("Rule cannot reference inexisting rule '%s'", self.by_ref) raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_BY_REF) is_top_level = len(parents) == 0 if (is_top_level and self.name is None) or \ (not is_top_level and self.name is not None): # From RFC7940, section section 6.3.4. The name and by-ref Attributes # rules declared as immediate child elements of the "rules" element # MUST be named using a unique "name" attribute, # and all other instances MUST NOT be named. logger.error("'name' attribute MUST be present only and only if " "rule is a direct child of the 'rules' element") raise LGRFormatException( LGRFormatException.LGRFormatReason.INVALID_TOP_LEVEL_NAME) logger.debug('Validate %s children', self) for child in self.children: child.validate(parents + [self], rules_lookup, classes_lookup) logger.debug('%s is valid', self)
def __init__(self, name=None, comment=None, by_ref=None): """ Create a new rule. :param name: Name of the rule. :param comment: Optional comment. :param by_ref: Name of the referenced rule. :raises LGRFormatException: If the rule has conflictual parameters. """ self.name = name self.comment = comment self.by_ref = by_ref self.children = [] if name is not None and by_ref is not None: logger.error( "Cannot create a rule with both a 'name' and a 'by-ref'") raise LGRFormatException( LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER)
def __init__(self, disp, comment=None, ref=None, match=None, not_match=None, any_variant=None, all_variants=None, only_variants=None): """ Create the action element. :param disp: Disposition of the action. :param comment: Optional comment. :param ref: Optional list of references. :param match: Name of a rule that must be matched. :param not_match: Name of a rule that must not be matched. :param any_variant: Sequence of disposition to match to trigger action. :param all_variants: Sequence of disposition to match to trigger action. :param only_variants: Sequence of disposition to match to trigger action. """ self.disp = disp self.comment = comment self.references = ref or [] self.match = match self.not_match = not_match self.any_variant = frozenset(any_variant) if any_variant else None self.all_variants = frozenset(all_variants) if all_variants else None self.only_variants = frozenset( only_variants) if only_variants else None if match is not None and not_match is not None: # From RFC7940, section 7.1. The "match" and "not-match" Attributes # An action MUST NOT contain both a "match" and a "not-match" attribute logger.error("Action contains both 'match' and 'not-match' " "attributes") raise LGRFormatException( LGRFormatException.LGRFormatReason.MATCH_NOT_MATCH)
def del_variant(self, *args, **kwargs): # From draft-davies-idntables-09, section 4. Code Points and Variants # A "range" element has no child elements. logger.error("%r: Range has no variant", self) raise LGRFormatException(LGRFormatException. LGRFormatReason.RANGE_NO_CHILD)
def get_script(lgr): try: return lgr.metadata.languages[0] except IndexError: raise LGRFormatException( reason=LGRFormatException.LGRFormatReason.INVALID_LANGUAGE_TAG)
def add_child(self, child): if self.by_ref is not None: logger.error("Cannot add child to a 'by-ref' rule") raise LGRFormatException( LGRFormatException.LGRFormatReason.BY_REF_AND_OTHER) self.children.append(child)
def del_variant(self, *args, **kwargs): # From RFC7940, section 5. Code Points and Variants # A "range" element has no child elements. logger.error("%r: Range has no variant", self) raise LGRFormatException( LGRFormatException.LGRFormatReason.RANGE_NO_CHILD)