def normalize(self, text, locale=None, max_length=MAX_LENGTH): """ Returns a normalized text. text has to be a unicode string and locale should be a normal locale, for example: 'pt_BR', 'sr@Latn' or 'de' """ if locale is not None: # Try to get a normalizer for the locale util = queryUtility(IIDNormalizer, name=locale) parts = locale.split('_') if util is None and len(parts) > 1: # Try to get a normalizer for the base language if we asked # for one for a language/country combination and found none util = queryUtility(IIDNormalizer, name=parts[0]) if util is not None: text = util.normalize(text, locale=locale) text = baseNormalize(text) # lowercase text # base = text.lower() ext = '' # replace whitespace and punctuation, but preserve filename extensions m = FILENAME_REGEX.match(text) if m is not None: base = m.groups()[0] ext = m.groups()[1] base = IGNORE_REGEX.sub('', base) base = NON_WORD_REGEX.sub('-', base) base = MULTIPLE_DASHES_REGEX.sub('-', base) base = EXTRA_DASHES_REGEX.sub('', base) base = cropName(base, maxLength=max_length) if ext != '': base = base + '.' + ext return base
def normalize(self, text, locale=None, max_length=MAX_LENGTH): """ Returns a normalized text. text has to be a unicode string and locale should be a normal locale, for example: 'pt_BR', 'sr@Latn' or 'de' """ if locale is not None: # Try to get a normalizer for the locale util = queryUtility(IIDNormalizer, name=locale) parts = locale.split('_') if util is None and len(parts) > 1: # Try to get a normalizer for the base language if we asked # for one for a language/country combination and found none util = queryUtility(IIDNormalizer, name=parts[0]) if util is not None: text = util.normalize(text, locale=locale) text = baseNormalize(text) # lowercase text base = text.lower() ext = '' # replace whitespace and punctuation, but preserve filename extensions m = FILENAME_REGEX.match(text) if m is not None: base = m.groups()[0] ext = m.groups()[1] base = IGNORE_REGEX.sub('', base) base = NON_WORD_REGEX.sub('-', base) base = MULTIPLE_DASHES_REGEX.sub('-', base) base = EXTRA_DASHES_REGEX.sub('', base) base = cropName(base, maxLength=max_length) if ext != '': base = base + '.' + ext return base
def normalize(self, text, locale=None, max_length=MAX_URL_LENGTH): """ Returns a normalized text. text has to be a unicode string and locale should be a normal locale, for example: 'pt_BR', 'sr@Latn' or 'de' """ if locale is not None: # Try to get a normalizer for the locale util = queryUtility(IURLNormalizer, name=locale) parts = locale.split('_') if util is None and len(parts) > 1: # Try to get a normalizer for the base language if we asked # for one for a language/country combination and found none util = queryUtility(IURLNormalizer, name=parts[0]) if util is not None: text = util.normalize(text, locale=locale) base = baseNormalize(text) ext = '' m = FILENAME_REGEX.match(base) if m is not None: base = m.groups()[0] ext = m.groups()[1] #base = base.replace(' ', '-') base = IGNORE_REGEX.sub('', base) base = DANGEROUS_CHARS_REGEX.sub('-', base) base = EXTRA_DASHES_REGEX.sub('', base) base = MULTIPLE_DASHES_REGEX.sub('-', base) base = cropName(base, maxLength=max_length) if ext != '': base = base + '.' + ext return base