Example #1
0
    def normalize(self, text, locale=None, max_length=MAX_LENGTH):
        """
        Returns a normalized text. text has to be a unicode string and locale
        should be a normal locale, for example: 'pt_BR', 'sr@Latn' or 'de'
        """
        if locale is not None:
            # Try to get a normalizer for the locale
            util = queryUtility(IIDNormalizer, name=locale)
            parts = locale.split('_')
            if util is None and len(parts) > 1:
                # Try to get a normalizer for the base language if we asked
                # for one for a language/country combination and found none
                util = queryUtility(IIDNormalizer, name=parts[0])
            if util is not None:
                text = util.normalize(text, locale=locale)

        text = baseNormalize(text)

        # lowercase text
        # base = text.lower()
        ext = ''

        # replace whitespace and punctuation, but preserve filename extensions
        m = FILENAME_REGEX.match(text)
        if m is not None:
            base = m.groups()[0]
            ext = m.groups()[1]

        base = IGNORE_REGEX.sub('', base)
        base = NON_WORD_REGEX.sub('-', base)
        base = MULTIPLE_DASHES_REGEX.sub('-', base)
        base = EXTRA_DASHES_REGEX.sub('', base)

        base = cropName(base, maxLength=max_length)

        if ext != '':
            base = base + '.' + ext

        return base
    def normalize(self, text, locale=None, max_length=MAX_LENGTH):
        """
        Returns a normalized text. text has to be a unicode string and locale
        should be a normal locale, for example: 'pt_BR', 'sr@Latn' or 'de'
        """
        if locale is not None:
            # Try to get a normalizer for the locale
            util = queryUtility(IIDNormalizer, name=locale)
            parts = locale.split('_')
            if util is None and len(parts) > 1:
                # Try to get a normalizer for the base language if we asked
                # for one for a language/country combination and found none
                util = queryUtility(IIDNormalizer, name=parts[0])
            if util is not None:
                text = util.normalize(text, locale=locale)

        text = baseNormalize(text)

        # lowercase text
        base = text.lower()
        ext  = ''

        # replace whitespace and punctuation, but preserve filename extensions
        m = FILENAME_REGEX.match(text)
        if m is not None:
            base = m.groups()[0]
            ext  = m.groups()[1]

        base = IGNORE_REGEX.sub('', base)
        base = NON_WORD_REGEX.sub('-', base)
        base = MULTIPLE_DASHES_REGEX.sub('-', base)
        base = EXTRA_DASHES_REGEX.sub('', base)

        base = cropName(base, maxLength=max_length)
        
        if ext != '':
            base = base + '.' + ext

        return base
Example #3
0
    def normalize(self, text, locale=None, max_length=MAX_URL_LENGTH):
        """
        Returns a normalized text. text has to be a unicode string and locale
        should be a normal locale, for example: 'pt_BR', 'sr@Latn' or 'de'
        """
        if locale is not None:
            # Try to get a normalizer for the locale
            util = queryUtility(IURLNormalizer, name=locale)
            parts = locale.split('_')
            if util is None and len(parts) > 1:
                # Try to get a normalizer for the base language if we asked
                # for one for a language/country combination and found none
                util = queryUtility(IURLNormalizer, name=parts[0])
            if util is not None:
                text = util.normalize(text, locale=locale)

        base = baseNormalize(text)
        ext = ''

        m = FILENAME_REGEX.match(base)
        if m is not None:
            base = m.groups()[0]
            ext = m.groups()[1]

        #base = base.replace(' ', '-')
        base = IGNORE_REGEX.sub('', base)
        base = DANGEROUS_CHARS_REGEX.sub('-', base)
        base = EXTRA_DASHES_REGEX.sub('', base)
        base = MULTIPLE_DASHES_REGEX.sub('-', base)

        base = cropName(base, maxLength=max_length)

        if ext != '':
            base = base + '.' + ext

        return base
    def normalize(self, text, locale=None, max_length=MAX_URL_LENGTH):
        """
        Returns a normalized text. text has to be a unicode string and locale
        should be a normal locale, for example: 'pt_BR', 'sr@Latn' or 'de'
        """
        if locale is not None:
            # Try to get a normalizer for the locale
            util = queryUtility(IURLNormalizer, name=locale)
            parts = locale.split('_')
            if util is None and len(parts) > 1:
                # Try to get a normalizer for the base language if we asked
                # for one for a language/country combination and found none
                util = queryUtility(IURLNormalizer, name=parts[0])
            if util is not None:
                text = util.normalize(text, locale=locale)

        base = baseNormalize(text)
        ext  = ''

        m = FILENAME_REGEX.match(base)
        if m is not None:
            base = m.groups()[0]
            ext  = m.groups()[1]

        #base = base.replace(' ', '-')
        base = IGNORE_REGEX.sub('', base)
        base = DANGEROUS_CHARS_REGEX.sub('-', base)
        base = EXTRA_DASHES_REGEX.sub('', base)
        base = MULTIPLE_DASHES_REGEX.sub('-', base)

        base = cropName(base, maxLength=max_length)

        if ext != '':
            base = base + '.' + ext

        return base