Example #1
0
def slugify(s, delete_chars=SlugDeleteChars, subst_char=SubstChar):
    """
    Convert (unicode) string to slug.
    * This only handles Western-language strings with very basic
      accents.
    """
    from garage.html_utils import strip_tags, unescape

    def convert_unwanted_chars(txt):
        converted = []
        for ch in txt:
            if ch in delete_chars:
                ch = subst_char
            converted.append(ch)
        return ''.join(converted)

    s = s.strip("\r\n")
    s = s.replace("\n", " ")
    s = strip_accents(s)
    s = strip_tags(unescape(s))
    s = re.sub(r"['’]s", 's', s)
    s = re.sub(r'([0-9\.]+)%', '\\1-percent', s)
    s = s.replace("&", " and ")
    s = s.replace("&", " and ")
    s = s.replace("/", " ")
    s = s.replace(" ", "-")
    s = s.replace("_", "-")
    s = convert_unwanted_chars(s)
    s = re.sub(r'\.\.+', '.', s)
    s = re.sub(r'--+', '-', s)
    s = s.strip('.')
    s = s.strip('-')
    s = s.lower()
    return s
Example #2
0
def slugify(s, delete_chars=SlugDeleteChars, subst_char=SubstChar):
    """
    Convert (unicode) string to slug.
    """
    def convert_unwanted_chars(txt):
        converted = []
        for ch in txt:
            if ch in delete_chars:
                ch = subst_char
            converted.append(ch)
        return u''.join(converted)

    s = s.decode("utf-8")
    s = s.strip(u"\r\n")
    s = s.replace(u"\n", u" ")
    s = strip_accents(s)
    s = strip_tags(unescape(s))
    s = re.sub(r"[']s", u's', s)
    s = re.sub(r'([0-9\.]+)%', u'\\1-percent', s)
    s = s.replace(u"&", u" and ")
    s = s.replace(u"&", u" and ")
    s = s.replace(u"/", u" ")
    s = s.replace(u" ", u"-")
    s = s.replace(u"_", u"-")
    s = convert_unwanted_chars(s)
    s = re.sub(r'\.\.+', u'.', s)
    s = re.sub(r'--+', u'-', s)
    s = s.strip(u'.')
    s = s.strip(u'-')
    s = s.lower()
    return s
Example #3
0
    def test_unescape(self):
        """
        Ensure unescape function is working properly.
        """
        from garage.html_utils import unescape
        self._msg('test', 'unescape', first=True)

        txt = '<em>he said, &quot;q &amp; a&quot;</em> &lt;abc&gt; &eacute;criture &#23531;&#20316;'
        expected = '<em>he said, "q & a"</em> <abc> écriture 寫作'
        result = unescape(txt)
        self._msg('text', txt)
        self._msg('result', result)
        self._msg('expected', expected)
        self.assertEqual(result, expected)