コード例 #1
0
ファイル: utils.py プロジェクト: khertan/pelican
def slugify(value):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode

    # unidecode returns str in Py2 and 3, so in Py2 we have to make
    # it unicode again
    value = unidecode(value)
    if isinstance(value, six.binary_type):
        value = value.decode("ascii")
    # still unicode
    value = unicodedata.normalize("NFKD", value)
    value = re.sub("[^\w\s-]", "", value).strip().lower()
    value = re.sub("[-\s]+", "-", value)
    # we want only ASCII chars
    value = value.encode("ascii", "ignore")
    # but Pelican should generally use only unicode
    return value.decode("ascii")
コード例 #2
0
def slugify(value, substitutions=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode
    # unidecode returns str in Py2 and 3, so in Py2 we have to make
    # it unicode again
    value = unidecode(value)
    if isinstance(value, six.binary_type):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value).lower()
    for src, dst in substitutions:
        value = value.replace(src.lower(), dst.lower())
    value = re.sub('[^\w\s-]', '', value).strip()
    value = re.sub('[-\s]+', '-', value)
    # we want only ASCII chars
    value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode
    return value.decode('ascii')
コード例 #3
0
ファイル: utils.py プロジェクト: maxhomicide/n
def slugify(value, regex_subs=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """

    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode
    value = unidecode(value)
    if isinstance(value, bytes):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value)

    for src, dst in regex_subs:
        value = re.sub(src, dst, value, flags=re.IGNORECASE)

    # convert to lowercase
    value = value.lower()

    # we want only ASCII chars
    value = value.encode('ascii', 'ignore').strip()
    # but Pelican should generally use only unicode
    return value.decode('ascii')
コード例 #4
0
def slugify(value, substitutions=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode
    # unidecode returns str in Py2 and 3, so in Py2 we have to make
    # it unicode again
    value = unidecode(value)
    if isinstance(value, six.binary_type):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value).lower()

    # backward compatible covert from 2-tuples to 3-tuples
    new_subs = []
    for tpl in substitutions:
        try:
            src, dst, skip = tpl
        except ValueError:
            src, dst = tpl
            skip = False
        new_subs.append((src, dst, skip))
    substitutions = tuple(new_subs)

    # by default will replace non-alphanum characters
    replace = True
    for src, dst, skip in substitutions:
        orig_value = value
        value = value.replace(src.lower(), dst.lower())
        # if replacement was made then skip non-alphanum
        # replacement if instructed to do so
        if value != orig_value:
            replace = replace and not skip

    if replace:
        value = re.sub(r'[^\w\s-]', '', value).strip()
        value = re.sub(r'[-\s]+', '-', value)
    else:
        value = value.strip()

    # we want only ASCII chars
    value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode
    return value.decode('ascii')
コード例 #5
0
ファイル: utils.py プロジェクト: Starch/pelican
def slugify(value, substitutions=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode
    # unidecode returns str in Py2 and 3, so in Py2 we have to make
    # it unicode again
    value = unidecode(value)
    if isinstance(value, six.binary_type):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value).lower()

    # backward compatible covert from 2-tuples to 3-tuples
    new_subs = []
    for tpl in substitutions:
        try:
            src, dst, skip = tpl
        except ValueError:
            src, dst = tpl
            skip = False
        new_subs.append((src, dst, skip))
    substitutions = tuple(new_subs)

    # by default will replace non-alphanum characters
    replace = True
    for src, dst, skip in substitutions:
        orig_value = value
        value = value.replace(src.lower(), dst.lower())
        # if replacement was made then skip non-alphanum
        # replacement if instructed to do so
        if value != orig_value:
            replace = replace and not skip

    if replace:
        value = re.sub(r'[^\w\s-]', '', value).strip()
        value = re.sub(r'[-\s]+', '-', value)
    else:
        value = value.strip()

    # we want only ASCII chars
    value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode
    return value.decode('ascii')