Exemple #1
0
def slugify(value, substitutions=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode
    # unidecode returns str in Py2 and 3, so in Py2 we have to make
    # it unicode again
    value = unidecode(value)
    if isinstance(value, six.binary_type):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value).lower()
    for src, dst in substitutions:
        value = value.replace(src.lower(), dst.lower())
    value = re.sub('[^\w\s-]', '', value).strip()
    value = re.sub('[-\s]+', '-', value)
    # we want only ASCII chars
    value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode
    return value.decode('ascii')
Exemple #2
0
def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """
    #changed regex subs
    #imported re
    #changed unicode
    if regex_subs == ():
        regex_subs = (DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS'])

    import unicodedata
    import unidecode
    import re

    def normalize_unicode(text):
        # normalize text by compatibility composition
        # see: https://en.wikipedia.org/wiki/Unicode_equivalence
        return unicodedata.normalize('NFKC', text)

    # strip tags from value
    value = Markup(value).striptags()

    # normalization
    value = normalize_unicode(value)

    if not use_unicode:
        # ASCII-fy
        value = unidecode.unidecode(value)

    # perform regex substitutions
    for src, dst in regex_subs:
        value = re.sub(normalize_unicode(src),
                       normalize_unicode(dst),
                       value,
                       flags=re.IGNORECASE)

    if not preserve_case:
        value = value.lower()
    value.replace(" ", "-")

    #test
    return value.strip()
Exemple #3
0
def slugify(value, substitutions=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode
    # unidecode returns str in Py2 and 3, so in Py2 we have to make
    # it unicode again
    value = unidecode(value)
    if isinstance(value, six.binary_type):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value).lower()

    # backward compatible covert from 2-tuples to 3-tuples
    new_subs = []
    for tpl in substitutions:
        try:
            src, dst, skip = tpl
        except ValueError:
            src, dst = tpl
            skip = False
        new_subs.append((src, dst, skip))
    substitutions = tuple(new_subs)

    # by default will replace non-alphanum characters
    replace = True
    for src, dst, skip in substitutions:
        orig_value = value
        value = value.replace(src.lower(), dst.lower())
        # if replacement was made then skip non-alphanum
        # replacement if instructed to do so
        if value != orig_value:
            replace = replace and not skip

    if replace:
        value = re.sub(r'[^\w\s-]', '', value).strip()
        value = re.sub(r'[-\s]+', '-', value)
    else:
        value = value.strip()

    # we want only ASCII chars
    value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode
    return value.decode('ascii')
Exemple #4
0
def slugify(value, substitutions=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.

    Took from Django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
    import unicodedata
    from unidecode import unidecode
    # unidecode returns str in Py2 and 3, so in Py2 we have to make
    # it unicode again
    value = unidecode(value)
    if isinstance(value, six.binary_type):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value).lower()

    # backward compatible covert from 2-tuples to 3-tuples
    new_subs = []
    for tpl in substitutions:
        try:
            src, dst, skip = tpl
        except ValueError:
            src, dst = tpl
            skip = False
        new_subs.append((src, dst, skip))
    substitutions = tuple(new_subs)

    # by default will replace non-alphanum characters
    replace = True
    for src, dst, skip in substitutions:
        orig_value = value
        value = value.replace(src.lower(), dst.lower())
        # if replacement was made then skip non-alphanum
        # replacement if instructed to do so
        if value != orig_value:
            replace = replace and not skip

    if replace:
        value = re.sub(r'[^\w\s-]', '', value).strip()
        value = re.sub(r'[-\s]+', '-', value)
    else:
        value = value.strip()

    # we want only ASCII chars
    value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode
    return value.decode('ascii')
Exemple #5
0
def convert_markdown(text, convert_url=None, *, inline=False):
    convert_url = convert_url if convert_url else lambda x: x

    # Workaround for https://github.com/lepture/mistune/issues/125
    NBSP_REPLACER = '\uf8ff'
    text = text.replace('\N{NO-BREAK SPACE}', NBSP_REPLACER)

    text = dedent(text)

    markdown = Markdown(
        escape=False,
        block=BlockLexer(),
        renderer=Renderer(convert_url),
    )
    result = Markup(markdown(text)).strip()

    if inline and result.startswith('<p>') and result.endswith('</p>'):
        result = result[len('<p>'):-len('</p>')]

    # Workaround for https://github.com/lepture/mistune/issues/125
    result = result.replace(NBSP_REPLACER, '\N{NO-BREAK SPACE}')
    return result