def command_name(cls): '''Command names are calculated as class names in lower case inserting a hyphen before each new capital letter. For example "MyCommand" will be used as "my-command". It's defined as an external function because a class method don't apply to minimal commands (those with only the "run" method). Example:: >>> class SomeCommand(object): ... pass >>> command_name(SomeCommand) == 'some-command' True If the command class has an attribute `command_cli_name`, this will be used instead:: >>> class SomeCommand(object): ... command_cli_name = 'adduser' >>> command_name(SomeCommand) == 'adduser' True It's an error to have a non-string `command_cli_name` attribute:: >>> class SomeCommand(object): ... command_cli_name = None >>> command_name(SomeCommand) # doctest: +ELLIPSIS Traceback (most recent call last): ... TypeError: Attribute 'command_cli_name' must be a string. ''' Unset = object() res = getattr(cls, 'command_cli_name', Unset) if res is not Unset: from xoutil.eight import string_types if not isinstance(res, string_types): raise TypeError("Attribute 'command_cli_name' must be a string.") else: from io import StringIO from xoutil.string import safe_decode buf = StringIO() start = True for letter in cls.__name__: if letter.isupper(): if not start: buf.write(safe_decode('-')) letter = letter.lower() buf.write(safe_decode(letter)) start = False buf.flush() res = buf.getvalue() buf.close() return res
def build_documentation(cls, get_doc=None, deep=1): '''Build a proper documentation from a class `cls`. Classes are recursed in MRO until process all levels (`deep`) building the resulting documentation. The function `get_doc` get the documentation of a given class. If no function is given, then attribute ``__doc__`` is used. ''' from xoutil.string import safe_decode assert isinstance(cls, type), _INVALID_CLASS_TYPE_MSG if deep < 1: deep = 1 get_doc = get_doc or (lambda c: c.__doc__) mro = cls.mro() i, level, used, res = 0, 0, {}, '' while (level < deep) and (i < len(mro)): sc = mro[i] doc = get_doc(sc) if doc: doc = safe_decode(doc).strip() key = sc.__name__ docs = used.setdefault(key, set()) if doc not in docs: docs.add(doc) if res: res += '\n\n' res += '=== <%s> ===\n\n%s' % (key, doc) level += 1 i += 1 return res
def escape(s, quote=True): """Replace special characters "&", "<" and ">" to HTML-safe sequences If the optional flag quote is true (the default), the quotation mark characters, both double quote (") and single quote (') characters are also translated. """ from xoutil.eight import text_type from xoutil.string import safe_decode, safe_encode if not isinstance(s, text_type): arg = safe_decode(s) else: arg = s if quote: res = arg.translate(_escape_map_full) else: res = arg.translate(_escape_map) if not isinstance(res, type(s)): return safe_encode(res) return res
def ppformat(obj): '''Just like :func:`pprint` but always returns the result instead of writing it to a stream. :returns: The pretty formated text. :rtype: `unicode` in Python 2, `str` in Python 3. ''' import io from six import PY3, text_type if PY3: stream = io.StringIO() else: stream = io.BytesIO() pprint(obj, stream=stream) stream.seek(0) res = stream.read() if isinstance(res, text_type): return res else: from xoutil.string import safe_decode return safe_decode(res)
def test_safe_decode_dont_fail_uppon_invalid_encoding(s): from xoutil.string import safe_decode assert safe_decode(s, 'i-dont-exist') == safe_decode(s)
warnings.warn('xoutil.html is deprecated') if _py3: from html import entities from html import parser else: import htmlentitydefs as entities import HTMLParser as parser entities.entitydefs_unicode = {} entities.entitydefs_utf8 = {} for name, entity in iteritems_(entities.entitydefs): text = entities.entitydefs_unicode[name] = safe_decode(entity, 'latin-1') entities.entitydefs_utf8[name] = text.encode('utf-8') del name, entity, safe_decode, iteritems_ def _further_escape(s): import re from xoutil.string import safe_encode ASCII = getattr(re, 'ASCII', 0) # Py3k what = re.compile(br'[\x00-\x1F\x80-\xFF]', ASCII) res, pos = b'', 0 for match in what.finditer(s): char, start, end = match.group(), match.start(), match.end() assert start + 1 == end res += s[pos:start] res += b'&#' + safe_encode(str(ord(char))) + b';'
def slugify(s, entities=True, decimal=True, hexadecimal=True): ''' Normalizes string, converts to lower-case, removes non-alpha characters, and converts spaces to hyphens. Parts from http://www.djangosnippets.org/snippets/369/ >>> slugify("Manuel Vázquez Acosta") # doctest: +SKIP 'manuel-vazquez-acosta' If `s` and `entities` is True (the default) all HTML entities are replaced by its equivalent character before normalization:: >>> slugify("Manuel Vázquez Acosta") # doctest: +SKIP 'manuel-vazquez-acosta' If `entities` is False, then no HTML-entities substitution is made:: >>> value = "Manuel Vázquez Acosta" >>> slugify(value, entities=False) # doctest: +SKIP 'manuel-v-aacute-zquez-acosta' If `decimal` is True, then all entities of the form ``&#nnnn`` where `nnnn` is a decimal number deemed as a unicode codepoint, are replaced by the corresponding unicode character:: >>> slugify('Manuel Vázquez Acosta') # doctest: +SKIP 'manuel-vazquez-acosta' >>> value = 'Manuel Vázquez Acosta' >>> slugify(value, decimal=False) # doctest: +SKIP 'manuel-v-225-zquez-acosta' If `hexadecimal` is True, then all entities of the form ``&#nnnn`` where `nnnn` is a hexdecimal number deemed as a unicode codepoint, are replaced by the corresponding unicode character:: >>> slugify('Manuel Vázquez Acosta') # doctest: +SKIP 'manuel-vazquez-acosta' >>> slugify('Manuel Vázquez Acosta', hexadecimal=False) # doctest: +SKIP 'manuel-v-x00e1-zquez-acosta' ''' import re from xoutil.eight import unichr, text_type from xoutil.string import safe_decode, normalize_slug if not isinstance(s, text_type): s = safe_decode(s) if entities: try: from htmlentitydefs import name2codepoint except ImportError: # Py3k: The ``htmlentitydefs`` module has been renamed to # ``html.entities`` in Python 3 from html.entities import name2codepoint s = re.sub(str('&(%s);') % str('|').join(name2codepoint), lambda m: unichr(name2codepoint[m.group(1)]), s) if decimal: try: s = re.sub(r'&#(\d+);', lambda m: unichr(int(m.group(1))), s) except: pass if hexadecimal: try: s = re.sub(r'&#x([\da-fA-F]+);', lambda m: unichr(int(m.group(1), 16)), s) except: pass return normalize_slug(s, '-')