Python ensure_unicode Examples, xdoctest.utils.util_str.ensure_unicode Python Examples

Example #1

0

Show file

File: util_stream.py Project: jayvdb/xdoctest

 def write(self, msg):
     if self.redirect is not None:
         self.redirect.write(msg)
     if six.PY2:
         from xdoctest.utils.util_str import ensure_unicode
         msg = ensure_unicode(msg)
     super(TeeStringIO, self).write(msg)

Example #2

0

Show file

File: util_stream.py Project: Erotemic/ubelt

 def write(self, msg):
     """
     Write to this and the redirected stream
     """
     if self.redirect is not None:
         self.redirect.write(msg)
     if PY2:
         from xdoctest.utils.util_str import ensure_unicode
         msg = ensure_unicode(msg)
     return super(TeeStringIO, self).write(msg)

Example #3

0

Show file

def split_google_docblocks(docstr):
    """ Breaks a docstring into parts defined by google style

    Args:
        docstr (str): a docstring

    Returns:
        List[Tuple]: list of 2-tuples where the first item is a google style
            docstring tag and the second item is the bock corresponding to that
            tag.

    CommandLine:
        xdoctest xdoctest.docstr.docscrape_google split_google_docblocks:2

    Example:
        >>> from xdoctest.docstr.docscrape_google import *  # NOQA
        >>> from xdoctest import utils
        >>> docstr = utils.codeblock(
        ...     '''
        ...     one line description
        ...
        ...     multiline
        ...     description
        ...
        ...     Args:
        ...         foo: bar
        ...
        ...     Returns:
        ...         None
        ...
        ...     Example:
        ...         >>> print('eg1')
        ...         eg1
        ...
        ...     Example:
        ...         >>> print('eg2')
        ...         eg2
        ...     ''')
        >>> groups = split_google_docblocks(docstr)
        >>> assert len(groups) == 5
        >>> [g[0] for g in groups]
        ['__DOC__', 'Args', 'Returns', 'Example', 'Example']

    Example:
        >>> from xdoctest.docstr.docscrape_google import *  # NOQA
        >>> docstr = split_google_docblocks.__doc__
        >>> groups = split_google_docblocks(docstr)

    Example:
        >>> from xdoctest.docstr.docscrape_google import *  # NOQA
        >>> from xdoctest import utils
        >>> docstr = utils.codeblock(
        ...     '''
        ...      a description with a leading space
        ...
        ...     Example:
        ...         >>> foobar
        ...     ''')
        >>> groups = split_google_docblocks(docstr)
        >>> print('groups = {!r}'.format(groups))

    Example:
        >>> from xdoctest.docstr.docscrape_google import *  # NOQA
        >>> from xdoctest import utils
        >>> docstr = utils.codeblock(
        ...     '''
        ...     Example:
        ...         >>> foobar
        ...     ''')
        >>> # Check that line offsets are valid if the first line is not blank
        >>> groups = split_google_docblocks(docstr)
        >>> offset = groups[0][1][1]
        >>> print('offset = {!r}'.format(offset))
        >>> assert offset == 0
        >>> # Check that line offsets are valid if the first line is blank
        >>> groups = split_google_docblocks(chr(10) + docstr)
        >>> offset = groups[0][1][1]
        >>> print('offset = {!r}'.format(offset))
        >>> assert offset == 1
    """
    if not isinstance(docstr, six.string_types):
        raise TypeError('Input docstr must be a string. Got {} instead'.format(
            type(docstr)))

    def get_indentation(line_):
        """ returns number of preceding spaces """
        return len(line_) - len(line_.lstrip())

    # Parse out initial documentation lines
    # Then parse out the blocked lines.
    docstr = ensure_unicode(docstr)

    docstr = textwrap.dedent(docstr)
    docstr_lines = docstr.split('\n')
    line_indent = [get_indentation(line) for line in docstr_lines]
    line_len = [len(line) for line in docstr_lines]

    # The first line may not have the correct indentation if it starts
    # right after the triple quotes. Adjust it in this case to ensure that
    # base indent is always 0
    adjusted = False
    is_nonzero = [len_ > 0 for len_ in line_len]
    if len(line_indent) >= 2:
        if line_len[0] != 0:
            indents = [x for x, f in zip(line_indent, is_nonzero) if f]
            if len(indents) >= 2:
                indent_adjust = min(indents[1:])
                line_indent[0] += indent_adjust
                line_len[0] += indent_adjust
                docstr_lines[0] = (' ' * indent_adjust) + docstr_lines[0]
                adjusted = True
    if adjusted:
        # Redo prepreocessing, but this time on a rectified input
        docstr = textwrap.dedent('\n'.join(docstr_lines))
        docstr_lines = docstr.split('\n')
        line_indent = [get_indentation(line) for line in docstr_lines]
        line_len = [len(line) for line in docstr_lines]

    indents = [x for x, f in zip(line_indent, is_nonzero) if f]
    if False and len(indents) >= 1:
        if indents[0] != 0:
            # debug info
            print('INDENTATION ERROR IN PARSING DOCSTRING')
            print('CHECK TO MAKE SURE YOU USED A RAW STRING IF YOU USE "\\n"')
            # TODO: Report this error with line number and file information
            print('Docstring:')
            print('----------')
            print(docstr)
            print('----------')
            raise exceptions.MalformedDocstr('malformed google docstr')

    base_indent = 0
    # We will group lines by their indentation.
    # Rectify empty lines by giving them their parent's indentation.
    true_indent = []
    prev_indent = None
    for indent_, len_ in zip(line_indent, line_len):
        if len_ == 0:
            # Empty lines take on their parents indentation
            indent_ = prev_indent
        true_indent.append(indent_)
        prev_indent = indent_

    # List of google style tags grouped by alias
    tag_groups = [
        ['Args', 'Arguments', 'Parameters', 'Other Parameters'],
        ['Kwargs', 'Keyword Args', 'Keyword Arguments'],
        ['Warns', 'Warning', 'Warnings'],
        ['Returns', 'Return'],
        ['Example', 'Examples'],
        ['Doctest'],
        ['Note', 'Notes'],
        ['Yields', 'Yield'],
        ['Attributes'],
        ['Methods'],
        ['Raises'],
        ['References'],
        ['See Also'],
        ['Todo'],
    ]
    # Map aliased tags to a cannonical name (the first item in the group).
    tag_aliases = dict([(item, group[0]) for group in tag_groups
                        for item in group])
    # Allow for single or double colon (support for pytorch)
    tag_pattern = '^' + '(' + '|'.join(tag_aliases.keys()) + ') *::? *$'

    # Label lines by their group-id
    group_id = 0
    prev_indent = 0
    group_list = []
    in_tag = False
    for line_num, (line, indent_) in enumerate(zip(docstr_lines, true_indent)):
        if re.match(tag_pattern, line):
            # Check if we can look ahead
            if line_num + 1 < len(docstr_lines):
                # A tag is only valid if its next line is properly indented,
                # empty, or is a tag itself.
                indent_increase = true_indent[line_num + 1] > base_indent
                indent_zero = line_len[line_num + 1] == 0
                matches_tag = re.match(tag_pattern, docstr_lines[line_num + 1])
                if (indent_increase or indent_zero or matches_tag):
                    group_id += 1
                    in_tag = True
            else:
                group_id += 1
                in_tag = True
        # If the indentation goes back to the base, then we have left the tag
        elif in_tag and indent_ != prev_indent and indent_ == base_indent:
            group_id += 1
            in_tag = False
        group_list.append(group_id)
        prev_indent = indent_

    assert len(docstr_lines) == len(group_list)

    # Group docstr lines by group list
    groups_ = collections.defaultdict(list)
    for groupid, line in zip(group_list, docstr_lines):
        groups_[groupid].append(line)

    groups = []
    line_offset = 0
    for k, lines in groups_.items():
        if len(lines) == 0 or (len(lines) == 1 and len(lines[0]) == 0):
            line_offset += len(lines)
            continue
        elif len(lines) >= 1 and re.match(tag_pattern, lines[0]):
            # An encoded google sub-block
            key = lines[0].strip().rstrip(':')
            val = lines[1:]
            subblock = textwrap.dedent('\n'.join(val))
        else:
            # A top level text documentation block
            key = '__DOC__'
            val = lines[:]
            subblock = '\n'.join(val)

        key = tag_aliases.get(key, key)
        block = (subblock, line_offset)
        groups.append((key, block))
        line_offset += len(lines)
    return groups