def write(self, msg): if self.redirect is not None: self.redirect.write(msg) if six.PY2: from xdoctest.utils.util_str import ensure_unicode msg = ensure_unicode(msg) super(TeeStringIO, self).write(msg)
def write(self, msg): """ Write to this and the redirected stream """ if self.redirect is not None: self.redirect.write(msg) if PY2: from xdoctest.utils.util_str import ensure_unicode msg = ensure_unicode(msg) return super(TeeStringIO, self).write(msg)
def split_google_docblocks(docstr): """ Breaks a docstring into parts defined by google style Args: docstr (str): a docstring Returns: List[Tuple]: list of 2-tuples where the first item is a google style docstring tag and the second item is the bock corresponding to that tag. CommandLine: xdoctest xdoctest.docstr.docscrape_google split_google_docblocks:2 Example: >>> from xdoctest.docstr.docscrape_google import * # NOQA >>> from xdoctest import utils >>> docstr = utils.codeblock( ... ''' ... one line description ... ... multiline ... description ... ... Args: ... foo: bar ... ... Returns: ... None ... ... Example: ... >>> print('eg1') ... eg1 ... ... Example: ... >>> print('eg2') ... eg2 ... ''') >>> groups = split_google_docblocks(docstr) >>> assert len(groups) == 5 >>> [g[0] for g in groups] ['__DOC__', 'Args', 'Returns', 'Example', 'Example'] Example: >>> from xdoctest.docstr.docscrape_google import * # NOQA >>> docstr = split_google_docblocks.__doc__ >>> groups = split_google_docblocks(docstr) Example: >>> from xdoctest.docstr.docscrape_google import * # NOQA >>> from xdoctest import utils >>> docstr = utils.codeblock( ... ''' ... a description with a leading space ... ... Example: ... >>> foobar ... ''') >>> groups = split_google_docblocks(docstr) >>> print('groups = {!r}'.format(groups)) Example: >>> from xdoctest.docstr.docscrape_google import * # NOQA >>> from xdoctest import utils >>> docstr = utils.codeblock( ... ''' ... Example: ... >>> foobar ... ''') >>> # Check that line offsets are valid if the first line is not blank >>> groups = split_google_docblocks(docstr) >>> offset = groups[0][1][1] >>> print('offset = {!r}'.format(offset)) >>> assert offset == 0 >>> # Check that line offsets are valid if the first line is blank >>> groups = split_google_docblocks(chr(10) + docstr) >>> offset = groups[0][1][1] >>> print('offset = {!r}'.format(offset)) >>> assert offset == 1 """ if not isinstance(docstr, six.string_types): raise TypeError('Input docstr must be a string. Got {} instead'.format( type(docstr))) def get_indentation(line_): """ returns number of preceding spaces """ return len(line_) - len(line_.lstrip()) # Parse out initial documentation lines # Then parse out the blocked lines. docstr = ensure_unicode(docstr) docstr = textwrap.dedent(docstr) docstr_lines = docstr.split('\n') line_indent = [get_indentation(line) for line in docstr_lines] line_len = [len(line) for line in docstr_lines] # The first line may not have the correct indentation if it starts # right after the triple quotes. Adjust it in this case to ensure that # base indent is always 0 adjusted = False is_nonzero = [len_ > 0 for len_ in line_len] if len(line_indent) >= 2: if line_len[0] != 0: indents = [x for x, f in zip(line_indent, is_nonzero) if f] if len(indents) >= 2: indent_adjust = min(indents[1:]) line_indent[0] += indent_adjust line_len[0] += indent_adjust docstr_lines[0] = (' ' * indent_adjust) + docstr_lines[0] adjusted = True if adjusted: # Redo prepreocessing, but this time on a rectified input docstr = textwrap.dedent('\n'.join(docstr_lines)) docstr_lines = docstr.split('\n') line_indent = [get_indentation(line) for line in docstr_lines] line_len = [len(line) for line in docstr_lines] indents = [x for x, f in zip(line_indent, is_nonzero) if f] if False and len(indents) >= 1: if indents[0] != 0: # debug info print('INDENTATION ERROR IN PARSING DOCSTRING') print('CHECK TO MAKE SURE YOU USED A RAW STRING IF YOU USE "\\n"') # TODO: Report this error with line number and file information print('Docstring:') print('----------') print(docstr) print('----------') raise exceptions.MalformedDocstr('malformed google docstr') base_indent = 0 # We will group lines by their indentation. # Rectify empty lines by giving them their parent's indentation. true_indent = [] prev_indent = None for indent_, len_ in zip(line_indent, line_len): if len_ == 0: # Empty lines take on their parents indentation indent_ = prev_indent true_indent.append(indent_) prev_indent = indent_ # List of google style tags grouped by alias tag_groups = [ ['Args', 'Arguments', 'Parameters', 'Other Parameters'], ['Kwargs', 'Keyword Args', 'Keyword Arguments'], ['Warns', 'Warning', 'Warnings'], ['Returns', 'Return'], ['Example', 'Examples'], ['Doctest'], ['Note', 'Notes'], ['Yields', 'Yield'], ['Attributes'], ['Methods'], ['Raises'], ['References'], ['See Also'], ['Todo'], ] # Map aliased tags to a cannonical name (the first item in the group). tag_aliases = dict([(item, group[0]) for group in tag_groups for item in group]) # Allow for single or double colon (support for pytorch) tag_pattern = '^' + '(' + '|'.join(tag_aliases.keys()) + ') *::? *$' # Label lines by their group-id group_id = 0 prev_indent = 0 group_list = [] in_tag = False for line_num, (line, indent_) in enumerate(zip(docstr_lines, true_indent)): if re.match(tag_pattern, line): # Check if we can look ahead if line_num + 1 < len(docstr_lines): # A tag is only valid if its next line is properly indented, # empty, or is a tag itself. indent_increase = true_indent[line_num + 1] > base_indent indent_zero = line_len[line_num + 1] == 0 matches_tag = re.match(tag_pattern, docstr_lines[line_num + 1]) if (indent_increase or indent_zero or matches_tag): group_id += 1 in_tag = True else: group_id += 1 in_tag = True # If the indentation goes back to the base, then we have left the tag elif in_tag and indent_ != prev_indent and indent_ == base_indent: group_id += 1 in_tag = False group_list.append(group_id) prev_indent = indent_ assert len(docstr_lines) == len(group_list) # Group docstr lines by group list groups_ = collections.defaultdict(list) for groupid, line in zip(group_list, docstr_lines): groups_[groupid].append(line) groups = [] line_offset = 0 for k, lines in groups_.items(): if len(lines) == 0 or (len(lines) == 1 and len(lines[0]) == 0): line_offset += len(lines) continue elif len(lines) >= 1 and re.match(tag_pattern, lines[0]): # An encoded google sub-block key = lines[0].strip().rstrip(':') val = lines[1:] subblock = textwrap.dedent('\n'.join(val)) else: # A top level text documentation block key = '__DOC__' val = lines[:] subblock = '\n'.join(val) key = tag_aliases.get(key, key) block = (subblock, line_offset) groups.append((key, block)) line_offset += len(lines) return groups