def build(mcs, p_doc: str, c_doc: str) -> str: if not p_doc or not c_doc: return p_doc or c_doc p_doc = NumpyDocString(p_doc) c_doc = NumpyDocString(c_doc) # reuse parents' doc except for `Extended Summary` for section, content in c_doc.items(): if section != "Extended Summary" and (not content and p_doc[section]): c_doc[section] = p_doc[section] # merge parameters c_params = [param for param in c_doc['Parameters']] c_param_names = set(param.name for param in c_params) p_params = [ param for param in p_doc['Parameters'] if param.name not in c_param_names ] c_params += p_params c_doc['Parameters'] = c_params return str(c_doc)
def check_docstring_indention(doc: Docstring) -> list: """ Check indention of docstring since numpydoc reports weird results. Parameters ---------- doc : numpydoc.validate.Docstring Docstring handler. Returns ------- list List of tuples with Modin error code and its description. """ from modin.utils import _get_indent numpy_docstring = NumpyDocString(doc.clean_doc) numpy_docstring._doc.reset() numpy_docstring._parse_summary() sections = list(numpy_docstring._read_sections()) errors = [] for section in sections: description = "\n".join(section[1]) if _get_indent(description) != 0: errors.append( ("MD03", MODIN_ERROR_CODES["MD03"].format(section=section[0])) ) return errors
def __init__(self, name): self.name = name obj = self._load_obj(name) self.obj = obj self.code_obj = self._to_original_callable(obj) self.raw_doc = obj.__doc__ or '' self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc)
def parse_methods(self, key, item): # todo s = [] for name, _, lst in item: doc = NumpyDocString('\n'.join(lst)) doc._parsed_data['Signature'] = name s.append(self.get_doctext(doc)) return '\n'.join(s)
def merge_docs(add_method, layer_string): # create combined docstring with parameters from add_* and Viewer methods add_method_doc = NumpyDocString(add_method.__doc__) params = ("\n".join(add_method_doc._str_param_list('Parameters')) + VIEW_PARAMS) # this ugliness is because the indentation of the parsed numpydocstring # is different for the first parameter :( lines = params.splitlines() lines = lines[:3] + textwrap.dedent("\n".join(lines[3:])).splitlines() params = "\n".join(lines) n = 'n' if layer_string.startswith(tuple('aeiou')) else '' return DOC.format(n=n, name=layer_string, params=params)
def test_no_index_in_str(): assert "index" not in str(NumpyDocString("""Test idx """)) assert "index" in str(NumpyDocString("""Test idx .. index :: random """)) assert "index" in str(NumpyDocString("""Test idx .. index :: foo """))
def test_unknown_section(): doc_text = """ Test having an unknown section Mope ---- This should be ignored and warned about """ class BadSection: """Class with bad section. Nope ---- This class has a nope section. """ pass with pytest.warns(UserWarning, match="Unknown section Mope") as record: NumpyDocString(doc_text) assert len(record) == 1 # SphinxClassDoc has _obj.__name__ == "BadSection". Test that this is # included in the message msg_match = "Unknown section Nope in the docstring of BadSection" with pytest.warns(UserWarning, match=msg_match) as record: SphinxClassDoc(BadSection) assert len(record) == 1
def test_unknown_section(): doc_text = """ Test having an unknown section Mope ---- This should be ignored and warned about """ class BadSection(object): """Class with bad section. Nope ---- This class has a nope section. """ pass with warnings.catch_warnings(record=True) as w: NumpyDocString(doc_text) assert len(w) == 1 assert "Unknown section Mope" == str(w[0].message) with warnings.catch_warnings(record=True) as w: SphinxClassDoc(BadSection) assert len(w) == 1 assert_true('test_docscrape.test_unknown_section.<locals>.BadSection' in str(w[0].message) or 'test_docscrape.BadSection' in str(w[0].message))
def extract_section(obj, section): docstr = NumpyDocString(obj.__doc__) section = section.lower().capitalize() if section in ('Parameters', 'Returns', 'Yields', 'Other Parameters', 'Raises', 'Warns'): return docstr[section] return []
def enum_doc(name, enum, header_level, source_location): """ Generate markdown for an enum Parameters ---------- name : str Name of the thing being documented enum : EnumMeta Enum to document header_level : int Heading level source_location : str URL of repo containing source code """ lines = [f"{'#'*header_level} Enum **{name}**\n\n"] lines.append(f"```python\n{name}\n```\n") lines.append(get_source_link(enum, source_location)) try: doc = NumpyDocString(inspect.getdoc(enum))._parsed_data lines += summary(doc) except: pass lines.append(f"{'#'*(header_level + 1)} Members\n\n") lines += [f"- `{str(v).split('.').pop()}`: `{v.value}` \n\n" for v in enum] return lines
def _docspec_comments(obj): u""" Inspect the docstring and get the comments for each parameter. """ # Sometimes our docstring is on the class, and sometimes it's on the initializer, # so we've got to check both. class_docstring = getattr(obj, u'__doc__', None) init_docstring = getattr(obj.__init__, u'__doc__', None) if hasattr( obj, u'__init__') else None docstring = class_docstring or init_docstring or u'' doc = NumpyDocString(docstring) params = doc[u"Parameters"] comments = {} for line in params: # It looks like when there's not a space after the parameter name, # numpydocstring parses it incorrectly. name_bad = line[0] name = name_bad.split(u":")[0] # Sometimes the line has 3 fields, sometimes it has 4 fields. comment = u"\n".join(line[-1]) comments[name] = comment return comments
def _search_param_in_numpydocstr(docstr, param_str): r""" Search `docstr` (in numpydoc format) for type(-s) of `param_str`. >>> from jedi.evaluate.docstrings import * # NOQA >>> from jedi.evaluate.docstrings import _search_param_in_numpydocstr >>> docstr = ( ... 'Parameters\n' ... '----------\n' ... 'x : ndarray\n' ... 'y : int or str or list\n' ... 'z : {"foo", "bar", 100500}, optional\n' ... ) >>> _search_param_in_numpydocstr(docstr, 'x') ['ndarray'] >>> sorted(_search_param_in_numpydocstr(docstr, 'y')) ['int', 'list', 'str'] >>> sorted(_search_param_in_numpydocstr(docstr, 'z')) ['int', 'str'] """ params = NumpyDocString(docstr)._parsed_data['Parameters'] for p_name, p_type, p_descr in params: if p_name == param_str: m = re.match(r'([^,]+(,[^,]+)*?)(,[ ]*optional)?$', p_type) if m: p_type = m.group(1) return _expand_typestr(p_type) return []
def _search_return_in_numpydocstr(docstr): r""" Search `docstr` (in numpydoc format) for type(-s) of `param_str`. >>> from jedi.evaluate.docstrings import * # NOQA >>> from jedi.evaluate.docstrings import _search_return_in_numpydocstr >>> from jedi.evaluate.docstrings import _expand_typestr >>> docstr = ( ... 'Returns\n' ... '----------\n' ... 'int\n' ... ' can return an anoymous integer\n' ... 'out : ndarray\n' ... ' can return a named value\n' ... ) >>> _search_return_in_numpydocstr(docstr) ['int', 'ndarray'] """ doc = NumpyDocString(docstr) returns = doc._parsed_data['Returns'] returns += doc._parsed_data['Yields'] found = [] for p_name, p_type, p_descr in returns: if not p_type: p_type = p_name p_name = '' m = re.match(r'([^,]+(,[^,]+)*?)$', p_type) if m: p_type = m.group(1) found.extend(_expand_typestr(p_type)) return found
def check_function(self, dir_name, file_name, func_name, func, failures): """ Perform docstring checks on a function. Parameters ---------- dir_name : str The name of the directory in which the method is defined. file_name : str The name of the file in which the method is defined. func_name : str The name of the function being checked func : function The function being tested. failures : dict The failures encountered by the method. These are all stored so that we can fail once at the end of the check_method method with information about every failure. Form is { 'dir_name/file_name:class_name.method_name': [ messages ] } """ if PY3: argspec = inspect.getfullargspec(func) else: argspec = inspect.getargspec(func) doc = inspect.getdoc(func) new_failures = [] # If the method is decorated with @contextmanager, skip it for now if _is_context_manager(func): return # Check if docstring is missing if doc is None: new_failures.append('is missing docstring') return if not func.__doc__.startswith('\n'): new_failures.append('docstring should start with a new line') # Check if docstring references another function if doc[:3] == 'See': return nds = NumpyDocString(doc) new_failures.extend(self.check_summary(nds)) new_failures.extend(self.check_parameters(argspec, nds)) new_failures.extend(self.check_returns(func, nds)) if new_failures: key = '{0}/{1}:{2}'.format(dir_name, file_name, func_name) if key in failures: failures[key] += new_failures else: failures[key] = new_failures
def getDicoParams(instanceClassifier): """ This function allows to retrieve the name of the parameter, the types for the accepted values ​​in case of modification and that description. :param instanceClassifier: the instance of the estimator issue by dictEstimator :return: A dictionary containing the name of the estimator as a key and a tuple as a value. The tuple contains (instance of type, default value of params, description) """ dico = {} # regex to find type types_re = re.compile( r"(?P<type>(float|int(eger)?|str(ing)?|bool(ean)?|dict?|list?|array?))" ) # Map string to object type_map = { 'string': str, 'str': str, 'boolean': bool, 'bool': bool, 'int': int, 'integer': int, 'float': float, 'dict': dict, 'array': list, 'list': list, } classifierTemp = instanceClassifier().get_params() doc = NumpyDocString( " " + instanceClassifier.__doc__) # hack, get the doc for the classifier # For each params in get_params, take the name, first row contain type and the description for name, type_, descriptions in doc['Parameters']: # Find types in this row types = types_re.match(type_) # Check if the type was finded and if is not equal to "Infinity" if types != None and classifierTemp[name] != "Infinity": # Creates a complete description for this params if type_map.get(types.group()) == dict: completeDescription = str( type_ ) + "\n \n" + "Example : {'key':45} " + "\n \n" + " ".join( str(e) for e in descriptions) elif type_map.get(types.group()) == list: completeDescription = str( type_ ) + "\n \n" + "Example : [value1,value2] " + "\n \n" + " ".join( str(e) for e in descriptions) else: completeDescription = str(type_) + "\n \n" + " ".join( str(e) for e in descriptions) # add into the dict at the key (name of the params) # a tuple (instance of type, default value of params, description) dico[name] = (type_map.get(types.group()), classifierTemp[name], completeDescription) return dico
def __init__(self, name): self.name = name obj = self._load_obj(name) self.obj = obj self.code_obj = self._to_original_callable(obj) self.raw_doc = obj.__doc__ or '' self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc)
def _get_numpydoc_obj(py_obj:Any): # special case where __doc__ is None e.g. with non overwritten dunder methods # such as "__dict__" if hasattr(py_obj, '__doc__') and getattr(py_obj, '__doc__') is None: return NumpyDocString('') # "normal" cases if inspect.isclass(py_obj): doc = ClassDoc(py_obj) elif inspect.isfunction(py_obj) or inspect.ismethod(py_obj): doc = FunctionDoc(py_obj) elif hasattr(py_obj, '__doc__'): doc = NumpyDocString(py_obj.__doc__) else: # pragma: no cover raise TypeError(f'The object {py_obj} is not a class, function, method ' 'or any other Python object that has a __doc__ attribute') return doc
def _is_deprecated(real_name): try: obj, parent, modname = _import_by_name(real_name) except ImportError: return False doc = NumpyDocString(obj.__doc__ or "") summary = "".join(doc["Summary"] + doc["Extended Summary"]) return ".. deprecated::" in summary
def _is_deprecated(real_name): try: obj, parent, modname = _import_by_name(real_name) except ImportError: return False doc = NumpyDocString(obj.__doc__ or '') summary = ''.join(doc['Summary'] + doc['Extended Summary']) return '.. deprecated::' in summary
def test_see_also_parse_error(): text = (""" z(x,theta) See Also -------- :func:`~foo` """) with pytest.raises(ValueError, match="See Also entry ':func:`~foo`'"): NumpyDocString(text)
def _search_param_in_numpydocstr(docstr, param_str): """Search `docstr` (in numpydoc format) for type(-s) of `param_str`.""" params = NumpyDocString(docstr)._parsed_data['Parameters'] for p_name, p_type, p_descr in params: if p_name == param_str: m = re.match('([^,]+(,[^,]+)*?)(,[ ]*optional)?$', p_type) if m: p_type = m.group(1) return _expand_typestr(p_type) return []
def splice(self, func): """ Main work of creating or updating the function docstring happens here. This is a signature preserving decorator that only alters the `__doc__` attribute of the object. """ # avoid circular import :| from recipes.string import sub # docstring of decorated function. The one to be adapted. self.origin = docstring = func.__doc__ # make substitutions. verbatim substitutions happen if docstring is None: # if decorated function has no docstring carbon copy the # docstring from input source if callable(self.from_func): docstring = self.from_func.__doc__ else: # splicing from multiple sources. Start from a blank slate docstring = '' else: # decorated function has a docstring. Look for directives and # substitute them if callable(self.from_func): # new = self.sub(docstring) self.subs.update(get_subs(docstring, self.from_func)) if self.subs: new = sub(docstring, self.subs) # TODO: do things in the order in which arguments were passed if new == docstring: wrn.warn('Docstring for function {func} identical ' 'after substitution') docstring = new else: 'multi-source without explicit mapping. might be ambiguous' # parse the update docstring doc = NumpyDocString(docstring) # parse directives and insert doc = self.insert(func, doc) # remove omitted sections / parameters self.get_remove(self.from_func) # write the new docstring if (self.subs or self.insertion): func.__doc__ = str(doc) else: wrn.warn( f'{self.__class__.__name__} did not alter docstring for {func}.' ) return func
def test_section_twice(): doc_text = """ Test having a section Notes twice Notes ----- See the next note for more information Notes ----- That should break... """ with pytest.raises(ValueError, match="The section Notes appears twice"): NumpyDocString(doc_text) # if we have a numpydoc object, we know where the error came from class Dummy: """ Dummy class. Notes ----- First note. Notes ----- Second note. """ def spam(self, a, b): """Spam\n\nSpam spam.""" pass def ham(self, c, d): """Cheese\n\nNo cheese.""" pass def dummy_func(arg): """ Dummy function. Notes ----- First note. Notes ----- Second note. """ with pytest.raises(ValueError, match="Dummy class"): SphinxClassDoc(Dummy) with pytest.raises(ValueError, match="dummy_func"): SphinxFunctionDoc(dummy_func)
def load_docstring(obj): if obj not in load_docstring.cache: docstring = normalize_docstring(getattr(obj, "__doc__", None) or "") doc = dict(NumpyDocString(docstring)) enhance_multilines_doc(doc, "Summary") enhance_multilines_doc(doc, "Extended Summary") if doc["Extended Summary"] == [[]]: doc["Extended Summary"] = [] doc["Summary"], *more_doc = doc["Summary"] if more_doc: doc["Extended Summary"] = more_doc + doc["Extended Summary"] if isinstance(obj, type): enhance_attributes(doc, "Attributes", getattr(obj, "__annotations__", {})) for base in obj.__mro__[1:-1]: base_doc = load_docstring(base) for name in base_doc.get("Attributes", {}).keys(): if name not in doc["Attributes"]: doc["Attributes"][name] = base_doc["Attributes"][name] else: annotations = getattr(obj, '__annotations__', {}) enhance_attributes(doc, "Parameters", annotations) klass = get_class_that_defined_method(obj) if klass: for base in klass.__mro__[:-1]: try: base_obj = getattr(base, obj.__name__) except AttributeError: continue if base_obj is obj: continue try: if base_obj.__func__ is obj.__func__: continue except AttributeError: pass base_doc = load_docstring(base_obj) for name in base_doc["Parameters"].keys(): if name not in doc["Parameters"]: doc["Parameters"][name] = base_doc["Parameters"][ name] enhance_return(doc, "Returns", annotations) enhance_raises(doc, "Raises") if doc.get("Examples"): doc["Examples"] = "\n".join(doc["Examples"]) load_docstring.cache[obj] = doc return load_docstring.cache[obj]
def getNames(): # for reference: at the time of this writing, the list of names was: # Methods, Parameters, Warnings, Warns, Other Parameters, Summary, # Returns, References, Examples, Signature, Raises, Attributes, # See Also, Notes, Extended Summary global _sectnames if _sectnames is None: _sectnames = set(NumpyDocString('')._parsed_data.keys()) _sectnames -= set(['index']) _sectnames |= set(['Parameters', 'Returns', 'Raises']) return _sectnames
def parse_docstring(doc: str, module: str, nested: bool) -> str: """Parse docstring and returns formatted markdown Parameters ---------- doc : str The docstring of a python object Returns ------- str Formatted markdown """ lines = [] if not doc: return '' doc = replace_links(doc, module, nested) doc = NumpyDocString(doc) if doc.get('Summary'): lines.append('{}'.format('\n'.join(doc['Summary']))) lines.append('') if doc.get('Extended Summary'): lines.append('{}'.format('\n'.join(doc['Extended Summary']))) lines.append('') if doc.get('Examples'): lines.append('__Examples__\n\n') lines.extend(doc['Examples']) lines.append('') if doc.get('Attributes'): lines.append('__Attributes__\n\n') for name, dtype, description in doc['Attributes']: lines.append('- `{}` (`{}`): {}'.format(name, dtype, '\n\t'.join(description))) lines.append('') if doc.get('Parameters'): lines.append('__Args__\n\n') for name, dtype, description in doc['Parameters']: lines.append('- `{}` (`{}`): {}'.format(name, dtype, '\n\t'.join(description))) lines.append('') if doc.get('Returns'): lines.append('__Returns__\n\n') for name, dtype, description in doc['Returns']: lines.append('- `{}`: {}'.format(name, '\n'.join(description))) lines.append('') return '\n'.join(lines)
def test_duplicate_signature(): # Duplicate function signatures occur e.g. in ufuncs, when the # automatic mechanism adds one, and a more detailed comes from the # docstring itself. doc = NumpyDocString(""" z(x1, x2) z(a, theta) """) assert doc['Signature'].strip() == 'z(a, theta)'
def check_dist_keyword_names(): # Look for collisions between names of distribution shape parameters and # keywords of distribution methods. See gh-5982. distnames = set(distdata[0] for distdata in distcont + distdiscrete) mod_results = [] for distname in distnames: dist = getattr(stats, distname) method_members = inspect.getmembers(dist, predicate=inspect.ismethod) method_names = [ method[0] for method in method_members if not method[0].startswith('_') ] for methodname in method_names: method = getattr(dist, methodname) try: params = NumpyDocString(method.__doc__)['Parameters'] except TypeError: result = (f'stats.{distname}.{methodname}', False, "Method parameters are not documented properly.") mod_results.append(result) continue if not dist.shapes: # can't have collision if there are no shapes continue shape_names = dist.shapes.split(', ') param_names1 = set(param.name for param in params) param_names2 = set(inspect.signature(method).parameters) param_names = param_names1.union(param_names2) # # Disabling this check in this PR; # # these discrepancies are a separate issue. # no_doc_params = {'args', 'kwds', 'kwargs'} # no need to document # undoc_params = param_names2 - param_names1 - no_doc_params # if un_doc_params: # result = (f'stats.{distname}.{methodname}', False, # f'Parameter(s) {undoc_params} are not documented.') # mod_results.append(result) # continue intersection = param_names.intersection(shape_names) if intersection: message = ("Distribution/method keyword collision: " f"{intersection} ") result = (f'stats.{distname}.{methodname}', False, message) else: result = (f'stats.{distname}.{methodname}', True, '') mod_results.append(result) return mod_results
def check_docstring(f): # can't inspect builtins if inspect.isbuiltin(f): return False with warnings.catch_warnings(): warnings.simplefilter('error') try: parsed = NumpyDocString(inspect.getdoc(f)) except: print('ERROR PARSING DOCSTRING: %s' % fullname(f)) print('') return False if len(parsed['Parameters']) == 0: return False def iter_docargs(): for item in chain(parsed['Parameters'], parsed['Other Parameters']): for rep in item[0].split(','): yield rep.strip() doc_args = set(iter_docargs()) try: argspec = inspect.getargspec(f) except TypeError as e: return False # ignore 'self' or 'cls' in the signature for instance methods or # class methods. args = set(argspec.args) args.discard('cls') args.discard('self') # the docstring might legitimately mention parameters that aren't in # the signature if the function takes *args, or **kwargs if args != doc_args and len(doc_args) > len(args) and ( (argspec.varargs is not None) or (argspec.keywords is not None)): return False # if doc_params != args and len(parsed['Parameters']) > 0: if args != doc_args: print('%s ( %s )' % (fullname(f), inspect.getfile(f))) undoc_args = args.difference(doc_args) doc_nonargs = doc_args.difference(args) if undoc_args: print(' Undocumented arguments: ', undoc_args) if doc_nonargs: print(' Documented non-arguments: ', doc_nonargs) print()
def test_see_also(): doc6 = NumpyDocString(""" z(x,theta) See Also -------- func_a, func_b, func_c func_d : some equivalent func foo.func_e : some other func over multiple lines func_f, func_g, :meth:`func_h`, func_j, func_k func_f1, func_g1, :meth:`func_h1`, func_j1 func_f2, func_g2, :meth:`func_h2`, func_j2 : description of multiple :obj:`baz.obj_q` :obj:`~baz.obj_r` :class:`class_j`: fubar foobar """) assert len(doc6['See Also']) == 10 for funcs, desc in doc6['See Also']: for func, role in funcs: if func in ('func_a', 'func_b', 'func_c', 'func_f', 'func_g', 'func_h', 'func_j', 'func_k', 'baz.obj_q', 'func_f1', 'func_g1', 'func_h1', 'func_j1', '~baz.obj_r'): assert not desc, str([func, desc]) elif func in ('func_f2', 'func_g2', 'func_h2', 'func_j2'): assert desc, str([func, desc]) else: assert desc, str([func, desc]) if func == 'func_h': assert role == 'meth' elif func == 'baz.obj_q' or func == '~baz.obj_r': assert role == 'obj' elif func == 'class_j': assert role == 'class' elif func in ['func_h1', 'func_h2']: assert role == 'meth' else: assert role is None, str([func, role]) if func == 'func_d': assert desc == ['some equivalent func'] elif func == 'foo.func_e': assert desc == ['some other func over', 'multiple lines'] elif func == 'class_j': assert desc == ['fubar', 'foobar'] elif func in ['func_f2', 'func_g2', 'func_h2', 'func_j2']: assert desc == ['description of multiple' ], str([desc, ['description of multiple']])
def test_see_also_parse_error(): text = (""" z(x,theta) See Also -------- :func:`~foo` """) with assert_raises(ParseError) as err: NumpyDocString(text) assert_equal( str(r":func:`~foo` is not a item name in '\n z(x,theta)\n\n See Also\n --------\n :func:`~foo`\n '" ), str(err.exception))
def __init__(self, method_name, method_obj): self.method_name = method_name self.method_obj = method_obj self.raw_doc = method_obj.__doc__ or '' self.clean_doc = pydoc.getdoc(self.method_obj) self.doc = NumpyDocString(self.clean_doc)
class Docstring: def __init__(self, method_name, method_obj): self.method_name = method_name self.method_obj = method_obj self.raw_doc = pydoc.getdoc(method_obj) self.doc = NumpyDocString(self.raw_doc) def __len__(self): return len(self.raw_doc) @property def source_file_name(self): fname = inspect.getsourcefile(self.method_obj) if fname: fname = os.path.relpath(fname, BASE_PATH) return fname @property def source_file_def_line(self): try: return inspect.getsourcelines(self.method_obj)[-1] except OSError: pass @property def github_url(self): url = 'https://github.com/pandas-dev/pandas/blob/master/' url += '{}#L{}'.format(self.source_file_name, self.source_file_def_line) return url @property def first_line_blank(self): if self.raw_doc: return not bool(self.raw_doc.split('\n')[0].strip()) @property def summary(self): if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: return '' return ' '.join(self.doc['Summary']) @property def extended_summary(self): if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: return ' '.join(self.doc['Summary']) return ' '.join(self.doc['Extended Summary']) @property def needs_summary(self): return not (bool(self.summary) and bool(self.extended_summary)) @property def doc_parameters(self): return self.doc['Parameters'] @property def signature_parameters(self): if not (inspect.isfunction(self.method_obj) or inspect.isclass(self.method_obj)): return tuple() if (inspect.isclass(self.method_obj) and self.method_name.split('.')[-1] in {'dt', 'str', 'cat'}): # accessor classes have a signature, but don't want to show this return tuple() params = tuple(inspect.signature(self.method_obj).parameters.keys()) if params and params[0] in ('self', 'cls'): return params[1:] return params @property def parameter_mismatches(self): errs = [] signature_params = self.signature_parameters if self.doc_parameters: doc_params = list(zip(*self.doc_parameters))[0] else: doc_params = [] missing = set(signature_params) - set(doc_params) if missing: errs.append('Parameters {!r} not documented'.format(missing)) extra = set(doc_params) - set(signature_params) if extra: errs.append('Unknown parameters {!r}'.format(extra)) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): errs.append('Wrong parameters order. ' + 'Actual: {!r}. '.format(signature_params) + 'Documented: {!r}'.format(doc_params)) return errs @property def correct_parameters(self): return not bool(self.parameter_mismatches) @property def see_also(self): return self.doc['See Also'] @property def examples(self): return self.doc['Examples'] @property def first_line_ends_in_dot(self): if self.doc: return self.doc.split('\n')[0][-1] == '.' @property def deprecated(self): pattern = re.compile('.. deprecated:: ') return (self.method_name.startswith('pandas.Panel') or bool(pattern.search(self.summary)) or bool(pattern.search(self.extended_summary))) @property def mentioned_private_classes(self): return [klass for klass in PRIVATE_CLASSES if klass in self.raw_doc] @property def examples_errors(self): flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL finder = doctest.DocTestFinder() runner = doctest.DocTestRunner(optionflags=flags) context = {'np': numpy, 'pd': pandas} error_msgs = '' for test in finder.find(self.raw_doc, self.method_name, globs=context): f = StringIO() with contextlib.redirect_stdout(f): runner.run(test) error_msgs += f.getvalue() return error_msgs
def __init__(self, method_name, method_obj): self.method_name = method_name self.method_obj = method_obj self.raw_doc = pydoc.getdoc(method_obj) self.doc = NumpyDocString(self.raw_doc)
class Docstring(object): def __init__(self, method_name, method_obj): self.method_name = method_name self.method_obj = method_obj self.raw_doc = method_obj.__doc__ or '' self.clean_doc = pydoc.getdoc(self.method_obj) self.doc = NumpyDocString(self.clean_doc) def __len__(self): return len(self.raw_doc) @property def is_function_or_method(self): # TODO(py27): remove ismethod return (inspect.isfunction(self.method_obj) or inspect.ismethod(self.method_obj)) @property def source_file_name(self): fname = inspect.getsourcefile(self.method_obj) if fname: fname = os.path.relpath(fname, BASE_PATH) return fname @property def source_file_def_line(self): try: return inspect.getsourcelines(self.method_obj)[-1] except OSError: pass @property def github_url(self): url = 'https://github.com/pandas-dev/pandas/blob/master/' url += '{}#L{}'.format(self.source_file_name, self.source_file_def_line) return url @property def start_blank_lines(self): i = None if self.raw_doc: for i, row in enumerate(self.raw_doc.split('\n')): if row.strip(): break return i @property def end_blank_lines(self): i = None if self.raw_doc: for i, row in enumerate(reversed(self.raw_doc.split('\n'))): if row.strip(): break return i @property def double_blank_lines(self): prev = True for row in self.raw_doc.split('\n'): if not prev and not row.strip(): return True prev = row.strip() return False @property def summary(self): if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: return '' return ' '.join(self.doc['Summary']) @property def extended_summary(self): if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: return ' '.join(self.doc['Summary']) return ' '.join(self.doc['Extended Summary']) @property def needs_summary(self): return not (bool(self.summary) and bool(self.extended_summary)) @property def doc_parameters(self): return collections.OrderedDict((name, (type_, ''.join(desc))) for name, type_, desc in self.doc['Parameters']) @property def signature_parameters(self): if inspect.isclass(self.method_obj): if hasattr(self.method_obj, '_accessors') and ( self.method_name.split('.')[-1] in self.method_obj._accessors): # accessor classes have a signature but don't want to show this return tuple() try: sig = signature(self.method_obj) except (TypeError, ValueError): # Some objects, mainly in C extensions do not support introspection # of the signature return tuple() params = sig.args if sig.varargs: params.append("*" + sig.varargs) if sig.keywords: params.append("**" + sig.keywords) params = tuple(params) if params and params[0] in ('self', 'cls'): return params[1:] return params @property def parameter_mismatches(self): errs = [] signature_params = self.signature_parameters doc_params = tuple(self.doc_parameters) missing = set(signature_params) - set(doc_params) if missing: errs.append( 'Parameters {} not documented'.format(pprint_thing(missing))) extra = set(doc_params) - set(signature_params) if extra: errs.append('Unknown parameters {}'.format(pprint_thing(extra))) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): errs.append('Wrong parameters order. ' + 'Actual: {!r}. '.format(signature_params) + 'Documented: {!r}'.format(doc_params)) return errs @property def correct_parameters(self): return not bool(self.parameter_mismatches) def parameter_type(self, param): return self.doc_parameters[param][0] def parameter_desc(self, param): return self.doc_parameters[param][1] @property def see_also(self): return collections.OrderedDict((name, ''.join(desc)) for name, desc, _ in self.doc['See Also']) @property def examples(self): return self.doc['Examples'] @property def returns(self): return self.doc['Returns'] @property def yields(self): return self.doc['Yields'] @property def method_source(self): return inspect.getsource(self.method_obj) @property def first_line_ends_in_dot(self): if self.doc: return self.doc.split('\n')[0][-1] == '.' @property def deprecated(self): pattern = re.compile('.. deprecated:: ') return (self.method_name.startswith('pandas.Panel') or bool(pattern.search(self.summary)) or bool(pattern.search(self.extended_summary))) @property def mentioned_private_classes(self): return [klass for klass in PRIVATE_CLASSES if klass in self.raw_doc] @property def examples_errors(self): flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL finder = doctest.DocTestFinder() runner = doctest.DocTestRunner(optionflags=flags) context = {'np': numpy, 'pd': pandas} error_msgs = '' for test in finder.find(self.raw_doc, self.method_name, globs=context): f = StringIO() runner.run(test, out=f.write) error_msgs += f.getvalue() return error_msgs
class Docstring(object): def __init__(self, name): self.name = name obj = self._load_obj(name) self.obj = obj self.code_obj = self._to_original_callable(obj) self.raw_doc = obj.__doc__ or '' self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc) def __len__(self): return len(self.raw_doc) @staticmethod def _load_obj(name): """ Import Python object from its name as string. Parameters ---------- name : str Object name to import (e.g. pandas.Series.str.upper) Returns ------- object Python object that can be a class, method, function... Examples -------- >>> Docstring._load_obj('pandas.Series') <class 'pandas.core.series.Series'> """ for maxsplit in range(1, name.count('.') + 1): # TODO when py3 only replace by: module, *func_parts = ... func_name_split = name.rsplit('.', maxsplit) module = func_name_split[0] func_parts = func_name_split[1:] try: obj = importlib.import_module(module) except ImportError: pass else: continue if 'module' not in locals(): raise ImportError('No module can be imported ' 'from "{}"'.format(name)) for part in func_parts: obj = getattr(obj, part) return obj @staticmethod def _to_original_callable(obj): """ Find the Python object that contains the source code ot the object. This is useful to find the place in the source code (file and line number) where a docstring is defined. It does not currently work for all cases, but it should help find some (properties...). """ while True: if inspect.isfunction(obj) or inspect.isclass(obj): f = inspect.getfile(obj) if f.startswith('<') and f.endswith('>'): return None return obj if inspect.ismethod(obj): obj = obj.__func__ elif isinstance(obj, functools.partial): obj = obj.func elif isinstance(obj, property): obj = obj.fget else: return None @property def type(self): return type(self.obj).__name__ @property def is_function_or_method(self): # TODO(py27): remove ismethod return (inspect.isfunction(self.obj) or inspect.ismethod(self.obj)) @property def source_file_name(self): """ File name where the object is implemented (e.g. pandas/core/frame.py). """ try: fname = inspect.getsourcefile(self.code_obj) except TypeError: # In some cases the object is something complex like a cython # object that can't be easily introspected. An it's better to # return the source code file of the object as None, than crash pass else: if fname: fname = os.path.relpath(fname, BASE_PATH) return fname @property def source_file_def_line(self): """ Number of line where the object is defined in its file. """ try: return inspect.getsourcelines(self.code_obj)[-1] except (OSError, TypeError): # In some cases the object is something complex like a cython # object that can't be easily introspected. An it's better to # return the line number as None, than crash pass @property def github_url(self): url = 'https://github.com/pandas-dev/pandas/blob/master/' url += '{}#L{}'.format(self.source_file_name, self.source_file_def_line) return url @property def start_blank_lines(self): i = None if self.raw_doc: for i, row in enumerate(self.raw_doc.split('\n')): if row.strip(): break return i @property def end_blank_lines(self): i = None if self.raw_doc: for i, row in enumerate(reversed(self.raw_doc.split('\n'))): if row.strip(): break return i @property def double_blank_lines(self): prev = True for row in self.raw_doc.split('\n'): if not prev and not row.strip(): return True prev = row.strip() return False @property def summary(self): return ' '.join(self.doc['Summary']) @property def num_summary_lines(self): return len(self.doc['Summary']) @property def extended_summary(self): if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: return ' '.join(self.doc['Summary']) return ' '.join(self.doc['Extended Summary']) @property def needs_summary(self): return not (bool(self.summary) and bool(self.extended_summary)) @property def doc_parameters(self): return collections.OrderedDict((name, (type_, ''.join(desc))) for name, type_, desc in self.doc['Parameters']) @property def signature_parameters(self): if inspect.isclass(self.obj): if hasattr(self.obj, '_accessors') and ( self.name.split('.')[-1] in self.obj._accessors): # accessor classes have a signature but don't want to show this return tuple() try: sig = signature(self.obj) except (TypeError, ValueError): # Some objects, mainly in C extensions do not support introspection # of the signature return tuple() params = sig.args if sig.varargs: params.append("*" + sig.varargs) if sig.keywords: params.append("**" + sig.keywords) params = tuple(params) if params and params[0] in ('self', 'cls'): return params[1:] return params @property def parameter_mismatches(self): errs = [] signature_params = self.signature_parameters doc_params = tuple(self.doc_parameters) missing = set(signature_params) - set(doc_params) if missing: errs.append( 'Parameters {} not documented'.format(pprint_thing(missing))) extra = set(doc_params) - set(signature_params) if extra: errs.append('Unknown parameters {}'.format(pprint_thing(extra))) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): errs.append('Wrong parameters order. ' + 'Actual: {!r}. '.format(signature_params) + 'Documented: {!r}'.format(doc_params)) return errs @property def correct_parameters(self): return not bool(self.parameter_mismatches) def parameter_type(self, param): return self.doc_parameters[param][0] def parameter_desc(self, param): desc = self.doc_parameters[param][1] # Find and strip out any sphinx directives for directive in DIRECTIVES: full_directive = '.. {}'.format(directive) if full_directive in desc: # Only retain any description before the directive desc = desc[:desc.index(full_directive)] return desc @property def see_also(self): return collections.OrderedDict((name, ''.join(desc)) for name, desc, _ in self.doc['See Also']) @property def examples(self): return self.doc['Examples'] @property def returns(self): return self.doc['Returns'] @property def yields(self): return self.doc['Yields'] @property def method_source(self): try: return inspect.getsource(self.obj) except TypeError: return '' @property def first_line_ends_in_dot(self): if self.doc: return self.doc.split('\n')[0][-1] == '.' @property def deprecated(self): pattern = re.compile('.. deprecated:: ') return (self.name.startswith('pandas.Panel') or bool(pattern.search(self.summary)) or bool(pattern.search(self.extended_summary))) @property def mentioned_private_classes(self): return [klass for klass in PRIVATE_CLASSES if klass in self.raw_doc] @property def examples_errors(self): flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL finder = doctest.DocTestFinder() runner = doctest.DocTestRunner(optionflags=flags) context = {'np': numpy, 'pd': pandas} error_msgs = '' for test in finder.find(self.raw_doc, self.name, globs=context): f = StringIO() runner.run(test, out=f.write) error_msgs += f.getvalue() return error_msgs
class Docstring: def __init__(self, name): self.name = name obj = self._load_obj(name) self.obj = obj self.code_obj = self._to_original_callable(obj) self.raw_doc = obj.__doc__ or '' self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc) def __len__(self): return len(self.raw_doc) @staticmethod def _load_obj(name): """ Import Python object from its name as string. Parameters ---------- name : str Object name to import (e.g. pandas.Series.str.upper) Returns ------- object Python object that can be a class, method, function... Examples -------- >>> Docstring._load_obj('pandas.Series') <class 'pandas.core.series.Series'> """ for maxsplit in range(1, name.count('.') + 1): # TODO when py3 only replace by: module, *func_parts = ... func_name_split = name.rsplit('.', maxsplit) module = func_name_split[0] func_parts = func_name_split[1:] try: obj = importlib.import_module(module) except ImportError: pass else: continue if 'obj' not in locals(): raise ImportError('No module can be imported ' 'from "{}"'.format(name)) for part in func_parts: obj = getattr(obj, part) return obj @staticmethod def _to_original_callable(obj): """ Find the Python object that contains the source code of the object. This is useful to find the place in the source code (file and line number) where a docstring is defined. It does not currently work for all cases, but it should help find some (properties...). """ while True: if inspect.isfunction(obj) or inspect.isclass(obj): f = inspect.getfile(obj) if f.startswith('<') and f.endswith('>'): return None return obj if inspect.ismethod(obj): obj = obj.__func__ elif isinstance(obj, functools.partial): obj = obj.func elif isinstance(obj, property): obj = obj.fget else: return None @property def type(self): return type(self.obj).__name__ @property def is_function_or_method(self): # TODO(py27): remove ismethod return (inspect.isfunction(self.obj) or inspect.ismethod(self.obj)) @property def source_file_name(self): """ File name where the object is implemented (e.g. pandas/core/frame.py). """ try: fname = inspect.getsourcefile(self.code_obj) except TypeError: # In some cases the object is something complex like a cython # object that can't be easily introspected. An it's better to # return the source code file of the object as None, than crash pass else: if fname: fname = os.path.relpath(fname, BASE_PATH) return fname @property def source_file_def_line(self): """ Number of line where the object is defined in its file. """ try: return inspect.getsourcelines(self.code_obj)[-1] except (OSError, TypeError): # In some cases the object is something complex like a cython # object that can't be easily introspected. An it's better to # return the line number as None, than crash pass @property def github_url(self): url = 'https://github.com/pandas-dev/pandas/blob/master/' url += '{}#L{}'.format(self.source_file_name, self.source_file_def_line) return url @property def start_blank_lines(self): i = None if self.raw_doc: for i, row in enumerate(self.raw_doc.split('\n')): if row.strip(): break return i @property def end_blank_lines(self): i = None if self.raw_doc: for i, row in enumerate(reversed(self.raw_doc.split('\n'))): if row.strip(): break return i @property def double_blank_lines(self): prev = True for row in self.raw_doc.split('\n'): if not prev and not row.strip(): return True prev = row.strip() return False @property def section_titles(self): sections = [] self.doc._doc.reset() while not self.doc._doc.eof(): content = self.doc._read_to_next_section() if (len(content) > 1 and len(content[0]) == len(content[1]) and set(content[1]) == {'-'}): sections.append(content[0]) return sections @property def summary(self): return ' '.join(self.doc['Summary']) @property def num_summary_lines(self): return len(self.doc['Summary']) @property def extended_summary(self): if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: return ' '.join(self.doc['Summary']) return ' '.join(self.doc['Extended Summary']) @property def needs_summary(self): return not (bool(self.summary) and bool(self.extended_summary)) @property def doc_parameters(self): return collections.OrderedDict((name, (type_, ''.join(desc))) for name, type_, desc in self.doc['Parameters']) @property def signature_parameters(self): if inspect.isclass(self.obj): if hasattr(self.obj, '_accessors') and ( self.name.split('.')[-1] in self.obj._accessors): # accessor classes have a signature but don't want to show this return tuple() try: sig = inspect.getfullargspec(self.obj) except (TypeError, ValueError): # Some objects, mainly in C extensions do not support introspection # of the signature return tuple() params = sig.args if sig.varargs: params.append("*" + sig.varargs) if sig.varkw: params.append("**" + sig.varkw) params = tuple(params) if params and params[0] in ('self', 'cls'): return params[1:] return params @property def parameter_mismatches(self): errs = [] signature_params = self.signature_parameters doc_params = tuple(self.doc_parameters) missing = set(signature_params) - set(doc_params) if missing: errs.append(error('PR01', missing_params=pprint_thing(missing))) extra = set(doc_params) - set(signature_params) if extra: errs.append(error('PR02', unknown_params=pprint_thing(extra))) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): errs.append(error('PR03', actual_params=signature_params, documented_params=doc_params)) return errs @property def correct_parameters(self): return not bool(self.parameter_mismatches) def parameter_type(self, param): return self.doc_parameters[param][0] def parameter_desc(self, param): desc = self.doc_parameters[param][1] # Find and strip out any sphinx directives for directive in DIRECTIVES: full_directive = '.. {}'.format(directive) if full_directive in desc: # Only retain any description before the directive desc = desc[:desc.index(full_directive)] return desc @property def see_also(self): result = collections.OrderedDict() for funcs, desc in self.doc['See Also']: for func, _ in funcs: result[func] = ''.join(desc) return result @property def examples(self): return self.doc['Examples'] @property def returns(self): return self.doc['Returns'] @property def yields(self): return self.doc['Yields'] @property def method_source(self): try: source = inspect.getsource(self.obj) except TypeError: return '' return textwrap.dedent(source) @property def method_returns_something(self): ''' Check if the docstrings method can return something. Bare returns, returns valued None and returns from nested functions are disconsidered. Returns ------- bool Whether the docstrings method can return something. ''' def get_returns_not_on_nested_functions(node): returns = [node] if isinstance(node, ast.Return) else [] for child in ast.iter_child_nodes(node): # Ignore nested functions and its subtrees. if not isinstance(child, ast.FunctionDef): child_returns = get_returns_not_on_nested_functions(child) returns.extend(child_returns) return returns tree = ast.parse(self.method_source).body if tree: returns = get_returns_not_on_nested_functions(tree[0]) return_values = [r.value for r in returns] # Replace NameConstant nodes valued None for None. for i, v in enumerate(return_values): if isinstance(v, ast.NameConstant) and v.value is None: return_values[i] = None return any(return_values) else: return False @property def first_line_ends_in_dot(self): if self.doc: return self.doc.split('\n')[0][-1] == '.' @property def deprecated(self): return '.. deprecated:: ' in (self.summary + self.extended_summary) @property def mentioned_private_classes(self): return [klass for klass in PRIVATE_CLASSES if klass in self.raw_doc] @property def examples_errors(self): flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL finder = doctest.DocTestFinder() runner = doctest.DocTestRunner(optionflags=flags) context = {'np': numpy, 'pd': pandas} error_msgs = '' for test in finder.find(self.raw_doc, self.name, globs=context): f = StringIO() runner.run(test, out=f.write) error_msgs += f.getvalue() return error_msgs @property def examples_source_code(self): lines = doctest.DocTestParser().get_examples(self.raw_doc) return [line.source for line in lines] def validate_pep8(self): if not self.examples: return # F401 is needed to not generate flake8 errors in examples # that do not user numpy or pandas content = ''.join(('import numpy as np # noqa: F401\n', 'import pandas as pd # noqa: F401\n', *self.examples_source_code)) application = flake8.main.application.Application() application.initialize(["--quiet"]) with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8') as file: file.write(content) file.flush() application.run_checks([file.name]) # We need this to avoid flake8 printing the names of the files to # the standard output application.formatter.write = lambda line, source: None application.report() yield from application.guide.stats.statistics_for('')