def dateutil_parse(timestr, default, ignoretz=False, tzinfos=None, **kwargs): """ lifted from dateutil to get resolution""" from dateutil import tz import time fobj = StringIO(str(timestr)) res = DEFAULTPARSER._parse(fobj, **kwargs) # dateutil 2.2 compat if isinstance(res, tuple): res, _ = res if res is None: raise ValueError("unknown string format") repl = {} reso = None for attr in [ "year", "month", "day", "hour", "minute", "second", "microsecond" ]: value = getattr(res, attr) if value is not None: repl[attr] = value reso = attr if reso is None: raise ValueError("Cannot parse date.") if reso == 'microsecond': if repl['microsecond'] == 0: reso = 'second' elif repl['microsecond'] % 1000 == 0: reso = 'millisecond' ret = default.replace(**repl) if res.weekday is not None and not res.day: ret = ret + relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: if callable(tzinfos) or tzinfos and res.tzname in tzinfos: if callable(tzinfos): tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata elif isinstance(tzdata, compat.string_types): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tz.tzoffset(res.tzname, tzdata) else: raise ValueError("offset must be tzinfo subclass, " "tz string, or int offset") ret = ret.replace(tzinfo=tzinfo) elif res.tzname and res.tzname in time.tzname: ret = ret.replace(tzinfo=tz.tzlocal()) elif res.tzoffset == 0: ret = ret.replace(tzinfo=tz.tzutc()) elif res.tzoffset: ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) return ret, reso
def dateutil_parse(timestr, default, ignoretz=False, tzinfos=None, **kwargs): """ lifted from dateutil to get resolution""" from dateutil import tz import time fobj = StringIO(str(timestr)) res = DEFAULTPARSER._parse(fobj, **kwargs) # dateutil 2.2 compat if isinstance(res, tuple): res, _ = res if res is None: raise ValueError("unknown string format") repl = {} reso = None for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: value = getattr(res, attr) if value is not None: repl[attr] = value reso = attr if reso is None: raise ValueError("Cannot parse date.") if reso == "microsecond": if repl["microsecond"] == 0: reso = "second" elif repl["microsecond"] % 1000 == 0: reso = "millisecond" ret = default.replace(**repl) if res.weekday is not None and not res.day: ret = ret + relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: if callable(tzinfos) or tzinfos and res.tzname in tzinfos: if callable(tzinfos): tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata elif isinstance(tzdata, compat.string_types): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tz.tzoffset(res.tzname, tzdata) else: raise ValueError("offset must be tzinfo subclass, " "tz string, or int offset") ret = ret.replace(tzinfo=tzinfo) elif res.tzname and res.tzname in time.tzname: ret = ret.replace(tzinfo=tz.tzlocal()) elif res.tzoffset == 0: ret = ret.replace(tzinfo=tz.tzutc()) elif res.tzoffset: ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) return ret, reso
def test_NaT_docstrings(): # GH#17327 nat_names = dir(NaT) # NaT should have *most* of the Timestamp methods, with matching # docstrings. The attributes that are not expected to be present in NaT # are private methods plus `ts_expected` below. ts_names = dir(Timestamp) ts_missing = [ x for x in ts_names if x not in nat_names and not x.startswith('_') ] ts_missing.sort() ts_expected = [ 'freqstr', 'normalize', 'offset', 'to_julian_date', 'to_period', 'tz' ] assert ts_missing == ts_expected ts_overlap = [ x for x in nat_names if x in ts_names and not x.startswith('_') and callable(getattr(Timestamp, x)) ] for name in ts_overlap: tsdoc = getattr(Timestamp, name).__doc__ natdoc = getattr(NaT, name).__doc__ assert tsdoc == natdoc # NaT should have *most* of the Timedelta methods, with matching # docstrings. The attributes that are not expected to be present in NaT # are private methods plus `td_expected` below. # For methods that are both Timestamp and Timedelta methods, the # Timestamp docstring takes priority. td_names = dir(Timedelta) td_missing = [ x for x in td_names if x not in nat_names and not x.startswith('_') ] td_missing.sort() td_expected = [ 'components', 'delta', 'is_populated', 'to_pytimedelta', 'to_timedelta64', 'view' ] assert td_missing == td_expected td_overlap = [ x for x in nat_names if x in td_names and x not in ts_names and # Timestamp __doc__ takes priority not x.startswith('_') and callable(getattr(Timedelta, x)) ] assert td_overlap == ['total_seconds'] for name in td_overlap: tddoc = getattr(Timedelta, name).__doc__ natdoc = getattr(NaT, name).__doc__ assert tddoc == natdoc
def assertRaisesRegexp(exception, regexp, callable, *args, **kwargs): """ Port of assertRaisesRegexp from unittest in Python 2.7 - used in with statement. Explanation from standard library: Like assertRaises() but also tests that regexp matches on the string representation of the raised exception. regexp may be a regular expression object or a string containing a regular expression suitable for use by re.search(). You can pass either a regular expression or a compiled regular expression object. >>> assertRaisesRegexp(ValueError, 'invalid literal for.*XYZ', ... int, 'XYZ') >>> import re >>> assertRaisesRegexp(ValueError, re.compile('literal'), int, 'XYZ') If an exception of a different type is raised, it bubbles up. >>> assertRaisesRegexp(TypeError, 'literal', int, 'XYZ') Traceback (most recent call last): ... ValueError: invalid literal for int() with base 10: 'XYZ' >>> dct = {} >>> assertRaisesRegexp(KeyError, 'pear', dct.__getitem__, 'apple') Traceback (most recent call last): ... AssertionError: "pear" does not match "'apple'" >>> assertRaisesRegexp(KeyError, 'apple', dct.__getitem__, 'apple') >>> assertRaisesRegexp(Exception, 'operand type.*int.*dict', lambda : 2 + {}) """ import re try: callable(*args, **kwargs) except Exception as e: if not issubclass(e.__class__, exception): # mimics behavior of unittest raise # don't recompile if hasattr(regexp, "search"): expected_regexp = regexp else: expected_regexp = re.compile(regexp) if not expected_regexp.search(str(e)): raise AssertionError('"%s" does not match "%s"' % (expected_regexp.pattern, str(e))) else: # Apparently some exceptions don't have a __name__ attribute? Just aping unittest library here name = getattr(exception, "__name__", str(exception)) raise AssertionError("{0} not raised".format(name))
def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): """ re-evaluate the obj with a groupby aggregation """ if grouper is None: self._set_binner() grouper = self.grouper obj = self._selected_obj try: grouped = groupby(obj, by=None, grouper=grouper, axis=self.axis) except TypeError: # panel grouper grouped = PanelGroupBy(obj, grouper=grouper, axis=self.axis) try: if isinstance(obj, ABCDataFrame) and compat.callable(how): # Check if the function is reducing or not. result = grouped._aggregate_item_by_item(how, *args, **kwargs) else: result = grouped.aggregate(how, *args, **kwargs) except Exception: # we have a non-reducing function # try to evaluate result = grouped.apply(how, *args, **kwargs) result = self._apply_loffset(result) return self._wrap_result(result)
def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): """ Load msgpack pandas object from the specified file path THIS IS AN EXPERIMENTAL LIBRARY and the storage format may not be stable until a future release. Parameters ---------- path_or_buf : string File path, BytesIO like or string encoding: Encoding for decoding msgpack str type iterator : boolean, if True, return an iterator to the unpacker (default is False) Returns ------- obj : type of object stored in file """ path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) def read(fh): l = list(unpack(fh, encoding=encoding, **kwargs)) if len(l) == 1: return l[0] return l # see if we have an actual file if isinstance(path_or_buf, compat.string_types): try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): exists = False if exists: with open(path_or_buf, 'rb') as fh: return read(fh) if isinstance(path_or_buf, compat.binary_type): # treat as a binary-like fh = None try: # We can't distinguish between a path and a buffer of bytes in # Python 2 so instead assume the first byte of a valid path is # less than 0x80. if compat.PY3 or ord(path_or_buf[0]) >= 0x80: fh = compat.BytesIO(path_or_buf) return read(fh) finally: if fh is not None: fh.close() elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): # treat as a buffer like return read(path_or_buf) raise ValueError('path_or_buf needs to be a string file path or file-like')
def test_NaT_docstrings(): # GH#17327 nat_names = dir(NaT) # NaT should have *most* of the Timestamp methods, with matching # docstrings. The attributes that are not expected to be present in NaT # are private methods plus `ts_expected` below. ts_names = dir(Timestamp) ts_missing = [x for x in ts_names if x not in nat_names and not x.startswith('_')] ts_missing.sort() ts_expected = ['freqstr', 'normalize', 'to_julian_date', 'to_period', 'tz'] assert ts_missing == ts_expected ts_overlap = [x for x in nat_names if x in ts_names and not x.startswith('_') and callable(getattr(Timestamp, x))] for name in ts_overlap: tsdoc = getattr(Timestamp, name).__doc__ natdoc = getattr(NaT, name).__doc__ assert tsdoc == natdoc # NaT should have *most* of the Timedelta methods, with matching # docstrings. The attributes that are not expected to be present in NaT # are private methods plus `td_expected` below. # For methods that are both Timestamp and Timedelta methods, the # Timestamp docstring takes priority. td_names = dir(Timedelta) td_missing = [x for x in td_names if x not in nat_names and not x.startswith('_')] td_missing.sort() td_expected = ['components', 'delta', 'is_populated', 'to_pytimedelta', 'to_timedelta64', 'view'] assert td_missing == td_expected td_overlap = [x for x in nat_names if x in td_names and x not in ts_names and # Timestamp __doc__ takes priority not x.startswith('_') and callable(getattr(Timedelta, x))] assert td_overlap == ['total_seconds'] for name in td_overlap: tddoc = getattr(Timedelta, name).__doc__ natdoc = getattr(NaT, name).__doc__ assert tddoc == natdoc
def read_msgpack(path_or_buf, encoding="utf-8", iterator=False, **kwargs): """ Load msgpack pandas object from the specified file path THIS IS AN EXPERIMENTAL LIBRARY and the storage format may not be stable until a future release. Parameters ---------- path_or_buf : string File path, BytesIO like or string encoding: Encoding for decoding msgpack str type iterator : boolean, if True, return an iterator to the unpacker (default is False) Returns ------- obj : type of object stored in file """ path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) def read(fh): l = list(unpack(fh, encoding=encoding, **kwargs)) if len(l) == 1: return l[0] return l # see if we have an actual file if isinstance(path_or_buf, compat.string_types): try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): exists = False if exists: with open(path_or_buf, "rb") as fh: return read(fh) # treat as a binary-like if isinstance(path_or_buf, compat.binary_type): fh = None try: fh = compat.BytesIO(path_or_buf) return read(fh) finally: if fh is not None: fh.close() # a buffer like if hasattr(path_or_buf, "read") and compat.callable(path_or_buf.read): return read(path_or_buf) raise ValueError("path_or_buf needs to be a string file path or file-like")
def read_msgpack(path_or_buf, iterator=False, **kwargs): """ Load msgpack pandas object from the specified file path THIS IS AN EXPERIMENTAL LIBRARY and the storage format may not be stable until a future release. Parameters ---------- path_or_buf : string File path, BytesIO like or string iterator : boolean, if True, return an iterator to the unpacker (default is False) Returns ------- obj : type of object stored in file """ path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) def read(fh): l = list(unpack(fh, **kwargs)) if len(l) == 1: return l[0] return l # see if we have an actual file if isinstance(path_or_buf, compat.string_types): try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): exists = False if exists: with open(path_or_buf, 'rb') as fh: return read(fh) # treat as a binary-like if isinstance(path_or_buf, compat.binary_type): fh = None try: fh = compat.BytesIO(path_or_buf) return read(fh) finally: if fh is not None: fh.close() # a buffer like if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): return read(path_or_buf) raise ValueError('path_or_buf needs to be a string file path or file-like')
def wrapper(*args, **kwargs): def dec(f): return decorator(f, *args, **kwargs) is_decorating = not kwargs and len(args) == 1 and callable(args[0]) if is_decorating: f = args[0] args = [] return dec(f) else: return dec
def knownfailureif(fail_condition, msg=None): """ Make function raise KnownFailureTest exception if given condition is true. If the condition is a callable, it is used at runtime to dynamically make the decision. This is useful for tests that may require costly imports, to delay the cost until the test suite is actually executed. Parameters ---------- fail_condition : bool or callable Flag to determine whether to mark the decorated test as a known failure (if True) or not (if False). msg : str, optional Message to give on raising a KnownFailureTest exception. Default is None. Returns ------- decorator : function Decorator, which, when applied to a function, causes SkipTest to be raised when `skip_condition` is True, and the function to be called normally otherwise. Notes ----- The decorator itself is decorated with the ``nose.tools.make_decorator`` function in order to transmit function name, and various other metadata. """ if msg is None: msg = "Test skipped due to known failure" # Allow for both boolean or callable known failure conditions. if callable(fail_condition): fail_val = fail_condition else: fail_val = lambda: fail_condition def knownfail_decorator(f): # Local import to avoid a hard nose dependency and only incur the # import time overhead at actual test-time. import nose def knownfailer(*args, **kwargs): if fail_val(): raise KnownFailureTest(msg) else: return f(*args, **kwargs) return nose.tools.make_decorator(f)(knownfailer) return knownfail_decorator
def knownfailureif(fail_condition, msg=None): """ Make function raise KnownFailureTest exception if given condition is true. If the condition is a callable, it is used at runtime to dynamically make the decision. This is useful for tests that may require costly imports, to delay the cost until the test suite is actually executed. Parameters ---------- fail_condition : bool or callable Flag to determine whether to mark the decorated test as a known failure (if True) or not (if False). msg : str, optional Message to give on raising a KnownFailureTest exception. Default is None. Returns ------- decorator : function Decorator, which, when applied to a function, causes SkipTest to be raised when `skip_condition` is True, and the function to be called normally otherwise. Notes ----- The decorator itself is decorated with the ``nose.tools.make_decorator`` function in order to transmit function name, and various other metadata. """ if msg is None: msg = 'Test skipped due to known failure' # Allow for both boolean or callable known failure conditions. if callable(fail_condition): fail_val = fail_condition else: fail_val = lambda: fail_condition def knownfail_decorator(f): # Local import to avoid a hard nose dependency and only incur the # import time overhead at actual test-time. import nose def knownfailer(*args, **kwargs): if fail_val(): raise KnownFailureTest(msg) else: return f(*args, **kwargs) return nose.tools.make_decorator(f)(knownfailer) return knownfail_decorator
def register_writer(klass): """Adds engine to the excel writer registry. You must use this method to integrate with ``to_excel``. Also adds config options for any new ``supported_extensions`` defined on the writer.""" if not compat.callable(klass): raise ValueError("Can only register callables as engines") engine_name = klass.engine _writers[engine_name] = klass for ext in klass.supported_extensions: if ext.startswith("."): ext = ext[1:] if ext not in _writer_extensions: config.register_option("io.excel.%s.writer" % ext, engine_name, validator=str) _writer_extensions.append(ext)
def mangle_signature(app, what, name, obj, options, sig, retann): # Do not try to inspect classes that don't define `__init__` if inspect.isclass(obj) and (not hasattr(obj, "__init__") or "initializes x; see " in pydoc.getdoc(obj.__init__)): return "", "" if not (callable(obj) or hasattr(obj, "__argspec_is_invalid_")): return if not hasattr(obj, "__doc__"): return doc = SphinxDocString(pydoc.getdoc(obj)) if doc["Signature"]: sig = re.sub(u("^[^(]*"), u(""), doc["Signature"]) return sig, u("")
def register_writer(klass): """Adds engine to the excel writer registry. You must use this method to integrate with ``to_excel``. Also adds config options for any new ``supported_extensions`` defined on the writer.""" if not compat.callable(klass): raise ValueError("Can only register callables as engines") engine_name = klass.engine _writers[engine_name] = klass for ext in klass.supported_extensions: if ext.startswith('.'): ext = ext[1:] if ext not in _writer_extensions: config.register_option("io.excel.%s.writer" % ext, engine_name, validator=str) _writer_extensions.append(ext)
def flatten(*stack): """ helper function for flattening iterables of generators in a sensible way. """ stack = list(stack) while stack: try: x = next(stack[0]) except StopIteration: stack.pop(0) continue if hasattr(x,'next') and callable(getattr(x,'next')): stack.insert(0, x) #if isinstance(x, (GeneratorType,listerator)): else: yield x
def get_doc_object(obj, what=None, doc=None, config={}): if what is None: if inspect.isclass(obj): what = 'class' elif inspect.ismodule(obj): what = 'module' elif callable(obj): what = 'function' else: what = 'object' if what == 'class': return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc, config=config) elif what in ('function', 'method'): return SphinxFunctionDoc(obj, doc=doc, config=config) else: if doc is None: doc = pydoc.getdoc(obj) return SphinxObjDoc(obj, doc, config=config)
def get_doc_object(obj, what=None, config=None): if what is None: if inspect.isclass(obj): what = 'class' elif inspect.ismodule(obj): what = 'module' elif callable(obj): what = 'function' else: what = 'object' if what == 'class': doc = SphinxTraitsDoc(obj, '', func_doc=SphinxFunctionDoc, config=config) if looks_like_issubclass(obj, 'HasTraits'): for name, trait, comment in comment_eater.get_class_traits(obj): # Exclude private traits. if not name.startswith('_'): doc['Traits'].append((name, trait, comment.splitlines())) return doc elif what in ('function', 'method'): return SphinxFunctionDoc(obj, '', config=config) else: return SphinxDocString(pydoc.getdoc(obj), config=config)
def methods(self): if self._cls is None: return [] return [name for name,func in inspect.getmembers(self._cls) if not name.startswith('_') and callable(func)]
def _get_grouper(obj, key=None, axis=0, level=None, sort=True, observed=False, mutated=False, validate=True): """ create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. This may be composed of multiple Grouping objects, indicating multiple groupers Groupers are ultimately index mappings. They can originate as: index mappings, keys to columns, functions, or Groupers Groupers enable local references to axis,level,sort, while the passed in axis, level, and sort are 'global'. This routine tries to figure out what the passing in references are and then creates a Grouping for each one, combined into a BaseGrouper. If observed & we have a categorical grouper, only show the observed values If validate, then check for key/level overlaps """ group_axis = obj._get_axis(axis) # validate that the passed single level is compatible with the passed # axis of the object if level is not None: # TODO: These if-block and else-block are almost same. # MultiIndex instance check is removable, but it seems that there are # some processes only for non-MultiIndex in else-block, # eg. `obj.index.name != level`. We have to consider carefully whether # these are applicable for MultiIndex. Even if these are applicable, # we need to check if it makes no side effect to subsequent processes # on the outside of this condition. # (GH 17621) if isinstance(group_axis, MultiIndex): if is_list_like(level) and len(level) == 1: level = level[0] if key is None and is_scalar(level): # Get the level values from group_axis key = group_axis.get_level_values(level) level = None else: # allow level to be a length-one list-like object # (e.g., level=[0]) # GH 13901 if is_list_like(level): nlevels = len(level) if nlevels == 1: level = level[0] elif nlevels == 0: raise ValueError('No group keys passed!') else: raise ValueError('multiple levels only valid with ' 'MultiIndex') if isinstance(level, compat.string_types): if obj.index.name != level: raise ValueError('level name %s is not the name of the ' 'index' % level) elif level > 0 or level < -1: raise ValueError('level > 0 or level < -1 only valid with ' ' MultiIndex') # NOTE: `group_axis` and `group_axis.get_level_values(level)` # are same in this section. level = None key = group_axis # a passed-in Grouper, directly convert if isinstance(key, Grouper): binner, grouper, obj = key._get_grouper(obj, validate=False) if key.key is None: return grouper, [], obj else: return grouper, {key.key}, obj # already have a BaseGrouper, just return it elif isinstance(key, BaseGrouper): return key, [], obj # In the future, a tuple key will always mean an actual key, # not an iterable of keys. In the meantime, we attempt to provide # a warning. We can assume that the user wanted a list of keys when # the key is not in the index. We just have to be careful with # unhashble elements of `key`. Any unhashable elements implies that # they wanted a list of keys. # https://github.com/pandas-dev/pandas/issues/18314 is_tuple = isinstance(key, tuple) all_hashable = is_tuple and is_hashable(key) if is_tuple: if ((all_hashable and key not in obj and set(key).issubset(obj)) or not all_hashable): # column names ('a', 'b') -> ['a', 'b'] # arrays like (a, b) -> [a, b] msg = ("Interpreting tuple 'by' as a list of keys, rather than " "a single key. Use 'by=[...]' instead of 'by=(...)'. In " "the future, a tuple will always mean a single key.") warnings.warn(msg, FutureWarning, stacklevel=5) key = list(key) if not isinstance(key, list): keys = [key] match_axis_length = False else: keys = key match_axis_length = len(keys) == len(group_axis) # what are we after, exactly? any_callable = any(callable(g) or isinstance(g, dict) for g in keys) any_groupers = any(isinstance(g, Grouper) for g in keys) any_arraylike = any(isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys) try: if isinstance(obj, DataFrame): all_in_columns_index = all(g in obj.columns or g in obj.index.names for g in keys) else: all_in_columns_index = False except Exception: all_in_columns_index = False if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: keys = [com.asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) levels = level else: levels = [level] * len(keys) groupings = [] exclusions = [] # if the actual grouper should be obj[key] def is_in_axis(key): if not _is_label_like(key): try: obj._data.items.get_loc(key) except Exception: return False return True # if the grouper is obj[name] def is_in_obj(gpr): try: return id(gpr) == id(obj[gpr.name]) except Exception: return False for i, (gpr, level) in enumerate(zip(keys, levels)): if is_in_obj(gpr): # df.groupby(df['name']) in_axis, name = True, gpr.name exclusions.append(name) elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: if validate: obj._check_label_or_level_ambiguity(gpr) in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) elif obj._is_level_reference(gpr): in_axis, name, level, gpr = False, None, gpr, None else: raise KeyError(gpr) elif isinstance(gpr, Grouper) and gpr.key is not None: # Add key to exclusions exclusions.append(gpr.key) in_axis, name = False, None else: in_axis, name = False, None if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: raise ValueError( ("Length of grouper ({len_gpr}) and axis ({len_axis})" " must be same length" .format(len_gpr=len(gpr), len_axis=obj.shape[axis]))) # create the Grouping # allow us to passing the actual Grouping as the gpr ping = Grouping(group_axis, gpr, obj=obj, name=name, level=level, sort=sort, observed=observed, in_axis=in_axis) \ if not isinstance(gpr, Grouping) else gpr groupings.append(ping) if len(groupings) == 0: raise ValueError('No group keys passed!') # create the internals grouper grouper = BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated) return grouper, exclusions, obj
def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): """Decorator to deprecate a keyword argument of a function Parameters ---------- old_arg_name : str Name of argument in function to deprecate new_arg_name : str Name of prefered argument in function mapping : dict or callable If mapping is present, use it to translate old arguments to new arguments. A callable must do its own value checking; values not found in a dict will be forwarded unchanged. Examples -------- The following deprecates 'cols', using 'columns' instead >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') ... def f(columns=''): ... print(columns) ... >>> f(columns='should work ok') should work ok >>> f(cols='should raise warning') FutureWarning: cols is deprecated, use columns instead warnings.warn(msg, FutureWarning) should raise warning >>> f(cols='should error', columns="can\'t pass do both") TypeError: Can only specify 'cols' or 'columns', not both >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) ... def f(new=False): ... print('yes!' if new else 'no!') ... >>> f(old='yes') FutureWarning: old='yes' is deprecated, use new=True instead warnings.warn(msg, FutureWarning) yes! """ if mapping is not None and not hasattr(mapping, 'get') and \ not callable(mapping): raise TypeError("mapping from old to new argument values " "must be dict or callable!") def _deprecate_kwarg(func): @wraps(func) def wrapper(*args, **kwargs): old_arg_value = kwargs.pop(old_arg_name, None) if old_arg_value is not None: if mapping is not None: if hasattr(mapping, 'get'): new_arg_value = mapping.get(old_arg_value, old_arg_value) else: new_arg_value = mapping(old_arg_value) msg = "the %s=%r keyword is deprecated, " \ "use %s=%r instead" % \ (old_arg_name, old_arg_value, new_arg_name, new_arg_value) else: new_arg_value = old_arg_value msg = "the '%s' keyword is deprecated, " \ "use '%s' instead" % (old_arg_name, new_arg_name) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) if kwargs.get(new_arg_name, None) is not None: msg = ("Can only specify '%s' or '%s', not both" % (old_arg_name, new_arg_name)) raise TypeError(msg) else: kwargs[new_arg_name] = new_arg_value return func(*args, **kwargs) return wrapper return _deprecate_kwarg
def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): """ Decorator to deprecate a keyword argument of a function. Parameters ---------- old_arg_name : str Name of argument in function to deprecate new_arg_name : str or None Name of preferred argument in function. Use None to raise warning that ``old_arg_name`` keyword is deprecated. mapping : dict or callable If mapping is present, use it to translate old arguments to new arguments. A callable must do its own value checking; values not found in a dict will be forwarded unchanged. Examples -------- The following deprecates 'cols', using 'columns' instead >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') ... def f(columns=''): ... print(columns) ... >>> f(columns='should work ok') should work ok >>> f(cols='should raise warning') FutureWarning: cols is deprecated, use columns instead warnings.warn(msg, FutureWarning) should raise warning >>> f(cols='should error', columns="can\'t pass do both") TypeError: Can only specify 'cols' or 'columns', not both >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) ... def f(new=False): ... print('yes!' if new else 'no!') ... >>> f(old='yes') FutureWarning: old='yes' is deprecated, use new=True instead warnings.warn(msg, FutureWarning) yes! To raise a warning that a keyword will be removed entirely in the future >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None) ... def f(cols='', another_param=''): ... print(cols) ... >>> f(cols='should raise warning') FutureWarning: the 'cols' keyword is deprecated and will be removed in a future version please takes steps to stop use of 'cols' should raise warning >>> f(another_param='should not raise warning') should not raise warning >>> f(cols='should raise warning', another_param='') FutureWarning: the 'cols' keyword is deprecated and will be removed in a future version please takes steps to stop use of 'cols' should raise warning """ if mapping is not None and not hasattr(mapping, 'get') and \ not callable(mapping): raise TypeError("mapping from old to new argument values " "must be dict or callable!") def _deprecate_kwarg(func): @wraps(func) def wrapper(*args, **kwargs): old_arg_value = kwargs.pop(old_arg_name, None) if new_arg_name is None and old_arg_value is not None: msg = ( "the '{old_name}' keyword is deprecated and will be " "removed in a future version. " "Please take steps to stop the use of '{old_name}'" ).format(old_name=old_arg_name) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) kwargs[old_arg_name] = old_arg_value return func(*args, **kwargs) if old_arg_value is not None: if mapping is not None: if hasattr(mapping, 'get'): new_arg_value = mapping.get(old_arg_value, old_arg_value) else: new_arg_value = mapping(old_arg_value) msg = ("the {old_name}={old_val!r} keyword is deprecated, " "use {new_name}={new_val!r} instead" ).format(old_name=old_arg_name, old_val=old_arg_value, new_name=new_arg_name, new_val=new_arg_value) else: new_arg_value = old_arg_value msg = ("the '{old_name}' keyword is deprecated, " "use '{new_name}' instead" ).format(old_name=old_arg_name, new_name=new_arg_name) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) if kwargs.get(new_arg_name, None) is not None: msg = ("Can only specify '{old_name}' or '{new_name}', " "not both").format(old_name=old_arg_name, new_name=new_arg_name) raise TypeError(msg) else: kwargs[new_arg_name] = new_arg_value return func(*args, **kwargs) return wrapper return _deprecate_kwarg
def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): """ Decorator to deprecate a keyword argument of a function. Parameters ---------- old_arg_name : str Name of argument in function to deprecate new_arg_name : str or None Name of preferred argument in function. Use None to raise warning that ``old_arg_name`` keyword is deprecated. mapping : dict or callable If mapping is present, use it to translate old arguments to new arguments. A callable must do its own value checking; values not found in a dict will be forwarded unchanged. Examples -------- The following deprecates 'cols', using 'columns' instead >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') ... def f(columns=''): ... print(columns) ... >>> f(columns='should work ok') should work ok >>> f(cols='should raise warning') FutureWarning: cols is deprecated, use columns instead warnings.warn(msg, FutureWarning) should raise warning >>> f(cols='should error', columns="can\'t pass do both") TypeError: Can only specify 'cols' or 'columns', not both >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) ... def f(new=False): ... print('yes!' if new else 'no!') ... >>> f(old='yes') FutureWarning: old='yes' is deprecated, use new=True instead warnings.warn(msg, FutureWarning) yes! To raise a warning that a keyword will be removed entirely in the future >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None) ... def f(cols='', another_param=''): ... print(cols) ... >>> f(cols='should raise warning') FutureWarning: the 'cols' keyword is deprecated and will be removed in a future version please takes steps to stop use of 'cols' should raise warning >>> f(another_param='should not raise warning') should not raise warning >>> f(cols='should raise warning', another_param='') FutureWarning: the 'cols' keyword is deprecated and will be removed in a future version please takes steps to stop use of 'cols' should raise warning """ if mapping is not None and not hasattr(mapping, 'get') and \ not callable(mapping): raise TypeError("mapping from old to new argument values " "must be dict or callable!") def _deprecate_kwarg(func): @wraps(func) def wrapper(*args, **kwargs): old_arg_value = kwargs.pop(old_arg_name, None) if new_arg_name is None and old_arg_value is not None: msg = ("the '{old_name}' keyword is deprecated and will be " "removed in a future version. " "Please take steps to stop the use of '{old_name}'" ).format(old_name=old_arg_name) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) kwargs[old_arg_name] = old_arg_value return func(*args, **kwargs) if old_arg_value is not None: if mapping is not None: if hasattr(mapping, 'get'): new_arg_value = mapping.get(old_arg_value, old_arg_value) else: new_arg_value = mapping(old_arg_value) msg = ("the {old_name}={old_val!r} keyword is deprecated, " "use {new_name}={new_val!r} instead").format( old_name=old_arg_name, old_val=old_arg_value, new_name=new_arg_name, new_val=new_arg_value) else: new_arg_value = old_arg_value msg = ("the '{old_name}' keyword is deprecated, " "use '{new_name}' instead").format( old_name=old_arg_name, new_name=new_arg_name) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) if kwargs.get(new_arg_name, None) is not None: msg = ("Can only specify '{old_name}' or '{new_name}', " "not both").format(old_name=old_arg_name, new_name=new_arg_name) raise TypeError(msg) else: kwargs[new_arg_name] = new_arg_value return func(*args, **kwargs) return wrapper return _deprecate_kwarg
def _get_grouper(obj, key=None, axis=0, level=None, sort=True, observed=False, mutated=False, validate=True): """ create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. This may be composed of multiple Grouping objects, indicating multiple groupers Groupers are ultimately index mappings. They can originate as: index mappings, keys to columns, functions, or Groupers Groupers enable local references to axis,level,sort, while the passed in axis, level, and sort are 'global'. This routine tries to figure out what the passing in references are and then creates a Grouping for each one, combined into a BaseGrouper. If observed & we have a categorical grouper, only show the observed values If validate, then check for key/level overlaps """ group_axis = obj._get_axis(axis) # validate that the passed single level is compatible with the passed # axis of the object if level is not None: # TODO: These if-block and else-block are almost same. # MultiIndex instance check is removable, but it seems that there are # some processes only for non-MultiIndex in else-block, # eg. `obj.index.name != level`. We have to consider carefully whether # these are applicable for MultiIndex. Even if these are applicable, # we need to check if it makes no side effect to subsequent processes # on the outside of this condition. # (GH 17621) if isinstance(group_axis, MultiIndex): if is_list_like(level) and len(level) == 1: level = level[0] if key is None and is_scalar(level): # Get the level values from group_axis key = group_axis.get_level_values(level) level = None else: # allow level to be a length-one list-like object # (e.g., level=[0]) # GH 13901 if is_list_like(level): nlevels = len(level) if nlevels == 1: level = level[0] elif nlevels == 0: raise ValueError('No group keys passed!') else: raise ValueError('multiple levels only valid with ' 'MultiIndex') if isinstance(level, compat.string_types): if obj.index.name != level: raise ValueError('level name %s is not the name of the ' 'index' % level) elif level > 0 or level < -1: raise ValueError('level > 0 or level < -1 only valid with ' ' MultiIndex') # NOTE: `group_axis` and `group_axis.get_level_values(level)` # are same in this section. level = None key = group_axis # a passed-in Grouper, directly convert if isinstance(key, Grouper): binner, grouper, obj = key._get_grouper(obj, validate=False) if key.key is None: return grouper, [], obj else: return grouper, set([key.key]), obj # already have a BaseGrouper, just return it elif isinstance(key, BaseGrouper): return key, [], obj # In the future, a tuple key will always mean an actual key, # not an iterable of keys. In the meantime, we attempt to provide # a warning. We can assume that the user wanted a list of keys when # the key is not in the index. We just have to be careful with # unhashble elements of `key`. Any unhashable elements implies that # they wanted a list of keys. # https://github.com/pandas-dev/pandas/issues/18314 is_tuple = isinstance(key, tuple) all_hashable = is_tuple and is_hashable(key) if is_tuple: if ((all_hashable and key not in obj and set(key).issubset(obj)) or not all_hashable): # column names ('a', 'b') -> ['a', 'b'] # arrays like (a, b) -> [a, b] msg = ("Interpreting tuple 'by' as a list of keys, rather than " "a single key. Use 'by=[...]' instead of 'by=(...)'. In " "the future, a tuple will always mean a single key.") warnings.warn(msg, FutureWarning, stacklevel=5) key = list(key) if not isinstance(key, list): keys = [key] match_axis_length = False else: keys = key match_axis_length = len(keys) == len(group_axis) # what are we after, exactly? any_callable = any(callable(g) or isinstance(g, dict) for g in keys) any_groupers = any(isinstance(g, Grouper) for g in keys) any_arraylike = any( isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys) try: if isinstance(obj, DataFrame): all_in_columns_index = all(g in obj.columns or g in obj.index.names for g in keys) else: all_in_columns_index = False except Exception: all_in_columns_index = False if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: keys = [com._asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) levels = level else: levels = [level] * len(keys) groupings = [] exclusions = [] # if the actual grouper should be obj[key] def is_in_axis(key): if not _is_label_like(key): try: obj._data.items.get_loc(key) except Exception: return False return True # if the grouper is obj[name] def is_in_obj(gpr): try: return id(gpr) == id(obj[gpr.name]) except Exception: return False for i, (gpr, level) in enumerate(zip(keys, levels)): if is_in_obj(gpr): # df.groupby(df['name']) in_axis, name = True, gpr.name exclusions.append(name) elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: if validate: stacklevel = 5 # Number of stack levels from df.groupby obj._check_label_or_level_ambiguity(gpr, stacklevel=stacklevel) in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) elif obj._is_level_reference(gpr): in_axis, name, level, gpr = False, None, gpr, None else: raise KeyError(gpr) elif isinstance(gpr, Grouper) and gpr.key is not None: # Add key to exclusions exclusions.append(gpr.key) in_axis, name = False, None else: in_axis, name = False, None if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: raise ValueError( ("Length of grouper ({len_gpr}) and axis ({len_axis})" " must be same length".format(len_gpr=len(gpr), len_axis=obj.shape[axis]))) # create the Grouping # allow us to passing the actual Grouping as the gpr ping = Grouping(group_axis, gpr, obj=obj, name=name, level=level, sort=sort, observed=observed, in_axis=in_axis) \ if not isinstance(gpr, Grouping) else gpr groupings.append(ping) if len(groupings) == 0: raise ValueError('No group keys passed!') # create the internals grouper grouper = BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated) return grouper, exclusions, obj