def _getitem_axis(self, key, axis=0): labels = self.obj._get_axis(axis) if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) elif _is_list_like(key) and not (isinstance(key, tuple) and isinstance(labels, MultiIndex)): if hasattr(key, 'ndim') and key.ndim > 1: raise ValueError('Cannot index with multidimensional key') return self._getitem_iterable(key, axis=axis) elif axis == 0: is_int_index = _is_integer_index(labels) idx = key if com.is_integer(key): if isinstance(labels, MultiIndex): try: return self._get_label(key, axis=0) except (KeyError, TypeError): if _is_integer_index(self.obj.index.levels[0]): raise if not is_int_index: idx = labels[key] return self._get_label(idx, axis=0) else: labels = self.obj._get_axis(axis) lab = key if com.is_integer(key) and not _is_integer_index(labels): lab = labels[key] return self._get_label(lab, axis=axis)
def _getitem_axis(self, key, axis=0): if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) elif _is_list_like(key): return self._getitem_iterable(key, axis=axis) elif axis == 0: labels = self.obj._get_axis(0) is_int_index = _is_integer_index(labels) idx = key if com.is_integer(key): if isinstance(labels, MultiIndex): try: return self._get_label(key, axis=0) except (KeyError, TypeError): if _is_integer_index(self.obj.index.levels[0]): raise if not is_int_index: idx = labels[key] return self._get_label(idx, axis=0) else: labels = self.obj._get_axis(axis) lab = key if com.is_integer(key) and not _is_integer_index(labels): lab = labels[key] return self._get_label(lab, axis=axis)
def get_freq_code(freqstr): """ Parameters ---------- Returns ------- """ if isinstance(freqstr, DateOffset): freqstr = (get_offset_name(freqstr), freqstr.n) if isinstance(freqstr, tuple): if (com.is_integer(freqstr[0]) and com.is_integer(freqstr[1])): #e.g., freqstr = (2000, 1) return freqstr else: #e.g., freqstr = ('T', 5) try: code = _period_str_to_code(freqstr[0]) stride = freqstr[1] except: code = _period_str_to_code(freqstr[1]) stride = freqstr[0] return code, stride if com.is_integer(freqstr): return (freqstr, 1) base, stride = _base_and_stride(freqstr) code = _period_str_to_code(base) return code, stride
def _get_formatter(self, i): if isinstance(self.formatters, (list, tuple)): if com.is_integer(i): return self.formatters[i] else: return None else: if com.is_integer(i) and i not in self.columns: i = self.columns[i] return self.formatters.get(i, None)
def get_freq_code(freqstr): """ Return freq str or tuple to freq code and stride (mult) Parameters ---------- freqstr : str or tuple Returns ------- return : tuple of base frequency code and stride (mult) Example ------- >>> get_freq_code('3D') (6000, 3) >>> get_freq_code('D') (6000, 1) >>> get_freq_code(('D', 3)) (6000, 3) """ if isinstance(freqstr, DateOffset): freqstr = (freqstr.rule_code, freqstr.n) if isinstance(freqstr, tuple): if (com.is_integer(freqstr[0]) and com.is_integer(freqstr[1])): # e.g., freqstr = (2000, 1) return freqstr else: # e.g., freqstr = ('T', 5) try: code = _period_str_to_code(freqstr[0]) stride = freqstr[1] except: if com.is_integer(freqstr[1]): raise code = _period_str_to_code(freqstr[1]) stride = freqstr[0] return code, stride if com.is_integer(freqstr): return (freqstr, 1) base, stride = _base_and_stride(freqstr) code = _period_str_to_code(base) return code, stride
def _evaluate_numeric_binop(self, other): other = self._validate_for_numeric_binop(other, op, opstr) attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) if reversed: self, other = other, self try: # alppy if we have an override if step: rstep = step(self._step, other) # we don't have a representable op # so return a base index if not com.is_integer(rstep) or not rstep: raise ValueError else: rstep = self._step rstart = op(self._start, other) rstop = op(self._stop, other) result = RangeIndex(rstart, rstop, rstep, **attrs) # for compat with numpy / Int64Index # even if we can represent as a RangeIndex, return # as a Float64Index if we have float-like descriptors if not all([com.is_integer(x) for x in [rstart, rstop, rstep]]): result = result.astype('float64') return result except (ValueError, TypeError, AttributeError): pass # convert to Int64Index ops if isinstance(self, RangeIndex): self = self.values if isinstance(other, RangeIndex): other = other.values return Index(op(self, other), **attrs)
def test_quantile_interpolation_dtype(self): # GH #10174 if _np_version_under1p9: raise nose.SkipTest("Numpy version is under 1.9") from numpy import percentile # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower') self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) self.assertTrue(com.is_integer(q)) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher') self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) self.assertTrue(com.is_integer(q))
def __new__( cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, **kwargs ): freq = frequencies.get_standard_freq(freq) if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError("Periods must be a number, got %s" % str(periods)) if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, kwargs) else: ordinal, freq = cls._from_arraylike(data, freq, tz) data = np.array(ordinal, dtype=np.int64, copy=False) return cls._simple_new(data, name=name, freq=freq)
def _maybe_cast_slice_bound(self, label, side, kind): """ This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing strings containing formatted datetimes. Parameters ---------- label : object side : {'left', 'right'} kind : string / None Returns ------- label : object Notes ----- Value of `side` parameter should be validated in caller. """ # we are a numeric index, so we accept # integer/floats directly if not (com.is_integer(label) or com.is_float(label)): self._invalid_indexer('slice', label) return label
def delete(self, loc): """ Make a new DatetimeIndex with passed location(s) deleted. Parameters ---------- loc: int, slice or array of ints Indicate which sub-arrays to remove. Returns ------- new_index : TimedeltaIndex """ new_tds = np.delete(self.asi8, loc) freq = 'infer' if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if com.is_list_like(loc): loc = lib.maybe_indices_to_slice( com._ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq return TimedeltaIndex(new_tds, name=self.name, freq=freq)
def _getitem_axis(self, key, axis=0): self._has_valid_type(key, axis) labels = self.obj._get_axis(axis) if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) elif _is_list_like(key) and not (isinstance(key, tuple) and isinstance(labels, MultiIndex)): if hasattr(key, 'ndim') and key.ndim > 1: raise ValueError('Cannot index with multidimensional key') return self._getitem_iterable(key, axis=axis) else: if com.is_integer(key): if axis == 0 and isinstance(labels, MultiIndex): try: return self._get_label(key, axis=axis) except (KeyError, TypeError): if self.obj.index.levels[0].is_integer(): raise # this is the fallback! (for a non-float, non-integer index) if not labels.is_floating() and not labels.is_integer(): return self._get_loc(key, axis=axis) return self._get_label(key, axis=axis)
def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to timedelta according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} Returns ------- label : object """ assert kind in ['ix', 'loc', 'getitem', None] if isinstance(label, compat.string_types): parsed = _coerce_scalar_to_timedelta_type(label, box=True) lbound = parsed.round(parsed.resolution) if side == 'left': return lbound else: return (lbound + to_offset(parsed.resolution) - Timedelta(1, 'ns')) elif is_integer(label) or is_float(label): self._invalid_indexer('slice', label) return label
def _convert_index(index): inferred_type = lib.infer_dtype(index) # Let's assume the index is homogeneous values = np.asarray(index) if inferred_type == 'datetime64': converted = values.view('i8') return converted, 'datetime64', _tables().Int64Col() elif isinstance(values[0], datetime): converted = np.array([(time.mktime(v.timetuple()) + v.microsecond / 1E6) for v in values], dtype=np.float64) return converted, 'datetime', _tables().Time64Col() elif isinstance(values[0], date): converted = np.array([time.mktime(v.timetuple()) for v in values], dtype=np.int32) return converted, 'date', _tables().Time32Col() elif isinstance(values[0], basestring): converted = np.array(list(values), dtype=np.str_) itemsize = converted.dtype.itemsize return converted, 'string', _tables().StringCol(itemsize) elif com.is_integer(values[0]): # take a guess for now, hope the values fit atom = _tables().Int64Col() return np.asarray(values, dtype=np.int64), 'integer', atom elif com.is_float(values[0]): atom = _tables().Float64Col() return np.asarray(values, dtype=np.float64), 'float', atom else: # pragma: no cover atom = _tables().ObjectAtom() return np.asarray(values, dtype='O'), 'object', atom
def _convert_index(index): # Let's assume the index is homogeneous values = np.asarray(index) if isinstance(values[0], (datetime, date)): if isinstance(values[0], datetime): kind = 'datetime' else: kind = 'date' converted = np.array([time.mktime(v.timetuple()) for v in values], dtype=np.int64) return converted, kind, _tables().Time64Col() elif isinstance(values[0], basestring): converted = np.array(list(values), dtype=np.str_) itemsize = converted.dtype.itemsize return converted, 'string', _tables().StringCol(itemsize) elif com.is_integer(values[0]): # take a guess for now, hope the values fit atom = _tables().Int64Col() return np.asarray(values, dtype=np.int64), 'integer', atom elif com.is_float(values[0]): atom = _tables().Float64Col() return np.asarray(values, dtype=np.float64), 'float', atom else: # pragma: no cover atom = _tables().ObjectAtom() return np.asarray(values, dtype='O'), 'object', atom
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label Returns ------- loc : int """ try: return self._engine.get_loc(key) except KeyError: if is_integer(key): raise try: asdt, parsed, reso = parse_time_string(key, self.freq) key = asdt except TypeError: pass try: key = Period(key, freq=self.freq) except ValueError: # we cannot construct the Period # as we have an invalid type raise KeyError(key) try: return Index.get_loc(self, key.ordinal, method, tolerance) except KeyError: raise KeyError(key)
def __add__(self, other): from pandas.core.index import Index from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.offsets import DateOffset if isinstance(other, TimedeltaIndex): return self._add_delta(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if hasattr(other, '_add_delta'): return other._add_delta(self) raise TypeError("cannot add TimedeltaIndex and {typ}" .format(typ=type(other))) elif isinstance(other, Index): warnings.warn("using '+' to provide set union with " "datetimelike Indexes is deprecated, " "use .union()", FutureWarning, stacklevel=2) return self.union(other) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(other) elif com.is_integer(other): return self.shift(other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._add_datelike(other) else: # pragma: no cover return NotImplemented
def _convert_scalar_indexer(self, key, kind=None): """ we don't allow integer or float indexing on datetime-like when using loc Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if lib.isscalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): self._invalid_indexer('index', key) elif kind in ['ix', 'getitem'] and is_flt: self._invalid_indexer('index', key) return (super(DatetimeIndexOpsMixin, self) ._convert_scalar_indexer(key, kind=kind))
def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to timedelta according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : string / None Returns ------- label : object """ if isinstance(label, compat.string_types): parsed = _coerce_scalar_to_timedelta_type(label, box=True) lbound = parsed.round(parsed.resolution) if side == "left": return lbound else: return lbound + _resolution_map[parsed.resolution]() - Timedelta(1, "ns") elif is_integer(label) or is_float(label): self._invalid_indexer("slice", label) return label
def __sub__(self, other): from pandas.core.index import Index from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.offsets import DateOffset if isinstance(other, TimedeltaIndex): return self._add_delta(-other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if not isinstance(other, TimedeltaIndex): raise TypeError("cannot subtract TimedeltaIndex and {typ}" .format(typ=type(other))) return self._add_delta(-other) elif isinstance(other, Index): warnings.warn("using '-' to provide set differences with " "datetimelike Indexes is deprecated, " "use .difference()", FutureWarning, stacklevel=2) return self.difference(other) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(-other) elif com.is_integer(other): return self.shift(-other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._sub_datelike(other) elif isinstance(other, prlib.Period): return self._sub_period(other) else: # pragma: no cover return NotImplemented
def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, year=None, month=None, quarter=None, day=None, hour=None, minute=None, second=None): freq = _freq_mod.get_standard_freq(freq) if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: fields = [year, month, quarter, day, hour, minute, second] data, freq = cls._generate_range(start, end, periods, freq, fields) else: ordinal, freq = cls._from_arraylike(data, freq) data = np.array(ordinal, dtype=np.int64, copy=False) subarr = data.view(cls) subarr.name = name subarr.freq = freq return subarr
def convert(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) if com.is_integer_dtype(values) or com.is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def convert(values, unit, axis): from pandas.tseries.index import DatetimeIndex def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif com.is_integer(values) or com.is_float(values): return values elif isinstance(values, str): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = np.array(values, dtype="O") try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def get_loc(self, key, method=None): """ Get integer location for requested label Returns ------- loc : int """ try: return self._engine.get_loc(key) except KeyError: if is_integer(key): raise try: asdt, parsed, reso = parse_time_string(key, self.freq) key = asdt except TypeError: pass key = Period(key, self.freq) try: return Index.get_loc(self, key.ordinal, method=method) except KeyError: raise KeyError(key)
def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : string / None Returns ------- bound : Period or object Notes ----- Value of `side` parameter should be validated in caller. """ if isinstance(label, datetime): return Period(label, freq=self.freq) elif isinstance(label, compat.string_types): try: _, parsed, reso = parse_time_string(label, self.freq) bounds = self._parsed_string_to_bounds(reso, parsed) return bounds[0 if side == 'left' else 1] except Exception: raise KeyError(label) elif is_integer(label) or is_float(label): self._invalid_indexer('slice',label) return label
def _coerce_scalar_to_timedelta_type(r, unit='ns'): # kludgy here until we have a timedelta scalar # handle the numpy < 1.7 case def conv(v): if _np_version_under1p7: return timedelta(microseconds=v/1000.0) return np.timedelta64(v) if isinstance(r, compat.string_types): converter = _get_string_converter(r, unit=unit) r = converter() r = conv(r) elif r == tslib.iNaT: return r elif isinstance(r, np.timedelta64): r = r.astype("m8[{0}]".format(unit.lower())) elif is_integer(r): r = tslib.cast_from_unit(r, unit) r = conv(r) if _np_version_under1p7: if not isinstance(r, timedelta): raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) if compat.PY3: # convert to microseconds in timedelta64 r = np.timedelta64(int(r.total_seconds()*1e9 + r.microseconds*1000)) else: return r if isinstance(r, timedelta): r = np.timedelta64(r) elif not isinstance(r, np.timedelta64): raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) return r.astype('timedelta64[ns]')
def _ensure_datetime64(other): if isinstance(other, np.datetime64): return other elif com.is_integer(other): return np.int64(other).view("M8[us]") else: raise TypeError(other)
def _get_string_slice(self, key, use_lhs=True, use_rhs=True): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) if is_integer(key) or is_float(key): self._invalid_indexer('slice', key) loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, use_rhs=use_rhs) return loc
def _maybe_get_tz(tz): if isinstance(tz, compat.string_types): import pytz tz = pytz.timezone(tz) if com.is_integer(tz): import pytz tz = pytz.FixedOffset(tz / 60) return tz
def _convert_key(self, key): """ require integer args (and convert to label arguments) """ ckey = [] for a, i in zip(self.obj.axes,key): if not com.is_integer(i): raise ValueError("iAt based indexing can only have integer indexers") ckey.append(a[i]) return ckey
def _offset(window, center): if not com.is_integer(window): window = len(window) offset = (window - 1) / 2. if center else 0 try: return int(offset) except: return offset.astype(int)
def rolling_window(arg, window=None, win_type=None, min_periods=None, freq=None, center=False, mean=True, time_rule=None, axis=0, **kwargs): """ Applies a moving window of type ``window_type`` and size ``window`` on the data. Parameters ---------- arg : Series, DataFrame window : int or ndarray Weighting window specification. If the window is an integer, then it is treated as the window length and win_type is required win_type : str, default None Window type (see Notes) min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. `time_rule` is a legacy alias for `freq`. center : boolean, default False Whether the label should correspond with center of window mean : boolean, default True If True computes weighted mean, else weighted sum axis : {0, 1}, default 0 Returns ------- y : type of input argument Notes ----- The recognized window types are: * ``boxcar`` * ``triang`` * ``blackman`` * ``hamming`` * ``bartlett`` * ``parzen`` * ``bohman`` * ``blackmanharris`` * ``nuttall`` * ``barthann`` * ``kaiser`` (needs beta) * ``gaussian`` (needs std) * ``general_gaussian`` (needs power, width) * ``slepian`` (needs width). By default, the result is set to the right edge of the window. This can be changed to the center of the window by setting ``center=True``. The `freq` keyword is used to conform time series data to a specified frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ if isinstance(window, (list, tuple, np.ndarray)): if win_type is not None: raise ValueError(('Do not specify window type if using custom ' 'weights')) window = com._asarray_tuplesafe(window).astype(float) elif com.is_integer(window): # window size if win_type is None: raise ValueError('Must specify window type') try: import scipy.signal as sig except ImportError: raise ImportError('Please install scipy to generate window weight') win_type = _validate_win_type(win_type, kwargs) # may pop from kwargs window = sig.get_window(win_type, window).astype(float) else: raise ValueError('Invalid window %s' % str(window)) minp = _use_window(min_periods, len(window)) arg = _conv_timerule(arg, freq, time_rule) return_hook, values = _process_data_structure(arg) f = lambda x: algos.roll_window(x, window, minp, avg=mean) result = np.apply_along_axis(f, axis, values) rs = return_hook(result) if center: rs = _center_window(rs, len(window), axis) return rs
def plot_lines(df, x_col, y_col, colorby_col=None, splitby_col=None, color_map=None, use_suptitle=True, use_subplots=False): """Plot a metric of mapreduce as a function of parameters. Data are first grouped by split_by to produce one figure per value. In each figure, data are grouped by colorby_col to create one line per value. If you have other parameters you must first group the results according to this parameter and then pass them to this function. We don't allow a lot of plot options but they can be changed with matplotlib API. The only option is the colormap (because line plot don't use it so we emulate that functionality). However we try to be a bit smart on axis labels. Parameters ---------- df: the dataframe containing the results. x_col: column name (or index level) of the x-axis. y_col: column name of the y-axis (it makes no sense to have an index level here). colorby_col: column name (or index level) that define colored lines. One line per value or level. splitby_col: column name (or index level) that define how to split figures. One figure per value or level (see use_subplots). color_map: color map to use (if None, use default colors). It's a dictionary whose key are the values of colorby_col and the values a matplotlib color. use_suptitle: if True, use values of splitby_col as suptitle (it's hard to tune). use_subplots: create a subplot instead of a figure for each value of splitby_col Returns ------- handles: dictionnary of figure handles. The key is the value of splitby_col and the value is the figure handler (or the subplot if use_subplots is True). """ # pandas.DataFrame.plot don't work properly if x_col is an index import pandas.core.common as com x_col_is_index = com.is_integer(x_col) # x_name is the name of the column/index used for x axis x_name = df.index.names[x_col] if x_col_is_index \ else x_col colorby_col_is_index = com.is_integer(colorby_col) # colorby_name is the name of the column/index used for lines colorby_name = df.index.names[colorby_col] if colorby_col_is_index \ else colorby_col # Not useful for now splitby_col_is_index = com.is_integer(splitby_col) # Labels (note that y_col is supposed to be a good label here) x_label = x_name colorby_label = colorby_name try: # Try to group by column name fig_groups = df.groupby(splitby_col) except KeyError: try: # Try to group by index level fig_groups = df.groupby(level=splitby_col) except: raise Exception("Cannot group by splitby_col.") handles = {} fig_created = False for i, (splitby_col_val, splitby_col_group) in enumerate(fig_groups, 1): if use_subplots: if not fig_created: h = plt.figure() n_plots = len(fig_groups) n_rows = math.floor(math.sqrt(n_plots)) n_cols = math.ceil(float(n_plots) / float(n_rows)) fig_created = True handles[splitby_col_val] = plt.subplot(n_rows, n_cols, i) else: h = plt.figure() handles[splitby_col_val] = h if use_suptitle: plt.suptitle(splitby_col_val) try: # Try to group by column name color_groups = splitby_col_group.groupby(colorby_col) except KeyError: try: # Try to group by index level color_groups = splitby_col_group.groupby(level=colorby_col) except: raise Exception("Cannot group by colorby_col.") for colorby_col_val, colorby_col_group in color_groups: if x_col_is_index: # Remove index: x_col is now referred as x_name colorby_col_group.reset_index(level=x_col, inplace=True) colorby_col_group.sort(x_name, inplace=True) if color_map is None: colorby_col_group.plot(x=x_name, y=y_col, label=colorby_col_val) else: colorby_col_group.plot(x=x_name, y=y_col, label=colorby_col_val, color=color_map[colorby_col_val]) plt.xlabel(x_label) plt.ylabel(y_col) plt.legend(title=colorby_label) return handles
def __add__(self, other): if com.is_integer(other): return Period(ordinal=self.ordinal + other, freq=self.freq) else: # pragma: no cover raise TypeError(other)
def __init__(self, value=None, freq=None, ordinal=None, year=None, month=1, quarter=None, day=1, hour=0, minute=0, second=0): # freq points to a tuple (base, mult); base is one of the defined # periods such as A, Q, etc. Every five minutes would be, e.g., # ('T', 5) but may be passed in as a string like '5T' self.freq = None # ordinal is the period offset from the gregorian proleptic epoch self.ordinal = None if ordinal is not None and value is not None: raise ValueError(("Only value or ordinal but not both should be " "given but not both")) elif ordinal is not None: if not com.is_integer(ordinal): raise ValueError("Ordinal must be an integer") if freq is None: raise ValueError('Must supply freq for ordinal value') self.ordinal = ordinal elif value is None: if freq is None: raise ValueError("If value is None, freq cannot be None") self.ordinal = _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq) elif isinstance(value, Period): other = value if freq is None or _gfc(freq) == _gfc(other.freq): self.ordinal = other.ordinal freq = other.freq else: converted = other.asfreq(freq) self.ordinal = converted.ordinal elif isinstance(value, compat.string_types) or com.is_integer(value): if com.is_integer(value): value = str(value) dt, freq = _get_date_and_freq(value, freq) elif isinstance(value, datetime): dt = value if freq is None: raise ValueError('Must supply freq for datetime value') elif isinstance(value, date): dt = datetime(year=value.year, month=value.month, day=value.day) if freq is None: raise ValueError('Must supply freq for datetime value') else: msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) base, mult = _gfc(freq) if mult != 1: # TODO: Better error message - this is slightly confusing raise ValueError('Only mult == 1 supported') if self.ordinal is None: self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond, 0, base) self.freq = _freq_mod._get_freq_str(base)
def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, **kwds): dayfirst = kwds.pop('dayfirst', None) yearfirst = kwds.pop('yearfirst', None) warn = False if 'offset' in kwds and kwds['offset']: freq = kwds['offset'] warn = True freq_infer = False if not isinstance(freq, DateOffset): if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if warn: import warnings warnings.warn( "parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning) offset = freq if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and offset is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize) if not isinstance(data, np.ndarray): if np.isscalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype='O') # try a few ways to make it datetime64 if lib.is_string_array(data): data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) else: data = tools.to_datetime(data) data.offset = offset if isinstance(data, DatetimeIndex): if name is not None: data.name = name if tz is not None: return data.tz_localize(tz) return data if issubclass(data.dtype.type, basestring): subarr = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) elif issubclass(data.dtype.type, np.datetime64): if isinstance(data, DatetimeIndex): if tz is None: tz = data.tz subarr = data.values if offset is None: offset = data.offset verify_integrity = False else: if data.dtype != _NS_DTYPE: subarr = lib.cast_to_nanoseconds(data) else: subarr = data elif data.dtype == _INT64_DTYPE: if isinstance(data, Int64Index): raise TypeError('cannot convert Int64Index->DatetimeIndex') if copy: subarr = np.asarray(data, dtype=_NS_DTYPE) else: subarr = data.view(_NS_DTYPE) else: try: subarr = tools.to_datetime(data) except ValueError: # tz aware subarr = tools.to_datetime(data, utc=True) if not np.issubdtype(subarr.dtype, np.datetime64): raise TypeError('Unable to convert %s to datetime dtype' % str(data)) if isinstance(subarr, DatetimeIndex): if tz is None: tz = subarr.tz else: if tz is not None: tz = tools._maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or getattr(data, 'tz', None) is None): # Convert tz-naive to UTC ints = subarr.view('i8') subarr = lib.tz_localize_to_utc(ints, tz) subarr = subarr.view(_NS_DTYPE) subarr = subarr.view(cls) subarr.name = name subarr.offset = offset subarr.tz = tz if verify_integrity and len(subarr) > 0: if offset is not None and not freq_infer: inferred = subarr.inferred_freq if inferred != offset.freqstr: raise ValueError('Dates do not conform to passed ' 'frequency') if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.offset = to_offset(inferred) return subarr
def _setitem_with_indexer(self, indexer, value): self._has_valid_setitem_indexer(indexer) # also has the side effect of consolidating in-place from pandas import Panel, DataFrame, Series # maybe partial set take_split_path = self.obj._is_mixed_type if isinstance(indexer, tuple): nindexer = [] for i, idx in enumerate(indexer): if isinstance(idx, dict): # reindex the axis to the new value # and set inplace key, _ = _convert_missing_indexer(idx) # if this is the items axes, then take the main missing path # first; this correctly sets the dtype and avoids cache issues # essentially this separates out the block that is needed to possibly # be modified if self.ndim > 1 and i == self.obj._info_axis_number: # add the new item, and set the value new_indexer = _convert_from_missing_indexer_tuple( indexer) self.obj[key] = np.nan self.obj.loc[new_indexer] = value return self.obj # reindex the axis index = self.obj._get_axis(i) labels = _safe_append_to_index(index, key) self.obj._data = self.obj.reindex_axis(labels, i)._data if isinstance(labels, MultiIndex): self.obj.sortlevel(inplace=True) labels = self.obj._get_axis(i) nindexer.append(labels.get_loc(key)) else: nindexer.append(idx) indexer = tuple(nindexer) else: indexer, missing = _convert_missing_indexer(indexer) if missing: # reindex the axis to the new value # and set inplace if self.ndim == 1: index = self.obj.index if len(index) == 0: new_index = Index([indexer]) else: new_index = _safe_append_to_index(index, indexer) new_values = np.concatenate([self.obj.values, [value]]) self.obj._data = self.obj._constructor(new_values, index=new_index, name=self.obj.name) return self.obj elif self.ndim == 2: index = self.obj._get_axis(0) labels = _safe_append_to_index(index, indexer) self.obj._data = self.obj.reindex_axis(labels, 0)._data return getattr(self.obj, self.name).__setitem__(indexer, value) # set using setitem (Panel and > dims) elif self.ndim >= 3: return self.obj.__setitem__(indexer, value) # set info_axis = self.obj._info_axis_number item_labels = self.obj._get_axis(info_axis) # if we have a complicated setup, take the split path if isinstance(indexer, tuple) and any( [isinstance(ax, MultiIndex) for ax in self.obj.axes]): take_split_path = True # align and set the values if take_split_path: if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) if isinstance(value, ABCSeries): value = self._align_series(indexer, value) info_idx = indexer[info_axis] if com.is_integer(info_idx): info_idx = [info_idx] labels = item_labels[info_idx] # if we have a partial multiindex, then need to adjust the plane indexer here if len(labels) == 1 and isinstance(self.obj[labels[0]].index, MultiIndex): index = self.obj[labels[0]].index idx = indexer[:info_axis][0] try: if idx in index: idx = index.get_loc(idx) except: pass plane_indexer = tuple([idx]) + indexer[info_axis + 1:] lplane_indexer = _length_of_indexer(plane_indexer[0], index) if is_list_like(value) and lplane_indexer != len(value): raise ValueError( "cannot set using a multi-index selection indexer with a different length than the value" ) # non-mi else: plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:] if info_axis > 0: plane_axis = self.obj.axes[:info_axis][0] lplane_indexer = _length_of_indexer( plane_indexer[0], plane_axis) else: lplane_indexer = 0 def setter(item, v): s = self.obj[item] pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer # set the item, possibly having a dtype change s = s.copy() s._data = s._data.setitem(pi, v) self.obj[item] = s def can_do_equal_len(): """ return True if we have an equal len settable """ if not len(labels) == 1: return False l = len(value) item = labels[0] index = self.obj[item].index # equal len list/ndarray if len(index) == l: return True elif lplane_indexer == l: return True return False if _is_list_like(value): # we have an equal len Frame if isinstance(value, ABCDataFrame) and value.ndim > 1: for item in labels: # align to if item in value: v = value[item] v = v.reindex(self.obj[item].index & v.index) setter(item, v.values) else: setter(item, np.nan) # we have an equal len ndarray to our labels elif isinstance(value, np.ndarray) and value.ndim == 2: if len(labels) != value.shape[1]: raise ValueError( 'Must have equal len keys and value when' ' setting with an ndarray') for i, item in enumerate(labels): setter(item, value[:, i]) # we have an equal len list/ndarray elif can_do_equal_len(): setter(labels[0], value) # per label values else: for item, v in zip(labels, value): setter(item, v) else: # scalar for item in labels: setter(item, value) else: if isinstance(indexer, tuple): indexer = _maybe_convert_ix(*indexer) if isinstance(value, ABCSeries): value = self._align_series(indexer, value) elif isinstance(value, ABCDataFrame): value = self._align_frame(indexer, value) if isinstance(value, ABCPanel): value = self._align_panel(indexer, value) self.obj._data = self.obj._data.setitem(indexer, value)
def _convert_to_indexer(self, obj, axis=0, is_setter=False): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) # if we are a scalar indexer and not type correct raise obj = self._convert_scalar_indexer(obj, axis) # see if we are positional in nature is_int_index = labels.is_integer() is_int_positional = com.is_integer(obj) and not is_int_index # if we are a label return me try: return labels.get_loc(obj) except (KeyError, TypeError): pass except (ValueError): if not is_int_positional: raise # a positional if is_int_positional: # if we are setting and its not a valid location # its an insert which fails by definition if is_setter: if obj >= len(self.obj) and not isinstance(labels, MultiIndex): raise ValueError( "cannot set by positional indexing with enlargement") return obj if isinstance(obj, slice): return self._convert_slice_indexer(obj, axis) elif _is_list_like(obj): if com._is_bool_indexer(obj): obj = _check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: if isinstance(obj, Index): objarr = obj.values else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if is_integer_dtype(objarr) and not is_int_index: if labels.inferred_type != 'integer': objarr = np.where(objarr < 0, len(labels) + objarr, objarr) return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None # unique index if labels.is_unique: indexer = check = labels.get_indexer(objarr) # non-unique (dups) else: indexer, missing = labels.get_indexer_non_unique( objarr) check = indexer mask = check == -1 if mask.any(): # mi here if isinstance(obj, tuple) and is_setter: return {'key': obj} raise KeyError('%s not in index' % objarr[mask]) return indexer else: try: return labels.get_loc(obj) except (KeyError): # allow a not found key only if we are a setter if not is_list_like(obj) and is_setter: return {'key': obj} raise
def _has_valid_type(self, key, axis): return isinstance(key, slice) or com.is_integer(key) or com._is_bool_indexer(key) or _is_list_like(key)
def __init__(self, value=None, freq=None, ordinal=None, year=None, month=1, quarter=None, day=1, hour=0, minute=0, second=0): """ Represents an period of time Parameters ---------- value : Period or basestring, default None The time period represented (e.g., '4Q2005') freq : str, default None e.g., 'B' for businessday, ('T', 5) or '5T' for 5 minutes year : int, default None month : int, default 1 quarter : int, default None day : int, default 1 hour : int, default 0 minute : int, default 0 second : int, default 0 """ # freq points to a tuple (base, mult); base is one of the defined # periods such as A, Q, etc. Every five minutes would be, e.g., # ('T', 5) but may be passed in as a string like '5T' self.freq = None # ordinal is the period offset from the gregorian proleptic epoch self.ordinal = None if ordinal is not None and value is not None: raise ValueError(("Only value or ordinal but not both should be " "given but not both")) elif ordinal is not None: if not com.is_integer(ordinal): raise ValueError("Ordinal must be an integer") if freq is None: raise ValueError('Must supply freq for ordinal value') self.ordinal = ordinal elif value is None: if freq is None: raise ValueError("If value is None, freq cannot be None") self.ordinal = _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq) elif isinstance(value, Period): other = value if freq is None or _gfc(freq) == _gfc(other.freq): self.ordinal = other.ordinal freq = other.freq else: converted = other.asfreq(freq) self.ordinal = converted.ordinal elif isinstance(value, basestring) or com.is_integer(value): if com.is_integer(value): value = str(value) dt, freq = _get_date_and_freq(value, freq) elif isinstance(value, datetime): dt = value if freq is None: raise ValueError('Must supply freq for datetime value') elif isinstance(value, date): dt = datetime(year=value.year, month=value.month, day=value.day) if freq is None: raise ValueError('Must supply freq for datetime value') else: msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) base, mult = _gfc(freq) if mult != 1: raise ValueError('Only mult == 1 supported') if self.ordinal is None: self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, base) self.freq = _freq_mod._get_freq_str(base)
def _is_valid_index(x): return (com.is_integer(x) or com.is_float(x) and np.allclose(x, int(x), rtol=_eps, atol=0))
def _is_valid_index(x): return com.is_integer(x)
def __init__(self, f, delimiter=None, dialect=None, names=None, header=0, index_col=None, na_values=None, keep_default_na=True, thousands=None, comment=None, parse_dates=False, keep_date_col=False, date_parser=None, dayfirst=False, chunksize=None, skiprows=None, skip_footer=0, converters=None, verbose=False, encoding=None, squeeze=False): """ Workhorse function for processing nested list into DataFrame Should be replaced by np.genfromtxt eventually? """ self.data = None self.buf = [] self.pos = 0 self.names = list(names) if names is not None else names self.header = header self.index_col = index_col self.chunksize = chunksize self.passed_names = names is not None self.encoding = encoding self.parse_dates = parse_dates self.keep_date_col = keep_date_col self.date_parser = date_parser self.dayfirst = dayfirst if com.is_integer(skiprows): skiprows = range(skiprows) self.skiprows = set() if skiprows is None else set(skiprows) self.skip_footer = skip_footer self.delimiter = delimiter self.dialect = dialect self.verbose = verbose if converters is not None: assert(isinstance(converters, dict)) self.converters = converters else: self.converters = {} assert(self.skip_footer >= 0) self.keep_default_na = keep_default_na if na_values is None and keep_default_na: self.na_values = _NA_VALUES elif isinstance(na_values, dict): if keep_default_na: for k, v in na_values.iteritems(): v = set(list(v)) | _NA_VALUES na_values[k] = v self.na_values = na_values else: na_values = set(list(na_values)) if keep_default_na: na_values = na_values | _NA_VALUES self.na_values = na_values self.thousands = thousands self.comment = comment self._comment_lines = [] if hasattr(f, 'readline'): self._make_reader(f) else: self.data = f self.columns = self._infer_columns() # needs to be cleaned/refactored # multiple date column thing turning into a real sphaghetti factory # get popped off for index self.orig_columns = list(self.columns) self.index_name = None self._name_processed = False if not self._has_complex_date_col: self.index_name = self._get_index_name() self._name_processed = True self._first_chunk = True self.squeeze = squeeze
def _can_hold_element(self, element): return com.is_integer(element) or isinstance(element, datetime)
def test_is_integer(self): self.assertTrue(com.is_integer(1)) self.assertTrue(com.is_integer(np.int64(1))) self.assertFalse(com.is_integer(True)) self.assertFalse(com.is_integer(1.1)) self.assertFalse(com.is_integer(1 + 3j)) self.assertFalse(com.is_integer(np.bool(False))) self.assertFalse(com.is_integer(np.bool_(False))) self.assertFalse(com.is_integer(np.float64(1.1))) self.assertFalse(com.is_integer(np.complex128(1 + 3j))) self.assertFalse(com.is_integer(np.nan)) self.assertFalse(com.is_integer(None)) self.assertFalse(com.is_integer('x')) self.assertFalse(com.is_integer(datetime(2011, 1, 1))) self.assertFalse(com.is_integer(np.datetime64('2011-01-01'))) self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01'))) self.assertFalse( com.is_integer(pd.Timestamp('2011-01-01', tz='US/Eastern'))) self.assertFalse(com.is_integer(timedelta(1000))) self.assertFalse(com.is_integer(pd.Timedelta('1 days'))) # questionable self.assertTrue(com.is_integer(np.timedelta64(1, 'D')))
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, closed=None, verify_integrity=True, **kwargs): if isinstance(data, TimedeltaIndex) and freq is None: if copy: data = data.copy() return data freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if np.isscalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) # check that we are matching freqs if verify_integrity and len(data) > 0: if freq is not None and not freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred != freq.freqstr: on_freq = cls._generate(index[0], None, len(index), name, freq) if not np.array_equal(index.asi8, on_freq.asi8): raise ValueError( 'Inferred frequency {0} from passed timedeltas does not ' 'conform to passed frequency {1}'.format( inferred, freq.freqstr)) index.freq = freq return index if freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred: index.freq = to_offset(inferred) return index return cls._simple_new(data, name=name, freq=freq)
def _setitem_with_indexer(self, indexer, value): # also has the side effect of consolidating in-place # mmm, spaghetti if self.obj._is_mixed_type: if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) if isinstance(value, ABCSeries): value = self._align_series(indexer, value) info_axis = self.obj._info_axis_number info_idx = indexer[info_axis] if com.is_integer(info_idx): info_idx = [info_idx] plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:] item_labels = self.obj._get_axis(info_axis) def setter(item, v): s = self.obj[item] pi = plane_indexer[0] if len( plane_indexer) == 1 else plane_indexer # set the item, possibly having a dtype change s = s.copy() s._data = s._data.setitem(pi, v) self.obj[item] = s labels = item_labels[info_idx] if _is_list_like(value): # we have an equal len Frame if isinstance(value, ABCDataFrame) and value.ndim > 1: for item in labels: # align to if item in value: v = value[item] v = v.reindex(self.obj[item].index & v.index) setter(item, v.values) else: setter(item, np.nan) # we have an equal len ndarray to our labels elif isinstance(value, np.ndarray) and value.ndim == 2: if len(labels) != value.shape[1]: raise ValueError( 'Must have equal len keys and value when' ' setting with an ndarray') for i, item in enumerate(labels): setter(item, value[:, i]) # we have an equal len list/ndarray elif len(labels) == 1 and ( len(self.obj[labels[0]]) == len(value) or len(plane_indexer[0]) == len(value)): setter(labels[0], value) # per label values else: for item, v in zip(labels, value): setter(item, v) else: # scalar for item in labels: setter(item, value) else: if isinstance(indexer, tuple): indexer = _maybe_convert_ix(*indexer) if isinstance(value, ABCSeries): value = self._align_series(indexer, value) elif isinstance(value, ABCDataFrame): value = self._align_frame(indexer, value) if isinstance(value, ABCPanel): value = self._align_panel(indexer, value) self.obj._data = self.obj._data.setitem(indexer, value)
def _convert_to_indexer(self, obj, axis=0): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) is_int_index = _is_integer_index(labels) if com.is_integer(obj) and not is_int_index: return obj try: return labels.get_loc(obj) except (KeyError, TypeError): pass if isinstance(obj, slice): ltype = labels.inferred_type if ltype == 'floating': int_slice = _is_int_slice(obj) else: # floats that are within tolerance of int used int_slice = _is_index_slice(obj) null_slice = obj.start is None and obj.stop is None # could have integers in the first level of the MultiIndex position_slice = (int_slice and not ltype == 'integer' and not isinstance(labels, MultiIndex)) start, stop = obj.start, obj.stop # last ditch effort: if we are mixed and have integers try: if 'mixed' in ltype and int_slice: if start is not None: i = labels.get_loc(start) if stop is not None: j = labels.get_loc(stop) position_slice = False except KeyError: if ltype == 'mixed-integer-float': raise if null_slice or position_slice: slicer = obj else: try: i, j = labels.slice_locs(start, stop) slicer = slice(i, j, obj.step) except Exception: if _is_index_slice(obj): if labels.inferred_type == 'integer': raise slicer = obj else: raise return slicer elif _is_list_like(obj): if com._is_bool_indexer(obj): objarr = _check_bool_indexer(labels, obj) return objarr else: if isinstance(obj, Index): objarr = obj.values else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if _is_integer_dtype(objarr) and not is_int_index: return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None # XXX if labels.is_unique: indexer = check = labels.get_indexer(objarr) else: mask = np.zeros(len(labels), dtype=bool) lvalues = labels.values for x in objarr: # ugh to_or = lib.map_infer(lvalues, x.__eq__) if not to_or.any(): raise KeyError('%s not in index' % str(x)) mask |= to_or indexer = check = mask.nonzero()[0] mask = check == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) return indexer else: return labels.get_loc(obj)
def _setitem_with_indexer(self, indexer, value): from pandas.core.frame import DataFrame, Series # also has the side effect of consolidating in-place # mmm, spaghetti if self.obj._is_mixed_type: if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) if isinstance(value, Series): value = self._align_series(indexer, value) het_axis = self.obj._het_axis het_idx = indexer[het_axis] if com.is_integer(het_idx): het_idx = [het_idx] plane_indexer = indexer[:het_axis] + indexer[het_axis + 1:] item_labels = self.obj._get_axis(het_axis) def setter(item, v): data = self.obj[item] values = data.values if np.prod(values.shape): result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None)) self.obj[item] = result labels = item_labels[het_idx] if _is_list_like(value): # we have an equal len Frame if isinstance(value, DataFrame) and value.ndim > 1: for item in labels: # align to if item in value: v = value[item] v = v.reindex(self.obj[item].index & v.index) setter(item, v.values) else: setter(item, np.nan) # we have an equal len ndarray elif isinstance(value, np.ndarray) and value.ndim == 2: if len(labels) != value.shape[1]: raise ValueError('Must have equal len keys and value when' ' setting with an ndarray') for i, item in enumerate(labels): setter(item, value[:,i]) # we have an equal len list/ndarray elif len(labels) == 1 and len(self.obj[labels[0]]) == len(value): setter(labels[0], value) # per label values else: for item, v in zip(labels, value): setter(item, v) else: # scalar for item in labels: setter(item, value) else: if isinstance(indexer, tuple): indexer = _maybe_convert_ix(*indexer) if isinstance(value, Series): value = self._align_series(indexer, value) if isinstance(value, DataFrame): value = self._align_frame(indexer, value) # 2096 values = self.obj.values if np.prod(values.shape): values[indexer] = value
def _convert_to_indexer(self, obj, axis=0): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) is_int_index = _is_integer_index(labels) if com.is_integer(obj) and not is_int_index: return obj try: return labels.get_loc(obj) except (KeyError, TypeError): pass if isinstance(obj, slice): ltype = labels.inferred_type # in case of providing all floats, use label-based indexing float_slice = (labels.inferred_type == 'floating' and _is_float_slice(obj)) # floats that are within tolerance of int used as positions int_slice = _is_index_slice(obj) null_slice = obj.start is None and obj.stop is None # could have integers in the first level of the MultiIndex, # in which case we wouldn't want to do position-based slicing position_slice = (int_slice and not ltype == 'integer' and not isinstance(labels, MultiIndex) and not float_slice) start, stop = obj.start, obj.stop # last ditch effort: if we are mixed and have integers try: if position_slice and 'mixed' in ltype: if start is not None: i = labels.get_loc(start) if stop is not None: j = labels.get_loc(stop) position_slice = False except KeyError: if ltype == 'mixed-integer-float': raise if null_slice or position_slice: indexer = obj else: try: indexer = labels.slice_indexer(start, stop, obj.step) except Exception: if _is_index_slice(obj): if ltype == 'integer': raise indexer = obj else: raise return indexer elif _is_list_like(obj): if com._is_bool_indexer(obj): obj = _check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: if isinstance(obj, Index): objarr = obj.values else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if _is_integer_dtype(objarr) and not is_int_index: if labels.inferred_type != 'integer': objarr = np.where(objarr < 0, len(labels) + objarr, objarr) return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None # unique index if labels.is_unique: indexer = check = labels.get_indexer(objarr) # non-unique (dups) else: indexer, missing = labels.get_indexer_non_unique(objarr) check = indexer mask = check == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) return indexer else: return labels.get_loc(obj)
def crit(x): try: _ = labels.get_loc(x) return False except KeyError: return com.is_integer(x) or x is None
def _crit(v): return v is None or com.is_integer(v)
def __add__(self, other): if com.is_integer(other): return PeriodIndex(ordinal=self.values + other, freq=self.freq) return super(PeriodIndex, self).__add__(other)
def _can_hold_element(self, element): return com.is_integer(element)
def _convert_to_indexer(self, obj, axis=0): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) try: return labels.get_loc(obj) except (KeyError, TypeError): pass is_int_index = _is_integer_index(labels) if isinstance(obj, slice): int_slice = _is_integer_slice(obj) null_slice = obj.start is None and obj.stop is None # could have integers in the first level of the MultiIndex position_slice = (int_slice and not labels.inferred_type == 'integer' and not isinstance(labels, MultiIndex)) if null_slice or position_slice: slicer = obj else: try: i, j = labels.slice_locs(obj.start, obj.stop) slicer = slice(i, j, obj.step) except Exception: if _is_integer_slice(obj): if labels.inferred_type == 'integer': raise slicer = obj else: raise return slicer elif _is_list_like(obj): if com._is_bool_indexer(obj): objarr = _check_bool_indexer(labels, obj) return objarr else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if _is_integer_dtype(objarr) and not is_int_index: return objarr indexer = labels.get_indexer(objarr) mask = indexer == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) return indexer else: if com.is_integer(obj) and not is_int_index: return obj return labels.get_loc(obj)
def __add__(self, other): if com.is_integer(other): return Period(ordinal=self.ordinal + other, freq=self.freq) raise ValueError("Cannot add with non-integer value")