def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (com.is_list_like(a) and com.is_list_like(b)): self.assertEqual(a, b) else: tm.assert_series_equal(a, b)
def test_is_list_like(): passes = ([], [1], (1,), (1, 2), {"a": 1}, set([1, "a"]), Series([1]), Series([]), Series(["a"]).str) fails = (1, "2", object()) for p in passes: assert com.is_list_like(p) for f in fails: assert not com.is_list_like(f)
def test_is_list_like(): passes = ([], [1], (1,), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), Series([]), Series(['a']).str) fails = (1, '2', object()) for p in passes: assert com.is_list_like(p) for f in fails: assert not com.is_list_like(f)
def test_is_list_like(): passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), Series([]), Series(['a']).str) fails = (1, '2', object()) for p in passes: assert com.is_list_like(p) for f in fails: assert not com.is_list_like(f)
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not com.is_list_like(comps): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__) ) comps = np.asarray(comps) if not com.is_list_like(values): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__) ) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if com.is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view("i8") comps = comps.view("i8") elif com.is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view("i8") comps = comps.view("i8") elif com.is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def _maybe_convert_data(self, data, target, *args, **kwargs): """ Internal function to instanciate data and target Parameters ---------- data : instance converted to ``pandas.DataFrame`` target : instance converted to ``pandas.Series`` args : argument passed from ``__init__`` kwargs : argument passed from ``__init__`` """ init_df = isinstance(data, pd.DataFrame) init_target = isinstance(target, (pd.Series, pd.DataFrame)) def _maybe_convert_target(data, target, index=None): if data is not None: index = data.index target = np.array(target) if len(target.shape) == 1: target = pd.Series(target, index=index) else: target = pd.DataFrame(target, index=index) return target if not init_df and not init_target: if data is not None: data = pd.DataFrame(data, *args, **kwargs) if com.is_list_like(target): target = _maybe_convert_target(data, target) elif not init_df: if data is not None: index = kwargs.pop('index', target.index) data = pd.DataFrame(data, index=index, *args, **kwargs) elif not init_target: if com.is_list_like(target): target = _maybe_convert_target(data, target) else: # no conversion required pass if isinstance(target, pd.Series) and target.name is None: target = pd.Series(target, name=self._TARGET_NAME) return data, target
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not com.is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) comps = np.asarray(comps) if not com.is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) if not isinstance(values, np.ndarray): values = list(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if com.is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view('i8') comps = comps.view('i8') elif com.is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view('i8') comps = comps.view('i8') elif com.is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def _is_dtype_compat(self, other): """ *this is an internal non-public method* provide a comparison between the dtype of self and other (coercing if needed) Raises ------ TypeError if the dtypes are not compatible """ if com.is_categorical_dtype(other): if isinstance(other, CategoricalIndex): other = other._values if not other.is_dtype_equal(self): raise TypeError("categories must match existing categories " "when appending") else: values = other if not com.is_list_like(values): values = [values] other = CategoricalIndex( self._create_categorical(self, other, categories=self.categories, ordered=self.ordered)) if not other.isin(values).all(): raise TypeError("cannot append a non-category item to a " "CategoricalIndex") return other
def conform(self, rhs): """ inplace conform rhs """ if not com.is_list_like(rhs): rhs = [rhs] if hasattr(self.rhs, 'ravel'): rhs = rhs.ravel() return rhs
def _create_table_setup(self): from sqlalchemy import Table, Column, PrimaryKeyConstraint column_names_and_types = \ self._get_column_names_and_types(self._sqlalchemy_type) columns = [ Column(name, typ, index=is_index) for name, typ, is_index in column_names_and_types ] if self.keys is not None: if not com.is_list_like(self.keys): keys = [self.keys] else: keys = self.keys pkc = PrimaryKeyConstraint(*keys, name=self.name + '_pk') columns.append(pkc) schema = self.schema or self.pd_sql.meta.schema # At this point, attach to new metadata, only attach to self.meta # once table is created. from sqlalchemy.schema import MetaData meta = MetaData(self.pd_sql, schema=schema) # FIX HERE v return Table(self.name, meta, *columns, schema=schema, prefixes=['TEMPORARY'])
def _gotitem(self, key, ndim, subset=None): """ sub-classes to define return a sliced object Parameters ---------- key : string / list of selections ndim : 1,2 requested ndim of result subset : object, default None subset to act on """ # create a new object to prevent aliasing if subset is None: subset = self.obj # we need to make a shallow copy of ourselves # with the same groupby kwargs = dict([(attr, getattr(self, attr)) for attr in self._attributes]) self = self.__class__(subset, groupby=self._groupby[key], parent=self, **kwargs) self._reset_cache() if subset.ndim == 2: if lib.isscalar(key) and key in subset or com.is_list_like(key): self._selection = key return self
def conform(self, rhs): """ inplace conform rhs """ if not com.is_list_like(rhs): rhs = [rhs] if isinstance(rhs, np.ndarray): rhs = rhs.ravel() return rhs
def wrapper(self, other): func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other): other = _to_m8(other) result = func(other) if com.isnull(other): result.fill(nat_result) else: if not com.is_list_like(other): raise TypeError("cannot compare a TimedeltaIndex with type " "{0}".format(type(other))) other = TimedeltaIndex(other).values result = func(other) result = _values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == tslib.iNaT else: o_mask = other.view('i8') == tslib.iNaT if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result # support of bool dtype indexers if com.is_bool_dtype(result): return result return Index(result)
def _length_of_indexer(indexer, target=None): """ return the length of a single non-tuple indexer which could be a slice """ if target is not None and isinstance(indexer, slice): l = len(target) start = indexer.start stop = indexer.stop step = indexer.step if start is None: start = 0 elif start < 0: start += l if stop is None or stop > l: stop = l elif stop < 0: stop += l if step is None: step = 1 elif step < 0: step = abs(step) return (stop - start) / step elif isinstance(indexer, (ABCSeries, np.ndarray, list)): return len(indexer) elif not is_list_like(indexer): return 1 raise AssertionError("cannot find the length of the indexer")
def cdd(self, periods, unit, base, per_day=False): """The total cooling degree days observed during each time period. Parameters ---------- periods : list of eemeter.evaluation.Period objects Time periods over which cooling degree days will be calculated and collected. A single Period may be given. unit : {"degC", "degF"} The temperature unit to be used base : int or float The base of the cooling degree day per_day : bool, default=False If True, the total should be returned as an average instead of a sum. Returns ------- out : np.ndarray Array of cooling degree days observed during each time period. """ if is_list_like(periods): return np.array([self._period_cdd(p, unit, base, per_day) for p in periods]) else: return self._period_cdd(periods, unit, base, per_day)
def hourly_temperatures(self, periods, unit): """The hourly observed temperatures for each period. Parameters ---------- periods : [list of] eemeter.evaluation.Period Time periods over which temperatures will be collected. A single datetime period may be given. unit : {"degC", "degF"} The unit in which temperatures should be returned. Returns ------- out : np.ndarray Array of arrays of observed_daily temperatures observed during each period. Note: array is not guaranteed to be rectangular. If a single datetime period is given, a single numpy array of temperatures will be returned. """ if is_list_like(periods): values = np.array([self._period_hourly_temperatures(p, None) for p in periods]) return self._unit_convert(values, unit) else: return self._period_hourly_temperatures(periods, unit)
def average_temperature(self, periods, unit): """The average temperatures during each period as calculated by taking the mean of all available daily average temperatures during that period. Parameters ---------- periods : [list of] eemeter.evaluation.Period Time periods over which temperatures will be aggregated. A single datetime period may be given. unit : {"degC", "degF"} The unit in which average temperatures should be returned. Returns ------- out : np.ndarray Array of average temperatures observed during each period. If a single datetime period is given, a single temperature will be returned as a float. """ if is_list_like(periods): values = np.array([self._period_average_temperature(p, None) for p in periods]) return self._unit_convert(values, unit) else: return self._period_average_temperature(periods, unit)
def read(self): if not is_list_like(self.symbols): names = [self.symbols] else: names = self.symbols urls = [ self.url + '%s' % n + '/downloaddata/%s' % n + '.csv' for n in names ] def fetch_data(url, name): resp = self._read_url_as_StringIO(url) data = read_csv(resp, index_col=0, parse_dates=True, header=None, skiprows=1, names=["DATE", name], na_values='.') try: return data.truncate(self.start, self.end) except KeyError: # pragma: no cover if data.ix[3].name[7:12] == 'Error': raise IOError("Failed to get the data. Check that " "{0!r} is a valid FRED series.".format(name)) raise df = concat([fetch_data(url, n) for url, n in zip(urls, names)], axis=1, join='outer') return df
def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import to_timedelta coerce = True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if (other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values)): values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT # a datelike elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif not (isinstance(values, (np.ndarray, pd.Series)) and com.is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = to_timedelta(values, coerce=coerce) elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': values = values.astype('timedelta64[ns]') elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif isinstance(values[0], pd.DateOffset): # handle DateOffsets os = np.array([getattr(v, 'delta', None) for v in values]) mask = isnull(os) if mask.any(): raise TypeError( "cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format(', '.join( [com.pprint_thing(v) for v in values[mask]]))) values = to_timedelta(os, coerce=coerce) elif inferred_type == 'floating': # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) values[:] = tslib.iNaT else: raise TypeError( 'incompatible type [{0}] for a datetime/timedelta ' 'operation'.format(np.array(values).dtype)) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(np.array(values).dtype)) return values
def _has_valid_positional_setitem_indexer(self, indexer): """ validate that an positional indexer cannot enlarge its target will raise if needed, does not modify the indexer externally """ if isinstance(indexer, dict): raise IndexError("{0} cannot enlarge its target object".format( self.name)) else: if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) for ax, i in zip(self.obj.axes, indexer): if isinstance(i, slice): # should check the stop slice? pass elif is_list_like(i): # should check the elements? pass elif com.is_integer(i): if i >= len(ax): raise IndexError( "{0} cannot enlarge its target object".format( self.name)) elif isinstance(i, dict): raise IndexError( "{0} cannot enlarge its target object".format( self.name)) return True
def _get_skiprows(skiprows): """Get an iterator given an integer, slice or container. Parameters ---------- skiprows : int, slice, container The iterator to use to skip rows; can also be a slice. Raises ------ TypeError * If `skiprows` is not a slice, integer, or Container Returns ------- it : iterable A proper iterator to use to skip rows of a DataFrame. """ if isinstance(skiprows, slice): return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1) elif isinstance(skiprows, numbers.Integral) or com.is_list_like(skiprows): return skiprows elif skiprows is None: return 0 raise TypeError('%r is not a valid type for skipping rows' % type(skiprows).__name__)
def _sanitize_values(arr): """ return an ndarray for our input, in a platform independent manner """ if hasattr(arr, 'values'): arr = arr.values else: # scalar if lib.isscalar(arr): arr = [arr] # ndarray if isinstance(arr, np.ndarray): pass elif com.is_list_like(arr) and len(arr) > 0: arr = com._possibly_convert_platform(arr) else: arr = np.asarray(arr) return arr
def _ixs(self, i, axis=0): """ i : int, slice, or sequence of integers axis : int """ ax = self._get_axis(axis) key = ax[i] # xs cannot handle a non-scalar key, so just reindex here # if we have a multi-index and a single tuple, then its a reduction (GH 7516) if not (isinstance(ax, MultiIndex) and isinstance(key, tuple)): if is_list_like(key): indexer = {self._get_axis_name(axis): key} return self.reindex(**indexer) # a reduction if axis == 0: values = self._data.iget(i) return self._box_item_values(key, values) # xs by position self._consolidate_inplace() new_data = self._data.xs(i, axis=axis, copy=True, takeable=True) return self._construct_return_type(new_data)
def _evaluate_compare(self, other, op): """ We have been called because a comparison between 8 aware arrays. numpy >= 1.11 will now warn about NaT comparisons """ # coerce to a similar object if not isinstance(other, type(self)): if not com.is_list_like(other): # scalar other = [other] elif lib.isscalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) # compare result = getattr(self.asi8, op)(other.asi8) # technically we could support bool dtyped Index # for now just return the indexing array directly mask = (self._isnan) | (other._isnan) if is_bool_dtype(result): result[mask] = False return result try: result[mask] = tslib.iNaT return Index(result) except TypeError: return result
def _length_of_indexer(indexer,target=None): """ return the length of a single non-tuple indexer which could be a slice """ if target is not None and isinstance(indexer, slice): l = len(target) start = indexer.start stop = indexer.stop step = indexer.step if start is None: start = 0 elif start < 0: start += l if stop is None or stop > l: stop = l elif stop < 0: stop += l if step is None: step = 1 elif step < 0: step = abs(step) return (stop-start) / step elif isinstance(indexer, (ABCSeries, np.ndarray, list)): return len(indexer) elif not is_list_like(indexer): return 1 raise AssertionError("cannot find the length of the indexer")
def get_data_fred(name, start=dt.datetime(2010, 1, 1), end=dt.datetime.today()): """ Get data for the given name from the St. Louis FED (FRED). Date format is datetime Returns a DataFrame. If multiple names are passed for "series" then the index of the DataFrame is the outer join of the indicies of each series. """ start, end = _sanitize_dates(start, end) if not is_list_like(name): names = [name] else: names = name urls = [_FRED_URL + "%s" % n + "/downloaddata/%s" % n + ".csv" for n in names] def fetch_data(url, name): with urlopen(url) as resp: data = read_csv( resp, index_col=0, parse_dates=True, header=None, skiprows=1, names=["DATE", name], na_values="." ) try: return data.truncate(start, end) except KeyError: if data.ix[3].name[7:12] == "Error": raise IOError("Failed to get the data. Check that {0!r} is " "a valid FRED series.".format(name)) raise df = concat([fetch_data(url, n) for url, n in zip(urls, names)], axis=1, join="outer") return df
def _possibly_cast_to_timedelta(value, coerce=True): """ try to cast to timedelta64, if already a timedeltalike, then make sure that we are [ns] (as numpy 1.6.2 is very buggy in this regards, don't force the conversion unless coerce is True if coerce='compat' force a compatibilty coercerion (to timedeltas) if needeed """ # coercion compatability if coerce == 'compat' and _np_version_under1p7: def convert(td, dtype): # we have an array with a non-object dtype if hasattr(td, 'item'): td = td.astype(np.int64).item() if td == tslib.iNaT: return td if dtype == 'm8[us]': td *= 1000 return td if td == tslib.compat_NaT: return tslib.iNaT # convert td value to a nanosecond value d = td.days s = td.seconds us = td.microseconds if dtype == 'object' or dtype == 'm8[ns]': td = 1000 * us + (s + d * 24 * 3600) * 10**9 else: raise ValueError( "invalid conversion of dtype in np < 1.7 [%s]" % dtype) return td # < 1.7 coercion if not is_list_like(value): value = np.array([value]) dtype = value.dtype return np.array([convert(v, dtype) for v in value], dtype='m8[ns]') # deal with numpy not being able to handle certain timedelta operations if isinstance(value, (ABCSeries, np.ndarray)) and value.dtype.kind == 'm': if value.dtype != 'timedelta64[ns]': value = value.astype('timedelta64[ns]') return value # we don't have a timedelta, but we want to try to convert to one (but # don't force it) if coerce: new_value = tslib.array_to_timedelta64( _values_from_object(value).astype(object), coerce=False) if new_value.dtype == 'i8': value = np.array(new_value, dtype='timedelta64[ns]') return value
def _delegate_property_get(self, name): from pandas import Series result = getattr(self.values, name) # maybe need to upcast (ints) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result # blow up if we operate on categories if self.orig is not None: result = take_1d(result, self.orig.cat.codes) # return the result as a Series, which is by definition a copy result = Series(result, index=self.index, name=self.name) # setting this object will show a SettingWithCopyWarning/Error result.is_copy = ("modifications to a property of a datetimelike " "object are not supported and are discarded. " "Change values on the original.") return result
def _is_dtype_compat(self, other): """ *this is an internal non-public method* provide a comparison between the dtype of self and other (coercing if needed) Raises ------ TypeError if the dtypes are not compatible """ if com.is_categorical_dtype(other): if isinstance(other, CategoricalIndex): other = other._values if not other.is_dtype_equal(self): raise TypeError("categories must match existing categories " "when appending") else: values = other if not com.is_list_like(values): values = [values] other = CategoricalIndex(self._create_categorical( self, other, categories=self.categories, ordered=self.ordered)) if not other.isin(values).all(): raise TypeError("cannot append a non-category item to a " "CategoricalIndex") return other
def delete(self, loc): """ Make a new DatetimeIndex with passed location(s) deleted. Parameters ---------- loc: int, slice or array of ints Indicate which sub-arrays to remove. Returns ------- new_index : TimedeltaIndex """ new_tds = np.delete(self.asi8, loc) freq = 'infer' if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if com.is_list_like(loc): loc = lib.maybe_indices_to_slice( com._ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq return TimedeltaIndex(new_tds, name=self.name, freq=freq)
def _get_skiprows(skiprows): """Get an iterator given an integer, slice or container. Parameters ---------- skiprows : int, slice, container The iterator to use to skip rows; can also be a slice. Raises ------ TypeError * If `skiprows` is not a slice, integer, or Container Returns ------- it : iterable A proper iterator to use to skip rows of a DataFrame. """ if isinstance(skiprows, slice): return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1) elif isinstance(skiprows, numbers.Integral) or com.is_list_like(skiprows): return skiprows elif skiprows is None: return 0 raise TypeError("%r is not a valid type for skipping rows" % type(skiprows).__name__)
def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import to_timedelta coerce = True if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) if inferred_type in ('datetime64', 'datetime', 'date', 'time'): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if (other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values)): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT # a datelike elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif not (isinstance(values, (np.ndarray, pd.Series)) and is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = to_timedelta(values, coerce=coerce) elif inferred_type == 'integer': # py3 compat where dtype is 'm' but is an integer if values.dtype.kind == 'm': values = values.astype('timedelta64[ns]') elif isinstance(values, pd.PeriodIndex): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif isinstance(values[0], pd.DateOffset): # handle DateOffsets os = np.array([getattr(v, 'delta', None) for v in values]) mask = isnull(os) if mask.any(): raise TypeError("cannot use a non-absolute DateOffset in " "datetime/timedelta operations [{0}]".format( ', '.join([com.pprint_thing(v) for v in values[mask]]))) values = to_timedelta(os, coerce=coerce) elif inferred_type == 'floating': # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT else: raise TypeError( 'incompatible type [{0}] for a datetime/timedelta ' 'operation'.format(np.array(values).dtype)) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(np.array(values).dtype)) return values
def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None): """ Convert argument to timedelta Parameters ---------- arg : string, timedelta, array of strings (with possible NAs) unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number box : boolean, default True - If True returns a Timedelta/TimedeltaIndex of the results - if False returns a np.timedelta64 or ndarray of values of dtype timedelta64[ns] errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaT - If 'ignore', then invalid parsing will return the input Returns ------- ret : timedelta64/arrays of timedelta64 if parsing succeeded """ unit = _validate_timedelta_unit(unit) def _convert_listlike(arg, box, unit): if isinstance(arg, (list, tuple)) or ((hasattr(arg, '__iter__') and not hasattr(arg, 'dtype'))): arg = np.array(list(arg), dtype='O') # these are shortcutable if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): value = arg.astype('timedelta64[{0}]'.format(unit)).astype( 'timedelta64[ns]', copy=False) else: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value, unit='ns') return value if arg is None: return arg elif isinstance(arg, ABCSeries): from pandas import Series values = _convert_listlike(arg.values, box=False, unit=unit) return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]') elif is_list_like(arg): return _convert_listlike(arg, box=box, unit=unit) # ...so it must be a scalar value. Return scalar. return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box, errors=errors)
def _is_offset(self, arr_or_obj): """ check if obj or all elements of list-like is DateOffset """ if isinstance(arr_or_obj, pd.DateOffset): return True elif is_list_like(arr_or_obj): return all(isinstance(x, pd.DateOffset) for x in arr_or_obj) else: return False
def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if com.is_integer_dtype(result): result = result.astype('int64') elif not com.is_list_like(result): return result return Series(result, index=s.index, name=s.name)
def to_timedelta(arg, unit='ns', box=True, coerce=False): """ Convert argument to timedelta Parameters ---------- arg : string, timedelta, array of strings (with possible NAs) unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number box : boolean, default True If True returns a Timedelta/TimedeltaIndex of the results if False returns a np.timedelta64 or ndarray of values of dtype timedelta64[ns] coerce : force errors to NaT (False by default) Returns ------- ret : timedelta64/arrays of timedelta64 if parsing succeeded """ unit = _validate_timedelta_unit(unit) def _convert_listlike(arg, box, unit): if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))): arg = np.array(list(arg), dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): # these are shortcutable value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce) except: # try to process strings fast; may need to fallback try: value = np.array([ _get_string_converter(r, unit=unit)() for r in arg ],dtype='m8[ns]') except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ]) value = value.astype('timedelta64[ns]', copy=False) if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value,unit='ns') return value if arg is None: return arg elif isinstance(arg, ABCSeries): from pandas import Series values = _convert_listlike(arg.values, box=False, unit=unit) return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]') elif is_list_like(arg): return _convert_listlike(arg, box=box, unit=unit) # ...so it must be a scalar value. Return scalar. return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box, coerce=coerce)
def to_timedelta(arg, box=True, unit='ns'): """ Convert argument to timedelta Parameters ---------- arg : string, timedelta, array of strings (with possible NAs) box : boolean, default True If True returns a Series of the results, if False returns ndarray of values unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number Returns ------- ret : timedelta64/arrays of timedelta64 if parsing succeeded """ if _np_version_under1p7: raise ValueError("to_timedelta is not support for numpy < 1.7") unit = _validate_timedelta_unit(unit) def _convert_listlike(arg, box, unit): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): # these are shortcutable value = arg.astype( 'timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit) except: value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) if box: from pandas import Series value = Series(value, dtype='m8[ns]') return value if arg is None: return arg elif isinstance(arg, ABCSeries): from pandas import Series values = _convert_listlike(arg.values, box=False, unit=unit) return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]') elif is_list_like(arg): return _convert_listlike(arg, box=box, unit=unit) # ...so it must be a scalar value. Return scalar. return _coerce_scalar_to_timedelta_type(arg, unit=unit)
def check_len(item, name): length_msg = ("Length of '{0}' ({1}) did " "not match the length of the columns " "being encoded ({2}).") if com.is_list_like(item): if not len(item) == len(columns_to_encode): raise ValueError(length_msg.format(name, len(item), len(columns_to_encode)))
def __init__(self, data, target=None, *args, **kwargs): if data is None and target is None: msg = '{0} must have either data or target' raise ValueError(msg.format(self.__class__.__name__)) elif data is None and not com.is_list_like(target): msg = 'target must be list-like when data is None' raise ValueError(msg) data, target = skaccessors._maybe_sklearn_data(data, target) data, target = smaccessors._maybe_statsmodels_data(data, target) # retrieve target_name if isinstance(data, ModelFrame): target_name = data.target_name data, target = self._maybe_convert_data(data, target, *args, **kwargs) if target is not None and not com.is_list_like(target): if target in data.columns: target_name = target df = data else: msg = "Specified target '{0}' is not included in data" raise ValueError(msg.format(target)) self._target_name = target_name else: df, target = self._concat_target(data, target) if isinstance(target, pd.Series): self._target_name = target.name elif isinstance(target, pd.DataFrame): if len(target.columns) > 1: self._target_name = target.columns else: self._target_name = target.columns[0] else: # target may be None self._target_name = self._TARGET_NAME pd.DataFrame.__init__(self, df)