def test_is_datetimetz(): assert not com.is_datetimetz([1, 2, 3]) assert not com.is_datetimetz(pd.DatetimeIndex([1, 2, 3])) assert com.is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") s = pd.Series([], dtype=dtype) assert com.is_datetimetz(s)
def test_dst(self): dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern') s1 = Series(dr1, name='A') assert is_datetimetz(s1) dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern') s2 = Series(dr2, name='A') assert is_datetimetz(s2) assert s1.dtype == s2.dtype
def test_is_datetimetz(): with tm.assert_produces_warning(FutureWarning): assert not com.is_datetimetz([1, 2, 3]) assert not com.is_datetimetz(pd.DatetimeIndex([1, 2, 3])) assert com.is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") s = pd.Series([], dtype=dtype) assert com.is_datetimetz(s)
def test_dst(self): dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern') s1 = Series(dr1, name='A') self.assertTrue(is_datetimetz(s1)) dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern') s2 = Series(dr2, name='A') self.assertTrue(is_datetimetz(s2)) self.assertEqual(s1.dtype, s2.dtype)
def test_dst(self): dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern') s1 = Series(dr1, name='A') assert is_datetime64tz_dtype(s1) with tm.assert_produces_warning(FutureWarning): assert is_datetimetz(s1) dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern') s2 = Series(dr2, name='A') assert is_datetime64tz_dtype(s2) with tm.assert_produces_warning(FutureWarning): assert is_datetimetz(s2) assert s1.dtype == s2.dtype
def test_basic(self): assert is_datetime64tz_dtype(self.dtype) dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr, name='A') # dtypes assert is_datetime64tz_dtype(s.dtype) assert is_datetime64tz_dtype(s) assert not is_datetime64tz_dtype(np.dtype('float64')) assert not is_datetime64tz_dtype(1.0) assert is_datetimetz(s) assert is_datetimetz(s.dtype) assert not is_datetimetz(np.dtype('float64')) assert not is_datetimetz(1.0)
def get_reindexed_values(self, empty_dtype, upcasted_na): if upcasted_na is None: # No upcasting is necessary fill_value = self.block.fill_value values = self.block.get_values() else: fill_value = upcasted_na if self.is_na: if getattr(self.block, 'is_object', False): # we want to avoid filling with np.nan if we are # using None; we already know that we are all # nulls values = self.block.values.ravel(order='K') if len(values) and values[0] is None: fill_value = None if getattr(self.block, 'is_datetimetz', False) or \ is_datetimetz(empty_dtype): pass elif getattr(self.block, 'is_categorical', False): pass elif getattr(self.block, 'is_sparse', False): pass else: missing_arr = np.empty(self.shape, dtype=empty_dtype) missing_arr.fill(fill_value) return missing_arr if not self.indexers: if not self.block._can_consolidate: # preserve these for validation in _concat_compat return self.block.values if self.block.is_bool and not self.block.is_categorical: # External code requested filling/upcasting, bool values must # be upcasted to object to avoid being upcasted to numeric. values = self.block.astype(np.object_).values elif self.block.is_extension: values = self.block.values else: # No dtype upcasting is done here, it will be performed during # concatenation itself. values = self.block.get_values() if not self.indexers: # If there's no indexing to be done, we want to signal outside # code that this array must be copied explicitly. This is done # by returning a view and checking `retval.base`. values = values.view() else: for ax, indexer in self.indexers.items(): values = algos.take_nd(values, indexer, axis=ax, fill_value=fill_value) return values
def test_basic(self): self.assertTrue(is_datetime64tz_dtype(self.dtype)) dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr, name='A') # dtypes self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue(is_datetime64tz_dtype(s)) self.assertFalse(is_datetime64tz_dtype(np.dtype('float64'))) self.assertFalse(is_datetime64tz_dtype(1.0)) self.assertTrue(is_datetimetz(s)) self.assertTrue(is_datetimetz(s.dtype)) self.assertFalse(is_datetimetz(np.dtype('float64'))) self.assertFalse(is_datetimetz(1.0))
def test_basic(self): assert is_datetime64tz_dtype(self.dtype) dr = date_range("20130101", periods=3, tz="US/Eastern") s = Series(dr, name="A") # dtypes assert is_datetime64tz_dtype(s.dtype) assert is_datetime64tz_dtype(s) assert not is_datetime64tz_dtype(np.dtype("float64")) assert not is_datetime64tz_dtype(1.0) with tm.assert_produces_warning(FutureWarning): assert is_datetimetz(s) assert is_datetimetz(s.dtype) assert not is_datetimetz(np.dtype("float64")) assert not is_datetimetz(1.0)
def test_value_counts_unique_nunique(self): for orig in self.objs: o = orig.copy() klass = type(o) values = o._values if isinstance(values, Index): # reset name not to affect latter process values.name = None # create repeated values, 'n'th element is repeated by n+1 times # skip boolean, because it only has 2 values at most if isinstance(o, Index) and o.is_boolean(): continue elif isinstance(o, Index): expected_index = pd.Index(o[::-1]) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' else: expected_index = pd.Index(values[::-1]) idx = o.index.repeat(range(1, len(o) + 1)) rep = np.repeat(values, range(1, len(o) + 1)) o = klass(rep, index=idx, name='a') # check values has the same dtype as the original self.assertEqual(o.dtype, orig.dtype) expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64', name='a') result = o.value_counts() tm.assert_series_equal(result, expected_s) self.assertTrue(result.index.name is None) self.assertEqual(result.name, 'a') result = o.unique() if isinstance(o, Index): self.assertTrue(isinstance(result, o.__class__)) tm.assert_index_equal(result, orig) elif is_datetimetz(o): # datetimetz Series returns array of Timestamp self.assertEqual(result[0], orig[0]) for r in result: assert isinstance(r, pd.Timestamp) tm.assert_numpy_array_equal(result, orig._values.asobject.values) else: tm.assert_numpy_array_equal(result, orig.values) self.assertEqual(o.nunique(), len(np.unique(o.values)))
def test_value_counts_unique_nunique(self): for orig in self.objs: o = orig.copy() klass = type(o) values = o._values if isinstance(values, Index): # reset name not to affect latter process values.name = None # create repeated values, 'n'th element is repeated by n+1 times # skip boolean, because it only has 2 values at most if isinstance(o, Index) and o.is_boolean(): continue elif isinstance(o, Index): expected_index = Index(o[::-1]) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' else: expected_index = Index(values[::-1]) idx = o.index.repeat(range(1, len(o) + 1)) rep = np.repeat(values, range(1, len(o) + 1)) o = klass(rep, index=idx, name='a') # check values has the same dtype as the original assert o.dtype == orig.dtype expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64', name='a') result = o.value_counts() tm.assert_series_equal(result, expected_s) assert result.index.name is None assert result.name == 'a' result = o.unique() if isinstance(o, Index): assert isinstance(result, o.__class__) tm.assert_index_equal(result, orig) elif is_datetimetz(o): # datetimetz Series returns array of Timestamp assert result[0] == orig[0] for r in result: assert isinstance(r, Timestamp) tm.assert_numpy_array_equal(result, orig._values.astype(object).values) else: tm.assert_numpy_array_equal(result, orig.values) assert o.nunique() == len(np.unique(o.values))
def test_none_comparison(self): # bug brought up by #1079 # changed from TypeError in 0.17.0 for o in self.is_valid_objs: if isinstance(o, Series): o[0] = np.nan # noinspection PyComparisonWithNone result = o == None # noqa assert not result.iat[0] assert not result.iat[1] # noinspection PyComparisonWithNone result = o != None # noqa assert result.iat[0] assert result.iat[1] result = None == o # noqa assert not result.iat[0] assert not result.iat[1] # this fails for numpy < 1.9 # and oddly for *some* platforms # result = None != o # noqa # assert result.iat[0] # assert result.iat[1] if (is_datetime64_dtype(o) or is_datetimetz(o)): # Following DatetimeIndex (and Timestamp) convention, # inequality comparisons with Series[datetime64] raise with pytest.raises(TypeError): None > o with pytest.raises(TypeError): o > None else: result = None > o assert not result.iat[0] assert not result.iat[1] result = o < None assert not result.iat[0] assert not result.iat[1]
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if is_categorical_dtype(dtype): typ = 'category' elif is_sparse(arr): typ = 'sparse' elif isinstance(arr, ABCRangeIndex): typ = 'range' elif is_datetimetz(arr): # if to_concat contains different tz, # the result must be object dtype typ = str(arr.dtype) elif is_datetime64_dtype(dtype): typ = 'datetime' elif is_timedelta64_dtype(dtype): typ = 'timedelta' elif is_object_dtype(dtype): typ = 'object' elif is_bool_dtype(dtype): typ = 'bool' elif is_period_dtype(dtype): typ = str(arr.dtype) elif is_interval_dtype(dtype): typ = str(arr.dtype) else: typ = dtype.kind typs.add(typ) return typs
def test_value_counts_unique_nunique_null(self): for null_obj in [np.nan, None]: for orig in self.objs: o = orig.copy() klass = type(o) values = o._ndarray_values if not self._allow_na_ops(o): continue # special assign to the numpy array if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 v[0:2] = iNaT values = o._shallow_copy(v) else: o = o.copy() o[0:2] = iNaT values = o._values elif needs_i8_conversion(o): values[0:2] = iNaT values = o._shallow_copy(values) else: values[0:2] = null_obj # check values has the same dtype as the original assert values.dtype == o.dtype # create repeated values, 'n'th element is repeated by n+1 # times if isinstance(o, (DatetimeIndex, PeriodIndex)): expected_index = o.copy() expected_index.name = None # attach name to klass o = klass(values.repeat(range(1, len(o) + 1))) o.name = 'a' else: if is_datetimetz(o): expected_index = orig._values._shallow_copy(values) else: expected_index = Index(values) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' # check values has the same dtype as the original assert o.dtype == orig.dtype # check values correctly have NaN nanloc = np.zeros(len(o), dtype=np.bool) nanloc[:3] = True if isinstance(o, Index): tm.assert_numpy_array_equal(pd.isna(o), nanloc) else: exp = Series(nanloc, o.index, name='a') tm.assert_series_equal(pd.isna(o), exp) expected_s_na = Series(list(range(10, 2, -1)) + [3], index=expected_index[9:0:-1], dtype='int64', name='a') expected_s = Series(list(range(10, 2, -1)), index=expected_index[9:1:-1], dtype='int64', name='a') result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) assert result_s_na.index.name is None assert result_s_na.name == 'a' result_s = o.value_counts() tm.assert_series_equal(o.value_counts(), expected_s) assert result_s.index.name is None assert result_s.name == 'a' result = o.unique() if isinstance(o, Index): tm.assert_index_equal(result, Index(values[1:], name='a')) elif is_datetimetz(o): # unable to compare NaT / nan vals = values[2:].astype(object).values tm.assert_numpy_array_equal(result[1:], vals) assert result[0] is pd.NaT else: tm.assert_numpy_array_equal(result[1:], values[2:]) assert pd.isna(result[0]) assert result.dtype == orig.dtype assert o.nunique() == 8 assert o.nunique(dropna=False) == 9
def test_value_counts_unique_nunique_null(self): for null_obj in [np.nan, None]: for orig in self.objs: o = orig.copy() klass = type(o) values = o._values if not self._allow_na_ops(o): continue # special assign to the numpy array if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 v[0:2] = iNaT values = o._shallow_copy(v) else: o = o.copy() o[0:2] = iNaT values = o._values elif needs_i8_conversion(o): values[0:2] = iNaT values = o._shallow_copy(values) else: values[0:2] = null_obj # check values has the same dtype as the original self.assertEqual(values.dtype, o.dtype) # create repeated values, 'n'th element is repeated by n+1 # times if isinstance(o, (DatetimeIndex, PeriodIndex)): expected_index = o.copy() expected_index.name = None # attach name to klass o = klass(values.repeat(range(1, len(o) + 1))) o.name = 'a' else: if is_datetimetz(o): expected_index = orig._values._shallow_copy(values) else: expected_index = pd.Index(values) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' # check values has the same dtype as the original self.assertEqual(o.dtype, orig.dtype) # check values correctly have NaN nanloc = np.zeros(len(o), dtype=np.bool) nanloc[:3] = True if isinstance(o, Index): tm.assert_numpy_array_equal(pd.isnull(o), nanloc) else: exp = pd.Series(nanloc, o.index, name='a') tm.assert_series_equal(pd.isnull(o), exp) expected_s_na = Series(list(range(10, 2, -1)) + [3], index=expected_index[9:0:-1], dtype='int64', name='a') expected_s = Series(list(range(10, 2, -1)), index=expected_index[9:1:-1], dtype='int64', name='a') result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) self.assertTrue(result_s_na.index.name is None) self.assertEqual(result_s_na.name, 'a') result_s = o.value_counts() tm.assert_series_equal(o.value_counts(), expected_s) self.assertTrue(result_s.index.name is None) self.assertEqual(result_s.name, 'a') result = o.unique() if isinstance(o, Index): tm.assert_index_equal(result, Index(values[1:], name='a')) elif is_datetimetz(o): # unable to compare NaT / nan tm.assert_numpy_array_equal(result[1:], values[2:].asobject.values) self.assertIs(result[0], pd.NaT) else: tm.assert_numpy_array_equal(result[1:], values[2:]) self.assertTrue(pd.isnull(result[0])) self.assertEqual(result.dtype, orig.dtype) self.assertEqual(o.nunique(), 8) self.assertEqual(o.nunique(dropna=False), 9)
def get_empty_dtype_and_na(join_units): """ Return dtype and N/A values to use when concatenating specified units. Returned N/A value may be None which means there was no casting involved. Returns ------- dtype na """ if len(join_units) == 1: blk = join_units[0].block if blk is None: return np.float64, np.nan if is_uniform_reindex(join_units): # XXX: integrate property empty_dtype = join_units[0].block.dtype upcasted_na = join_units[0].block.fill_value return empty_dtype, upcasted_na has_none_blocks = False dtypes = [None] * len(join_units) for i, unit in enumerate(join_units): if unit.block is None: has_none_blocks = True else: dtypes[i] = unit.dtype upcast_classes = defaultdict(list) null_upcast_classes = defaultdict(list) for dtype, unit in zip(dtypes, join_units): if dtype is None: continue if is_categorical_dtype(dtype): upcast_cls = 'category' elif is_datetimetz(dtype): upcast_cls = 'datetimetz' elif issubclass(dtype.type, np.bool_): upcast_cls = 'bool' elif issubclass(dtype.type, np.object_): upcast_cls = 'object' elif is_datetime64_dtype(dtype): upcast_cls = 'datetime' elif is_timedelta64_dtype(dtype): upcast_cls = 'timedelta' elif is_float_dtype(dtype) or is_numeric_dtype(dtype): upcast_cls = dtype.name else: upcast_cls = 'float' # Null blocks should not influence upcast class selection, unless there # are only null blocks, when same upcasting rules must be applied to # null upcast classes. if unit.is_na: null_upcast_classes[upcast_cls].append(dtype) else: upcast_classes[upcast_cls].append(dtype) if not upcast_classes: upcast_classes = null_upcast_classes # create the result if 'object' in upcast_classes: return np.dtype(np.object_), np.nan elif 'bool' in upcast_classes: if has_none_blocks: return np.dtype(np.object_), np.nan else: return np.dtype(np.bool_), None elif 'category' in upcast_classes: return np.dtype(np.object_), np.nan elif 'datetimetz' in upcast_classes: dtype = upcast_classes['datetimetz'] return dtype[0], tslibs.iNaT elif 'datetime' in upcast_classes: return np.dtype('M8[ns]'), tslibs.iNaT elif 'timedelta' in upcast_classes: return np.dtype('m8[ns]'), tslibs.iNaT else: # pragma g = np.find_common_type(upcast_classes, []) if is_float_dtype(g): return g, g.type(np.nan) elif is_numeric_dtype(g): if has_none_blocks: return np.float64, np.nan else: return g, None msg = "invalid dtype determination in get_concat_dtype" raise AssertionError(msg)
def get_empty_dtype_and_na(join_units): """ Return dtype and N/A values to use when concatenating specified units. Returned N/A value may be None which means there was no casting involved. Returns ------- dtype na """ if len(join_units) == 1: blk = join_units[0].block if blk is None: return np.float64, np.nan if is_uniform_reindex(join_units): # XXX: integrate property empty_dtype = join_units[0].block.dtype upcasted_na = join_units[0].block.fill_value return empty_dtype, upcasted_na has_none_blocks = False dtypes = [None] * len(join_units) for i, unit in enumerate(join_units): if unit.block is None: has_none_blocks = True else: dtypes[i] = unit.dtype upcast_classes = defaultdict(list) null_upcast_classes = defaultdict(list) for dtype, unit in zip(dtypes, join_units): if dtype is None: continue if is_categorical_dtype(dtype): upcast_cls = 'category' elif is_datetimetz(dtype): upcast_cls = 'datetimetz' elif issubclass(dtype.type, np.bool_): upcast_cls = 'bool' elif issubclass(dtype.type, np.object_): upcast_cls = 'object' elif is_datetime64_dtype(dtype): upcast_cls = 'datetime' elif is_timedelta64_dtype(dtype): upcast_cls = 'timedelta' elif is_sparse(dtype): upcast_cls = dtype.subtype.name elif is_float_dtype(dtype) or is_numeric_dtype(dtype): upcast_cls = dtype.name else: upcast_cls = 'float' # Null blocks should not influence upcast class selection, unless there # are only null blocks, when same upcasting rules must be applied to # null upcast classes. if unit.is_na: null_upcast_classes[upcast_cls].append(dtype) else: upcast_classes[upcast_cls].append(dtype) if not upcast_classes: upcast_classes = null_upcast_classes # create the result if 'object' in upcast_classes: return np.dtype(np.object_), np.nan elif 'bool' in upcast_classes: if has_none_blocks: return np.dtype(np.object_), np.nan else: return np.dtype(np.bool_), None elif 'category' in upcast_classes: return np.dtype(np.object_), np.nan elif 'datetimetz' in upcast_classes: dtype = upcast_classes['datetimetz'] return dtype[0], tslibs.iNaT elif 'datetime' in upcast_classes: return np.dtype('M8[ns]'), tslibs.iNaT elif 'timedelta' in upcast_classes: return np.dtype('m8[ns]'), tslibs.iNaT else: # pragma try: g = np.find_common_type(upcast_classes, []) except TypeError: # At least one is an ExtensionArray return np.dtype(np.object_), np.nan else: if is_float_dtype(g): return g, g.type(np.nan) elif is_numeric_dtype(g): if has_none_blocks: return np.float64, np.nan else: return g, None msg = "invalid dtype determination in get_concat_dtype" raise AssertionError(msg)