def test_construction_from_string(self): result = DatetimeTZDtype('datetime64[ns, US/Eastern]') self.assertTrue(is_dtype_equal(self.dtype, result)) result = DatetimeTZDtype.construct_from_string( 'datetime64[ns, US/Eastern]') self.assertTrue(is_dtype_equal(self.dtype, result)) self.assertRaises(TypeError, lambda: DatetimeTZDtype.construct_from_string('foo'))
def union_categoricals(to_union): """ Combine list-like of Categoricals, unioning categories. All must have the same dtype, and none can be ordered. .. versionadded:: 0.19.0 Parameters ---------- to_union : list-like of Categoricals Returns ------- Categorical A single array, categories will be ordered as they appear in the list Raises ------ TypeError If any of the categoricals are ordered or all do not have the same dtype ValueError Emmpty list of categoricals passed """ from pandas import Index, Categorical if len(to_union) == 0: raise ValueError('No Categoricals to union') first = to_union[0] if any(c.ordered for c in to_union): raise TypeError("Can only combine unordered Categoricals") if not all( com.is_dtype_equal(c.categories.dtype, first.categories.dtype) for c in to_union): raise TypeError("dtype of categories must be the same") cats = first.categories unique_cats = cats.append([c.categories for c in to_union[1:]]).unique() categories = Index(unique_cats) new_codes = [] for c in to_union: indexer = categories.get_indexer(c.categories) new_codes.append(indexer.take(c.codes)) codes = np.concatenate(new_codes) return Categorical(codes, categories=categories, ordered=False, fastpath=True)
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, "datetime64[ns, US/Eastern]")) self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype("ns", "US/Eastern"))) self.assertFalse(is_dtype_equal(self.dtype, "foo")) self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype("ns", "CET"))) self.assertFalse(is_dtype_equal(DatetimeTZDtype("ns", "US/Eastern"), DatetimeTZDtype("ns", "US/Pacific"))) # numpy compat self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]"))
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]')) self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype('ns','US/Eastern'))) self.assertFalse(is_dtype_equal(self.dtype, 'foo')) self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype('ns','CET'))) self.assertFalse(is_dtype_equal(DatetimeTZDtype('ns','US/Eastern'), DatetimeTZDtype('ns','US/Pacific'))) # numpy compat self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"),"datetime64[ns]"))
def union_categoricals(to_union): """ Combine list-like of Categoricals, unioning categories. All must have the same dtype, and none can be ordered. .. versionadded:: 0.19.0 Parameters ---------- to_union : list-like of Categoricals Returns ------- Categorical A single array, categories will be ordered as they appear in the list Raises ------ TypeError If any of the categoricals are ordered or all do not have the same dtype ValueError Emmpty list of categoricals passed """ from pandas import Index, Categorical if len(to_union) == 0: raise ValueError('No Categoricals to union') first = to_union[0] if any(c.ordered for c in to_union): raise TypeError("Can only combine unordered Categoricals") if not all(com.is_dtype_equal(c.categories.dtype, first.categories.dtype) for c in to_union): raise TypeError("dtype of categories must be the same") cats = first.categories unique_cats = cats.append([c.categories for c in to_union[1:]]).unique() categories = Index(unique_cats) new_codes = [] for c in to_union: indexer = categories.get_indexer(c.categories) new_codes.append(indexer.take(c.codes)) codes = np.concatenate(new_codes) return Categorical(codes, categories=categories, ordered=False, fastpath=True)
def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self is other: return True # need to compare nans locations and make sure that they are the same # since nans don't compare equal this is a bit tricky try: if not isinstance(other, Float64Index): other = self._constructor(other) if (not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape): return False left, right = self._values, other._values return ((left == right) | (self._isnan & other._isnan)).all() except (TypeError, ValueError): return False
def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False): if fastpath: return cls._simple_new(data, name=name) # isscalar, generators handled in coerce_to_ndarray data = cls._coerce_to_ndarray(data) if issubclass(data.dtype.type, compat.string_types): cls._string_data_error(data) if copy or not is_dtype_equal(data.dtype, cls._default_dtype): subarr = np.array(data, dtype=cls._default_dtype, copy=copy) cls._assert_safe_casting(data, subarr) else: subarr = data if name is None and hasattr(data, 'name'): name = data.name return cls._simple_new(subarr, name=name)
def test_construction_from_string(self): result = CategoricalDtype.construct_from_string("category") self.assertTrue(is_dtype_equal(self.dtype, result)) self.assertRaises(TypeError, lambda: CategoricalDtype.construct_from_string("foo"))
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, "category")) self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype())) self.assertFalse(is_dtype_equal(self.dtype, "foo"))
def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') self.assertTrue(is_dtype_equal(self.dtype, result)) self.assertRaises(TypeError, lambda : CategoricalDtype.construct_from_string('foo'))
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'category')) self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype())) self.assertFalse(is_dtype_equal(self.dtype, 'foo'))
def _maybe_add_join_keys(self, result, left_indexer, right_indexer): left_has_missing = None right_has_missing = None keys = zip(self.join_names, self.left_on, self.right_on) for i, (name, lname, rname) in enumerate(keys): if not _should_fill(lname, rname): continue take_left, take_right = None, None if name in result: if left_indexer is not None and right_indexer is not None: if name in self.left: if left_has_missing is None: left_has_missing = any(left_indexer == -1) if left_has_missing: take_right = self.right_join_keys[i] if not com.is_dtype_equal(result[name].dtype, self.left[name].dtype): take_left = self.left[name]._values elif name in self.right: if right_has_missing is None: right_has_missing = any(right_indexer == -1) if right_has_missing: take_left = self.left_join_keys[i] if not com.is_dtype_equal(result[name].dtype, self.right[name].dtype): take_right = self.right[name]._values elif left_indexer is not None \ and isinstance(self.left_join_keys[i], np.ndarray): take_left = self.left_join_keys[i] take_right = self.right_join_keys[i] if take_left is not None or take_right is not None: if take_left is None: lvals = result[name]._values else: lfill = na_value_for_dtype(take_left.dtype) lvals = algos.take_1d(take_left, left_indexer, fill_value=lfill) if take_right is None: rvals = result[name]._values else: rfill = na_value_for_dtype(take_right.dtype) rvals = algos.take_1d(take_right, right_indexer, fill_value=rfill) # if we have an all missing left_indexer # make sure to just use the right values mask = left_indexer == -1 if mask.all(): key_col = rvals else: key_col = Index(lvals).where(~mask, rvals) if name in result: result[name] = key_col else: result.insert(i, name or 'key_%d' % i, key_col)