def test_setdiff2d(self, arrays: tp.Sequence[np.ndarray]) -> None: post = util.setdiff2d(arrays[0], arrays[1], assume_unique=False) self.assertTrue(post.ndim == 2) self.assertTrue( len(post) == len( set(util.array2d_to_tuples(arrays[0])).difference( set(util.array2d_to_tuples(arrays[1])))))
def test_intersect2d(self, arrays: tp.Sequence[np.ndarray]) -> None: if datetime64_not_aligned(arrays[0], arrays[1]): return post = util.intersect2d(arrays[0], arrays[1], assume_unique=False) self.assertTrue(post.ndim == 2) self.assertTrue( len(post) == len( set(util.array2d_to_tuples(arrays[0])) & set(util.array2d_to_tuples(arrays[1]))))
def test_intersect2d(self, arrays: tp.Sequence[np.ndarray]) -> None: post = util.intersect2d(arrays[0], arrays[1], assume_unique=False) if post.dtype == object: self.assertTrue(post.ndim == 1) else: self.assertTrue(post.ndim == 2) self.assertTrue( len(post) == len( set(util.array2d_to_tuples(arrays[0])) & set(util.array2d_to_tuples(arrays[1]))))
def test_setdiff2d(self, arrays: tp.Sequence[np.ndarray]) -> None: if datetime64_not_aligned(arrays[0], arrays[1]): return for array in arrays: if array.dtype.kind in ('f', 'c') and np.isnan(array).any(): return post = util.setdiff2d(arrays[0], arrays[1], assume_unique=False) self.assertTrue(post.ndim == 2) self.assertTrue( len(post) == len( set(util.array2d_to_tuples(arrays[0])).difference( set(util.array2d_to_tuples(arrays[1])))))
def test_union2d(self, arrays: tp.Sequence[np.ndarray]) -> None: if datetime64_not_aligned(arrays[0], arrays[1]): return post = util.union2d(arrays[0], arrays[1], assume_unique=False) self.assertTrue(post.ndim == 2) if post.dtype.kind in ('f', 'c') and np.isnan(post).any(): return self.assertTrue( len(post) == len( set(util.array2d_to_tuples(arrays[0])) | set(util.array2d_to_tuples(arrays[1]))))
def _update_array_cache(self): # extract all features from self._levels self._depth = next(self._levels.depths()) # store both NP array of labels, as well as KeysView of hashable tuples self._labels = self._levels.get_labels() # note: this does not retain order in 3.5 self._keys = KeysView._from_iterable(array2d_to_tuples(self._labels)) # if we get labels, faster to get that length self._length = len(self._labels) #self._levels.__len__() self._recache = False
def to_pairs(self) -> tp.Iterable[tp.Tuple[tp.Hashable, tp.Any]]: ''' Return a tuple of tuples, where each inner tuple is a pair of index label, value. ''' if isinstance(self._index, IndexHierarchy): index_values = list(array2d_to_tuples(self._index.values)) else: index_values = self._index.values return tuple(zip(index_values, self.values))
def __init__(self, labels: IndexInitializer, *, loc_is_iloc: bool = False, name: tp.Hashable = None, dtype: DtypeSpecifier = None ) -> None: self._recache = False self._map = None positions = None # resolve the targetted labels dtype, by lookin at the class attr _DTYPE and/or the passed dtype argument if dtype is None: dtype_extract = self._DTYPE # set in some specialized Index classes else: # passed dtype is not None if self._DTYPE is not None and dtype != self._DTYPE: raise RuntimeError('invalid dtype argument for this Index', dtype, self._DTYPE) # self._DTYPE is None, passed dtype is not None, use dtype dtype_extract = dtype # handle all Index subclasses # check isinstance(labels, IndexBase) if issubclass(labels.__class__, IndexBase): if labels._recache: labels._update_array_cache() if name is None and labels.name is not None: name = labels.name # immutable, so no copy necessary if labels.depth == 1: # not an IndexHierarchy if labels.STATIC: # can take the map self._map = labels._map # get a reference to the immutable arrays, even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date positions = labels._positions loc_is_iloc = labels._loc_is_iloc labels = labels._labels else: # IndexHierarchy # will be a generator of tuples; already updated caches labels = array2d_to_tuples(labels._labels) elif hasattr(labels, 'values'): # it is a Series or similar array = labels.values if array.ndim == 1: labels = array else: labels = array2d_to_tuples(array) if self._DTYPE is not None: # do not need to check arrays, as will and checked to match dtype_extract in _extract_labels if not isinstance(labels, np.ndarray): # for now, assume that if _DTYPE is defined, we have a date labels = (to_datetime64(v, dtype_extract) for v in labels) else: # coerce to target type labels = labels.astype(dtype_extract) self._name = name if name is None else name_filter(name) if self._map is None: self._map = self._get_map(labels, positions) # this might be NP array, or a list, depending on if static or grow only; if an array, dtype will be compared with passed dtype_extract self._labels = self._extract_labels(self._map, labels, dtype_extract) self._positions = self._extract_positions(self._map, positions) if self._DTYPE and self._labels.dtype != self._DTYPE: raise RuntimeError('invalid label dtype for this Index', self._labels.dtype, self._DTYPE) if len(self._map) != len(self._labels): raise KeyError(f'labels ({len(self._labels)}) have non-unique values ({len(self._map)})') # NOTE: automatic discovery is possible, but not yet implemented self._loc_is_iloc = loc_is_iloc
def flat(self): '''Return a flat, one-dimensional index of tuples for each level. ''' return self._INDEX_CONSTRUCTOR(array2d_to_tuples(self.__iter__()))
def from_correspondence(cls, src_index: 'Index', dst_index: 'Index') -> 'IndexCorrespondence': ''' Return an IndexCorrespondence instance from the correspondence of two Index or IndexHierarchy objects. ''' mixed_depth = False if src_index.depth == dst_index.depth: depth = src_index.depth else: # if dimensions are mixed, the only way there can be a match is if the 1D index is of object type (so it can hold a tuple); otherwise, there can be no matches; if src_index.depth == 1 and src_index.values.dtype.kind == 'O': depth = dst_index.depth mixed_depth = True elif dst_index.depth == 1 and dst_index.values.dtype.kind == 'O': depth = src_index.depth mixed_depth = True else: depth = 0 # need to use lower level array methods go get intersection, rather than Index methods, as need arrays, not Index objects if depth == 1: # NOTE: this can fail in some cases: comparing two object arrays with NaNs and strings. common_labels = intersect1d(src_index.values, dst_index.values, assume_unique=True) has_common = len(common_labels) > 0 assert not mixed_depth elif depth > 1: # if either values arrays are object, we have to covert all values to tuples common_labels = intersect2d(src_index.values, dst_index.values, assume_unique=True) if mixed_depth: # when mixed, on the 1D index we have to use loc_to_iloc with tuples common_labels = list(array2d_to_tuples(common_labels)) has_common = len(common_labels) > 0 else: has_common = False size = len(dst_index.values) # either a reordering or a subset if has_common: if len(common_labels) == len(dst_index): # use new index to retain order values_dst = dst_index.values if values_dst.dtype == DTYPE_BOOL: # if the index values are a Boolean array, loc_to_iloc will try to do a Boolean selection, which is incorrect. Using a list avoids this problem. iloc_src = src_index.loc_to_iloc(values_dst.tolist()) else: iloc_src = src_index.loc_to_iloc(values_dst) iloc_dst = np.arange(size) return cls(has_common=has_common, is_subset=True, iloc_src=iloc_src, iloc_dst=iloc_dst, size=size) # these will be equal sized iloc_src = src_index.loc_to_iloc(common_labels) iloc_dst = dst_index.loc_to_iloc(common_labels) # if iloc_src.dtype != int: # import ipdb; ipdb.set_trace() return cls(has_common=has_common, is_subset=False, iloc_src=iloc_src, iloc_dst=iloc_dst, size=size) return cls(has_common=has_common, is_subset=False, iloc_src=None, iloc_dst=None, size=size)
def __init__(self, labels: IndexInitializer, *, loc_is_iloc: bool = False, name: NameType = NAME_DEFAULT, dtype: DtypeSpecifier = None) -> None: self._recache: bool = False self._map: tp.Optional[FrozenAutoMap] = None positions = None is_typed = self._DTYPE is not None # resolve the targetted labels dtype, by lookin at the class attr _DTYPE and/or the passed dtype argument if dtype is None: dtype_extract = self._DTYPE # set in some specialized Index classes else: # passed dtype is not None if is_typed and dtype != self._DTYPE: # NOTE: should never get to this branch, as derived Index classes that set _DTYPE remove dtype from __init__ raise ErrorInitIndex('invalid dtype argument for this Index', dtype, self._DTYPE) #pragma: no cover # self._DTYPE is None, passed dtype is not None, use dtype dtype_extract = dtype #----------------------------------------------------------------------- # handle all Index subclasses if isinstance(labels, IndexBase): if labels._recache: labels._update_array_cache() if name is NAME_DEFAULT: name = labels.name # immutable, so no copy necessary if isinstance(labels, Index): # not an IndexHierarchy if (labels.STATIC and self.STATIC and dtype is None): if not is_typed or (is_typed and self._DTYPE == labels.dtype): # can take the map if static and if types in the dict are the same as those in the labels (or to become the labels after conversion) self._map = labels._map # get a reference to the immutable arrays, even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date; for datetime64 indices, we might need to translate to a different type positions = labels._positions loc_is_iloc = labels._map is None labels = labels._labels else: # IndexHierarchy # will be a generator of tuples; already updated caches labels = array2d_to_tuples(labels.__iter__()) elif isinstance(labels, ContainerOperand): # it is a Series or similar array = labels.values if array.ndim == 1: labels = array else: labels = array2d_to_tuples(array) # else: assume an iterable suitable for labels usage #----------------------------------------------------------------------- if is_typed: # do not need to check arrays, as will and checked to match dtype_extract in _extract_labels if not isinstance(labels, np.ndarray): # for now, assume that if _DTYPE is defined, we have a date labels = (to_datetime64(v, dtype_extract) for v in labels) # coerce to target type elif labels.dtype != dtype_extract: labels = labels.astype(dtype_extract) labels.flags.writeable = False #type: ignore self._name = None if name is NAME_DEFAULT else name_filter(name) if self._map is None: # if _map not shared from another Index if not loc_is_iloc: try: self._map = FrozenAutoMap( labels) if self.STATIC else AutoMap(labels) except ValueError: # Automap will raise ValueError of non-unique values are encountered pass if self._map is None: raise ErrorInitIndex( f'labels ({len(tuple(labels))}) have non-unique values ({len(set(labels))})' ) size = len(self._map) else: # must assume labels are unique # labels must not be a generator, but we assume that internal clients that provided loc_is_iloc will not give a generator size = len(labels) #type: ignore if positions is None: positions = PositionsAllocator.get(size) else: # map shared from another Index size = len(self._map) # this might be NP array, or a list, depending on if static or grow only; if an array, dtype will be compared with passed dtype_extract self._labels = self._extract_labels(self._map, labels, dtype_extract) self._positions = self._extract_positions(size, positions) if self._DTYPE and self._labels.dtype != self._DTYPE: raise ErrorInitIndex( 'invalid label dtype for this Index', #pragma: no cover self._labels.dtype, self._DTYPE)