def process_iloc_indexes(inp, indexes): ndim = inp.ndim if not isinstance(indexes, tuple): indexes = (indexes,) if len(indexes) < ndim: indexes += (slice(None),) * (ndim - len(indexes)) if len(indexes) > ndim: raise IndexingError('Too many indexers') new_indexes = [] # check each index for ax, index in enumerate(indexes): if isinstance(index, tuple): # a tuple should already have been caught by this point # so don't treat a tuple as a valid indexer raise IndexingError("Too many indexers") elif isinstance(index, slice): if any(v is not None for v in [index.start, index.stop, index.step]): pd_index = (inp.index_value if ax == 0 else inp.columns_value).to_pandas() for val in [index.start, index.stop, index.step]: if val is not None: try: pd_index[val] # check on the pandas except IndexError: pass except TypeError: raise TypeError(f'cannot do slice indexing on {type(pd_index)} ' f'with these indexers [{val}] of {type(val)}') new_indexes.append(index) elif isinstance(index, (list, np.ndarray, pd.Series, ENTITY_TYPE)): if not isinstance(index, ENTITY_TYPE): index = np.asarray(index) else: index = asarray(index) if ax == 1: # do not support tensor index on axis 1 # because if so, the dtypes and columns_value would be unknown try: index = index.fetch() except (RuntimeError, ValueError): raise NotImplementedError('indexer on axis columns cannot be ' 'non-executed tensor') if index.dtype != np.bool_: index = index.astype(np.int64) if index.ndim != 1: raise ValueError('Buffer has wrong number of dimensions ' f'(expected 1, got {index.ndim})') new_indexes.append(index) elif isinstance(index, Integral): shape = inp.shape[ax] if not np.isnan(shape): if index < -shape or index >= shape: raise IndexError('single positional indexer is out-of-bounds') new_indexes.append(index) else: raise ValueError(_ILOC_ERROR_MSG) return new_indexes
def process_iloc_indexes(inp, indexes): ndim = inp.ndim if not isinstance(indexes, tuple): indexes = (indexes,) if len(indexes) < ndim: indexes += (slice(None),) * (ndim - len(indexes)) if len(indexes) > ndim: raise IndexingError('Too many indexers') new_indexes = [] # check each index for ax, index in enumerate(indexes): if isinstance(index, tuple): # a tuple should already have been caught by this point # so don't treat a tuple as a valid indexer raise IndexingError("Too many indexers") elif isinstance(index, slice): pd_index = (inp.index_value if ax == 0 else inp.columns_value).to_pandas() for val in [index.start, index.stop, index.step]: if val is not None: try: pd_index[val] # check on the pandas except IndexError: pass except TypeError: raise TypeError( 'cannot do slice indexing on {} ' 'with these indexers [{}] ' 'of {}'.format(type(pd_index), val, type(val))) new_indexes.append(index) elif isinstance(index, (list, np.ndarray, Base, Entity)): if not isinstance(index, (Base, Entity)): index = np.asarray(index) else: index = asarray(index) if ax == 1: # do not support tensor index on axis 1 # because if so, the dtypes and columns_value would be unknown try: index = index.fetch() except (RuntimeError, ValueError): raise NotImplementedError('indexer on axis columns cannot be ' 'non-executed tensor') if index.dtype != np.bool_: index = index.astype(np.int64) if index.ndim != 1: raise ValueError('Buffer has wrong number of dimensions ' '(expected 1, got {})'.format(index.ndim)) new_indexes.append(index) elif isinstance(index, Integral): new_indexes.append(index) else: raise ValueError(_ILOC_ERROR_MSG) return new_indexes
def _parse_tuple(tup): """Unpack the user input for getitem and setitem and compute ndim loc[a] -> ([a], :), 1D loc[[a,b],] -> ([a,b], :), loc[a,b] -> ([a], [b]), 0D """ row_loc, col_loc = slice(None), slice(None) if is_tuple(tup): row_loc = tup[0] if len(tup) == 2: col_loc = tup[1] if len(tup) > 2: raise IndexingError("Too many indexers") else: row_loc = tup ndim = _compute_ndim(row_loc, col_loc) row_scaler = is_scalar(row_loc) col_scaler = is_scalar(col_loc) row_loc = [row_loc] if row_scaler else row_loc col_loc = [col_loc] if col_scaler else col_loc return row_loc, col_loc, ndim, row_scaler, col_scaler
def process_loc_indexes(inp, indexes): ndim = inp.ndim if not isinstance(indexes, tuple): indexes = (indexes, ) if len(indexes) < ndim: indexes += (slice(None), ) * (ndim - len(indexes)) if len(indexes) > ndim: raise IndexingError('Too many indexers') new_indexes = [] for ax, index in enumerate(indexes): if isinstance(index, (list, np.ndarray, pd.Series, ENTITY_TYPE)): if not isinstance(index, ENTITY_TYPE): index = np.asarray(index) else: index = asarray(index) if ax == 1: # do not support tensor index on axis 1 # because if so, the dtypes and columns_value would be unknown try: index = index.fetch() except (RuntimeError, ValueError): raise NotImplementedError( 'indexer on axis columns cannot be ' 'non-executed tensor') new_indexes.append(index) return new_indexes
def __getitem__(self, key): # When getting along a single axis, if not isinstance(key, tuple): # Try to fasttrack the code through already optimized path try: return self.df.__getitem__(key) # This can happen if it is a list of rows except KeyError: pass else: if len(key) > self.df.ndim: raise IndexingError("Too many indexers") # If we're only slicing columns, handle the case with `__getitem__` if isinstance(key[0], slice) and key[0] == slice(None): if not isinstance(key[1], slice): # Boolean indexers can just be sliced into the columns object and # then passed to `__getitem__` if is_boolean_array(key[1]): return self.df.__getitem__(self.df.columns[key[1]]) return self.df.__getitem__(key[1]) else: result_slice = self.df.columns.slice_locs( key[1].start, key[1].stop) return self.df.iloc[:, slice(*result_slice)] row_loc, col_loc, ndim, self.row_scaler, self.col_scaler = _parse_tuple( key) row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc) # Check that the row_lookup/col_lookup is longer than 1 or that the # row_loc/col_loc is not boolean list to determine the ndim of the # result properly for multiindex. ndim = (0 if len(row_lookup) == 1 and not is_boolean_array(row_loc) else 1) + (0 if len(col_lookup) == 1 and not is_boolean_array(col_loc) else 1) result = super(_LocIndexer, self).__getitem__(row_lookup, col_lookup, ndim) # Pandas drops the levels that are in the `loc`, so we have to as well. if hasattr(result, "index") and isinstance(result.index, pandas.MultiIndex): if (isinstance(result, Series) and not isinstance(col_loc, slice) and all(col_loc[i] in result.index.levels[i] for i in range(len(col_loc)))): result.index = result.index.droplevel(list(range( len(col_loc)))) elif all(row_loc[i] in result.index.levels[i] for i in range(len(row_loc))): result.index = result.index.droplevel(list(range( len(row_loc)))) if (hasattr(result, "columns") and isinstance(result.columns, pandas.MultiIndex) and all(col_loc[i] in result.columns.levels[i] for i in range(len(col_loc)))): result.columns = result.columns.droplevel(list(range( len(col_loc)))) return result
def sdc_reindex_series_impl(arr, index, name, by_index): # no reindexing is needed if indexes are equal if range_indexes == True: # noqa equal_indexes = numpy_like.array_equal(index, by_index) elif int64_indexes == True: # noqa equal_indexes = numpy_like.array_equal(index, by_index) else: equal_indexes = False if (index is by_index or equal_indexes): return pandas.Series(data=arr, index=by_index, name=name) if data_is_str_arr == True: # noqa _res_data = [''] * len(by_index) res_data_nan_mask = numpy.zeros(len(by_index), dtype=types.bool_) else: _res_data = numpy.empty(len(by_index), dtype=data_dtype) # build a dict of self.index values to their positions: map_index_to_position = Dict.empty(key_type=index_dtype, value_type=types.int32) for i, value in enumerate(index): if value in map_index_to_position: raise ValueError("cannot reindex from a duplicate axis") else: map_index_to_position[value] = i index_mismatch = 0 for i in numba.prange(len(by_index)): val = by_index[i] if val in map_index_to_position: pos_in_self = map_index_to_position[val] _res_data[i] = arr[pos_in_self] if data_is_str_arr == True: # noqa res_data_nan_mask[i] = isna(arr, i) else: index_mismatch += 1 if index_mismatch: msg = "Unalignable boolean Series provided as indexer " + \ "(index of the boolean Series and of the indexed object do not match)." raise IndexingError(msg) if data_is_str_arr == True: # noqa res_data = create_str_arr_from_list(_res_data) str_arr_set_na_by_mask(res_data, res_data_nan_mask) else: res_data = _res_data return pandas.Series(data=res_data, index=by_index, name=name)
def _parse_tuple(tup): """ Unpack the user input for getitem and setitem and compute ndim. loc[a] -> ([a], :), 1D loc[[a,b],] -> ([a,b], :), loc[a,b] -> ([a], [b]), 0D Parameters ---------- tup : tuple User input to unpack. Returns ------- row_loc : list List of row locators. col_list : list List of column locators. ndim : {0, 1, 2} Number of dimensions of located dataset. row_scaler : bool True if `row_loc` is a scalar, False otherwise. col_scaler : bool True if `col_loc` is a scalar, False otherwise. """ row_loc, col_loc = slice(None), slice(None) if is_tuple(tup): row_loc = tup[0] if len(tup) == 2: col_loc = tup[1] if len(tup) > 2: raise IndexingError("Too many indexers") else: row_loc = tup ndim = _compute_ndim(row_loc, col_loc) row_scaler = is_scalar(row_loc) col_scaler = is_scalar(col_loc) row_loc = [row_loc] if row_scaler else row_loc col_loc = [col_loc] if col_scaler else col_loc return row_loc, col_loc, ndim, row_scaler, col_scaler
def __getitem__(self, key): # When getting along a single axis, if not isinstance(key, tuple): # Try to fasttrack the code through already optimized path try: return self.df.__getitem__(key) # This can happen if it is a list of rows except KeyError: pass else: if len(key) > self.df.ndim: raise IndexingError("Too many indexers") if key[0] == slice(None): return self.df.__getitem__(key[1]) row_loc, col_loc, ndim, self.row_scaler, self.col_scaler = _parse_tuple( key) self._handle_enlargement(row_loc, col_loc) row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc) ndim = (0 if len(row_lookup) == 1 else 1) + (0 if len(col_lookup) == 1 else 1) result = super(_LocIndexer, self).__getitem__(row_lookup, col_lookup, ndim) # Pandas drops the levels that are in the `loc`, so we have to as well. if hasattr(result, "index") and isinstance(result.index, pandas.MultiIndex): if (isinstance(result, Series) and not isinstance(col_loc, slice) and all(col_loc[i] in result.index.levels[i] for i in range(len(col_loc)))): result.index = result.index.droplevel(list(range( len(col_loc)))) elif all(row_loc[i] in result.index.levels[i] for i in range(len(row_loc))): result.index = result.index.droplevel(list(range( len(row_loc)))) if (hasattr(result, "columns") and isinstance(result.columns, pandas.MultiIndex) and all(col_loc[i] in result.columns.levels[i] for i in range(len(col_loc)))): result.columns = result.columns.droplevel(list(range( len(col_loc)))) return result
def sdc_reindex_series_impl(arr, index, name, by_index): _, new_order = index.reindex(by_index) if new_order is not None: new_order_as_array = _nonoptional(new_order) index_mismatch = 0 for i in numba.prange(len(by_index)): if new_order_as_array[i] == -1: index_mismatch += 1 if index_mismatch: # TO-DO: seems it covers only specific series reindex case, generalize? msg = "Unalignable boolean Series provided as indexer " + \ "(index of the boolean Series and of the indexed object do not match)." raise IndexingError(msg) res_data = numpy_like.take(arr, new_order_as_array) else: res_data = arr return pandas.Series(data=res_data, index=by_index, name=name)
def _parse_tuple(tup): """ Unpack the user input for getitem and setitem and compute ndim. TODO: Add more details for this docstring template. loc[a] -> ([a], :), 1D loc[[a,b],] -> ([a,b], :), loc[a,b] -> ([a], [b]), 0D Parameters ---------- tup: tuple [Descsription] Returns ------- What this returns (if anything) """ row_loc, col_loc = slice(None), slice(None) if is_tuple(tup): row_loc = tup[0] if len(tup) == 2: col_loc = tup[1] if len(tup) > 2: raise IndexingError("Too many indexers") else: row_loc = tup ndim = _compute_ndim(row_loc, col_loc) row_scaler = is_scalar(row_loc) col_scaler = is_scalar(col_loc) row_loc = [row_loc] if row_scaler else row_loc col_loc = [col_loc] if col_scaler else col_loc return row_loc, col_loc, ndim, row_scaler, col_scaler
def _parse_row_and_column_locators(self, tup): """ Unpack the user input for getitem and setitem and compute ndim. loc[a] -> ([a], :), 1D loc[[a,b]] -> ([a,b], :), loc[a,b] -> ([a], [b]), 0D Parameters ---------- tup : tuple User input to unpack. Returns ------- row_loc : scalar or list Row locator(s) as a scalar or List. col_list : scalar or list Column locator(s) as a scalar or List. ndim : {0, 1, 2} Number of dimensions of located dataset. """ row_loc, col_loc = slice(None), slice(None) if is_tuple(tup): row_loc = tup[0] if len(tup) == 2: col_loc = tup[1] if len(tup) > 2: raise IndexingError("Too many indexers") else: row_loc = tup row_loc = row_loc(self.df) if callable(row_loc) else row_loc col_loc = col_loc(self.df) if callable(col_loc) else col_loc return row_loc, col_loc, _compute_ndim(row_loc, col_loc)