def test_searchsorted(self, string_dtype): arr = pd.array(['a', 'b', 'c'], dtype=string_dtype) result = arr.searchsorted('a', side='left') assert is_scalar(result) assert result == 0 result = arr.searchsorted('a', side='right') assert is_scalar(result) assert result == 1
def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 # https://github.com/pandas-dev/pandas/issues/14522 c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], categories=['cheese', 'milk', 'apple', 'bread'], ordered=True) s1 = Series(c1) c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], categories=['cheese', 'milk', 'apple', 'bread'], ordered=False) s2 = Series(c2) # Searching for single item argument, side='left' (default) res_cat = c1.searchsorted('apple') assert res_cat == 2 assert is_scalar(res_cat) res_ser = s1.searchsorted('apple') assert res_ser == 2 assert is_scalar(res_ser) # Searching for single item array, side='left' (default) res_cat = c1.searchsorted(['bread']) res_ser = s1.searchsorted(['bread']) exp = np.array([3], dtype=np.intp) tm.assert_numpy_array_equal(res_cat, exp) tm.assert_numpy_array_equal(res_ser, exp) # Searching for several items array, side='right' res_cat = c1.searchsorted(['apple', 'bread'], side='right') res_ser = s1.searchsorted(['apple', 'bread'], side='right') exp = np.array([3, 5], dtype=np.intp) tm.assert_numpy_array_equal(res_cat, exp) tm.assert_numpy_array_equal(res_ser, exp) # Searching for a single value that is not from the Categorical msg = r"Value\(s\) to be inserted must be in categories" with pytest.raises(KeyError, match=msg): c1.searchsorted('cucumber') with pytest.raises(KeyError, match=msg): s1.searchsorted('cucumber') # Searching for multiple values one of each is not from the Categorical with pytest.raises(KeyError, match=msg): c1.searchsorted(['bread', 'cucumber']) with pytest.raises(KeyError, match=msg): s1.searchsorted(['bread', 'cucumber']) # searchsorted call for unordered Categorical msg = "Categorical not ordered" with pytest.raises(ValueError, match=msg): c2.searchsorted('apple') with pytest.raises(ValueError, match=msg): s2.searchsorted('apple')
def make_meta(x, index=None): """Create an empty pandas object containing the desired metadata. Parameters ---------- x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or an iterable of `(name, dtype)` tuples. To create a `Series`, provide a tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index should match the desired output. If a dtype or scalar, a scalar of the same dtype is returned. index : pd.Index, optional Any pandas index to use in the metadata. If none provided, a `RangeIndex` will be used. Examples -------- >>> make_meta([('a', 'i8'), ('b', 'O')]) Empty DataFrame Columns: [a, b] Index: [] >>> make_meta(('a', 'f8')) Series([], Name: a, dtype: float64) >>> make_meta('i8') 1 """ if hasattr(x, '_meta'): return x._meta if isinstance(x, (pd.Series, pd.DataFrame)): return x.iloc[0:0] elif isinstance(x, pd.Index): return x[0:0] index = index if index is None else index[0:0] if isinstance(x, dict): return pd.DataFrame({c: _empty_series(c, d, index=index) for (c, d) in x.items()}, index=index) if isinstance(x, tuple) and len(x) == 2: return _empty_series(x[0], x[1], index=index) elif isinstance(x, (list, tuple)): if not all(isinstance(i, tuple) and len(i) == 2 for i in x): raise ValueError("Expected iterable of tuples of (name, dtype), " "got {0}".format(x)) return pd.DataFrame({c: _empty_series(c, d, index=index) for (c, d) in x}, columns=[c for c, d in x], index=index) elif not hasattr(x, 'dtype') and x is not None: # could be a string, a dtype object, or a python type. Skip `None`, # because it is implictly converted to `dtype('f8')`, which we don't # want here. try: dtype = np.dtype(x) return _scalar_from_dtype(dtype) except Exception: # Continue on to next check pass if is_scalar(x): return _nonempty_scalar(x) raise TypeError("Don't know how to create metadata from {0}".format(x))
def test_searchsorted_numeric_dtypes_scalar(self, any_real_dtype): arr = pd.array([1, 3, 90], dtype=any_real_dtype) result = arr.searchsorted(30) assert is_scalar(result) assert result == 2 result = arr.searchsorted([30]) expected = np.array([2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected)
def __setitem__(self, key, value): if is_list_like(value): if is_scalar(key): raise ValueError("setting an array element with a sequence.") value = [decimal.Decimal(v) for v in value] else: value = decimal.Decimal(value) key = check_array_indexer(self, key) self._data[key] = value
def test_iloc_setitem_with_scalar_index(self, indexer, value): # GH #19474 # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated # elementwisely, not using "setter('A', ['Z'])". df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) df.iloc[0, indexer] = value result = df.iloc[0, 0] assert is_scalar(result) and result == "Z"
def test_loc_setitem_with_scalar_index(self, indexer, value): # GH #19474 # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated # elementwisely, not using "setter('A', ['Z'])". df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) df.loc[0, indexer] = value result = df.loc[0, 'A'] assert is_scalar(result) and result == 'Z'
def _copy_if_else( self, cond, other=None, inplace=False, axis=None, level=None, errors="raise", try_cast=False, negate=False, ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis, 0) if level is not None: raise err._unsupported_error("level", level) if axis not in (0, ): raise err._unsupported_error("axis", axis) if try_cast not in (False, ): raise err._unsupported_error("try_cast", try_cast) # Checks on cond cond = self._ensure_valid_frame(cond) if self.ndim < cond.ndim: raise ValueError( "cannot use the higher dimensional dataframe for 'cond'") _, cond = self._align_frame(cond, join="left", broadcast_axis=1) if any(not is_bool_dtype(dtype) for dtype in cond._get_dtypes()): raise ValueError("'cond' must have only boolean values") # Checks on other if not is_scalar(other): other = self._ensure_valid_frame(other) if self.ndim < other.ndim: raise ValueError( "cannot use the higher dimensional dataframe for 'other'") _, other = self._align_frame(other, join="left", broadcast_axis=1) for l_dtype, r_dtype in zip(self._get_dtypes(), other._get_dtypes()): if l_dtype != r_dtype: raise ValueError("'other' must have the same type as self") other = other._frame else: other = util.sanitize_scalar(other) frame = self._frame.copy_if_else(cond._frame, other, negate=negate) return self._create_or_update_frame(frame, inplace)
def _check_op_integer(self, result, expected, mask, s, op_name, other): # check comparisions that are resulting in integer dtypes # to compare properly, we convert the expected # to float, mask to nans and convert infs # if we have uints then we process as uints # then conert to float # and we ultimately want to create a IntArray # for comparisons fill_value = 0 # mod/rmod turn floating 0 into NaN while # integer works as expected (no nan) if op_name in ['__mod__', '__rmod__']: if is_scalar(other): if other == 0: expected[s.values == 0] = 0 else: expected = expected.fillna(0) else: expected[(s.values == 0) & ((expected == 0) | expected.isna())] = 0 try: expected[(expected == np.inf) | (expected == -np.inf)] = fill_value original = expected expected = expected.astype(s.dtype) except ValueError: expected = expected.astype(float) expected[(expected == np.inf) | (expected == -np.inf)] = fill_value original = expected expected = expected.astype(s.dtype) expected[mask] = np.nan # assert that the expected astype is ok # (skip for unsigned as they have wrap around) if not s.dtype.is_unsigned_integer: original = pd.Series(original) # we need to fill with 0's to emulate what an astype('int') does # (truncation) for certain ops if op_name in ['__rtruediv__', '__rdiv__']: mask |= original.isna() original = original.fillna(0).astype('int') original = original.astype('float') original[mask] = np.nan tm.assert_series_equal(original, expected.astype('float')) # assert our expected result tm.assert_series_equal(result, expected)
def construct_result(self, result, columns, out_ndim, row_scalar): if out_ndim > 0: result = self.df.__ctor__(frame=result, columns=columns) if out_ndim == 1: result = result.squeeze(axis=1) else: result = result.to_pandas().squeeze() if row_scalar and not is_scalar(result) and len(result) == 0: raise _NotFoundError() return result
def meta_nonempty_object(x): """Create a nonempty pandas object from the given metadata. Returns a pandas DataFrame, Series, or Index that contains two rows of fake data. """ if is_scalar(x): return _nonempty_scalar(x) else: raise TypeError("Expected Index, Series, DataFrame, or scalar, " "got {0}".format(type(x).__name__))
def construct_result(self, result, out_ndim, row_scalar): if out_ndim == 1: result = self.sr.__ctor__(frame=result, name=self.sr.name) else: assert out_ndim == 0 result = result.to_pandas().squeeze() if row_scalar and not is_scalar(result) and len(result) == 0: raise _NotFoundError() return result
def create_column_from_scalar(self, value): assert is_scalar(value) assert self._index is not None value_dtype = ty.infer_dtype(value) column = _create_column( self._index.storage, value_dtype, nullable=value is None ) column.fill(value, self._index.volume) return Table(self._runtime, self._index, [column])
def insert(self, loc, value): assert loc >= 0 and loc <= len(self._columns) if is_scalar(value): value = self.create_column_from_scalar(value) assert len(value._columns) == 1 column = value._columns[0] return self.replace_columns( self._columns[:loc] + [column] + self._columns[loc:] )
def meta_nonempty_object(x): """Create a nonempty pandas object from the given metadata. Returns a pandas DataFrame, Series, or Index that contains two rows of fake data. """ if is_scalar(x): return _nonempty_scalar(x) else: raise TypeError( "Expected Pandas-like Index, Series, DataFrame, or scalar, " f"got {typename(type(x))}")
def flatten_preds_if_necessary(df): """ Flatten predictions if they are a list in a list. This is necessary because of an issue with the predict.py script prior to the update performed on 15-09-2021. """ cols = [col for col in df.columns if 'pred' in col] for col in cols: test = df[col].iloc[0] if is_scalar(test[0]): continue df[col] = df[col].str[0] return df
def __drop_na(self, df, index, columns): add_cat = series.add_category fillna = lambda s: add_cat(s, self.na_cat).fillna(self.na_cat) to_list = lambda x: [] if x is None else [x] if is_scalar(x) else x df = df.copy() if self.na == 'drop': return df.dropna(subset=to_list(columns)) else: all_items = to_list(columns) + to_list(index) selection = [item for item in all_items if item is not None] df[selection] = df[selection].apply(fillna) return df
def grouper_match(grp1: GroupByAgg, grp2): # No need to broadcast against a scalar (pandas will handle) ---- if is_scalar(grp2): return grp1.obj, grp2, grp1 # Broadcasting requires: non-agg groupby with same original grouper ---- if not isinstance(grp2, SeriesGroupBy): raise TypeError("grp2 must be a scalar or SeriesGroupBy") if not is_compatible(grp1, grp2): raise ValueError("groups must have matching groupers") return broadcast_agg(grp1), grp2.obj, grp2
def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. Parameters ---------- value : scalar, array-like If a scalar value is passed it is used to fill all missing values. Alternatively, an array-like 'value' can be given. It's expected that the array-like have the same length as 'self'. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap limit : int, default None If method is specified, this is the maximum number of consecutive NaN values to forward/backward fill. In other words, if there is a gap with more than this number of consecutive NaNs, it will only be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. Returns ------- filled : ExtensionArray with NA/NaN filled """ from pandas.api.types import is_scalar from pandas.util._validators import validate_fillna_kwargs from pandas.core.missing import pad_1d, backfill_1d value, method = validate_fillna_kwargs(value, method) mask = self.isna() if not is_scalar(value): if len(value) != len(self): raise ValueError("Length of 'value' does not match. Got ({}) " " expected {}".format(len(value), len(self))) value = value[mask] if mask.any(): if method is not None: func = pad_1d if method == 'pad' else backfill_1d new_values = func(self.astype(object), limit=limit, mask=mask) new_values = self._constructor_from_sequence(new_values) else: # fill with value new_values = self.copy() new_values[mask] = value else: new_values = self.copy() return new_values
def __setitem__(self, key, item): (row_loc, row_scalar, _) = self._validate_locator(key) sr = self.sr self._validate_lhs(sr) if row_scalar: if not is_scalar(item): raise ValueError("Value must be a scalar") result = sr._frame.write_at(row_loc, item) elif isinstance(row_loc, slice): if row_loc == slice(None): index = sr._frame._index item = self._align_rhs(sr, index, item) result = item else: (index, bounds) = sr._frame.slice_index_by_slice(row_loc, False) item = self._align_rhs(sr, index, item) result = sr._frame.scatter_by_slice(index, bounds, item) else: row_loc = sr._ensure_valid_frame(row_loc) if not row_loc._is_series: raise ValueError("indexer must be 1-dimensional") if not is_bool_dtype(row_loc.dtype): raise err._unsupported_error( "only boolean indexers are supported now") # This may raise an exception if the indexer size doesn't match # with the index of the LHS. row_loc = row_loc._frame.update_legate_index(sr._raw_index) index = sr._frame.slice_index_by_boolean_mask(row_loc) item = self._align_rhs(sr, index, item) result = sr._frame.scatter_by_boolean_mask(row_loc, index, item) self.update_column(result)
def _parse_tuple(tup): """Unpack the user input for getitem and setitem and compute ndim loc[a] -> ([a], :), 1D loc[[a,b],] -> ([a,b], :), loc[a,b] -> ([a], [b]), 0D """ row_loc, col_loc = slice(None), slice(None) if is_tuple(tup): row_loc = tup[0] if len(tup) == 2: col_loc = tup[1] if len(tup) > 2: raise IndexingError('Too many indexers') else: row_loc = tup ndim = _compute_ndim(row_loc, col_loc) row_loc = [row_loc] if is_scalar(row_loc) else row_loc col_loc = [col_loc] if is_scalar(col_loc) else col_loc return row_loc, col_loc, ndim
def _align_rhs(self, lhs, align_index, rhs): if not is_scalar(rhs): to_align = self.df.__ctor__(index=align_index, columns=lhs.columns) rhs = to_align._ensure_valid_frame(rhs) _, aligned = to_align._align_frame(rhs, join="left", broadcast_axis=1) # FIXME: For now we allow only aligned frames. if not (rhs._is_series or rhs.columns.equals(aligned.columns)): raise err._unsupported_error( "Unaligned frames cannot be used for in-place updates") rhs = aligned._frame return rhs
def infer_dtype_bydata(data): d_type = DataType.UNKNOWN if is_scalar(data): d_type = infer_dtype_by_scaladata(data) return d_type if is_list_like(data) or is_array_like(data): failed = False try: type_str = infer_dtype(data) except TypeError: failed = True if not failed: d_type = dtype_str_map.get(type_str, DataType.UNKNOWN) if is_numeric_datatype(d_type): d_type = DataType.FLOAT_VECTOR else: d_type = DataType.UNKNOWN return d_type if d_type == DataType.UNKNOWN: try: elem = data[0] except: elem = None if elem is not None and is_scalar(elem): d_type = infer_dtype_by_scaladata(elem) if d_type == DataType.UNKNOWN: _dtype = getattr(data, "dtype", None) if _dtype is not None: d_type = map_numpy_dtype_to_datatype(_dtype) return d_type
def _validate_locators(self, tup): if util.is_tuple(tup) and len(tup) >= 1: if len(tup) > 2: raise ValueError("Too many indexers") row_loc = tup[0] col_loc = tup[1] if len(tup) == 2 else slice(None) else: row_loc = tup col_loc = slice(None) if isinstance(row_loc, slice) and row_loc.step is not None: raise err._unsupported_error( "row slicer cannot have a step for now") row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc) col_scalar = is_scalar(col_loc) or util.is_tuple(col_loc) if self.is_at: if not util.is_tuple(tup) or len(tup) != 2: raise ValueError("Need two indexers") if self.is_loc: if not row_scalar or not col_scalar: raise ValueError( "At based indexing can only have scalar indexers") else: if not is_integer(row_loc) or not is_integer(col_loc): raise ValueError( "iAt based indexing can only have integer indexers") return ( row_loc, [col_loc] if col_scalar else col_loc, row_scalar, col_scalar, _compute_ndim(row_loc, col_loc), )
def _binary_op(self, op, other, axis=None, level=None, fill_value=None): # Retrieve arguments and convert them to default ones if necessary axis = self._get_axis_number(axis) # Raise an exception for cases that are not implemented yet if level is not None: raise err._unsupported_error("level", level) other = self._ensure_valid_frame(other) if not self._is_series and not is_scalar(other): if other._is_series and axis not in (0, ): raise err._unsupported_error("axis", axis) # Convert the RHS to a frame unless it's a scalar if is_scalar(other): new_self = self other = util.sanitize_scalar(other) else: new_self, other = self._align_frame(other, join="outer", fill_value=fill_value, broadcast_axis=1) other = other._frame new_frame = new_self._frame.binary_op(op, other) if new_self._is_series: from .series import Series return Series(frame=new_frame, name=new_self.name) else: from .dataframe import DataFrame return DataFrame(frame=new_frame, columns=new_self.columns)
def __getitem__(self, key): if is_scalar(key) or isinstance(key, tuple): return self._get_columns_by_labels(key) elif isinstance(key, slice): return self.iloc[key] elif isinstance(key, (DataFrame, pandas.DataFrame)): return self.where(key) elif isinstance(key, Series): return self.loc[key] elif is_list_like(key): if is_bool_indexer(key): return self.loc[key] else: return self._get_columns_by_labels(key) else: raise ValueError(f"Unsupported key type '{type(key).__name}'")
def method(self, other): is_arithmetic = \ True if op.__name__ in ops.ARITHMETIC_BINOPS else False is_other_array = False if not is_scalar(other): is_other_array = True other = np.asarray(other) self_is_na = self.isna() other_is_na = pd.isna(other) mask = self_is_na | other_is_na chunks = [] mask_chunks = [] start = 0 for chunk_array in self._arrow_array.chunks: chunk_array = np.asarray(chunk_array.to_pandas()) end = start + len(chunk_array) chunk_mask = mask[start:end] chunk_valid = ~chunk_mask if is_arithmetic: result = np.empty(chunk_array.shape, dtype=object) else: result = np.zeros(chunk_array.shape, dtype=bool) chunk_other = other if is_other_array: chunk_other = other[start:end] chunk_other = chunk_other[chunk_valid] # calculate only for both not None result[chunk_valid] = op(chunk_array[chunk_valid], chunk_other) if is_arithmetic: chunks.append( pa.array(result, type=pa.string(), from_pandas=True)) else: chunks.append(result) mask_chunks.append(chunk_mask) if is_arithmetic: return ArrowStringArray(pa.chunked_array(chunks)) else: return pd.arrays.BooleanArray(np.concatenate(chunks), np.concatenate(mask_chunks))
def is_scalar(val): """Return True if given object is scalar. Parameters ---------- val : object Possibly scalar object. Returns ------- bool Return True if given object is scalar. """ return (isinstance(val, cudf._lib.scalar.DeviceScalar) or isinstance(val, cudf.Scalar) or isinstance(val, cudf.core.tools.datetimes.DateOffset) or pd_types.is_scalar(val))
def _get_columns_by_labels(self, key): key_scalar = is_scalar(key) or isinstance(key, tuple) keys = util.to_list_if_scalar(key) columns = self.columns # Validate keys for key in keys: if key not in columns: raise KeyError(key) indexer = columns.get_indexer_for(keys) new_self = self._slice_columns(indexer) if key_scalar: assert len(new_self.columns) == 1 return new_self.squeeze(axis=1) else: return new_self
def __getitem__(self, key): (row_loc, row_scalar, out_ndim) = self._validate_locator(key) sr = self.sr if row_scalar: index = sr._raw_index if index.nlevels == 1 and not is_scalar(row_loc): raise KeyError("row indexer must be a scalar") mask = index == row_loc result = sr._frame.select(mask) # If the frame has a multi-index, we need to check if it was # a partial match and handle the output accordingly (only to # make the output the same as Pandas' and for no other reason...) row_loc_tpl = util.to_tuple_if_scalar(row_loc) if index.nlevels > len(row_loc_tpl): # If this is a partial match, the output should not be # squeezed down to a scalar, out_ndim += 1 # and the matched levels should be droped for some reason. result = result.droplevel(range(len(row_loc_tpl))) elif isinstance(row_loc, slice): if row_loc == slice(None): result = sr._frame else: result = sr._frame.slice_rows_by_slice(row_loc, True) else: row_loc = sr._ensure_valid_frame(row_loc) _, row_loc = sr._align_frame(row_loc, join="left", axis=0) if not is_bool_dtype(row_loc.dtype): raise err._unsupported_error( "only boolean indexers are supported now") result = sr._frame.select(row_loc._frame) try: return super().construct_result(result, out_ndim, row_scalar) except _NotFoundError: raise KeyError(row_loc)
def _write_one_pair(key, value): if is_scalar(value): if type(value).__module__ == 'numpy': value = value.item() scalar_dict[key] = value elif isinstance(value, np.ndarray): self.write_array(sub_group, key, value) elif isinstance(value, pd.DataFrame): self.write_dataframe(sub_group, key, value) elif is_dict_like(value): self.write_mapping(sub_group, key, value) elif issparse(value): assert isinstance(value, csr_matrix) self.write_csr(sub_group, key, value) else: # assume value is either list or tuple, converting it to np.ndarray self.write_array(sub_group, key, value.astype(str) if is_categorical_dtype(value) else np.array(value))
def __setitem__(self, key, value): if isinstance(value, (pd.Index, pd.Series)): value = value.to_numpy() key = check_array_indexer(self, key) scalar_key = is_scalar(key) # validate new items if scalar_key: if pd.isna(value): value = None elif not is_list_like(value): raise ValueError('Must provide list.') array = np.asarray(self._arrow_array.to_pandas()) array[key] = value self._arrow_array = pa.chunked_array( [pa.array(array, type=self.dtype.arrow_type)])
def __call__(self, arg): if is_scalar(arg): ret = pd.to_datetime(arg, errors=self._errors, dayfirst=self._dayfirst, yearfirst=self._yearfirst, utc=self._utc, format=self._format, exact=self._exact, unit=self._unit, infer_datetime_format=self._infer_datetime_format, origin=self._origin, cache=self._cache) return astensor(ret) dtype = np.datetime64(1, 'ns').dtype if isinstance(arg, (pd.Series, SERIES_TYPE)): arg = asseries(arg) self._object_type = ObjectType.series return self.new_series([arg], shape=arg.shape, dtype=dtype, index_value=arg.index_value, name=arg.name) if is_dict_like(arg) or isinstance(arg, DATAFRAME_TYPE): arg = asdataframe(arg) columns = arg.columns_value.to_pandas().tolist() if sorted(columns) != sorted(['year', 'month', 'day']): missing = ','.join(c for c in ['day', 'month', 'year'] if c not in columns) raise ValueError('to assemble mappings requires at least ' 'that [year, month, day] be specified: [{}] is missing'.format(missing)) self._object_type = ObjectType.series return self.new_series([arg], shape=(arg.shape[0],), dtype=dtype, index_value=arg.index_value) elif isinstance(arg, (pd.Index, INDEX_TYPE)): arg = asindex(arg) self._object_type = ObjectType.index return self.new_series([arg], shape=arg.shape, dtype=dtype, index_value=parse_index(pd.Index([], dtype=dtype), self._params, arg), name=arg.name) else: arg = astensor(arg) if arg.ndim != 1: raise TypeError('arg must be a string, datetime, ' 'list, tuple, 1-d tensor, or Series') self._object_type = ObjectType.index return self.new_index([arg], shape=arg.shape, dtype=dtype, index_value=parse_index(pd.Index([], dtype=dtype), self._params, arg))
def _ensure_valid_frame(self, data, copy=False): if is_scalar(data) or util.is_tuple(data): return data elif isinstance(data, Frame): return data.copy(deep=copy) elif isinstance(data, pandas.DataFrame): from .dataframe import DataFrame return DataFrame(data) elif isinstance(data, pandas.Series): from .series import Series return Series(data) elif isinstance(data, np.ndarray): # TODO: Here we assume that the axis to which we align the ndarray # is the index, but we really should be choosing between # the index and the columns, depending on the axis argument. if data.ndim == 1: from .series import Series if len(self) != len(data): raise ValueError( f"Length of passed values is {len(self)}, " f"index implies {len(data)}.") name = self.name if self._is_series else None return Series(data, name=name, index=self._raw_index) elif data.ndim == 2: if self._is_series: raise Exception("Data must be 1-dimensional") from .dataframe import DataFrame return DataFrame(data, columns=self.columns, index=self._raw_index) else: raise ValueError("array must be either 1-d or 2-d") elif is_list_like(data): return self._ensure_valid_frame(np.array(data)) else: raise ValueError(f"unsupported value type '{type(data)}'")
def __setitem__(self, key, value): # need to not use `not value` on numpy arrays if isinstance(value, (list, tuple)) and (not value): # doing nothing here seems to be ok return if isinstance(value, _Quantity): value = value.to(self.units).magnitude elif is_list_like(value) and isinstance(value[0], _Quantity): value = [item.to(self.units).magnitude for item in value] _is_scalar = is_scalar(value) if _is_scalar: value = [value] if _is_scalar: value = value[0] self._data[key] = value
def test_searchsorted_monotonic(indices): # GH17271 # not implemented for tuple searches in MultiIndex # or Intervals searches in IntervalIndex if isinstance(indices, (MultiIndex, IntervalIndex)): return # nothing to test if the index is empty if indices.empty: return value = indices[0] # determine the expected results (handle dupes for 'right') expected_left, expected_right = 0, (indices == value).argmin() if expected_right == 0: # all values are the same, expected_right should be length expected_right = len(indices) # test _searchsorted_monotonic in all cases # test searchsorted only for increasing if indices.is_monotonic_increasing: ssm_left = indices._searchsorted_monotonic(value, side='left') assert is_scalar(ssm_left) assert expected_left == ssm_left ssm_right = indices._searchsorted_monotonic(value, side='right') assert is_scalar(ssm_right) assert expected_right == ssm_right ss_left = indices.searchsorted(value, side='left') assert is_scalar(ss_left) assert expected_left == ss_left ss_right = indices.searchsorted(value, side='right') assert is_scalar(ss_right) assert expected_right == ss_right elif indices.is_monotonic_decreasing: ssm_left = indices._searchsorted_monotonic(value, side='left') assert is_scalar(ssm_left) assert expected_left == ssm_left ssm_right = indices._searchsorted_monotonic(value, side='right') assert is_scalar(ssm_right) assert expected_right == ssm_right else: # non-monotonic should raise. with pytest.raises(ValueError): indices._searchsorted_monotonic(value, side='left')
def meta_nonempty(x): """Create a nonempty pandas object from the given metadata. Returns a pandas DataFrame, Series, or Index that contains two rows of fake data. """ if isinstance(x, pd.Index): return _nonempty_index(x) elif isinstance(x, pd.Series): idx = _nonempty_index(x.index) return _nonempty_series(x, idx) elif isinstance(x, pd.DataFrame): idx = _nonempty_index(x.index) data = {i: _nonempty_series(x.iloc[:, i], idx) for i, c in enumerate(x.columns)} res = pd.DataFrame(data, index=idx, columns=np.arange(len(x.columns))) res.columns = x.columns return res elif is_scalar(x): return _nonempty_scalar(x) else: raise TypeError("Expected Index, Series, DataFrame, or scalar, " "got {0}".format(type(x).__name__))
def test_search_sorted_datetime64_scalar(self, arr, val): arr = pd.array(arr) result = arr.searchsorted(val) assert is_scalar(result) assert result == 1