def _parse_no_numpy(self): data = loads(self.json, precise_float=self.precise_float) if self.orient == "split": decoded = {str(k): v for k, v in data.items()} self.check_keys_split(decoded) self.obj = create_series_with_explicit_dtype(**decoded) else: self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
def wrap_results( self, results: ResType, res_index: "Index" ) -> Union["Series", "DataFrame"]: from pandas import Series # see if we can infer the results if len(results) > 0 and 0 in results and is_sequence(results[0]): return self.wrap_results_for_axis(results, res_index) # dict of scalars # the default dtype of an empty Series will be `object`, but this # code can be hit by df.mean() where the result should have dtype # float64 even if it's an empty Series. constructor_sliced = self.obj._constructor_sliced if constructor_sliced is Series: result = create_series_with_explicit_dtype( results, dtype_if_empty=np.float64 ) else: result = constructor_sliced(results) result.index = res_index return result
def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: homogenized = [] for val in data: if isinstance(val, ABCSeries): if dtype is not None: val = val.astype(dtype, copy=False) if val.index is not index: # Forces alignment. No need to copy data since we # are putting it into an ndarray later val = val.reindex(index, copy=False) val = val._values else: if isinstance(val, dict): # see test_constructor_subclass_dict # test_constructor_dict_datetime64_index val = create_series_with_explicit_dtype(val, index=index)._values val = sanitize_array(val, index, dtype=dtype, copy=False, raise_cast_failure=False) homogenized.append(val) return homogenized
def _expand_elements(body): data = [len(elem) for elem in body] lens = create_series_with_explicit_dtype(data, dtype_if_empty=object) lens_max = lens.max() not_max = lens[lens != lens_max] empty = [""] for ind, length in not_max.items(): body[ind] += empty * (lens_max - length)
def _parse_numpy(self): load_kwargs = { "dtype": None, "numpy": True, "precise_float": self.precise_float, } if self.orient in ["columns", "index"]: load_kwargs["labelled"] = True loads_ = functools.partial(loads, **load_kwargs) data = loads_(self.json) if self.orient == "split": decoded = {str(k): v for k, v in data.items()} self.check_keys_split(decoded) self.obj = create_series_with_explicit_dtype(**decoded) elif self.orient in ["columns", "index"]: self.obj = create_series_with_explicit_dtype(*data, dtype_if_empty=object) else: self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
def _map_values(self, mapper, na_action=None): """ An internal function that maps values using the input correspondence (which can be a dict, Series, or function). Parameters ---------- mapper : function, dict, or Series The input correspondence object na_action : {None, 'ignore'} If 'ignore', propagate NA values, without passing them to the mapping function Returns ------- Union[Index, MultiIndex], inferred The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. """ # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types if is_dict_like(mapper): if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): # If a dictionary subclass defines a default value method, # convert mapper to a lookup function (GH #15999). dict_with_default = mapper mapper = lambda x: dict_with_default[x] else: # Dictionary does not have a default. Thus it's safe to # convert to an Series for efficiency. # we specify the keys here to handle the # possibility that they are tuples # The return value of mapping with an empty mapper is # expected to be pd.Series(np.nan, ...). As np.nan is # of dtype float64 the return value of this method should # be float64 as well mapper = create_series_with_explicit_dtype( mapper, dtype_if_empty=np.float64) if isinstance(mapper, ABCSeries): # Since values were input this means we came from either # a dict or a series and mapper should be an index if is_categorical_dtype(self.dtype): # use the built in categorical series mapper which saves # time by mapping the categories instead of all values # error: Incompatible types in assignment (expression has type # "Categorical", variable has type "IndexOpsMixin") self = cast("Categorical", self) # type: ignore[assignment] # error: Item "ExtensionArray" of "Union[ExtensionArray, Any]" has no # attribute "map" return self._values.map(mapper) # type: ignore[union-attr] values = self._values indexer = mapper.index.get_indexer(values) new_values = algorithms.take_nd(mapper._values, indexer) return new_values # we must convert to python types if is_extension_array_dtype(self.dtype) and hasattr( self._values, "map"): # GH#23179 some EAs do not have `map` values = self._values if na_action is not None: raise NotImplementedError map_f = lambda values, f: values.map(f) else: # error: "IndexOpsMixin" has no attribute "astype" values = self.astype(object)._values # type: ignore[attr-defined] if na_action == "ignore": map_f = lambda values, f: lib.map_infer_mask( values, f, isna(values).view(np.uint8)) elif na_action is None: map_f = lib.map_infer else: msg = ("na_action must either be 'ignore' or None, " f"{na_action} was passed") raise ValueError(msg) # mapper is a function new_values = map_f(values, mapper) return new_values
def test_is_unique(data, expected): # GH#11946 / GH#25180 ser = create_series_with_explicit_dtype(data, dtype_if_empty=object) assert ser.is_unique is expected
def test_constructor_list_of_series(self): data = [ OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), ] sdict = OrderedDict(zip(["x", "y"], data)) idx = Index(["a", "b", "c"]) # all named data2 = [ Series([1.5, 3, 4], idx, dtype="O", name="x"), Series([1.5, 3, 6], idx, name="y"), ] result = DataFrame(data2) expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected) # some unnamed data2 = [ Series([1.5, 3, 4], idx, dtype="O", name="x"), Series([1.5, 3, 6], idx), ] result = DataFrame(data2) sdict = OrderedDict(zip(["x", "Unnamed 0"], data)) expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected) # none named data = [ OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]), OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]), OrderedDict([["a", 1.5], ["d", 6]]), OrderedDict(), OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), OrderedDict([["b", 3], ["c", 4], ["d", 6]]), ] data = [ create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data ] result = DataFrame(data) sdict = OrderedDict(zip(range(len(data)), data)) expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected.reindex(result.index)) result2 = DataFrame(data, index=np.arange(6)) tm.assert_frame_equal(result, result2) result = DataFrame([Series(dtype=object)]) expected = DataFrame(index=[0]) tm.assert_frame_equal(result, expected) data = [ OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]), ] sdict = OrderedDict(zip(range(len(data)), data)) idx = Index(["a", "b", "c"]) data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)] result = DataFrame(data2) expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected)