Exemple #1
0
    def _parse_no_numpy(self):
        data = loads(self.json, precise_float=self.precise_float)

        if self.orient == "split":
            decoded = {str(k): v for k, v in data.items()}
            self.check_keys_split(decoded)
            self.obj = create_series_with_explicit_dtype(**decoded)
        else:
            self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
Exemple #2
0
    def wrap_results(
        self, results: ResType, res_index: "Index"
    ) -> Union["Series", "DataFrame"]:
        from pandas import Series

        # see if we can infer the results
        if len(results) > 0 and 0 in results and is_sequence(results[0]):

            return self.wrap_results_for_axis(results, res_index)

        # dict of scalars

        # the default dtype of an empty Series will be `object`, but this
        # code can be hit by df.mean() where the result should have dtype
        # float64 even if it's an empty Series.
        constructor_sliced = self.obj._constructor_sliced
        if constructor_sliced is Series:
            result = create_series_with_explicit_dtype(
                results, dtype_if_empty=np.float64
            )
        else:
            result = constructor_sliced(results)
        result.index = res_index

        return result
def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
    homogenized = []

    for val in data:
        if isinstance(val, ABCSeries):
            if dtype is not None:
                val = val.astype(dtype, copy=False)
            if val.index is not index:
                # Forces alignment. No need to copy data since we
                # are putting it into an ndarray later
                val = val.reindex(index, copy=False)

            val = val._values
        else:
            if isinstance(val, dict):
                # see test_constructor_subclass_dict
                #  test_constructor_dict_datetime64_index
                val = create_series_with_explicit_dtype(val,
                                                        index=index)._values

            val = sanitize_array(val,
                                 index,
                                 dtype=dtype,
                                 copy=False,
                                 raise_cast_failure=False)

        homogenized.append(val)

    return homogenized
Exemple #4
0
def _expand_elements(body):
    data = [len(elem) for elem in body]
    lens = create_series_with_explicit_dtype(data, dtype_if_empty=object)
    lens_max = lens.max()
    not_max = lens[lens != lens_max]

    empty = [""]
    for ind, length in not_max.items():
        body[ind] += empty * (lens_max - length)
Exemple #5
0
    def _parse_numpy(self):
        load_kwargs = {
            "dtype": None,
            "numpy": True,
            "precise_float": self.precise_float,
        }
        if self.orient in ["columns", "index"]:
            load_kwargs["labelled"] = True
        loads_ = functools.partial(loads, **load_kwargs)
        data = loads_(self.json)

        if self.orient == "split":
            decoded = {str(k): v for k, v in data.items()}
            self.check_keys_split(decoded)
            self.obj = create_series_with_explicit_dtype(**decoded)
        elif self.orient in ["columns", "index"]:
            self.obj = create_series_with_explicit_dtype(*data, dtype_if_empty=object)
        else:
            self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
Exemple #6
0
    def _map_values(self, mapper, na_action=None):
        """
        An internal function that maps values using the input
        correspondence (which can be a dict, Series, or function).

        Parameters
        ----------
        mapper : function, dict, or Series
            The input correspondence object
        na_action : {None, 'ignore'}
            If 'ignore', propagate NA values, without passing them to the
            mapping function

        Returns
        -------
        Union[Index, MultiIndex], inferred
            The output of the mapping function applied to the index.
            If the function returns a tuple with more than one element
            a MultiIndex will be returned.
        """
        # we can fastpath dict/Series to an efficient map
        # as we know that we are not going to have to yield
        # python types
        if is_dict_like(mapper):
            if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
                # If a dictionary subclass defines a default value method,
                # convert mapper to a lookup function (GH #15999).
                dict_with_default = mapper
                mapper = lambda x: dict_with_default[x]
            else:
                # Dictionary does not have a default. Thus it's safe to
                # convert to an Series for efficiency.
                # we specify the keys here to handle the
                # possibility that they are tuples

                # The return value of mapping with an empty mapper is
                # expected to be pd.Series(np.nan, ...). As np.nan is
                # of dtype float64 the return value of this method should
                # be float64 as well
                mapper = create_series_with_explicit_dtype(
                    mapper, dtype_if_empty=np.float64)

        if isinstance(mapper, ABCSeries):
            # Since values were input this means we came from either
            # a dict or a series and mapper should be an index
            if is_categorical_dtype(self.dtype):
                # use the built in categorical series mapper which saves
                # time by mapping the categories instead of all values

                # error: Incompatible types in assignment (expression has type
                # "Categorical", variable has type "IndexOpsMixin")
                self = cast("Categorical", self)  # type: ignore[assignment]
                # error: Item "ExtensionArray" of "Union[ExtensionArray, Any]" has no
                # attribute "map"
                return self._values.map(mapper)  # type: ignore[union-attr]

            values = self._values

            indexer = mapper.index.get_indexer(values)
            new_values = algorithms.take_nd(mapper._values, indexer)

            return new_values

        # we must convert to python types
        if is_extension_array_dtype(self.dtype) and hasattr(
                self._values, "map"):
            # GH#23179 some EAs do not have `map`
            values = self._values
            if na_action is not None:
                raise NotImplementedError
            map_f = lambda values, f: values.map(f)
        else:
            # error: "IndexOpsMixin" has no attribute "astype"
            values = self.astype(object)._values  # type: ignore[attr-defined]
            if na_action == "ignore":
                map_f = lambda values, f: lib.map_infer_mask(
                    values, f,
                    isna(values).view(np.uint8))
            elif na_action is None:
                map_f = lib.map_infer
            else:
                msg = ("na_action must either be 'ignore' or None, "
                       f"{na_action} was passed")
                raise ValueError(msg)

        # mapper is a function
        new_values = map_f(values, mapper)

        return new_values
Exemple #7
0
def test_is_unique(data, expected):
    # GH#11946 / GH#25180
    ser = create_series_with_explicit_dtype(data, dtype_if_empty=object)
    assert ser.is_unique is expected
Exemple #8
0
    def test_constructor_list_of_series(self):
        data = [
            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
        ]
        sdict = OrderedDict(zip(["x", "y"], data))
        idx = Index(["a", "b", "c"])

        # all named
        data2 = [
            Series([1.5, 3, 4], idx, dtype="O", name="x"),
            Series([1.5, 3, 6], idx, name="y"),
        ]
        result = DataFrame(data2)
        expected = DataFrame.from_dict(sdict, orient="index")
        tm.assert_frame_equal(result, expected)

        # some unnamed
        data2 = [
            Series([1.5, 3, 4], idx, dtype="O", name="x"),
            Series([1.5, 3, 6], idx),
        ]
        result = DataFrame(data2)

        sdict = OrderedDict(zip(["x", "Unnamed 0"], data))
        expected = DataFrame.from_dict(sdict, orient="index")
        tm.assert_frame_equal(result, expected)

        # none named
        data = [
            OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
            OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
            OrderedDict([["a", 1.5], ["d", 6]]),
            OrderedDict(),
            OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
            OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
        ]
        data = [
            create_series_with_explicit_dtype(d, dtype_if_empty=object)
            for d in data
        ]

        result = DataFrame(data)
        sdict = OrderedDict(zip(range(len(data)), data))
        expected = DataFrame.from_dict(sdict, orient="index")
        tm.assert_frame_equal(result, expected.reindex(result.index))

        result2 = DataFrame(data, index=np.arange(6))
        tm.assert_frame_equal(result, result2)

        result = DataFrame([Series(dtype=object)])
        expected = DataFrame(index=[0])
        tm.assert_frame_equal(result, expected)

        data = [
            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
            OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
        ]
        sdict = OrderedDict(zip(range(len(data)), data))

        idx = Index(["a", "b", "c"])
        data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)]
        result = DataFrame(data2)
        expected = DataFrame.from_dict(sdict, orient="index")
        tm.assert_frame_equal(result, expected)