Example #1
0
def test_empty_str_methods(any_string_dtype):
    empty_str = empty = Series(dtype=any_string_dtype)
    if any_string_dtype == "object":
        empty_int = Series(dtype="int64")
        empty_bool = Series(dtype=bool)
    else:
        empty_int = Series(dtype="Int64")
        empty_bool = Series(dtype="boolean")
    empty_object = Series(dtype=object)
    empty_bytes = Series(dtype=object)
    empty_df = DataFrame()

    # GH7241
    # (extract) on empty series

    tm.assert_series_equal(empty_str, empty.str.cat(empty))
    assert "" == empty.str.cat()
    tm.assert_series_equal(empty_str, empty.str.title())
    tm.assert_series_equal(empty_int, empty.str.count("a"))
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_bool, empty.str.contains("a"))
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_bool, empty.str.startswith("a"))
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_bool, empty.str.endswith("a"))
    tm.assert_series_equal(empty_str, empty.str.lower())
    tm.assert_series_equal(empty_str, empty.str.upper())
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_str, empty.str.replace("a", "b"))
    tm.assert_series_equal(empty_str, empty.str.repeat(3))
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_bool, empty.str.match("^a"))
    tm.assert_frame_equal(
        DataFrame(columns=[0], dtype=any_string_dtype),
        empty.str.extract("()", expand=True),
    )
    tm.assert_frame_equal(
        DataFrame(columns=[0, 1], dtype=any_string_dtype),
        empty.str.extract("()()", expand=True),
    )
    tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False))
    tm.assert_frame_equal(
        DataFrame(columns=[0, 1], dtype=any_string_dtype),
        empty.str.extract("()()", expand=False),
    )
    tm.assert_frame_equal(empty_df, empty.str.get_dummies())
    tm.assert_series_equal(empty_str, empty_str.str.join(""))
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_int, empty.str.len())
    tm.assert_series_equal(empty_object, empty_str.str.findall("a"))
    tm.assert_series_equal(empty_int, empty.str.find("a"))
    tm.assert_series_equal(empty_int, empty.str.rfind("a"))
    tm.assert_series_equal(empty_str, empty.str.pad(42))
    tm.assert_series_equal(empty_str, empty.str.center(42))
    tm.assert_series_equal(empty_object, empty.str.split("a"))
    tm.assert_series_equal(empty_object, empty.str.rsplit("a"))
    tm.assert_series_equal(empty_object, empty.str.partition("a",
                                                             expand=False))
    tm.assert_frame_equal(empty_df, empty.str.partition("a"))
    tm.assert_series_equal(empty_object, empty.str.rpartition("a",
                                                              expand=False))
    tm.assert_frame_equal(empty_df, empty.str.rpartition("a"))
    tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
    tm.assert_series_equal(empty_str, empty.str.slice(step=1))
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_str, empty.str.strip())
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_str, empty.str.lstrip())
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under4p0):
        tm.assert_series_equal(empty_str, empty.str.rstrip())
    tm.assert_series_equal(empty_str, empty.str.wrap(42))
    tm.assert_series_equal(empty_str, empty.str.get(0))
    tm.assert_series_equal(empty_object, empty_bytes.str.decode("ascii"))
    tm.assert_series_equal(empty_bytes, empty.str.encode("ascii"))
    # ismethods should always return boolean (GH 29624)
    tm.assert_series_equal(empty_bool, empty.str.isalnum())
    tm.assert_series_equal(empty_bool, empty.str.isalpha())
    tm.assert_series_equal(empty_bool, empty.str.isdigit())
    with maybe_perf_warn(any_string_dtype == "string[pyarrow]"
                         and pa_version_under2p0):
        tm.assert_series_equal(empty_bool, empty.str.isspace())
    tm.assert_series_equal(empty_bool, empty.str.islower())
    tm.assert_series_equal(empty_bool, empty.str.isupper())
    tm.assert_series_equal(empty_bool, empty.str.istitle())
    tm.assert_series_equal(empty_bool, empty.str.isnumeric())
    tm.assert_series_equal(empty_bool, empty.str.isdecimal())
    tm.assert_series_equal(empty_str, empty.str.capitalize())
    tm.assert_series_equal(empty_str, empty.str.swapcase())
    tm.assert_series_equal(empty_str, empty.str.normalize("NFC"))

    table = str.maketrans("a", "b")
    tm.assert_series_equal(empty_str, empty.str.translate(table))
Example #2
0
 def setup(self, keep):
     self.s = Series(np.random.randint(1, 10, 100000))
Example #3
0
 def time_constructor(self, data):
     Series(data=self.data, index=self.idx)
Example #4
0
    def test_replace_datetimetz(self):

        # GH 11326
        # behaving poorly when presented with a datetime64[ns, tz]
        df = DataFrame({
            'A': date_range('20130101', periods=3, tz='US/Eastern'),
            'B': [0, np.nan, 2]
        })
        result = df.replace(np.nan, 1)
        expected = DataFrame({
            'A':
            date_range('20130101', periods=3, tz='US/Eastern'),
            'B':
            Series([0, 1, 2], dtype='float64')
        })
        assert_frame_equal(result, expected)

        result = df.fillna(1)
        assert_frame_equal(result, expected)

        result = df.replace(0, np.nan)
        expected = DataFrame({
            'A':
            date_range('20130101', periods=3, tz='US/Eastern'),
            'B': [np.nan, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.replace(Timestamp('20130102', tz='US/Eastern'),
                            Timestamp('20130104', tz='US/Eastern'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104', tz='US/Eastern'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': pd.NaT},
                                Timestamp('20130104', tz='US/Eastern'))
        assert_frame_equal(result, expected)

        # coerce to object
        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': pd.NaT},
                                Timestamp('20130104', tz='US/Pacific'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104', tz='US/Pacific'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': np.nan}, Timestamp('20130104'))
        expected = DataFrame({
            'A': [
                Timestamp('20130101', tz='US/Eastern'),
                Timestamp('20130104'),
                Timestamp('20130103', tz='US/Eastern')
            ],
            'B': [0, np.nan, 2]
        })
        assert_frame_equal(result, expected)
Example #5
0
def hash_pandas_object(
    obj,
    index: bool = True,
    encoding: str = "utf8",
    hash_key: Optional[str] = _default_hash_key,
    categorize: bool = True,
):
    """
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object
    """
    from pandas import Series

    if hash_key is None:
        hash_key = _default_hash_key

    if isinstance(obj, ABCMultiIndex):
        return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False)

    elif isinstance(obj, ABCIndexClass):
        h = hash_array(obj.values, encoding, hash_key, categorize).astype(
            "uint64", copy=False
        )
        h = Series(h, index=obj, dtype="uint64", copy=False)

    elif isinstance(obj, ABCSeries):
        h = hash_array(obj.values, encoding, hash_key, categorize).astype(
            "uint64", copy=False
        )
        if index:
            index_iter = (
                hash_pandas_object(
                    obj.index,
                    index=False,
                    encoding=encoding,
                    hash_key=hash_key,
                    categorize=categorize,
                ).values
                for _ in [None]
            )
            arrays = itertools.chain([h], index_iter)
            h = _combine_hash_arrays(arrays, 2)

        h = Series(h, index=obj.index, dtype="uint64", copy=False)

    elif isinstance(obj, ABCDataFrame):
        hashes = (hash_array(series.values) for _, series in obj.items())
        num_items = len(obj.columns)
        if index:
            index_hash_generator = (
                hash_pandas_object(
                    obj.index,
                    index=False,
                    encoding=encoding,
                    hash_key=hash_key,
                    categorize=categorize,
                ).values  # noqa
                for _ in [None]
            )
            num_items += 1

            # keep `hashes` specifically a generator to keep mypy happy
            _hashes = itertools.chain(hashes, index_hash_generator)
            hashes = (x for x in _hashes)
        h = _combine_hash_arrays(hashes, num_items)

        h = Series(h, index=obj.index, dtype="uint64", copy=False)
    else:
        raise TypeError(f"Unexpected type for hashing {type(obj)}")
    return h
Example #6
0
 def test_float_index_at_iat(self):
     s = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
     for el, item in s.items():
         assert s.at[el] == item
     for i in range(len(s)):
         assert s.iat[i] == i + 1
Example #7
0
    def test_coercion_with_setitem(self, start_data, expected_result):
        start_series = Series(start_data)
        start_series[0] = None

        expected_series = Series(expected_result)
        tm.assert_series_equal(start_series, expected_series)
Example #8
0
def test_string_slice_out_of_bounds(any_string_dtype):
    ser = Series(["foo", "b", "ba"], dtype=any_string_dtype)
    result = ser.str[1]
    expected = Series(["o", np.nan, "a"], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Example #9
0
def test_encode_decode(any_string_dtype):
    ser = Series(["a", "b", "a\xe4"],
                 dtype=any_string_dtype).str.encode("utf-8")
    result = ser.str.decode("utf-8")
    expected = ser.map(lambda x: x.decode("utf-8"))
    tm.assert_series_equal(result, expected)
Example #10
0
def test_removesuffix(any_string_dtype, suffix, expected):
    ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
    result = ser.str.removesuffix(suffix)
    ser_expected = Series(expected, dtype=any_string_dtype)
    tm.assert_series_equal(result, ser_expected)
Example #11
0
def test_string_slice_out_of_bounds_nested():
    ser = Series([(1, 2), (1, ), (3, 4, 5)])
    result = ser.str[1]
    expected = Series([2, np.nan, 4])
    tm.assert_series_equal(result, expected)
Example #12
0
def test_strip_lstrip_rstrip(any_string_dtype, method, exp):
    ser = Series(["  aa   ", " bb \n", np.nan, "cc  "], dtype=any_string_dtype)

    result = getattr(ser.str, method)()
    expected = Series(exp, dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Example #13
0
def test_slice(start, stop, step, expected, any_string_dtype):
    ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"],
                 dtype=any_string_dtype)
    result = ser.str.slice(start, stop, step)
    expected = Series(expected, dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)
Example #14
0
def test_spilt_join_roundtrip(any_string_dtype):
    ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
    result = ser.str.split("_").str.join("_")
    expected = ser.astype(object)
    tm.assert_series_equal(result, expected)
Example #15
0
    def test_multi_assign(self):

        # GH 3626, an assignment of a sub-df to a df
        df = DataFrame(
            {
                "FC": ["a", "b", "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": list(range(6)),
                "col2": list(range(6, 12)),
            }
        )
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ["col1", "col2"]

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame(
            {
                "FC": ["a", np.nan, "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": Series([0, 1, 4, 6, 8, 10]),
                "col2": [12, 7, 16, np.nan, 20, 22],
            }
        )

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame(
            {
                "FC": ["a", np.nan, "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0],
                "col2": [12, 7, 16, np.nan, 20, 22],
            }
        )
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(
            dict(
                A=[1, 2, 0, 0, 0],
                B=[0, 0, 0, 10, 11],
                C=[0, 0, 0, 10, 11],
                D=[3, 4, 5, 6, 7],
            )
        )

        expected = df.copy()
        mask = expected["A"] == 0
        for col in ["A", "B"]:
            expected.loc[mask, col] = df["D"]

        df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
        tm.assert_frame_equal(df, expected)
Example #16
0
def test_str_accessor_no_new_attributes(any_string_dtype):
    # https://github.com/pandas-dev/pandas/issues/10673
    ser = Series(list("aabbcde"), dtype=any_string_dtype)
    with pytest.raises(AttributeError,
                       match="You cannot add any new attribute"):
        ser.str.xlabel = "a"
Example #17
0
class TestMisc:
    def test_float_index_to_mixed(self):
        df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)})
        df["a"] = 10
        tm.assert_frame_equal(
            DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}), df
        )

    def test_float_index_non_scalar_assignment(self):
        df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
        df.loc[df.index[:2]] = 1
        expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index)
        tm.assert_frame_equal(expected, df)

        df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
        df2 = df.copy()
        df.loc[df.index] = df.loc[df.index]
        tm.assert_frame_equal(df, df2)

    def test_float_index_at_iat(self):
        s = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
        for el, item in s.items():
            assert s.at[el] == item
        for i in range(len(s)):
            assert s.iat[i] == i + 1

    def test_mixed_index_assignment(self):
        # GH 19860
        s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
        s.at["a"] = 11
        assert s.iat[0] == 11
        s.at[1] = 22
        assert s.iat[3] == 22

    def test_mixed_index_no_fallback(self):
        # GH 19860
        s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
        with pytest.raises(KeyError, match="^0$"):
            s.at[0]
        with pytest.raises(KeyError, match="^4$"):
            s.at[4]

    def test_rhs_alignment(self):
        # GH8258, tests that both rows & columns are aligned to what is
        # assigned to. covers both uniform data-type & multi-type cases
        def run_tests(df, rhs, right):
            # label, index, slice
            lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
            lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)

            left = df.copy()
            left.loc[lbl_one, lbl_two] = rhs
            tm.assert_frame_equal(left, right)

            left = df.copy()
            left.iloc[idx_one, idx_two] = rhs
            tm.assert_frame_equal(left, right)

            left = df.copy()
            left.iloc[slice_one, slice_two] = rhs
            tm.assert_frame_equal(left, right)

        xs = np.arange(20).reshape(5, 4)
        cols = ["jim", "joe", "jolie", "joline"]
        df = DataFrame(xs, columns=cols, index=list("abcde"))

        # right hand side; permute the indices and multiplpy by -2
        rhs = -2 * df.iloc[3:0:-1, 2:0:-1]

        # expected `right` result; just multiply by -2
        right = df.copy()
        right.iloc[1:4, 1:3] *= -2

        # run tests with uniform dtypes
        run_tests(df, rhs, right)

        # make frames multi-type & re-run tests
        for frame in [df, rhs, right]:
            frame["joe"] = frame["joe"].astype("float64")
            frame["jolie"] = frame["jolie"].map("@{}".format)

        run_tests(df, rhs, right)

    def test_str_label_slicing_with_negative_step(self):
        SLC = pd.IndexSlice

        def assert_slices_equivalent(l_slc, i_slc):
            tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

            if not idx.is_integer:
                # For integer indices, .loc and plain getitem are position-based.
                tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
                tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

        for idx in [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)]:
            idx = Index(idx)
            s = Series(np.arange(20), index=idx)
            assert_slices_equivalent(SLC[idx[9] :: -1], SLC[9::-1])
            assert_slices_equivalent(SLC[: idx[9] : -1], SLC[:8:-1])
            assert_slices_equivalent(SLC[idx[13] : idx[9] : -1], SLC[13:8:-1])
            assert_slices_equivalent(SLC[idx[9] : idx[13] : -1], SLC[:0])

    def test_slice_with_zero_step_raises(self):
        s = Series(np.arange(20), index=_mklbl("A", 20))
        with pytest.raises(ValueError, match="slice step cannot be zero"):
            s[::0]
        with pytest.raises(ValueError, match="slice step cannot be zero"):
            s.loc[::0]

    def test_indexing_assignment_dict_already_exists(self):
        df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]}).set_index("z")
        expected = df.copy()
        rhs = dict(x=9, y=99)
        df.loc[5] = rhs
        expected.loc[5] = [9, 99]
        tm.assert_frame_equal(df, expected)

    def test_indexing_dtypes_on_empty(self):
        # Check that .iloc returns correct dtypes GH9983
        df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]})
        df2 = df.iloc[[], :]

        assert df2.loc[:, "a"].dtype == np.int64
        tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0])

    @pytest.mark.parametrize("size", [5, 999999, 1000000])
    def test_range_in_series_indexing(self, size):
        # range can cause an indexing error
        # GH 11652
        s = Series(index=range(size), dtype=np.float64)
        s.loc[range(1)] = 42
        tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0]))

        s.loc[range(2)] = 43
        tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1]))

    @pytest.mark.parametrize(
        "slc",
        [
            pd.IndexSlice[:, :],
            pd.IndexSlice[:, 1],
            pd.IndexSlice[1, :],
            pd.IndexSlice[[1], [1]],
            pd.IndexSlice[1, [1]],
            pd.IndexSlice[[1], 1],
            pd.IndexSlice[1],
            pd.IndexSlice[1, 1],
            slice(None, None, None),
            [0, 1],
            np.array([0, 1]),
            Series([0, 1]),
        ],
    )
    def test_non_reducing_slice(self, slc):
        df = DataFrame([[0, 1], [2, 3]])

        tslice_ = non_reducing_slice(slc)
        assert isinstance(df.loc[tslice_], DataFrame)

    def test_list_slice(self):
        # like dataframe getitem
        slices = [["A"], Series(["A"]), np.array(["A"])]
        df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"])
        expected = pd.IndexSlice[:, ["A"]]
        for subset in slices:
            result = non_reducing_slice(subset)
            tm.assert_frame_equal(df.loc[result], df.loc[expected])

    def test_maybe_numeric_slice(self):
        df = DataFrame({"A": [1, 2], "B": ["c", "d"], "C": [True, False]})
        result = maybe_numeric_slice(df, slice_=None)
        expected = pd.IndexSlice[:, ["A"]]
        assert result == expected

        result = maybe_numeric_slice(df, None, include_bool=True)
        expected = pd.IndexSlice[:, ["A", "C"]]
        result = maybe_numeric_slice(df, [1])
        expected = [1]
        assert result == expected

    def test_partial_boolean_frame_indexing(self):
        # GH 17170
        df = DataFrame(
            np.arange(9.0).reshape(3, 3), index=list("abc"), columns=list("ABC")
        )
        index_df = DataFrame(1, index=list("ab"), columns=list("AB"))
        result = df[index_df.notnull()]
        expected = DataFrame(
            np.array([[0.0, 1.0, np.nan], [3.0, 4.0, np.nan], [np.nan] * 3]),
            index=list("abc"),
            columns=list("ABC"),
        )
        tm.assert_frame_equal(result, expected)

    def test_no_reference_cycle(self):
        df = DataFrame({"a": [0, 1], "b": [2, 3]})
        for name in ("loc", "iloc", "at", "iat"):
            getattr(df, name)
        wr = weakref.ref(df)
        del df
        assert wr() is None
Example #18
0
def test_cat_on_bytes_raises():
    lhs = Series(np.array(list("abc"), "S1").astype(object))
    rhs = Series(np.array(list("def"), "S1").astype(object))
    msg = "Cannot use .str.cat with values of inferred dtype 'bytes'"
    with pytest.raises(TypeError, match=msg):
        lhs.str.cat(rhs)
Example #19
0
 def test_slice_with_zero_step_raises(self):
     s = Series(np.arange(20), index=_mklbl("A", 20))
     with pytest.raises(ValueError, match="slice step cannot be zero"):
         s[::0]
     with pytest.raises(ValueError, match="slice step cannot be zero"):
         s.loc[::0]
Example #20
0
def test_str_accessor_in_apply_func():
    # https://github.com/pandas-dev/pandas/issues/38979
    df = DataFrame(zip("abc", "def"))
    expected = Series(["A/D", "B/E", "C/F"])
    result = df.apply(lambda f: "/".join(f.str.upper()), axis=1)
    tm.assert_series_equal(result, expected)
Example #21
0
    def test_coercion_with_loc_and_series(self, start_data, expected_result):
        start_series = Series(start_data)
        start_series.loc[start_series == start_series[0]] = None

        expected_series = Series(expected_result)
        tm.assert_series_equal(start_series, expected_series)
Example #22
0
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 25 08:20:27 2017 by Dhiraj Upadhyaya
"""

#Categories
import pandas as pd
from pandas import Series
import numpy as np

s = Series(["a", "b", "c", "a"], dtype="category")
s
s.cat.categories
s.cat.ordered
type(s)

df = pd.DataFrame({'A': ['a', 'b', 'c', 'a']})
df
df['B'] = df['A'].astype('category')
df
type(df['B'])

#pd.core.common.is_categorical_dtype(df.A.dtype)
df = pd.DataFrame({'value': np.random.randint(0, 100, 20)})
df
labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]

df = pd.DataFrame({
    'a': [1, 2, 3, 4, 5],
    'b': ['yes', 'no', 'yes', 'no', 'absent']
})
Example #23
0
    def test_replace_mixed(self):
        mf = self.mixed_frame
        mf.iloc[5:20, mf.columns.get_loc('foo')] = np.nan
        mf.iloc[-10:, mf.columns.get_loc('A')] = np.nan

        result = self.mixed_frame.replace(np.nan, -18)
        expected = self.mixed_frame.fillna(value=-18)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-18, np.nan), self.mixed_frame)

        result = self.mixed_frame.replace(np.nan, -1e8)
        expected = self.mixed_frame.fillna(value=-1e8)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-1e8, np.nan), self.mixed_frame)

        # int block upcasting
        df = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0, 1], dtype='int64')
        })
        expected = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0.5, 1], dtype='float64')
        })
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        df.replace(0, 0.5, inplace=True)
        assert_frame_equal(df, expected)

        # int block splitting
        df = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0, 1], dtype='int64'),
            'C': Series([1, 2], dtype='int64')
        })
        expected = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0.5, 1], dtype='float64'),
            'C': Series([1, 2], dtype='int64')
        })
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        # to object block upcasting
        df = DataFrame({
            'A': Series([1.0, 2.0], dtype='float64'),
            'B': Series([0, 1], dtype='int64')
        })
        expected = DataFrame({
            'A': Series([1, 'foo'], dtype='object'),
            'B': Series([0, 1], dtype='int64')
        })
        result = df.replace(2, 'foo')
        assert_frame_equal(result, expected)

        expected = DataFrame({
            'A': Series(['foo', 'bar'], dtype='object'),
            'B': Series([0, 'foo'], dtype='object')
        })
        result = df.replace([1, 2], ['foo', 'bar'])
        assert_frame_equal(result, expected)

        # test case from
        df = DataFrame({
            'A': Series([3, 0], dtype='int64'),
            'B': Series([0, 3], dtype='int64')
        })
        result = df.replace(3, df.mean().to_dict())
        expected = df.copy().astype('float64')
        m = df.mean()
        expected.iloc[0, 0] = m[0]
        expected.iloc[1, 1] = m[1]
        assert_frame_equal(result, expected)
Example #24
0
def test_missing_labels_inside_loc_matched_in_error_message():
    # GH34272
    s = Series({"a": 1, "b": 2, "c": 3})
    error_message_regex = "missing_0.*missing_1.*missing_2"
    with pytest.raises(KeyError, match=error_message_regex):
        s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]]
Example #25
0
 def test_get_loc_single_level(self, single_level_multiindex):
     single_level = single_level_multiindex
     s = Series(np.random.randn(len(single_level)), index=single_level)
     for k in single_level.values:
         s[k]
Example #26
0
class TestFancy:
    """ pure get/set item & fancy indexing """

    def test_setitem_ndarray_1d(self):
        # GH5508

        # len of indexer vs length of the 1d ndarray
        df = DataFrame(index=Index(np.arange(1, 11)))
        df["foo"] = np.zeros(10, dtype=np.float64)
        df["bar"] = np.zeros(10, dtype=complex)

        # invalid
        msg = (
            "cannot set using a multi-index selection "
            "indexer with a different length than the value"
        )
        with pytest.raises(ValueError, match=msg):
            df.loc[df.index[2:5], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        # valid
        df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        result = df.loc[df.index[2:6], "bar"]
        expected = Series(
            [2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name="bar"
        )
        tm.assert_series_equal(result, expected)

        # dtype getting changed?
        df = DataFrame(index=Index(np.arange(1, 11)))
        df["foo"] = np.zeros(10, dtype=np.float64)
        df["bar"] = np.zeros(10, dtype=complex)

        msg = "Must have equal len keys and value when setting with an iterable"
        with pytest.raises(ValueError, match=msg):
            df[2:5] = np.arange(1, 4) * 1j

    @pytest.mark.parametrize(
        "obj",
        [
            lambda i: Series(np.arange(len(i)), index=i),
            lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i),
        ],
        ids=["Series", "DataFrame"],
    )
    @pytest.mark.parametrize(
        "idxr, idxr_id",
        [
            (lambda x: x, "getitem"),
            (lambda x: x.loc, "loc"),
            (lambda x: x.iloc, "iloc"),
        ],
    )
    def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = "|".join(
            [
                r"Buffer has wrong number of dimensions \(expected 1, got 3\)",
                "Cannot index with multidimensional key",
                r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]",
                "Index data must be 1-dimensional",
                "positional indexers are out-of-bounds",
                "Indexing a MultiIndex with a multidimensional key is not implemented",
            ]
        )

        potential_errors = (IndexError, ValueError, NotImplementedError)
        with pytest.raises(potential_errors, match=msg):
            with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
                idxr[nd3]

    @pytest.mark.parametrize(
        "obj",
        [
            lambda i: Series(np.arange(len(i)), index=i),
            lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i),
        ],
        ids=["Series", "DataFrame"],
    )
    @pytest.mark.parametrize(
        "idxr, idxr_id",
        [
            (lambda x: x, "setitem"),
            (lambda x: x.loc, "loc"),
            (lambda x: x.iloc, "iloc"),
        ],
    )
    def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        if (len(index) == 0) and (idxr_id == "iloc") and isinstance(obj, pd.DataFrame):
            # gh-32896
            pytest.skip("This is currently failing. There's an xfailed test below.")

        if idxr_id == "iloc":
            err = ValueError
            msg = f"Cannot set values with ndim > {obj.ndim}"
        elif (
            isinstance(index, pd.IntervalIndex)
            and idxr_id == "setitem"
            and obj.ndim == 1
        ):
            err = AttributeError
            msg = (
                "'pandas._libs.interval.IntervalTree' object has no attribute 'get_loc'"
            )
        else:
            err = ValueError
            msg = r"Buffer has wrong number of dimensions \(expected 1, got 3\)|"

        with pytest.raises(err, match=msg):
            idxr[nd3] = 0

    @pytest.mark.xfail(reason="gh-32896")
    def test_setitem_ndarray_3d_does_not_fail_for_iloc_empty_dataframe(self):
        # when fixing this, please remove the pytest.skip in test_setitem_ndarray_3d
        i = Index([])
        obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = f"Cannot set values with ndim > {obj.ndim}"
        with pytest.raises(ValueError, match=msg):
            obj.iloc[nd3] = 0

    def test_inf_upcast(self):
        # GH 16957
        # We should be able to use np.inf as a key
        # np.inf should cause an index to convert to float

        # Test with np.inf in rows
        df = DataFrame(columns=[0])
        df.loc[1] = 1
        df.loc[2] = 2
        df.loc[np.inf] = 3

        # make sure we can look up the value
        assert df.loc[np.inf, 0] == 3

        result = df.index
        expected = pd.Float64Index([1, 2, np.inf])
        tm.assert_index_equal(result, expected)

        # Test with np.inf in columns
        df = DataFrame()
        df.loc[0, 0] = 1
        df.loc[1, 1] = 2
        df.loc[0, np.inf] = 3

        result = df.columns
        expected = pd.Float64Index([0, 1, np.inf])
        tm.assert_index_equal(result, expected)

    def test_setitem_dtype_upcast(self):

        # GH3216
        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
        df["c"] = np.nan
        assert df["c"].dtype == np.float64

        df.loc[0, "c"] = "foo"
        expected = DataFrame(
            [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]
        )
        tm.assert_frame_equal(df, expected)

        # GH10280
        df = DataFrame(
            np.arange(6, dtype="int64").reshape(2, 3),
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )

        for val in [3.14, "wxyz"]:
            left = df.copy()
            left.loc["a", "bar"] = val
            right = DataFrame(
                [[0, val, 2], [3, 4, 5]],
                index=list("ab"),
                columns=["foo", "bar", "baz"],
            )

            tm.assert_frame_equal(left, right)
            assert is_integer_dtype(left["foo"])
            assert is_integer_dtype(left["baz"])

        left = DataFrame(
            np.arange(6, dtype="int64").reshape(2, 3) / 10.0,
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )
        left.loc["a", "bar"] = "wxyz"

        right = DataFrame(
            [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]],
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )

        tm.assert_frame_equal(left, right)
        assert is_float_dtype(left["foo"])
        assert is_float_dtype(left["baz"])

    def test_dups_fancy_indexing(self):

        # GH 3455

        df = tm.makeCustomDataframe(10, 3)
        df.columns = ["a", "a", "b"]
        result = df[["b", "a"]].columns
        expected = Index(["b", "a", "a"])
        tm.assert_index_equal(result, expected)

        # across dtypes
        df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa"))
        df.head()
        str(df)
        result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]])
        result.columns = list("aaaaaaa")

        # TODO(wesm): unused?
        df_v = df.iloc[:, 4]  # noqa
        res_v = result.iloc[:, 4]  # noqa

        tm.assert_frame_equal(df, result)

        # GH 3561, dups not in selected order
        df = DataFrame(
            {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")},
            index=["A", "A", "B", "C"],
        )
        rows = ["C", "B"]
        expected = DataFrame(
            {"test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"]}, index=rows
        )
        result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        result = df.loc[Index(rows)]
        tm.assert_frame_equal(result, expected)

        rows = ["C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # see GH5553, make sure we use the right indexer
        rows = ["F", "G", "H", "C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # List containing only missing label
        dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
        with pytest.raises(
            KeyError,
            match=re.escape(
                "\"None of [Index(['E'], dtype='object')] are in the [index]\""
            ),
        ):
            dfnu.loc[["E"]]

        # ToDo: check_index_type can be True after GH 11497

        # GH 4619; duplicate indexer with missing label
        df = DataFrame({"A": [0, 1, 2]})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        df = DataFrame({"A": list("abc")})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        # non unique with non unique selector
        df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[["A", "A", "E"]]

    def test_dups_fancy_indexing2(self):
        # GH 5835
        # dups on index and missing values
        df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])

        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[:, ["A", "B", "C"]]

        # GH 6504, multi-axis indexing
        df = DataFrame(
            np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"]
        )

        expected = df.iloc[0:6]
        result = df.loc[[1, 2]]
        tm.assert_frame_equal(result, expected)

        expected = df
        result = df.loc[:, ["a", "b"]]
        tm.assert_frame_equal(result, expected)

        expected = df.iloc[0:6, :]
        result = df.loc[[1, 2], ["a", "b"]]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("case", [lambda s: s, lambda s: s.loc])
    def test_duplicate_int_indexing(self, case):
        # GH 17347
        s = Series(range(3), index=[1, 1, 3])
        expected = s[1]
        result = case(s)[[1]]
        tm.assert_series_equal(result, expected)

    def test_indexing_mixed_frame_bug(self):

        # GH3492
        df = DataFrame(
            {"a": {1: "aaa", 2: "bbb", 3: "ccc"}, "b": {1: 111, 2: 222, 3: 333}}
        )

        # this works, new column is created correctly
        df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x)

        # this does not work, ie column test is not changed
        idx = df["test"] == "_"
        temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x)
        df.loc[idx, "test"] = temp
        assert df.iloc[0, 2] == "-----"

    def test_multitype_list_index_access(self):
        # GH 10610
        df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23])

        with pytest.raises(KeyError, match=re.escape("'[-8, 26] not in index'")):
            df[[22, 26, -8]]
        assert df[21].shape[0] == df.shape[0]

    def test_set_index_nan(self):

        # GH 3586
        df = DataFrame(
            {
                "PRuid": {
                    17: "nonQC",
                    18: "nonQC",
                    19: "nonQC",
                    20: "10",
                    21: "11",
                    22: "12",
                    23: "13",
                    24: "24",
                    25: "35",
                    26: "46",
                    27: "47",
                    28: "48",
                    29: "59",
                    30: "10",
                },
                "QC": {
                    17: 0.0,
                    18: 0.0,
                    19: 0.0,
                    20: np.nan,
                    21: np.nan,
                    22: np.nan,
                    23: np.nan,
                    24: 1.0,
                    25: np.nan,
                    26: np.nan,
                    27: np.nan,
                    28: np.nan,
                    29: np.nan,
                    30: np.nan,
                },
                "data": {
                    17: 7.9544899999999998,
                    18: 8.0142609999999994,
                    19: 7.8591520000000008,
                    20: 0.86140349999999999,
                    21: 0.87853110000000001,
                    22: 0.8427041999999999,
                    23: 0.78587700000000005,
                    24: 0.73062459999999996,
                    25: 0.81668560000000001,
                    26: 0.81927080000000008,
                    27: 0.80705009999999999,
                    28: 0.81440240000000008,
                    29: 0.80140849999999997,
                    30: 0.81307740000000006,
                },
                "year": {
                    17: 2006,
                    18: 2007,
                    19: 2008,
                    20: 1985,
                    21: 1985,
                    22: 1985,
                    23: 1985,
                    24: 1985,
                    25: 1985,
                    26: 1985,
                    27: 1985,
                    28: 1985,
                    29: 1985,
                    30: 1986,
                },
            }
        ).reset_index()

        result = (
            df.set_index(["year", "PRuid", "QC"])
            .reset_index()
            .reindex(columns=df.columns)
        )
        tm.assert_frame_equal(result, df)

    def test_multi_assign(self):

        # GH 3626, an assignment of a sub-df to a df
        df = DataFrame(
            {
                "FC": ["a", "b", "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": list(range(6)),
                "col2": list(range(6, 12)),
            }
        )
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ["col1", "col2"]

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame(
            {
                "FC": ["a", np.nan, "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": Series([0, 1, 4, 6, 8, 10]),
                "col2": [12, 7, 16, np.nan, 20, 22],
            }
        )

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame(
            {
                "FC": ["a", np.nan, "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0],
                "col2": [12, 7, 16, np.nan, 20, 22],
            }
        )
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(
            dict(
                A=[1, 2, 0, 0, 0],
                B=[0, 0, 0, 10, 11],
                C=[0, 0, 0, 10, 11],
                D=[3, 4, 5, 6, 7],
            )
        )

        expected = df.copy()
        mask = expected["A"] == 0
        for col in ["A", "B"]:
            expected.loc[mask, col] = df["D"]

        df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
        tm.assert_frame_equal(df, expected)

    def test_setitem_list(self):

        # GH 6043
        # iloc with a list
        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = [1, 2, 3]
        df.iloc[1, 0] = [1, 2]

        result = DataFrame(index=[0, 1], columns=[0])
        result.iloc[1, 0] = [1, 2]

        tm.assert_frame_equal(result, df)

        # iloc with an object
        class TO:
            def __init__(self, value):
                self.value = value

            def __str__(self) -> str:
                return f"[{self.value}]"

            __repr__ = __str__

            def __eq__(self, other) -> bool:
                return self.value == other.value

            def view(self):
                return self

        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = TO(1)
        df.iloc[1, 0] = TO(2)

        result = DataFrame(index=[0, 1], columns=[0])
        result.iloc[1, 0] = TO(2)

        tm.assert_frame_equal(result, df)

        # remains object dtype even after setting it back
        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = TO(1)
        df.iloc[1, 0] = np.nan
        result = DataFrame(index=[0, 1], columns=[0])

        tm.assert_frame_equal(result, df)

    def test_string_slice(self):
        # GH 14424
        # string indexing against datetimelike with object
        # dtype should properly raises KeyError
        df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object))
        assert df.index._is_all_dates
        with pytest.raises(KeyError, match="'2011'"):
            df["2011"]

        with pytest.raises(KeyError, match="'2011'"):
            with tm.assert_produces_warning(FutureWarning):
                # This does an is_all_dates check
                df.loc["2011", 0]

        df = DataFrame()
        assert not df.index._is_all_dates
        with pytest.raises(KeyError, match="'2011'"):
            df["2011"]

        with pytest.raises(KeyError, match="'2011'"):
            df.loc["2011", 0]

    def test_astype_assignment(self):

        # GH4312 (iloc)
        df_orig = DataFrame(
            [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
        expected = DataFrame(
            [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
        expected = DataFrame(
            [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        # GH5702 (loc)
        df = df_orig.copy()
        df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
        expected = DataFrame(
            [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
        expected = DataFrame(
            [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        # full replacements / no nans
        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
        df.iloc[:, 0] = df["A"].astype(np.int64)
        expected = DataFrame({"A": [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
        df.loc[:, "A"] = df["A"].astype(np.int64)
        expected = DataFrame({"A": [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

    def test_index_type_coercion(self):

        # GH 11836
        # if we have an index type and set it with something that looks
        # to numpy like the same, but is actually, not
        # (e.g. setting with a float or string '0')
        # then we need to coerce to object

        # integer indexes
        for s in [Series(range(5)), Series(range(5), index=range(1, 6))]:

            assert s.index.is_integer()

            for indexer in [lambda x: x.loc, lambda x: x]:
                s2 = s.copy()
                indexer(s2)[0.1] = 0
                assert s2.index.is_floating()
                assert indexer(s2)[0.1] == 0

                s2 = s.copy()
                indexer(s2)[0.0] = 0
                exp = s.index
                if 0 not in s:
                    exp = Index(s.index.tolist() + [0])
                tm.assert_index_equal(s2.index, exp)

                s2 = s.copy()
                indexer(s2)["0"] = 0
                assert s2.index.is_object()

        for s in [Series(range(5), index=np.arange(5.0))]:

            assert s.index.is_floating()

            for idxr in [lambda x: x.loc, lambda x: x]:

                s2 = s.copy()
                idxr(s2)[0.1] = 0
                assert s2.index.is_floating()
                assert idxr(s2)[0.1] == 0

                s2 = s.copy()
                idxr(s2)[0.0] = 0
                tm.assert_index_equal(s2.index, s.index)

                s2 = s.copy()
                idxr(s2)["0"] = 0
                assert s2.index.is_object()
Example #27
0
 def setup(self, dtype):
     N = 10**5
     data = np.array([1] * N + [2] * N + [3] * N).astype(dtype)
     self.s = Series(data)
Example #28
0
 def test_duplicate_int_indexing(self, case):
     # GH 17347
     s = Series(range(3), index=[1, 1, 3])
     expected = s[1]
     result = case(s)[[1]]
     tm.assert_series_equal(result, expected)
Example #29
0
 def setup(self, n):
     self.s = Series(np.random.randn(n))
Example #30
0
def test_repeat_with_null(any_string_dtype, arg, repeat):
    # GH: 31632
    ser = Series(["a", arg], dtype=any_string_dtype)
    result = ser.str.repeat([3, repeat])
    expected = Series(["aaa", np.nan], dtype=any_string_dtype)
    tm.assert_series_equal(result, expected)