Esempio n. 1
0
def test_rank_apply():
    lev1 = tm.rands_array(10, 100)
    lev2 = tm.rands_array(10, 130)
    lab1 = np.random.randint(0, 100, size=500)
    lab2 = np.random.randint(0, 130, size=500)

    df = DataFrame(
        {
            "value": np.random.randn(500),
            "key1": lev1.take(lab1),
            "key2": lev2.take(lab2),
        }
    )

    result = df.groupby(["key1", "key2"]).value.rank()

    expected = [piece.value.rank() for key, piece in df.groupby(["key1", "key2"])]
    expected = concat(expected, axis=0)
    expected = expected.reindex(result.index)
    tm.assert_series_equal(result, expected)

    result = df.groupby(["key1", "key2"]).value.rank(pct=True)

    expected = [
        piece.value.rank(pct=True) for key, piece in df.groupby(["key1", "key2"])
    ]
    expected = concat(expected, axis=0)
    expected = expected.reindex(result.index)
    tm.assert_series_equal(result, expected)
Esempio n. 2
0
def test_getitem_negative_out_of_bounds():
    s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))

    msg = "index -11 is out of bounds for axis 0 with size 10"
    with pytest.raises(IndexError, match=msg):
        s[-11]
    with pytest.raises(IndexError, match=msg):
        s[-11] = "foo"
Esempio n. 3
0
def test_long_strings(setup_path):

    # GH6166
    df = DataFrame({"a": tm.rands_array(100, size=10)},
                   index=tm.rands_array(100, size=10))

    with ensure_clean_store(setup_path) as store:
        store.append("df", df, data_columns=["a"])

        result = store.select("df")
        tm.assert_frame_equal(df, result)
Esempio n. 4
0
    def test_series_frame_radd_bug(self, fixed_now_ts):
        # GH#353
        vals = Series(tm.rands_array(5, 10))
        result = "foo_" + vals
        expected = vals.map(lambda x: "foo_" + x)
        tm.assert_series_equal(result, expected)

        frame = pd.DataFrame({"vals": vals})
        result = "foo_" + frame
        expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)})
        tm.assert_frame_equal(result, expected)

        ts = tm.makeTimeSeries()
        ts.name = "ts"

        # really raise this time
        fix_now = fixed_now_ts.to_pydatetime()
        msg = "|".join([
            "unsupported operand type",
            # wrong error message, see https://github.com/numpy/numpy/issues/18832
            "Concatenation operation",
        ])
        with pytest.raises(TypeError, match=msg):
            fix_now + ts

        with pytest.raises(TypeError, match=msg):
            ts + fix_now
Esempio n. 5
0
    def test_compress_group_combinations(self):

        # ~ 40000000 possible unique groups
        key1 = tm.rands_array(10, 10000)
        key1 = np.tile(key1, 2)
        key2 = key1[::-1]

        df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)})

        df2 = DataFrame(
            {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)}
        )

        # just to hit the label compression code path
        merge(df, df2, how="outer")
Esempio n. 6
0
    def test_series_frame_radd_bug(self):
        # GH#353
        vals = pd.Series(tm.rands_array(5, 10))
        result = "foo_" + vals
        expected = vals.map(lambda x: "foo_" + x)
        tm.assert_series_equal(result, expected)

        frame = pd.DataFrame({"vals": vals})
        result = "foo_" + frame
        expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)})
        tm.assert_frame_equal(result, expected)

        ts = tm.makeTimeSeries()
        ts.name = "ts"

        # really raise this time
        now = pd.Timestamp.now().to_pydatetime()
        with pytest.raises(TypeError):
            now + ts

        with pytest.raises(TypeError):
            ts + now
Esempio n. 7
0
def test_same_len_hash_collisions(l_exp, l_add):
    length = 2**(l_exp + 8) + l_add
    s = tm.rands_array(length, 2)

    result = hash_array(s, "utf8")
    assert not result[0] == result[1]
Esempio n. 8
0
 def test_very_wide_info_repr(self):
     df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20))
     repr(df)
Esempio n. 9
0
def test_rands_array_2d():
    arr = tm.rands_array(7, size=(10, 10))
    assert arr.shape == (10, 10)
    assert len(arr[1, 1]) == 7
Esempio n. 10
0
def test_rands_array_1d():
    arr = tm.rands_array(5, size=10)
    assert arr.shape == (10, )
    assert len(arr[0]) == 5