def test_clip_to_seconds_series(): m = IndexMap() stamp = 1234567890 k = pd.date_range(pd.to_datetime(stamp, unit='s'), periods=10000, freq='ns').to_series().astype(int) assert len(m.clip_to_seconds(k).unique()) == 1 assert m.clip_to_seconds(k).unique()[0] == stamp
def test_update(mocker): m = IndexMap() keys = generate_keys(10000) def hash_mock(k, salt=0): seed = 123456 rs = np.random.RandomState(seed=seed + salt) return pd.Series(rs.randint(0, len(k) * 10, size=len(k)), index=k) with mocker.patch.object(m, 'hash_', side_effect=hash_mock): m.update(keys) assert len(m) == len(keys), "All keys not in mapping" assert m._map.index.difference(keys).empty, "All keys not in mapping" assert len(m._map.unique()) == len(keys), "Duplicate values in mapping" # Can't have duplicate keys. with pytest.raises(KeyError): m.update(keys) new_unique_keys = generate_keys(1000).difference(keys) m.update(new_unique_keys) assert len( m) == len(keys) + len(new_unique_keys), "All keys not in mapping" assert m._map.index.difference( keys.union(new_unique_keys)).empty, "All keys not in mapping" assert len(m._map.unique( )) == len(keys) + len(new_unique_keys), "Duplicate values in mapping"
def test_convert_to_ten_digit_int(): m = IndexMap() v = 1234567890 datetime_col = pd.date_range(pd.to_datetime(v, unit='s'), periods=10000, freq='ns').to_series() int_col = pd.Series(v, index=range(10000)) float_col = pd.Series(1.1234567890, index=range(10000)) bad_col = pd.Series('a', index=range(10000)) assert m.convert_to_ten_digit_int(datetime_col).unique()[0] == v assert m.convert_to_ten_digit_int(int_col).unique()[0] == 4072825790 assert m.convert_to_ten_digit_int(float_col).unique()[0] == v with pytest.raises(RandomnessError): m.convert_to_ten_digit_int(bad_col)
def test_spread_scalar(): m = IndexMap() assert m.spread(1234567890) == 4072825790
def test_clip_to_seconds_scalar(): m = IndexMap() k = pd.to_datetime("2010-01-25 06:25:31.123456789") assert m.clip_to_seconds(k.value) == int(str(k.value)[:10])
def test_digit_series(): m = IndexMap() k = pd.Series(123456789, index=range(10000)) for i in range(10): assert len(m.digit(k, i).unique()) == 1 assert m.digit(k, i)[0] == 10 - (i + 1)
def test_digit_scalar(): m = IndexMap() k = 123456789 for i in range(10): assert m.digit(k, i) == 10 - (i + 1)
def map_size_and_hashed_values(request): keys = generate_keys(*request.param) m = IndexMap() return m.map_size, m.hash_(keys)
def test_shift_series(): m = IndexMap() s = pd.Series(1.1234567890, index=range(10000)) assert len(m.shift(s).unique()) == 1 assert m.shift(s).unique()[0] == 1234567890
def test_shift_scalar(): m = IndexMap() assert m.shift(1.1234567890) == 1234567890
def test_spread_series(): m = IndexMap() s = pd.Series(1234567890, index=range(10000)) assert len(m.spread(s).unique()) == 1 assert m.spread(s).unique()[0] == 4072825790