def test_censor(): x = list(range(10)) xx = censor(x, (2, 8)) assert np.isnan(xx[0]) assert np.isnan(xx[1]) assert np.isnan(xx[9]) df = pd.DataFrame({'x': x, 'y': range(10)}) df['x'] = censor(df['x'], (2, 8)) assert np.isnan(df['x'][0]) assert np.isnan(df['x'][1]) assert np.isnan(df['x'][9]) df['y'] = censor(df['y'], (-2, 18)) assert issubclass(df['y'].dtype.type, np.integer) # datetime limits = datetime(2010, 1, 1), datetime(2020, 1, 1) x = [datetime(year, 1, 1) for year in range(2008, 2023)] result = censor(x, limits) assert result[2:-2] == x[2:-2] assert result[:2] == ['NaT', 'NaT'] assert result[-2:] == ['NaT', 'NaT'] # timedelta limits = timedelta(seconds=2010), timedelta(seconds=2020) x = [timedelta(seconds=i) for i in range(2008, 2023)] result = censor(x, limits) assert result[2:-2] == x[2:-2] assert result[:2] == ['NaT', 'NaT'] assert result[-2:] == ['NaT', 'NaT'] # pd.timestamp limits = pd.Timestamp(200 * 1e16), pd.Timestamp(205 * 1e16) x = [pd.Timestamp(i * 1e16) for i in range(198, 208)] result = censor(x, limits) assert result[2:-2] == x[2:-2] assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) x1 = np.array(x) result = censor(x1, limits) npt.assert_array_equal(result[2:-2], x1[2:-2]) assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) x2 = pd.Series(x) result = censor(x2, limits) pdt.assert_series_equal(result[2:-2], x2[2:-2]) assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) # np.datetime limits = np.datetime64(200, 'D'), np.datetime64(205, 'D') x = [np.datetime64(i, 'D') for i in range(198, 208)] x2 = np.array(x) result = censor(x2, limits) npt.assert_array_equal(result[2:-2], x2[2:-2]) assert all(isinstance(val, np.datetime64) for val in result[:2]) assert all(isinstance(val, np.datetime64) for val in result[-2:]) # pd.Timedelta limits = pd.Timedelta(seconds=2010), pd.Timedelta(seconds=2020) x = [pd.Timedelta(seconds=i) for i in range(2008, 2023)] result = censor(x, limits) assert isinstance(result, list) assert result[2:-2] == x[2:-2] assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) x4 = np.array(x) result = censor(x4, limits) npt.assert_array_equal(result[2:-2], x4[2:-2]) assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) # np.timedelta64 limits = np.timedelta64(200, 'D'), np.timedelta64(205, 'D') x = [np.timedelta64(i, 'D') for i in range(198, 208)] x5 = np.array(x) result = censor(x5, limits) npt.assert_array_equal(result[2:-2], x5[2:-2]) assert all(isinstance(val, np.timedelta64) for val in result[:2]) assert all(isinstance(val, np.timedelta64) for val in result[-2:]) # branches # x = np.array([1, 2, np.inf, 3, 4, 11]) result = censor(x, (0, 10), only_finite=False) npt.assert_array_equal(result, np.array([1, 2, np.nan, 3, 4, np.nan])) result = censor([], (-2, 18)) assert len(result) == 0 with pytest.raises(ValueError): result = censor(['a', 'b', 'c'], ('a', 'z'))
def test_censor(): x = list(range(10)) xx = censor(x, (2, 8)) assert np.isnan(xx[0]) assert np.isnan(xx[1]) assert np.isnan(xx[9]) df = pd.DataFrame({'x': x, 'y': range(10)}) df['x'] = censor(df['x'], (2, 8)) assert np.isnan(df['x'][0]) assert np.isnan(df['x'][1]) assert np.isnan(df['x'][9]) df['y'] = censor(df['y'], (-2, 18)) assert issubclass(df['y'].dtype.type, np.integer) # datetime limits = datetime(2010, 1, 1), datetime(2020, 1, 1) x = [datetime(year, 1, 1) for year in range(2008, 2023)] result = censor(x, limits) assert result[2:-2] == x[2:-2] assert result[:2] == ['NaT', 'NaT'] assert result[-2:] == ['NaT', 'NaT'] # timedelta limits = timedelta(seconds=2010), timedelta(seconds=2020) x = [timedelta(seconds=i) for i in range(2008, 2023)] result = censor(x, limits) assert result[2:-2] == x[2:-2] assert result[:2] == ['NaT', 'NaT'] assert result[-2:] == ['NaT', 'NaT'] # pd.timestamp limits = pd.Timestamp(200*1e16), pd.Timestamp(205*1e16) x = [pd.Timestamp(i*1e16) for i in range(198, 208)] result = censor(x, limits) assert result[2:-2] == x[2:-2] assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) x1 = np.array(x) result = censor(x1, limits) npt.assert_array_equal(result[2:-2], x1[2:-2]) assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) x2 = pd.Series(x) result = censor(x2, limits) pdt.assert_series_equal(result[2:-2], x2[2:-2]) assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) # np.datetime limits = np.datetime64(200, 'D'), np.datetime64(205, 'D') x = [np.datetime64(i, 'D') for i in range(198, 208)] x2 = np.array(x) result = censor(x2, limits) npt.assert_array_equal(result[2:-2], x2[2:-2]) assert all(isinstance(val, np.datetime64) for val in result[:2]) assert all(isinstance(val, np.datetime64) for val in result[-2:]) # pd.Timedelta limits = pd.Timedelta(seconds=2010), pd.Timedelta(seconds=2020) x = [pd.Timedelta(seconds=i) for i in range(2008, 2023)] result = censor(x, limits) assert isinstance(result, list) assert result[2:-2] == x[2:-2] assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) x4 = np.array(x) result = censor(x4, limits) npt.assert_array_equal(result[2:-2], x4[2:-2]) assert all(isinstance(val, NaT_type) for val in result[:2]) assert all(isinstance(val, NaT_type) for val in result[-2:]) # np.timedelta64 limits = np.timedelta64(200, 'D'), np.timedelta64(205, 'D') x = [np.timedelta64(i, 'D') for i in range(198, 208)] x5 = np.array(x) result = censor(x5, limits) npt.assert_array_equal(result[2:-2], x5[2:-2]) assert all(isinstance(val, np.timedelta64) for val in result[:2]) assert all(isinstance(val, np.timedelta64) for val in result[-2:]) # branches # x = np.array([1, 2, np.inf, 3, 4, 11]) result = censor(x, (0, 10), only_finite=False) npt.assert_array_equal( result, np.array([1, 2, np.nan, 3, 4, np.nan])) result = censor([], (-2, 18)) assert len(result) == 0 with pytest.raises(ValueError): result = censor(['a', 'b', 'c'], ('a', 'z'))