Example #1
0
def test_censor():
    x = list(range(10))
    xx = censor(x, (2, 8))
    assert np.isnan(xx[0])
    assert np.isnan(xx[1])
    assert np.isnan(xx[9])

    df = pd.DataFrame({'x': x, 'y': range(10)})
    df['x'] = censor(df['x'], (2, 8))
    assert np.isnan(df['x'][0])
    assert np.isnan(df['x'][1])
    assert np.isnan(df['x'][9])

    df['y'] = censor(df['y'], (-2, 18))
    assert issubclass(df['y'].dtype.type, np.integer)

    # datetime
    limits = datetime(2010, 1, 1), datetime(2020, 1, 1)
    x = [datetime(year, 1, 1) for year in range(2008, 2023)]
    result = censor(x, limits)
    assert result[2:-2] == x[2:-2]
    assert result[:2] == ['NaT', 'NaT']
    assert result[-2:] == ['NaT', 'NaT']

    # timedelta
    limits = timedelta(seconds=2010), timedelta(seconds=2020)
    x = [timedelta(seconds=i) for i in range(2008, 2023)]
    result = censor(x, limits)
    assert result[2:-2] == x[2:-2]
    assert result[:2] == ['NaT', 'NaT']
    assert result[-2:] == ['NaT', 'NaT']

    # pd.timestamp
    limits = pd.Timestamp(200 * 1e16), pd.Timestamp(205 * 1e16)
    x = [pd.Timestamp(i * 1e16) for i in range(198, 208)]
    result = censor(x, limits)
    assert result[2:-2] == x[2:-2]
    assert all(isinstance(val, NaT_type) for val in result[:2])
    assert all(isinstance(val, NaT_type) for val in result[-2:])

    x1 = np.array(x)
    result = censor(x1, limits)
    npt.assert_array_equal(result[2:-2], x1[2:-2])
    assert all(isinstance(val, NaT_type) for val in result[:2])
    assert all(isinstance(val, NaT_type) for val in result[-2:])

    x2 = pd.Series(x)
    result = censor(x2, limits)
    pdt.assert_series_equal(result[2:-2], x2[2:-2])
    assert all(isinstance(val, NaT_type) for val in result[:2])
    assert all(isinstance(val, NaT_type) for val in result[-2:])

    # np.datetime
    limits = np.datetime64(200, 'D'), np.datetime64(205, 'D')
    x = [np.datetime64(i, 'D') for i in range(198, 208)]
    x2 = np.array(x)
    result = censor(x2, limits)
    npt.assert_array_equal(result[2:-2], x2[2:-2])
    assert all(isinstance(val, np.datetime64) for val in result[:2])
    assert all(isinstance(val, np.datetime64) for val in result[-2:])

    # pd.Timedelta
    limits = pd.Timedelta(seconds=2010), pd.Timedelta(seconds=2020)
    x = [pd.Timedelta(seconds=i) for i in range(2008, 2023)]
    result = censor(x, limits)
    assert isinstance(result, list)
    assert result[2:-2] == x[2:-2]
    assert all(isinstance(val, NaT_type) for val in result[:2])
    assert all(isinstance(val, NaT_type) for val in result[-2:])

    x4 = np.array(x)
    result = censor(x4, limits)
    npt.assert_array_equal(result[2:-2], x4[2:-2])
    assert all(isinstance(val, NaT_type) for val in result[:2])
    assert all(isinstance(val, NaT_type) for val in result[-2:])

    # np.timedelta64
    limits = np.timedelta64(200, 'D'), np.timedelta64(205, 'D')
    x = [np.timedelta64(i, 'D') for i in range(198, 208)]
    x5 = np.array(x)
    result = censor(x5, limits)
    npt.assert_array_equal(result[2:-2], x5[2:-2])
    assert all(isinstance(val, np.timedelta64) for val in result[:2])
    assert all(isinstance(val, np.timedelta64) for val in result[-2:])

    # branches #
    x = np.array([1, 2, np.inf, 3, 4, 11])
    result = censor(x, (0, 10), only_finite=False)
    npt.assert_array_equal(result, np.array([1, 2, np.nan, 3, 4, np.nan]))

    result = censor([], (-2, 18))
    assert len(result) == 0

    with pytest.raises(ValueError):
        result = censor(['a', 'b', 'c'], ('a', 'z'))
Example #2
0
def test_censor():
    x = list(range(10))
    xx = censor(x, (2, 8))
    assert np.isnan(xx[0])
    assert np.isnan(xx[1])
    assert np.isnan(xx[9])

    df = pd.DataFrame({'x': x, 'y': range(10)})
    df['x'] = censor(df['x'], (2, 8))
    assert np.isnan(df['x'][0])
    assert np.isnan(df['x'][1])
    assert np.isnan(df['x'][9])

    df['y'] = censor(df['y'], (-2, 18))
    assert issubclass(df['y'].dtype.type, np.integer)

    # datetime
    limits = datetime(2010, 1, 1), datetime(2020, 1, 1)
    x = [datetime(year, 1, 1) for year in range(2008, 2023)]
    result = censor(x, limits)
    assert result[2:-2] == x[2:-2]
    assert result[:2] == ['NaT', 'NaT']
    assert result[-2:] == ['NaT', 'NaT']

    # timedelta
    limits = timedelta(seconds=2010), timedelta(seconds=2020)
    x = [timedelta(seconds=i) for i in range(2008, 2023)]
    result = censor(x, limits)
    assert result[2:-2] == x[2:-2]
    assert result[:2] == ['NaT', 'NaT']
    assert result[-2:] == ['NaT', 'NaT']

    # pd.timestamp
    limits = pd.Timestamp(200*1e16), pd.Timestamp(205*1e16)
    x = [pd.Timestamp(i*1e16) for i in range(198, 208)]
    result = censor(x, limits)
    assert result[2:-2] == x[2:-2]
    assert all(isinstance(val, NaT_type)
               for val in result[:2])
    assert all(isinstance(val, NaT_type)
               for val in result[-2:])

    x1 = np.array(x)
    result = censor(x1, limits)
    npt.assert_array_equal(result[2:-2], x1[2:-2])
    assert all(isinstance(val, NaT_type)
               for val in result[:2])
    assert all(isinstance(val, NaT_type)
               for val in result[-2:])

    x2 = pd.Series(x)
    result = censor(x2, limits)
    pdt.assert_series_equal(result[2:-2], x2[2:-2])
    assert all(isinstance(val, NaT_type)
               for val in result[:2])
    assert all(isinstance(val, NaT_type)
               for val in result[-2:])

    # np.datetime
    limits = np.datetime64(200, 'D'), np.datetime64(205, 'D')
    x = [np.datetime64(i, 'D') for i in range(198, 208)]
    x2 = np.array(x)
    result = censor(x2, limits)
    npt.assert_array_equal(result[2:-2], x2[2:-2])
    assert all(isinstance(val, np.datetime64)
               for val in result[:2])
    assert all(isinstance(val, np.datetime64)
               for val in result[-2:])

    # pd.Timedelta
    limits = pd.Timedelta(seconds=2010), pd.Timedelta(seconds=2020)
    x = [pd.Timedelta(seconds=i) for i in range(2008, 2023)]
    result = censor(x, limits)
    assert isinstance(result, list)
    assert result[2:-2] == x[2:-2]
    assert all(isinstance(val, NaT_type)
               for val in result[:2])
    assert all(isinstance(val, NaT_type)
               for val in result[-2:])

    x4 = np.array(x)
    result = censor(x4, limits)
    npt.assert_array_equal(result[2:-2], x4[2:-2])
    assert all(isinstance(val, NaT_type)
               for val in result[:2])
    assert all(isinstance(val, NaT_type)
               for val in result[-2:])

    # np.timedelta64
    limits = np.timedelta64(200, 'D'), np.timedelta64(205, 'D')
    x = [np.timedelta64(i, 'D') for i in range(198, 208)]
    x5 = np.array(x)
    result = censor(x5, limits)
    npt.assert_array_equal(result[2:-2], x5[2:-2])
    assert all(isinstance(val, np.timedelta64)
               for val in result[:2])
    assert all(isinstance(val, np.timedelta64)
               for val in result[-2:])

    # branches #
    x = np.array([1, 2, np.inf, 3, 4, 11])
    result = censor(x, (0, 10), only_finite=False)
    npt.assert_array_equal(
        result, np.array([1, 2, np.nan, 3, 4, np.nan]))

    result = censor([], (-2, 18))
    assert len(result) == 0

    with pytest.raises(ValueError):
        result = censor(['a', 'b', 'c'], ('a', 'z'))