def test_boosting_epochs(): """Test boosting with epoched data""" ds = datasets.get_uts(True, vector3d=True) p1 = epoch_impulse_predictor('uts', 'A=="a1"', name='a1', ds=ds) p0 = epoch_impulse_predictor('uts', 'A=="a0"', name='a0', ds=ds) p1 = p1.smooth('time', .05, 'hamming') p0 = p0.smooth('time', .05, 'hamming') # 1d for tstart, basis in product((-0.1, 0.1, 0), (0, 0.05)): print(f"tstart={tstart}, basis={basis}") res = boosting('uts', [p0, p1], tstart, 0.6, model='A', ds=ds, basis=basis, partitions=10, debug=True) y = convolve(res.h_scaled, [p0, p1]) assert correlation_coefficient(y, res.y_pred) > .999 r = correlation_coefficient(y, ds['uts']) assert res.r == approx(r, abs=1e-3) assert res.partitions == 10 # 2d res = boosting('utsnd', [p0, p1], 0, 0.6, model='A', ds=ds, partitions=10) assert len(res.h) == 2 assert res.h[0].shape == (5, 60) assert res.h[1].shape == (5, 60) y = convolve(res.h_scaled, [p0, p1]) r = correlation_coefficient(y, ds['utsnd'], ('case', 'time')) assert_dataobj_equal(res.r, r, decimal=3, name=False) # vector res = boosting('v3d', [p0, p1], 0, 0.6, error='l1', model='A', ds=ds, partitions=10) assert res.residual.ndim == 0
def test_boosting_epochs(): """Test boosting with epoched data""" ds = datasets.get_uts(True, vector3d=True) p1 = epoch_impulse_predictor('uts', 'A=="a1"', name='a1', ds=ds) p0 = epoch_impulse_predictor('uts', 'A=="a0"', name='a0', ds=ds) p1 = p1.smooth('time', .05, 'hamming') p0 = p0.smooth('time', .05, 'hamming') # 1d for tstart, basis in product((-0.1, 0.1, 0), (0, 0.05)): print(f"tstart={tstart}, basis={basis}") res = boosting('uts', [p0, p1], tstart, 0.6, model='A', ds=ds, basis=basis, partitions=10, debug=True) y = convolve(res.h_scaled, [p0, p1]) assert correlation_coefficient(y, res.y_pred) > .999 r = correlation_coefficient(y, ds['uts']) assert res.r == approx(r, abs=1e-3) assert res.partitions == 10 # prefit res1 = boosting('uts', p1, 0, 0.6, model='A', ds=ds, partitions=10) res0 = boosting('uts', p0, 0, 0.6, model='A', ds=ds, partitions=10) res01 = boosting('uts', [p0, p1], 0, 0.6, model='A', ds=ds, partitions=10, prefit=res1) # 2d res = boosting('utsnd', [p0, p1], 0, 0.6, model='A', ds=ds, partitions=10) assert len(res.h) == 2 assert res.h[0].shape == (5, 60) assert res.h[1].shape == (5, 60) y = convolve(res.h_scaled, [p0, p1]) r = correlation_coefficient(y, ds['utsnd'], ('case', 'time')) assert_dataobj_equal(res.r, r, decimal=3, name=False) # vector res = boosting('v3d', [p0, p1], 0, 0.6, error='l1', model='A', ds=ds, partitions=10) assert res.residual.ndim == 0
def test_correlation_coefficient(): ds = datasets.get_uts() uts = ds['uts'] uts2 = uts.copy() uts2.x += np.random.normal(0, 1, uts2.shape) assert correlation_coefficient(uts, uts2) == pytest.approx( np.corrcoef(uts.x.ravel(), uts2.x.ravel())[0, 1]) assert_allclose( correlation_coefficient(uts[:10], uts2[:10], 'time').x, [np.corrcoef(uts.x[i], uts2.x[i])[0, 1] for i in range(10)]) assert_allclose( correlation_coefficient(uts[:, :-.1], uts2[:, :-.1], 'case').x, [np.corrcoef(uts.x[:, i], uts2.x[:, i])[0, 1] for i in range(10)])
def test_correlation_coefficient(): ds = datasets.get_uts() uts = ds['uts'] uts2 = uts.copy() uts2.x += np.random.normal(0, 1, uts2.shape) assert_almost_equal( correlation_coefficient(uts, uts2), np.corrcoef(uts.x.ravel(), uts2.x.ravel())[0, 1]) assert_allclose( correlation_coefficient(uts[:10], uts2[:10], 'time').x, [np.corrcoef(uts.x[i], uts2.x[i])[0, 1] for i in range(10)]) assert_allclose( correlation_coefficient(uts[:, :-.1], uts2[:, :-.1], 'case').x, [np.corrcoef(uts.x[:, i], uts2.x[:, i])[0, 1] for i in range(10)])
def test_result(): "Test boosting results" ds = datasets._get_continuous() x1 = ds['x1'] # convolve function y = convolve([ds['h1'], ds['h2']], [ds['x1'], ds['x2']]) assert_dataobj_equal(y, ds['y'], name=False) # test prediction with res.h and res.h_scaled res = boosting(ds['y'], ds['x1'], 0, 1) y1 = convolve(res.h_scaled, ds['x1']) x_scaled = ds['x1'] / res.x_scale y2 = convolve(res.h, x_scaled) y2 *= res.y_scale y2 += y1.mean() - y2.mean() # mean can't be reconstructed assert_dataobj_equal(y1, y2, decimal=12) # reconstruction res = boosting(x1, y, -1, 0, debug=True) x1r = convolve(res.h_scaled, y) assert correlation_coefficient(res.y_pred, x1r) > .999 assert correlation_coefficient(x1r[0.9:], x1[0.9:]) == approx(res.r, abs=1e-3) # test NaN checks (modifies data) ds['x2'].x[1, 50] = np.nan with pytest.raises(ValueError): boosting(ds['y'], ds['x2'], 0, .5) with pytest.raises(ValueError): boosting(ds['y'], ds['x2'], 0, .5, False) ds['x2'].x[1, :] = 1 with catch_warnings(): filterwarnings('ignore', category=RuntimeWarning) with pytest.raises(ValueError): boosting(ds['y'], ds['x2'], 0, .5) ds['y'].x[50] = np.nan with pytest.raises(ValueError): boosting(ds['y'], ds['x1'], 0, .5) with pytest.raises(ValueError): boosting(ds['y'], ds['x1'], 0, .5, False)
def test_boosting(n_workers): "Test boosting NDVars" ds = datasets._get_continuous(ynd=True) configure(n_workers=n_workers) y = ds['y'] ynd = ds['ynd'] x1 = ds['x1'] x2 = ds['x2'] y_mean = y.mean() x2_mean = x2.mean('time') # test values from running function, not verified independently res = boosting(y, x1 * 2000, 0, 1, scale_data=False, mindelta=0.0025) assert repr( res) == '<boosting y ~ x1, 0 - 1, scale_data=False, mindelta=0.0025>' assert res.r == approx(0.75, abs=0.001) assert res.y_mean is None assert res.h.info['unit'] == 'V' assert res.h_scaled.info['unit'] == 'V' with pytest.raises(NotImplementedError): res.proportion_explained res = boosting(y, x1, 0, 1) assert repr(res) == '<boosting y ~ x1, 0 - 1>' assert res.r == approx(0.83, abs=0.001) assert res.y_mean == y_mean assert res.y_scale == y.std() assert res.x_mean == x1.mean() assert res.x_scale == x1.std() assert res.h.name == 'x1' assert res.h.info['unit'] == 'normalized' assert res.h_scaled.name == 'x1' assert res.h_scaled.info['unit'] == 'V' assert res.proportion_explained == approx(0.506, abs=0.001) # inplace res_ip = boosting(y.copy(), x1.copy(), 0, 1, 'inplace') assert_res_equal(res_ip, res) # persistence res_p = pickle.loads(pickle.dumps(res, pickle.HIGHEST_PROTOCOL)) assert_res_equal(res_p, res) res = boosting(y, x2, 0, 1) assert res.r == approx(0.601, abs=0.001) assert res.proportion_explained == approx(0.273, abs=0.001) res = boosting(y, x2, 0, 1, error='l1') assert res.r == approx(0.553, abs=0.001) assert res.y_mean == y.mean() assert res.y_scale == (y - y_mean).abs().mean() assert_array_equal(res.x_mean.x, x2_mean) assert_array_equal(res.x_scale, (x2 - x2_mean).abs().mean('time')) assert res.proportion_explained == approx(0.123, abs=0.001) # 2 predictors res = boosting(y, [x1, x2], 0, 1) assert res.r == approx(0.947, abs=0.001) # selective stopping res = boosting(y, [x1, x2], 0, 1, selective_stopping=1) assert res.r == approx(0.967, abs=0.001) res = boosting(y, [x1, x2], 0, 1, selective_stopping=2) assert res.r == approx(0.992, abs=0.001) # prefit res_full = boosting(y, [x1, x2], 0, 1) prefit = boosting(y, x1, 0, 1) res = boosting(y, [x1, x2], 0, 1, prefit=prefit) assert correlation_coefficient(res.h, res_full.h[1]) == approx(0.984, 1e-3) prefit = boosting(y, x2, 0, 1) res = boosting(y, [x1, x2], 0, 1, prefit=prefit) assert correlation_coefficient(res.h, res_full.h[0]) == approx(0.995, 1e-3) # ynd res_full = boosting(ynd, [x1, x2], 0, 1) prefit = boosting(ynd, x1, 0, 1) res = boosting(ynd, [x1, x2], 0, 1, prefit=prefit) assert correlation_coefficient(res.h, res_full.h[1]) == approx(0.978, 1e-3) prefit = boosting(ynd, x2, 0, 1) res = boosting(ynd, [x1, x2], 0, 1, prefit=prefit) assert correlation_coefficient(res.h, res_full.h[0]) == approx(0.997, 1e-3)