def test_sir_regularized_2d(): # Compare regularized SIR to traditional SIR when there is no penalty. # The two procedures should agree exactly. np.random.seed(93482) n = 1000 p = 10 xmat = np.random.normal(size=(n, p)) y1 = np.dot(xmat[:, 0:4], np.r_[1, 1, -1, -1]) y2 = np.dot(xmat[:, 4:8], np.r_[1, 1, -1, -1]) y = y1 + np.arctan(y2) + np.random.normal(size=n) model = SlicedInverseReg(y, xmat) rslt1 = model.fit() fmat = np.zeros((1, p)) for d in 1, 2, 3, 4: if d < 3: rslt2 = model.fit_regularized(d, fmat) else: with pytest.warns(UserWarning, match="SIR.fit_regularized did"): rslt2 = model.fit_regularized(d, fmat) pa1 = rslt1.params[:, 0:d] pa1, _, _ = np.linalg.svd(pa1, 0) pa2 = rslt2.params _, s, _ = np.linalg.svd(np.dot(pa1.T, pa2)) assert_allclose(np.sum(s), d, atol=1e-1, rtol=1e-1)
def test_sir_regularized_numdiff(): # Use numeric gradients to check the analytic gradient # for the regularized SIRobjective function. np.random.seed(93482) n = 1000 p = 10 xmat = np.random.normal(size=(n, p)) y1 = np.dot(xmat, np.linspace(-1, 1, p)) y2 = xmat.sum(1) y = y2 / (1 + y1**2) + np.random.normal(size=n) model = SlicedInverseReg(y, xmat) _ = model.fit() # Second difference penalty matrix. fmat = np.zeros((p - 2, p)) for i in range(p - 2): fmat[i, i:i + 3] = [1, -2, 1] _ = model.fit_regularized(2, 3 * fmat) # Compare the gradients to the numerical derivatives for _ in range(5): pa = np.random.normal(size=(p, 2)) pa, _, _ = np.linalg.svd(pa, 0) gn = approx_fprime(pa.ravel(), model._regularized_objective, 1e-7) gr = model._regularized_grad(pa.ravel()) assert_allclose(gn, gr, atol=1e-5, rtol=1e-4)
def test_sir_regularized_1d(): # Compare regularized SIR to traditional SIR, in a setting where the # regularization is compatible with the true parameters (i.e. there # is no regularization bias). np.random.seed(93482) n = 1000 p = 10 xmat = np.random.normal(size=(n, p)) y = np.dot(xmat[:, 0:4], np.r_[1, 1, -1, -1]) + np.random.normal(size=n) model = SlicedInverseReg(y, xmat) rslt = model.fit() # The penalty drives p[0] ~ p[1] and p[2] ~ p[3]] fmat = np.zeros((2, p)) fmat[0, 0:2] = [1, -1] fmat[1, 2:4] = [1, -1] rslt2 = model.fit_regularized(1, 3 * fmat) pa0 = np.zeros(p) pa0[0:4] = [1, 1, -1, -1] pa1 = rslt.params[:, 0] pa2 = rslt2.params[:, 0:2] # Compare two 1d subspaces def sim(x, y): x = x / np.sqrt(np.sum(x * x)) y = y / np.sqrt(np.sum(y * y)) return 1 - np.abs(np.dot(x, y)) # Regularized SIRshould be closer to the truth than traditional SIR assert_equal(sim(pa0, pa1) > sim(pa0, pa2), True) # Regularized SIR should be close to the truth assert_equal(sim(pa0, pa2) < 1e-3, True) # Regularized SIR should have a smaller penalty value than traditional SIR assert_equal( np.sum(np.dot(fmat, pa1)**2) > np.sum(np.dot(fmat, pa2)**2), True)