def check_classifier_ratio(clf, method, cv): # Passing distributions directly p0 = Normal(mu=0.0) p1 = Normal(mu=0.1) ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(numerator=p0, denominator=p1, n_samples=10000) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean(np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01 # Passing X, y only X = np.vstack((p0.rvs(5000), p1.rvs(5000))) y = np.zeros(10000, dtype=np.int) y[5000:] = 1 ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(X=X, y=y) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean(np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01
def test_mixin_composition(): # Check composed expressions as parameters a = theano.shared(0.0) b = theano.shared(-1.0) mu = a + b - 1.0 sigma = T.abs_(a * b) p = Normal(mu=mu, sigma=sigma) assert a in p.parameters_ assert b in p.parameters_ # Compose parameters with observed variables a = theano.shared(1.0) b = theano.shared(0.0) y = T.dmatrix(name="y") p = Normal(mu=a * y + b) assert len(p.parameters_) == 3 assert a in p.parameters_ assert b in p.parameters_ assert p.sigma in p.parameters_ assert p.mu not in p.parameters_ assert len(p.observeds_) == 1 assert y in p.observeds_ # Check signatures data_X = np.random.rand(10, 1) data_y = np.random.rand(10, 1) p.pdf(X=data_X, y=data_y) p.cdf(X=data_X, y=data_y) p.rvs(10, y=data_y) # Check error a = theano.shared(1.0) b = theano.shared(0.0) y = T.dmatrix() # y must be named assert_raises(ValueError, Normal, mu=a * y + b)
def check_classifier_ratio(clf, method, cv): # Passing distributions directly p0 = Normal(mu=0.0) p1 = Normal(mu=0.1) ratio = ClassifierRatio( CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(numerator=p0, denominator=p1, n_samples=10000) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean( np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01 # Passing X, y only X = np.vstack((p0.rvs(5000), p1.rvs(5000))) y = np.zeros(10000, dtype=np.int) y[5000:] = 1 ratio = ClassifierRatio( CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(X=X, y=y) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean( np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01
def check_normal(mu, sigma): rng = check_random_state(1) p_carl = Normal(mu=mu, sigma=sigma) p_scipy = st.norm(loc=mu, scale=sigma) X = rng.rand(50, 1) assert_array_almost_equal(p_carl.pdf(X), p_scipy.pdf(X.ravel())) assert_array_almost_equal(p_carl.cdf(X), p_scipy.cdf(X.ravel())) assert_array_almost_equal(-np.log(p_carl.pdf(X)), p_carl.nll(X))
def test_calibrated_classifier_ratio_identity(): p = Normal(mu=0.0) ratio = CalibratedClassifierRatio(base_estimator=ElasticNetCV()) ratio.fit(numerator=p, denominator=p, n_samples=10000) reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1) assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0 assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals))) assert_array_almost_equal(ratio.predict(reals, log=True), np.zeros(len(reals)))
def test_classifier_ratio_identity(): p = Normal(mu=0.0) ratio = ClassifierRatio( CalibratedClassifierCV(base_estimator=ElasticNetCV())) ratio.fit(numerator=p, denominator=p, n_samples=10000) reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1) assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0 assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals))) assert_array_almost_equal(ratio.predict(reals, log=True), np.zeros(len(reals)))
def test_linear_transform_1d(): p0 = Normal() pt = LinearTransform(p0, A=np.array([[0.5]])) X0 = p0.rvs(10, random_state=0) Xt = pt.rvs(10, random_state=0) assert X0.shape == Xt.shape assert_array_equal(X0 * 0.5, Xt) assert_array_equal(p0.pdf(X0), pt.pdf(Xt)) assert_array_equal(p0.nll(X0), pt.nll(Xt))
def test_parameterized_regressor(): mu = theano.shared(0) p = Normal(mu=mu) X = p.rvs(100) y = p.pdf(X).astype(np.float32) tf = ParameterStacker(params=[mu]) clf = ParameterizedRegressor(DecisionTreeRegressor(), params=[mu]) clf.fit(tf.transform(X), y) assert clf.n_features_ == 1 assert_array_almost_equal(y, clf.predict(tf.transform(X)), decimal=3)
def test_kde(): # Test API p = Normal(random_state=1) X = p.rvs(10000) k = KernelDensity() k.fit(X) reals = np.linspace(-3, 3).reshape(-1, 1) assert np.mean(np.abs(p.pdf(reals) - k.pdf(reals))) < 0.05 assert np.mean(np.abs(p.nnlf(reals) - k.nnlf(reals))) < 0.05 # Test sampling X = k.rvs(10000) assert np.abs(np.mean(X)) < 0.05