def test_same_zero(self): """ checks that two models with the same parameters have zero KL divergence """ rng = np.random.RandomState([1,2,3]) dim = self.dim num_trials = 3 for trial in xrange(num_trials): mu = rng.randn(dim).astype(floatX) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.p.mu.set_value(mu) self.q.mu.set_value(mu) self.p.beta.set_value(beta) self.q.beta.set_value(beta) kl = kl_divergence(self.q,self.p) kl = function([],kl)() tol = 1e-7 if kl > tol: raise AssertionError("KL divergence between two " "equivalent models should be 0 but is "+ str(kl)) #second check because the above evaluates to False #if kl is None, etc. assert kl <= tol
def test_nonnegative_samples(self): """ checks that the kl divergence is non-negative at sampled parameter values for q and p""" rng = np.random.RandomState([1,2,3]) dim = self.dim num_trials = 3 for trial in xrange(num_trials): mu = rng.randn(dim).astype(floatX) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.p.mu.set_value(mu) mu = rng.randn(dim).astype(floatX) self.q.mu.set_value(mu) self.p.beta.set_value(beta) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.q.beta.set_value(beta) kl = kl_divergence(self.q,self.p) kl = function([],kl)() if kl < 0.: raise AssertionError("KL divergence should " "be non-negative but is "+ str(kl))
def test_zero_optimal(self): """ minimizes the kl divergence between q and p using batch gradient descent and checks that the result is zero""" rng = np.random.RandomState([1,2,3]) dim = self.dim num_trials = 3 mu = rng.randn(dim).astype(floatX) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.p.mu.set_value(mu) mu = rng.randn(dim).astype(floatX) self.q.mu.set_value(mu) self.p.beta.set_value(beta) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.q.beta.set_value(beta) kl = kl_divergence(self.q,self.p) p = self.p q = self.q optimizer = BatchGradientDescent( max_iter = 100, line_search_mode = 'exhaustive', verbose = True, objective = kl, conjugate = True, params = [ p.mu, p.beta, q.mu, q.beta ], param_constrainers = [ p.censor_updates, q.censor_updates ]) #optimizer.verbose = True kl = optimizer.minimize() if kl < 0.: if config.floatX == 'float32': neg_tol = 4.8e-7 else: neg_tol = 0. if kl < - neg_tol: raise AssertionError("KL divergence should " "be non-negative but is "+ str(kl)) warnings.warn("KL divergence is not very numerically stable, evidently") tol = 5.4e-5 if kl > tol: print 'kl:',kl print 'tol:',tol assert kl <= tol assert not (kl > tol )
def test_zero_optimal(self): """ minimizes the kl divergence between q and p using batch gradient descent and checks that the result is zero""" rng = np.random.RandomState([1,2,3]) dim = self.dim num_trials = 3 mu = rng.randn(dim).astype(floatX) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.p.mu.set_value(mu) mu = rng.randn(dim).astype(floatX) self.q.mu.set_value(mu) self.p.beta.set_value(beta) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.q.beta.set_value(beta) kl = kl_divergence(self.q,self.p) p = self.p q = self.q optimizer = BatchGradientDescent( max_iter = 100, line_search_mode = 'exhaustive', verbose = True, objective = kl, conjugate = True, params = [ p.mu, p.beta, q.mu, q.beta ], param_constrainers = [ p.censor_updates, q.censor_updates ]) #optimizer.verbose = True kl = optimizer.minimize() if kl < 0.: if config.floatX == 'float32': neg_tol = 4.8e-7 else: neg_tol = 0. if kl < - neg_tol: raise AssertionError("KL divergence should " "be non-negative but is "+ str(kl)) warnings.warn("KL divergence is not very numerically stable, evidently") tol = 6e-5 if kl > tol: print 'kl:',kl print 'tol:',tol assert kl <= tol assert not (kl > tol )
def test_zero_optimal(self): """ minimizes the kl divergence between q and p using batch gradient descent and checks that the result is zero""" rng = np.random.RandomState([1,2,3]) dim = self.dim num_trials = 3 mu = rng.randn(dim).astype(floatX) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.p.mu.set_value(mu) mu = rng.randn(dim).astype(floatX) self.q.mu.set_value(mu) self.p.beta.set_value(beta) beta = rng.uniform(.1,10.,(dim,)).astype(floatX) self.q.beta.set_value(beta) kl = kl_divergence(self.q,self.p) p = self.p q = self.q optimizer = BatchGradientDescent( objective = kl, params = [ p.mu, p.beta, q.mu, q.beta ], param_constrainers = [ p.censor_updates, q.censor_updates ]) #optimizer.verbose = True kl = optimizer.minimize() if kl < 0.: raise AssertionError("KL divergence should " "be non-negative but is "+ str(kl)) tol = 5.4e-5 assert kl <= tol assert not (kl > tol )
def test_zero_optimal(self): """ minimizes the kl divergence between q and p using batch gradient descent and checks that the result is zero""" rng = np.random.RandomState([1, 2, 3]) dim = self.dim num_trials = 3 mu = rng.randn(dim).astype(floatX) beta = rng.uniform(.1, 10., (dim, )).astype(floatX) self.p.mu.set_value(mu) mu = rng.randn(dim).astype(floatX) self.q.mu.set_value(mu) self.p.beta.set_value(beta) beta = rng.uniform(.1, 10., (dim, )).astype(floatX) self.q.beta.set_value(beta) kl = kl_divergence(self.q, self.p) p = self.p q = self.q optimizer = BatchGradientDescent( objective=kl, params=[p.mu, p.beta, q.mu, q.beta], param_constrainers=[p.censor_updates, q.censor_updates]) #optimizer.verbose = True kl = optimizer.minimize() if kl < 0.: raise AssertionError("KL divergence should " "be non-negative but is " + str(kl)) tol = 5.4e-5 assert kl <= tol assert not (kl > tol)
max_beta=10.) X = sharedX(function([], data_distribution.random_design_matrix(m))()) Xv = X.get_value() mu = Xv.mean(axis=0) print 'maximum likelihood mu: ', mu diff = Xv - mu var = np.square(diff).mean(axis=0) mlbeta = 1. / var print 'maximum likelihood beta: ', mlbeta ml_model = DiagonalMND(nvis=dim, init_mu=mu, init_beta=mlbeta, min_beta=0.0, max_beta=1e6) ml_kl = kl_divergence(true, ml_model) ml_kl = function([], ml_kl)() assert ml_kl >= 0.0 ml_kls[trial] = ml_kl print 'maximum likelihood kl divergence:', ml_kl best_mse = None #Try each noise beta for idx1 in xrange(num_beta): beta = betas[idx1] print 'Running experiment for ', beta #Allocate a fresh model model = DiagonalMND(nvis=dim,
mu = np.zeros((dim,)), seed = 17 * (trial+1) ) true = DiagonalMND( nvis = dim, init_beta = true_beta, init_mu = 0., min_beta = .1, max_beta = 10.) X = sharedX(function([],data_distribution.random_design_matrix(m))()) Xv = X.get_value() mu = Xv.mean(axis=0) print 'maximum likelihood mu: ',mu diff = Xv - mu var = np.square(diff).mean(axis=0) mlbeta = 1./var print 'maximum likelihood beta: ',mlbeta ml_model = DiagonalMND( nvis = dim, init_mu = mu, init_beta = mlbeta, min_beta = 0.0, max_beta = 1e6) ml_kl = kl_divergence( true, ml_model) ml_kl = function([],ml_kl)() assert ml_kl >= 0.0 ml_kls[trial] = ml_kl print 'maximum likelihood kl divergence:',ml_kl best_mse = None #Try each noise beta for idx1 in xrange(num_beta): beta = betas[idx1] print 'Running experiment for ',beta #Allocate a fresh model model = DiagonalMND(