def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.max_force = 20.0 self.tau = 0.02 # seconds between state updates # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * np.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max]) self.action_space = spaces.Box(low=-self.max_force, high=self.max_force, shape=(1,)) self.observation_space = spaces.Box(-high, high) self._seed() self.viewer = None self.state = None self.steps_beyond_done = None
def test_covariance_symmetry(): value1 = np.random.normal(5, 10) dvalue1 = np.abs(np.random.normal(0, 1)) test_obs1 = pe.pseudo_Obs(value1, dvalue1, 't') test_obs1.gamma_method() value2 = np.random.normal(5, 10) dvalue2 = np.abs(np.random.normal(0, 1)) test_obs2 = pe.pseudo_Obs(value2, dvalue2, 't') test_obs2.gamma_method() cov_ab = pe.covariance(test_obs1, test_obs2) cov_ba = pe.covariance(test_obs2, test_obs1) assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * ( 1 + 10 * np.finfo(np.float64).eps) N = 100 arr = np.random.normal(1, .2, size=N) configs = np.ones_like(arr) for i in np.random.uniform(0, len(arr), size=int(.8 * N)): configs[int(i)] = 0 zero_arr = [arr[i] for i in range(len(arr)) if not configs[i] == 0] idx = [i + 1 for i in range(len(configs)) if configs[i] == 1] a = pe.Obs([zero_arr], ['t'], idl=[idx]) a.gamma_method() assert np.isclose(a.dvalue**2, pe.covariance(a, a), atol=100, rtol=1e-4) cov_ab = pe.covariance(test_obs1, a) cov_ba = pe.covariance(a, test_obs1) assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * a.dvalue * ( 1 + 10 * np.finfo(np.float64).eps)
def log_beta_function(x): """ Log beta function ln(\gamma(x)) - ln(\gamma(\sum_{i=1}^{N}(x_{i})) """ return agnp.sum(agscipy.gammaln(x + agnp.finfo(agnp.float32).eps)) \ - agscipy.gammaln(agnp.sum(x + agnp.finfo(agnp.float32).eps))
def test_derived_observables(n): # Construct pseudo Obs with random shape test_obs = pe.pseudo_Obs(2, 0.1 * (1 + np.random.rand()), 't', int(1000 * (1 + np.random.rand()))) # Check if autograd and numgrad give the same result d_Obs_ad = pe.derived_observable( lambda x, **kwargs: x[0] * x[1] * np.sin(x[0] * x[1]), [test_obs, test_obs]) d_Obs_ad.gamma_method() d_Obs_fd = pe.derived_observable( lambda x, **kwargs: x[0] * x[1] * np.sin(x[0] * x[1]), [test_obs, test_obs], num_grad=True) d_Obs_fd.gamma_method() assert d_Obs_ad.value == d_Obs_fd.value assert np.abs(4.0 * np.sin(4.0) - d_Obs_ad.value) < 1000 * np.finfo( np.float).eps * np.abs(d_Obs_ad.value) assert np.abs(d_Obs_ad.dvalue - d_Obs_fd.dvalue) < 1000 * np.finfo( np.float).eps * d_Obs_ad.dvalue i_am_one = pe.derived_observable(lambda x, **kwargs: x[0] / x[1], [d_Obs_ad, d_Obs_ad]) i_am_one.gamma_method(e_tag=1) assert i_am_one.value == 1.0 assert i_am_one.dvalue < 2 * np.finfo(np.float).eps assert i_am_one.e_dvalue['t'] <= 2 * np.finfo(np.float).eps assert i_am_one.e_ddvalue['t'] <= 2 * np.finfo(np.float).eps
def softmax(x): """ Softmax computation e^{x} / sum_{i=1}^{K}(e^x_{i}) """ e_x = agnp.exp(x - agnp.max(x)) return (e_x + agnp.finfo(agnp.float32).eps) / \ (e_x.sum(axis=0) + agnp.finfo(agnp.float32).eps)
def _do_backward_pass(self, framelogprob): # Based on hmmlearn's _BaseHMM safe_startmat = self.startprob_ + np.finfo(float).eps safe_transmat = self.transmat_ + np.finfo(float).eps n_samples, n_components = framelogprob.shape bwdlattice = np.zeros((n_samples, n_components)) _hmmc._backward(n_samples, n_components, np.log(safe_startmat), np.log(safe_transmat), framelogprob, bwdlattice) return bwdlattice
def _do_viterbi_pass(self, framelogprob): # Based on hmmlearn's _BaseHMM safe_startmat = self.startprob_ + np.finfo(float).eps safe_transmat = self.transmat_ + np.finfo(float).eps n_samples, n_components = framelogprob.shape state_sequence, logprob = _hmmc._viterbi( n_samples, n_components, np.log(safe_startmat), np.log(safe_transmat), framelogprob) return logprob, state_sequence
def dirichlet_expectation(alpha): """ Dirichlet expectation computation \Psi(\alpha) - \Psi(\sum_{i=1}^{K}(\alpha_{i})) """ if len(alpha.shape) == 1: return agscipy.psi(alpha + agnp.finfo(agnp.float32).eps) \ - agscipy.psi(agnp.sum(alpha)) return agscipy.psi(alpha + agnp.finfo(agnp.float32).eps)\ - agscipy.psi(agnp.sum(alpha, 1))[:, agnp.newaxis]
def test_covariance_is_variance(): value = np.random.normal(5, 10) dvalue = np.abs(np.random.normal(0, 1)) test_obs = pe.pseudo_Obs(value, dvalue, 't') test_obs.gamma_method() assert np.abs(test_obs.dvalue**2 - pe.covariance(test_obs, test_obs) ) <= 10 * np.finfo(np.float64).eps test_obs = test_obs + pe.pseudo_Obs(value, dvalue, 'q', 200) test_obs.gamma_method() assert np.abs(test_obs.dvalue**2 - pe.covariance(test_obs, test_obs) ) <= 10 * np.finfo(np.float64).eps
def test_fft(): value = np.random.normal(5, 100) dvalue = np.abs(np.random.normal(0, 5)) test_obs1 = pe.pseudo_Obs(value, dvalue, 't', int(500 + 1000 * np.random.rand())) test_obs2 = copy.deepcopy(test_obs1) test_obs1.gamma_method() test_obs2.gamma_method(fft=False) assert max(np.abs(test_obs1.e_rho['t'] - test_obs2.e_rho['t'])) <= 10 * np.finfo(np.float64).eps assert np.abs(test_obs1.dvalue - test_obs2.dvalue) <= 10 * max( test_obs1.dvalue, test_obs2.dvalue) * np.finfo(np.float64).eps
def test_covariance_symmetry(): value1 = np.random.normal(5, 10) dvalue1 = np.abs(np.random.normal(0, 1)) test_obs1 = pe.pseudo_Obs(value1, dvalue1, 't') test_obs1.gamma_method() value2 = np.random.normal(5, 10) dvalue2 = np.abs(np.random.normal(0, 1)) test_obs2 = pe.pseudo_Obs(value2, dvalue2, 't') test_obs2.gamma_method() cov_ab = pe.covariance(test_obs1, test_obs2) cov_ba = pe.covariance(test_obs2, test_obs1) assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * ( 1 + 10 * np.finfo(np.float64).eps)
def resample_star(self): # jointly resample fluxes and location def loglike(th): u, color = self.constrain_loc(th[:2]), th[2:] #unpack params fluxes = np.exp(star_flux_mog.to_fluxes(color)) ll = self.log_likelihood(u=u, fluxes=fluxes) ll_color = star_flux_mog.logpdf(color) return ll+ll_color gloglike = grad(loglike) # pack params (make sure we convert to color first lfluxes = np.log(self.params.fluxes) th = np.concatenate([self.unconstrain_loc(self.params.u), star_flux_mog.to_colors(lfluxes)]) print "initial conditional likelihood: %2.4f"%loglike(th) from scipy.optimize import minimize res = minimize(fun = lambda th: -1.*loglike(th), jac = lambda th: -1.*gloglike(th), x0=th, method='L-BFGS-B', options={'ftol' : 1e3 * np.finfo(float).eps}) print res print "final conditional likelihood: %2.4f"%loglike(res.x) print gloglike(res.x) self.params.u = self.constrain_loc(res.x[:2]) self.params.fluxes = np.exp(star_flux_mog.to_fluxes(res.x[2:]))
def _my_logistic_regression_1d(df, y): df = column_or_1d(df) y = column_or_1d(y) F = df tiny = np.finfo(np.float).tiny #prior0 = float(np.sum(y <= 0)) prior0 = float(np.sum(y <= 0.5)) prior1 = y.shape[0] - prior0 T = y T1 = 1. - T def objective(AB): E = np.exp(AB[0] * F + AB[1]) P = 1. / (1. + E) l = -(T * np.log(P + tiny) + T1 * np.log(1. - P + tiny)) return l.sum() AB0 = np.array([0., log( (prior0 + 1.) / (prior1 + 1.))]) # ei tea kas on õiged mida valida AB_ = fmin_bfgs(objective, AB0, fprime=grad(objective), disp=False) return [[AB_[0]]], [AB_[1]]
def loss(self, y_hat, y): assert len(y_hat) == len(y), "Label vectors differ in size." eps = np.finfo(float).eps num = 0 for y_h, y_t in zip(y_hat, y): num = num - ((y_t*np.log(y_h + eps) + (1-y_t)*np.log(1-y_h + eps))) return num/len(y_hat)
def do_the_fit(obs, **kwargs): global print_output, beta0 func = kwargs.get('function') yerr = kwargs.get('yerr') length = len(yerr) xerr = kwargs.get('xerr') if length == len(obs): assert 'x_constants' in kwargs data = RealData(kwargs.get('x_constants'), obs, sy=yerr) fit_type = 2 elif length == len(obs) // 2: data = RealData(obs[:length], obs[length:], sx=xerr, sy=yerr) fit_type = 0 else: raise Exception('x and y do not fit together.') model = Model(func) odr = ODR(data, model, beta0, partol=np.finfo(np.float64).eps) odr.set_job(fit_type=fit_type, deriv=1) output = odr.run() if print_output and not silent: print(*output.stopreason) print('chisquare/d.o.f.:', output.res_var) print_output = 0 beta0 = output.beta return output.beta[kwargs.get('n')]
def plot_dual(dual_fun, opt, elow=0, ehigh=8, logax=True, numdiff=False): import matplotlib.pyplot as plt import scipy as sc fig, ax1 = plt.subplots() if logax: values = np.logspace(elow, ehigh).flatten() ax1.set_xscale('log') else: values = np.linspace(10**elow, 10**ehigh).flatten() grad_fdiff = None if numdiff: dual_value = lambda alpha: dual_fun(alpha)[0].item() eps = 1e-8 * np.sqrt(np.finfo(float).eps) grad_fdiff = np.hstack([sc.optimize.approx_fprime(np.array([val]), dual_value, [eps]) for val in values]) obj, grad = zip(*[dual_fun(val) for val in values]) obj, grad = np.hstack(obj), np.hstack(grad) ax1.plot(values, obj, 'b') ax1.set_ylabel("objective", color='b') ax1.set_xlabel("alpha") ax1.axvline(opt, color='k', ls="--") ax2 = ax1.twinx() ax2.set_ylabel("gradient", color='r') ax2.plot(values, grad, 'r') if numdiff: ax2.plot(values, grad_fdiff, 'r--') ax2.axhline(0, color='k') plt.show()
def minimize(self): [ vjp_fun_this, jvp_fun_this, hjvp_fun_this, jloss, loss_before_update ] = self._matrix_vector_operators() if loss_before_update < 10 * np.finfo('float32').eps: print('Stopping iteration. Very low loss value:', loss_before_update) return self._input_var linear_b = -vjp_fun_this(jloss) while True: linear_ax = lambda h: vjp_fun_this(hjvp_fun_this(jvp_fun_this(h))) + self._damping_factor * h # I am planning on trying out both the scipy linear solver # and my own conjugate gradient solver. # For the initial guess, I am following Marten's recipe # i.e., using the solution from the previous run. A = LinearOperator((linear_b.size, linear_b.size), matvec=linear_ax) opt_out = scipy.sparse.linalg.cg(A, linear_b, tol=self._cg_tol, x0=self._update_var, maxiter=self._max_cg_iter) if opt_out[1] < 0: raise ValueError("Linear system not correctly solved") update_this = opt_out[0] x_new = self._input_var + update_this loss_new = self._loss_fn(self._predictions_fn(x_new)) loss_change = loss_new - loss_before_update expected_quadratic_change = -0.5 * np.dot(update_this, self._damping_factor * update_this + linear_b) reduction_ratio = loss_change / expected_quadratic_change #print(f'red {reduction_ratio:3.4f} damp {self._damping_factor:3.4f} num {loss_change:3.4f} denom {expected_quadratic_change}') if reduction_ratio > self._update_cond_threshold_high: self._damping_factor *= self._damping_update_factor elif reduction_ratio < self._update_cond_threshold_low: self._damping_factor *= 1 / self._damping_update_factor self._damping_factor = np.clip(self._damping_factor, a_min=self._damping_threshold_low, a_max=self._damping_threshold_high) if reduction_ratio > 0: self._update_var = update_this self._input_var = x_new break return self._input_var
def __init__(self, X, y, iterators, tuple_iter_neg=False, A_0=None, alpha=0.1, epsilon=0.001, verbose=1, init_time=None, steps_print=10): """ About parameters: * tuple_iter_neg: if True, we use an incomplete tuple-based statistic to estimate E [ d_A(X,X') | Y \ne Y']. If False, we use a complete statistic. * A_0: initial value for the "covariance matrix" of the Mahalanobis distance. * alpha: step of the gradient descent. * epsilon: variation of the objective that make us stop the iterations. """ def hermit(x, xp): delta = (x - xp).reshape((-1, 1)) return delta.dot(delta.transpose()) self.x_p_constr = np.mean( [hermit(x, xp) for x, xp in iterators["pos"](X, y)], axis=0) self.X = X self.y = y self.it_neg = iterators["neg"] self.tuple_iter_neg = tuple_iter_neg self.tol = 0.01 # tolerance parameter for violating the constraint self.eps = np.finfo( float).eps # tolerance parameter for positive eigenvals # GD parameters self.it = 0 # iteration of gradient descent self.steps_print = steps_print self.epsilon = epsilon self.alpha = alpha self.obj = -float("inf") self.stop_opt = False if A_0 is None: self.A = np.eye(X.shape[1]) self.verbose = verbose if init_time is None: self.init_time = time.time() else: self.init_time = init_time self.project_A()
def _do_score_samples(self, data, lengths=None): # adapted hmmlearn # TODO: Support lengths arguement framelogprob = self._compute_log_likelihood(data) logprob, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice # gamma is guaranteed to be correctly normalized by logprob at # all frames, unless we do approximate inference using pruning. # So, we will normalize each frame explicitly in case we # pruned too aggressively. posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T posteriors += np.finfo(np.float64).eps posteriors /= np.sum(posteriors, axis=1).reshape((-1, 1)) return logprob, posteriors
def gauss_filt_2D(shape=(3, 3), sigma=0.5): """ 2D gaussian mask - should give the same result as MATLAB's fspecial('gaussian',[shape],[sigma]) """ import numpy as np m, n = [(ss - 1.) / 2. for ss in shape] y, x = np.ogrid[-m:m + 1, -n:n + 1] h = np.exp(-(x * x + y * y) / (2. * sigma * sigma)) h[h < np.finfo(h.dtype).eps * h.max()] = 0 sumh = h.sum() if sumh != 0: h /= sumh return h
def gauss_filt_2D(shape=(3,3),sigma=0.5): """ 2D gaussian mask - should give the same result as MATLAB's fspecial('gaussian',[shape],[sigma]) """ import numpy as np m,n = [(ss-1.)/2. for ss in shape] y,x = np.ogrid[-m:m+1,-n:n+1] h = np.exp( -(x*x + y*y) / (2.*sigma*sigma) ) h[ h < np.finfo(h.dtype).eps*h.max() ] = 0 sumh = h.sum() if sumh != 0: h /= sumh return h
def _decode_map(self, data): # adapted hmmlearn framelogprob = self._compute_log_likelihood(data) logprob, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice # gamma is guaranteed to be correctly normalized by logprob at # all frames, unless we do approximate inference using pruning. # So, we will normalize each frame explicitly in case we # pruned too aggressively. posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T posteriors += np.finfo(np.float64).eps posteriors /= np.sum(posteriors, axis=1).reshape((-1, 1)) state_sequence = np.argmax(posteriors, axis=1) map_logprob = np.max(posteriors, axis=1).sum() return map_logprob, state_sequence
def test_mlp(): D = [2, 8, 1] x_in = np.random.randn(D[0]) # Write the forward and backward pass for an MLP with one hidden layer. w_h = np.random.randn(D[0] * D[1]).reshape([D[1], D[0]]) b_h = np.random.randn(D[1]) w_out = np.random.randn(D[1] * D[2]).reshape([D[2], D[1]]) b_out = np.random.randn(D[2]) # Forward. h_0 = np.matmul(w_h, x_in) h_0 += b_h y_out = np.matmul(w_out, h_0) y_out += b_out y_gt = _rosenbrock(x_in) y_bar = y_out - y_gt loss = 0.5 * y_bar**2 grad_b_out = y_bar y_bar = np.expand_dims(y_bar, -1) h_0 = np.expand_dims(h_0, -1) grad_w_out = np.matmul(y_bar, h_0.transpose()) h_0_bar = np.matmul(w_out.transpose(), y_bar) grad_b_h = h_0_bar h_0_bar = np.expand_dims(h_0_bar, -1) x_in = np.expand_dims(x_in, -1) grad_w_h = np.matmul(h_0_bar, x_in.transpose()) loss_grad_fn = grad(_get_loss) autograd_grad = loss_grad_fn([w_h, b_h, w_out, b_out], x_in.squeeze(), y_gt) eps = np.finfo(np.float32).eps assert np.max( np.abs(autograd_grad[0].flatten() - grad_w_h.flatten())) < eps assert np.max( np.abs(autograd_grad[1].flatten() - grad_b_h.flatten())) < eps assert np.max( np.abs(autograd_grad[2].flatten() - grad_w_out.flatten())) < eps assert np.max( np.abs(autograd_grad[3].flatten() - grad_b_out.flatten())) < eps
def gpdfit(ary): """Estimate the parameters for the Generalized Pareto Distribution (GPD). Empirical Bayes estimate for the parameters of the generalized Pareto distribution given the data. Parameters ---------- ary : array sorted 1D data array Returns ------- k : float estimated shape parameter sigma : float estimated scale parameter """ prior_bs = 3 prior_k = 10 n = len(ary) m_est = 30 + int(n**0.5) b_ary = 1 - np.sqrt(m_est / (np.arange(1, m_est + 1, dtype=float) - 0.5)) b_ary /= prior_bs * ary[int(n / 4 + 0.5) - 1] b_ary += 1 / ary[-1] k_ary = np.log1p(-b_ary[:, None] * ary).mean(axis=1) # pylint: disable=no-member len_scale = n * (np.log(-(b_ary / k_ary)) - k_ary - 1) weights = 1 / np.exp(len_scale - len_scale[:, None]).sum(axis=1) # remove negligible weights real_idxs = weights >= 10 * np.finfo(float).eps if not np.all(real_idxs): weights = weights[real_idxs] b_ary = b_ary[real_idxs] # normalise weights weights /= weights.sum() # posterior mean for b b_post = np.sum(b_ary * weights) # estimate for k k_post = np.log1p(-b_post * ary).mean() # pylint: disable=invalid-unary-operand-type,no-member # add prior for k_post k_post = (n * k_post + prior_k * 0.5) / (n + prior_k) sigma = -k_post / b_post return k_post, sigma
def _update(self, Jep, theta): max1, max2 = self.MI_features_selection(Jep, theta) eta_start = np.ones(1) res = minimize(myREPS_mi._dual_function, eta_start, jac=myREPS_mi._dual_function_diff, bounds=((np.finfo(np.float32).eps, np.inf), ), args=(self.eps, Jep, theta)) eta_opt = res.x.item() Jep -= np.max(Jep) d = np.exp(Jep / eta_opt) #self.distribution.mle(theta, d) ## eta_in = np.ones(2) res = minimize(myREPS_mi._lag_function_constrained, eta_in, method='SLSQP', jac=grad(myREPS_mi._lag_function_constrained), args=(d, theta, self.distribution, self.eps, self.k), bounds=((0.0, np.inf), (0.0, np.inf))) eta_opt, omeg_opt = res.x[0], res.x[1] mu_pre, std_pre = self.distribution._mu, self.distribution._std sigma_pre = std_pre**2 mu_change = (d @ theta + eta_opt * mu_pre) / (np.sum(d) + eta_opt) mu_post = mu_pre mu_post[max1] = mu_change[max1] mu_post[max2] = mu_change[max2] diff = theta - mu_post tmp = np.einsum('nk,n,nh->kh', diff, d, diff) sigma_post = (tmp + eta_opt * sigma_pre + eta_opt * np.outer(mu_post - mu_pre, mu_post - mu_pre) ) / (np.sum(d) + eta_opt - omeg_opt) std_post = np.sqrt(sigma_post) self.distribution._mu = mu_post self.distribution._std = std_post kl = myREPS_mi._KL_M_Projection(mu_pre, sigma_pre, mu_post, sigma_post) entropydiff = myREPS_mi._entropy(sigma_pre) - myREPS_mi._entropy( sigma_post)
def test_odr_fit(n): dim = 10 + int(30 * np.random.rand()) x = np.arange(dim) + np.random.normal(0.0, 0.15, dim) xerr = 0.1 + 0.1 * np.random.rand(dim) y = 2 * np.exp(-0.06 * x) + np.random.normal(0.0, 0.15, dim) yerr = 0.1 + 0.1 * np.random.rand(dim) ox = [] for i, item in enumerate(x): ox.append(pe.pseudo_Obs(x[i], xerr[i], str(i))) oy = [] for i, item in enumerate(x): oy.append(pe.pseudo_Obs(y[i], yerr[i], str(i))) def f(x, a, b): return a * np.exp(-b * x) def func(a, x): y = a[0] * np.exp(-a[1] * x) return y data = RealData([o.value for o in ox], [o.value for o in oy], sx=[o.dvalue for o in ox], sy=[o.dvalue for o in oy]) model = Model(func) odr = ODR(data, model, [0, 0], partol=np.finfo(np.float).eps) odr.set_job(fit_type=0, deriv=1) output = odr.run() beta = pe.fits.odr_fit(ox, oy, func) pe.Obs.e_tag_global = 5 for i in range(2): beta[i].gamma_method(e_tag=5, S=1.0) assert math.isclose(beta[i].value, output.beta[i], rel_tol=1e-5) assert math.isclose( output.cov_beta[i, i], beta[i].dvalue**2, rel_tol=2.5e-1), str( output.cov_beta[i, i]) + ' ' + str(beta[i].dvalue**2) assert math.isclose(pe.covariance(beta[0], beta[1]), output.cov_beta[0, 1], rel_tol=2.5e-1) pe.Obs.e_tag_global = 0
def _update(self, Jep, theta): eta_start = np.ones(1) res = minimize(myREPS._dual_function, eta_start, jac=myREPS._dual_function_diff, bounds=((np.finfo(np.float32).eps, np.inf), ), args=(self.eps, Jep, theta)) eta_opt = res.x.item() Jep -= np.max(Jep) d = np.exp(Jep / eta_opt) #self.distribution.mle(theta, d) ## eta_in = np.ones(2) res = minimize(myREPS._lag_function_constrained, eta_in, method='SLSQP', jac=grad(myREPS._lag_function_constrained), args=(d, theta, self.distribution, self.eps, self.k), bounds=((0.0, np.inf), (0.0, np.inf))) print('the result is:', res.x, 'success is', res.success) eta_opt, omeg_opt = res.x[0], res.x[1] mu_pre, cholsigma_pre = self.distribution._mu, self.distribution._chol_sigma sigma_pre = cholsigma_pre @ cholsigma_pre.T mu_post = (d @ theta + eta_opt * mu_pre) / (np.sum(d) + eta_opt) diff = theta - mu_post tmp = np.einsum('nk,n,nh->kh', diff, d, diff) sigma_post = (tmp + eta_opt * sigma_pre + eta_opt * np.outer(mu_post - mu_pre, mu_post - mu_pre) ) / (np.sum(d) + eta_opt - omeg_opt) self.distribution._mu = mu_post self.distribution._chol_sigma = np.linalg.cholesky(sigma_post) kl = myREPS._KL_M_Projection(mu_pre, sigma_pre, mu_post, sigma_post) entropydiff = myREPS._entropy(sigma_pre) - myREPS._entropy(sigma_post) print('KL is :', kl) print('entropy difference is:', entropydiff)
def _accumulate_sufficient_statistics(self, stats, X, framelogprob, posteriors, fwdlattice, bwdlattice): """Updates sufficient statistics from a given sample. Parameters ---------- stats : dict Sufficient statistics as returned by :meth:`~base._BaseHMM._initialize_sufficient_statistics`. X : array, shape (n_samples, n_features) Sample sequence. framelogprob : array, shape (n_samples, n_components) Log-probabilities of each sample under each of the model states. posteriors : array, shape (n_samples, n_components) Posterior probabilities of each sample being generated by each of the model states. fwdlattice, bwdlattice : array, shape (n_samples, n_components) Log-forward and log-backward probabilities. """ # Based on hmmlearn's _BaseHMM safe_transmat = self.transmat_ + np.finfo(float).eps stats['nobs'] += 1 if 's' in self.params: stats['start'] += posteriors[0] if 't' in self.params: n_samples, n_components = framelogprob.shape # when the sample is of length 1, it contains no transitions # so there is no reason to update our trans. matrix estimate if n_samples <= 1: return lneta = np.zeros((n_samples - 1, n_components, n_components)) _hmmc._compute_lneta(n_samples, n_components, fwdlattice, np.log(safe_transmat), bwdlattice, framelogprob, lneta) stats['trans'] += np.exp(logsumexp(lneta, axis=0)) # stats['trans'] = np.round(stats['trans']) # if np.sum(stats['trans']) != X.shape[0]-1: # warnings.warn("transmat counts != n_samples", RuntimeWarning) # import pdb; pdb.set_trace() stats['trans'][np.where(stats['trans'] < 0.01)] = 0.0
def get_mcl_normal_direction_at_chord_fraction(self, chord_fraction): # Returns the normal direction of the mean camber line at a specified chord fraction. # If you input a single value, returns a 1D numpy array with 2 elements (x,y). # If you input a vector of values, returns a 2D numpy array. First index is the point number, second index is (x,y) # Right now, does it by finite differencing camber values :( # When I'm less lazy I'll make it do it in a proper, more efficient way # TODO make this not finite difference epsilon = np.sqrt(np.finfo(float).eps) cambers = self.get_camber_at_chord_fraction(chord_fraction) cambers_incremented = self.get_camber_at_chord_fraction(chord_fraction + epsilon) dydx = (cambers_incremented - cambers) / epsilon if dydx.shape == 1: # single point normal = np.hstack((-dydx, 1)) normal /= np.linalg.norm(normal) return normal else: # multiple points vectorized normal = np.column_stack((-dydx, np.ones(dydx.shape))) normal /= np.expand_dims(np.linalg.norm(normal, axis=1), axis=1) # normalize return normal
NOTE!!!! This model doesn't work exactly as described in Kruschke's original paper; primarily because the similarity equation using the minkowsky dist func isn't differentiable when the distance metric is >= 2 ''' ## std lib ## ext requirements import autograd.numpy as np from autograd import grad from scipy import spatial np.set_printoptions(suppress=True) ## int requirements import utils minfloat = np.finfo(np.double).tiny def pdist(a1, a2, r, **kwargs): attention_weights = kwargs.get('attention_weights', np.ones([1,a1.shape[1]]) / a1.shape[1]) # format inputs & exemplars for (i think vectorized) pairwise distance calculations a1_tiled = np.tile(a1, a2.shape[0]).reshape(a1.shape[0], a2.shape[0], a1.shape[1]) a2_tiled = np.repeat([a2], a1.shape[0], axis=0) if hps['r'] > 1: # get attention-weighted pairwise distances distances = np.sum( np.multiply( attention_weights, np.abs(a1_tiled - a2_tiled) ** r ),
import autograd import scipy.linalg import scipy.stats import scipy.optimize import visualisation from tqdm import tqdm numpy.set_printoptions(precision=2) eps = numpy.finfo(numpy.random.randn(1).dtype).eps class GP_Beta: def __init__(self, length_scale=None, std=None, omega=None, kappa=None): self.n = None self.y = None self.mu = None self.sigma = None self.q = None
def _do_mstep(self, stats, params): # M-Step for startprob and transmat if 's' in params: startprob_ = self.startprob_prior + stats['start'] normalize(startprob_) self.startprob_ = np.where(self.startprob_ <= np.finfo(float).eps, self.startprob_, startprob_) if 't' in params: if self.n_tied == 0: transmat_ = self.transmat_prior + stats['trans'] normalize(transmat_, axis=1) self.transmat_ = np.where(self.transmat_ <= np.finfo(float).eps, self.transmat_, transmat_) else: transmat_ = np.zeros((self.n_components, self.n_components)) transitionCnts = stats['trans'] + self.transmat_prior transition_index = [i * self.n_chain for i in range(self.n_unique)] for b in range(self.n_unique): block = \ transitionCnts[self.n_chain * b : self.n_chain * (b + 1)][:] + 0. denominator_diagonal = np.sum(block) diagonal = 0.0 index_line = range(0, self.n_chain) index_row = range(self.n_chain * b, self.n_chain * (b + 1)) for l, r in zip(index_line, index_row): diagonal += (block[l][r]) for l, r in zip(index_line, index_row): block[l][r] = diagonal / denominator_diagonal self_transition = block[0][self.n_chain * b] denominator_off_diagonal = \ (np.sum(block[self.n_chain-1])) - self_transition template = block[self.n_chain - 1] + 0. for entry in range(len(template)): template[entry] = (template[entry] * (1 - self_transition)) \ / float(denominator_off_diagonal) template[(self.n_chain * (b + 1)) - 1] = 0. line_value = 1 - self_transition for entry in range(len(template)): line_value = line_value - template[entry] for index in transition_index: if index != (b * self.n_chain): block[self.n_chain - 1][index] = \ line_value + template[index] line = range(self.n_chain - 1) row = [b * self.n_chain + i for i in range(1, self.n_chain)] for x, y in zip(line, row): block[x][y] = 1 - self_transition transmat_[self.n_chain * b : self.n_chain * (b + 1)][:] = block self.transmat_ = np.copy(transmat_)
# test_params, dailyshare, dailyview = test_cases[vid] # test_predict(test_params, dailyshare, dailyview, vid, idx) # == == == == == == == == Part 3: Test gradient function == == == == == == == == # # for tc_idx, vid in enumerate(test_vids): # test_params, dailyshare, dailyview = test_cases[vid] # print('err value for test case {0}: {1}'.format(tc_idx, optimize.check_grad(cost_function, grad_descent, test_params, dailyshare, dailyview))) # == == == == == == == == Part 4: Test cost and grad function == == == == == == == == # # setting parameters age = 120 iteration = 200 num_train = 75 num_cv = 15 num_test = 30 eps = np.finfo(float).eps bounds = [(0, None), (0, 100), (0, None), (0, 5), (0, None), (0, None)] # define auto grad function auto_grad_func = grad(cost_function) reg_auto_grad_func = grad(reg_cost_function) for tc_idx, vid in enumerate(test_vids): test_params, dailyshare, dailyview = test_cases[vid] dailyshare = dailyshare[:age] dailyview = dailyview[:age] # if vid == '0VuncLRnRlw': # test_params = [218.9131, 24.36634, get_C(22.27494), 0.1545296, 2869.518, 242.8026] # if vid == '4IlZLjmPA2k': # test_params = [20.38386, 1.55089, get_C(0.5068971), 2.220446E-16, 1688.062, 44.46518]
from sklearn.utils import check_random_state import autograd.numpy as np from autograd import grad, value_and_grad from scipy.optimize import minimize from scipy.special import gamma import statsmodels.api as smapi from statsmodels.tsa.tsatools import lagmat from .ar import ARTHMM __all__ = ['STUDENT'] ZEROLOGPROB = -1e200 EPS = np.finfo(float).eps NEGINF = -np.inf decoder_algorithms = frozenset(("viterbi", "map")) class STUDENT(ARTHMM): """Hidden Markov Model with tied states and autoregressive observations drawn from a student t distribution Parameters ---------- n_unique : int Number of unique components. n_tied : int Number of tied states for each component.