def _parameter_initialiser(self, x, c=None, n=None, t=None, offset=False): log_x = np.log(x) log_x[np.isnan(log_x)] = -np.inf if (2 in c) or (-1 in c): heuristic = "Turnbull" else: heuristic = "Fleming-Harrington" data = {'x' : x, 'c' : c, 'n' : n, 't' : t} model = para.Parametric(self, 'MPP', data, offset, False, False) fitting_info = {} fitting_info['rr'] = 'x' fitting_info['heuristic'] = heuristic fitting_info['on_d_is_0'] = True fitting_info['turnbull_estimator'] = 'Fleming-Harrington' fitting_info['init'] = None model.fitting_info = fitting_info if offset: results = mpp(model) return (results['gamma'], *results['params']) else: gumb = para.Gumbel.fit(log_x, c, n, t, how='MLE') if not gumb.res.success: gumb = para,Gumbel.fit(log_x, c, n, t, how='MPP', heuristic=heuristic) mu, sigma = gumb.params alpha, beta = np.exp(mu), 1. / sigma if (np.isinf(alpha) | np.isnan(alpha)): alpha = np.median(x) if (np.isinf(beta) | np.isnan(beta)): beta = 1. return alpha, beta
def avabc(params,num_samples,num_particles,K,convergence): lower_bounds = [] scaled_lower_bounds = [] iterating = 1 i=0 m = np.array([0.,0.]) v = np.array([0.,0.]) while iterating==1: params,m,v,LB = iterate(params,i,m,v,num_samples,num_particles) #LB/=M if params[1]<=0 or np.isnan(params).any(): params = np.random.uniform(0,1,2) m = np.array([0.,0.]) v = np.array([0.,0.]) i+=1 lower_bounds.append(-LB) if len(lower_bounds)>K+1: lb2 = np.mean(np.array(lower_bounds[-K:])) lb1 = np.mean(np.array(lower_bounds[-K-1:-1])) scaled_lower_bounds.append(-LB) if abs(lb2-lb1)<convergence: iterating = 0 if i%10==0: print abs(lb2-lb1) if np.isnan(abs(lb2-lb1)): lower_bounds=[] if i%10==0: print params, LB return params, scaled_lower_bounds,i
def compute_P(self, freqs, return_df=False, normalize=False): ''' Compute the power spectrum. ''' omega = 2. * pi * freqs A_EMG, f_EMG = self.A_EMG, self.f_EMG mod_omega = omega / (2 * pi * f_EMG) P_EMG = A_EMG * (mod_omega)**2 / (1 + mod_omega**2)**2 P_EEG = self.compute_P_EEG(freqs) P_EEG_EMG = P_EEG + P_EMG if normalize: P_EEG = P_EEG / P_EEG.max() P_EMG = P_EMG / P_EMG.max() P_EEG_EMG = P_EEG_EMG / P_EEG_EMG.max() P_EEG[isnan(P_EEG)] = 0 P_EMG[isnan(P_EMG)] = 0 P_EEG_EMG[isnan(P_EEG_EMG)] = 0 if return_df == False: return P_EEG_EMG else: df = pd.DataFrame([P_EEG, P_EMG, P_EEG_EMG], columns=freqs, index=['P_EEG', 'P_EMG', 'P_EEG_EMG']).T df.index.names = ['Hz'] return df
def check_bad_values(self): # Check for bad x or direction values if np.isnan(self.x[self.k]).any() or np.isnan(self.direction[self.k]).any(): print("\nDescent algorithm diverged.") print("x: ", self.x[self.k]) print("Grad(x): ", self.direction[self.k]) return self.x[self.k], self.costFunc(self.x[self.k]), self.fevals
def check_bad_values(self): # Check if x or f(x) is Nan or inf - symptoms that the algorithm reached # the constraint barrier if np.isnan(self.x[self.k]).any() or np.isinf(self.x[self.k]).any() or \ np.isnan(self.costFunc(self.x[self.k])).any() or np.isinf(self.costFunc(self.x[self.k])).any(): self.alpha[self.k] *= self.betaParam return self.alpha[self.k]
def solve_constrained_f(f, c, constraint, x0, y0): """ Solve f(x, y) = c and constraint(x, y) = 0. """ def g(x, y): return np.r_[f(x, y) - c, constraint(x, y)] x, y = Newton(g, x0, y0) if np.isnan(x) or np.isnan(y): raise ValueError("divergence: unable to find a root for f.") return x, y
def mse(model): """ MSE: Mean Square Error This is simply fitting the curve to the best estimate from a non-parametric estimate. This is slightly different in that it fits it to untransformed data on the x and y axis. The MPP method fits the curve to the transformed data. This is simply fitting a the CDF sigmoid to the nonparametric estimate. """ dist = model.dist x, c, n, t = (model.data['x'], model.data['c'], model.data['n'], model.data['t']) const = model.fitting_info['const'] inv_trans = model.fitting_info['inv_trans'] init = model.fitting_info['init'] if (-1 in c) or (2 in c): out = nonp.turnbull(x, c, n, t, estimator='Fleming-Harrington') else: out = nonp.fleming_harrington(x, c, n, t) F = 1 - out['R'] mask = np.isfinite(out['x']) F = F[mask] x = out['x'][mask] jac = jacobian(mse_fun) hess = hessian(mse_fun) old_err_state = np.seterr(all='ignore') res = minimize(mse_fun, init, method='Newton-CG', jac=jac, hess=hess, args=(dist, x, F, inv_trans, const)) if (res.success is False) or (np.isnan(res.x).any()): res = minimize(mse_fun, init, method='BFGS', jac=jac, args=(dist, x, F, inv_trans, const)) if (res.success is False) or (np.isnan(res.x).any()): res = minimize(mse_fun, init, args=(dist, x, F, inv_trans, const)) results = {} results['res'] = res results['params'] = inv_trans(const(res.x)) np.seterr(**old_err_state) return results
def policy_dual_optimization(self, alpha, param, iters=10): if self.policy_kl_stepwise: min_alpha, max_alpha = 1e-4 * np.ones( (self.nb_steps, )), 1e64 * np.ones((self.nb_steps, )) else: min_alpha, max_alpha = 1e-4 * np.ones((1, )), 1e64 * np.ones((1, )) best_alpha = alpha best_dual, best_grad = np.inf, np.inf for i in range(iters): dual, grad = self.policy_dual(alpha, param) if not np.isnan(dual) and not np.any(np.isnan(grad)): if grad < best_grad: best_alpha = alpha best_dual = dual best_grad = grad if self.policy_kl_stepwise: for t in range(self.nb_steps): if np.all(abs(grad) < 0.1 * self.policy_kl_bound): return alpha, dual, grad else: if grad[t] > 0: # alpha too large max_alpha[t] = alpha[t] alpha[t] = np.sqrt(min_alpha[t] * max_alpha[t]) else: # alpha too small min_alpha[t] = alpha[t] alpha[t] = np.sqrt(min_alpha[t] * max_alpha[t]) else: if abs(grad) < 0.1 * self.policy_kl_bound: LOGGER.debug("Param KL: %.2e, Grad: %f, Beta: %f" % (self.policy_kl_bound, grad, alpha)) return alpha, dual, grad else: if grad > 0: # alpha too large max_alpha = alpha alpha = np.sqrt(min_alpha * max_alpha) LOGGER.debug( "Param KL: %.1e, Grad: %2.3e, Beta too big, New Beta: %2.3e, Min. Beta: %2.3e, Max. Beta: %2.3e" % (self.policy_kl_bound, grad, alpha, min_alpha, max_alpha)) else: # alpha too small min_alpha = alpha alpha = np.sqrt(min_alpha * max_alpha) LOGGER.debug( "Param KL: %.1e, Grad: %2.3e, Beta too small, New Beta: %2.3e, Min. Beta: %2.3e, Max. Beta: %2.3e" % (self.policy_kl_bound, grad, alpha, min_alpha, max_alpha)) else: min_alpha = alpha alpha = np.sqrt(min_alpha * max_alpha) return best_alpha, best_dual, best_grad
def mps(model): """ MPS: Maximum Product Spacing This is the method to get the largest (geometric) average distance between all points. This method works really well when all points are unique. Some complication comes in when using repeated data. This method is quite good for offset distributions. """ old_err_state = np.seterr(all='ignore') dist = model.dist x, c, n = model.data['x'], model.data['c'], model.data['n'] const = model.fitting_info['const'] inv_trans = model.fitting_info['inv_trans'] init = model.fitting_info['init'] offset = model.offset jac = jacobian(mps_fun) hess = hessian(mps_fun) res = minimize(mps_fun, init, method='Newton-CG', jac=jac, hess=hess, tol=1e-15, args=(dist, x, inv_trans, const, c, n, offset)) if (res.success is False) or (np.isnan(res.x).any()): res = minimize(mps_fun, init, method='BFGS', jac=jac, args=(dist, x, inv_trans, const, c, n, offset)) if (res.success is False) or (np.isnan(res.x).any()): res = minimize(mps_fun, init, args=(dist, x, inv_trans, const, c, n, offset)) if (res.success is False) or (np.isnan(res.x).any()): print("MPS FAILED: Try alternate estimation method", file=sys.stderr) results = {} params = inv_trans(const(res.x)) results['res'] = res if offset: results['gamma'] = params[0] results['params'] = params[1::] else: results['params'] = params results['jac'] = jac np.seterr(**old_err_state) return results
def log_likelihood(self, u=None, fluxes=None, shape=None): """ conditional likelihood of source conditioned on photon sampled images """ # args passed in or from source's own params u = self.params.u if u is None else u fluxes = self.params.fluxes if fluxes is None else fluxes shape = self.params.shape if shape is None else shape assert np.all(~np.isnan(fluxes)), 'passing in NAN fluxes.' # for each source image, compute per pixel poisson likelihood term ll = 0 for n, (samp_img, fits_img, pixel_grid) in enumerate(self.sample_image_list): # grab the patch the sample_image corresponds to (every other # pixel is a zero value) ylim, xlim = (samp_img.y0, samp_img.y1), \ (samp_img.x0, samp_img.x1) # photon scatter image (from psf and galaxy extent) aligned w/ patch psf_ns, _, _ = \ self.compute_scatter_on_pixels(fits_image=fits_img, u=u, shape=shape, xlim=xlim, ylim=ylim, pixel_grid=pixel_grid) # convert parameter flux to fits_image specific photon count flux band_flux = self.flux_in_image(fits_img, fluxes=fluxes) if psf_ns is None: ll_img = -band_flux * np.sum(fits_img.weights) ll += ll_img continue # compute model patch means and the sum of means outside patch (should be small...) model_patch = band_flux * psf_ns # compute poisson likelihood of each pixel - note that # the last term would be sum(model_patch) - model_outside, which is # just equal to band_flux * sum(fits_img.weights) mask = (model_patch > 0.) ll_img = np.sum( np.log(model_patch[mask]) * np.array(samp_img.data)[mask] ) - \ band_flux*np.sum(fits_img.weights) ### debug if np.isnan(ll_img): print "NAN" print "model patch zeros: ", np.sum(model_patch == 0) print "band flux ", band_flux ll += ll_img return ll
def log_likelihood(self, u=None, fluxes=None, shape=None): """ conditional likelihood of source conditioned on photon sampled images """ # args passed in or from source's own params u = self.params.u if u is None else u fluxes = self.params.fluxes if fluxes is None else fluxes shape = self.params.shape if shape is None else shape assert np.all(~np.isnan(fluxes)), 'passing in NAN fluxes.' # for each source image, compute per pixel poisson likelihood term ll = 0 for n, (samp_img, fits_img, pixel_grid) in enumerate(self.sample_image_list): # grab the patch the sample_image corresponds to (every other # pixel is a zero value) ylim, xlim = (samp_img.y0, samp_img.y1), \ (samp_img.x0, samp_img.x1) # photon scatter image (from psf and galaxy extent) aligned w/ patch psf_ns, _, _ = \ self.compute_scatter_on_pixels(fits_image=fits_img, u=u, shape=shape, xlim=xlim, ylim=ylim, pixel_grid=pixel_grid) # convert parameter flux to fits_image specific photon count flux band_flux = self.flux_in_image(fits_img, fluxes=fluxes) if psf_ns is None: ll_img = - band_flux * np.sum(fits_img.weights) ll += ll_img continue # compute model patch means and the sum of means outside patch (should be small...) model_patch = band_flux * psf_ns # compute poisson likelihood of each pixel - note that # the last term would be sum(model_patch) - model_outside, which is # just equal to band_flux * sum(fits_img.weights) mask = (model_patch > 0.) ll_img = np.sum( np.log(model_patch[mask]) * np.array(samp_img.data)[mask] ) - \ band_flux*np.sum(fits_img.weights) ### debug if np.isnan(ll_img): print "NAN" print "model patch zeros: ", np.sum(model_patch==0) print "band flux ", band_flux ll += ll_img return ll
def marginal(self, kernel): """ calculates marginal likelihood Args: Ks_new: new covariance if needed Returns: np.array for marginal likelihood """ if kernel.params is not None: self.Ks = self.construct_Ks() self.alpha = np.zeros([self.X.shape[0]]) self.W = np.zeros([self.X.shape[0]]) self.grads = np.zeros([self.X.shape[0]]) self.f = self.mu self.f_pred = self.f self.run(10) Ks = self.Ks eigs = [np.expand_dims(np.linalg.eig(K)[0], 1) for K in Ks] eig_K = np.squeeze(kron_list(eigs)) self.eig_K = eig_K if self.obs_idx is not None: f_lim = self.f[self.obs_idx] alpha_lim = self.alpha[self.obs_idx] mu_lim = self.mu[self.obs_idx] W_lim = self.W[self.obs_idx] eig_k_lim = eig_K[self.obs_idx] pen = -0.5 * np.sum(np.multiply(alpha_lim, f_lim - mu_lim)) pen = np.where(np.isnan(pen), np.zeros_like(pen), pen) eigs = 0.5 * np.sum(np.log(1 + np.multiply(eig_k_lim, W_lim))) eigs = np.where(np.isnan(eigs), np.zeros_like(eigs), eigs) like = np.sum(self.likelihood.log_like(f_lim, self.y)) like = np.where(np.isnan(like), np.zeros_like(like), like) return -(pen+eigs+like) pen = -0.5 * np.sum(np.multiply(self.alpha, self.f - self.mu)) eigs = - 0.5*np.sum(np.log(1 + np.multiply(eig_K, self.W))) like = np.sum(self.likelihood.log_like(self.f, self.y)) return -(pen+eigs+like)
def get_fixed_params(self): fixed_params = {} for k, v in param_idx_dict.items(): val = self.model_params[v] if not np.isnan(val): fixed_params[k]=val return fixed_params
def _lop_p(self, theta): log_p = 0.0 for property_type in self._property_types: reference_data = self._reference_data[property_type] precisions = self._reference_precisions[property_type] temperatures = reference_data[:, 0] reference_values = reference_data[:, 1] surrogate_values = self._surrogate_model.evaluate( property_type, theta, temperatures) precisions = precisions**-2.0 if (any(numpy.isnan(surrogate_values)) or any(numpy.isinf(surrogate_values)) or any(surrogate_values > 1e10)): return -numpy.inf # Compute likelihood based on gaussian penalty function log_p += autograd.numpy.sum( distributions.Normal(surrogate_values, precisions).log_pdf(reference_values)) return log_p
def sgd(funs, gfuns, y, big_eval=None, evals=10000): step_size = 0.0001 no_funs = len(funs) randomized_order = np.random.permutation(no_funs) count = 1 while count < evals: i = randomized_order[count % no_funs] grad_val = gfuns[i](y) l_before = funs[i](y) new_y = y + grad_val * step_size l_after = funs[i](new_y) if (not np.isnan(l_after) ) and l_after > l_before and check_symmetry(new_y): step_size *= 1.0 + 1.0 / (count + 1) y = new_y else: step_size *= 1.0 - 1.0 / (count + 1)**0.5 print 'step_size', step_size if count % no_funs == 0: #different cykles to prevent repitition randomized_order = np.random.permutation(no_funs) print 'finished one round' if big_eval is not None: print 'l(', str(y), ')=', big_eval(y) else: print 'y=', str(y) count += 1
def preprocessData(self, data_graphs): super().updateGraphs(data_graphs) self.possible_latent_states = {} total_nodes = 0 for data_graph, fbs in data_graphs: for node, state in data_graph.possible_latent_states.items(): self.possible_latent_states[total_nodes + node] = state total_nodes += len(data_graph.nodes) ys = [] for graph, fbs in data_graphs: ys.extend([ graph.data[node] if graph.data[node] is not None else np.nan for node in graph.nodes ]) self.ys = np.array(ys) if (hasattr(self, 'emission_dist')): self.L_set = True ys = np.array(ys).T assert ys.ndim == 2, 'If there is only 1 measurement, add an extra dim!' self.L = np.array([ self.emission_dist[:, y] if not np.any(np.isnan(y)) else np.zeros_like(self.emission_dist[:, 0]) for y in ys ]).sum(axis=0).T
def preprocessData(self, data_graphs): super(_graphHMMMixin, self).updateGraphs(data_graphs) self.possible_latent_states = {} total_nodes = 0 for data_graph in data_graphs: for node, state in data_graph.possible_latent_states.items(): self.possible_latent_states[total_nodes + node] = state total_nodes += len(data_graph.nodes) ys = [] for graph in data_graphs: ys.extend([ graph.data[node] if graph.data[node] is not None else np.nan for node in graph.nodes ]) self.ys = ys if (hasattr(self, 'emission_dist')): self.L_set = True ys = np.array(ys).T self.L = np.array([ self.emission_dist[:, y] if not np.any(np.isnan(y)) else np.zeros_like(self.emission_dist[:, 0]) for y in ys ]).sum(axis=0).T
def is_real_num(x): """return true if x is a real number""" try: float(x) return not (np.isnan(x) or np.isinf(x)) except ValueError: return False
def standard_normalizer(self, x): # compute the mean and standard deviation of the input x_means = np.nanmean(x, axis=1)[:, np.newaxis] x_stds = np.nanstd(x, axis=1)[:, np.newaxis] # check to make sure thta x_stds > small threshold, for those not # divide by 1 instead of original standard deviation ind = np.argwhere(x_stds < 10**(-2)) if len(ind) > 0: ind = [v[0] for v in ind] adjust = np.zeros((x_stds.shape)) adjust[ind] = 1.0 x_stds += adjust # fill in any nan values with means ind = np.argwhere(np.isnan(x) == True) for i in ind: x[i[0], i[1]] = x_means[i[0]] # create standard normalizer function normalizer = lambda data: (data - x_means) / x_stds # create inverse standard normalizer inverse_normalizer = lambda data: data * x_stds + x_means # return normalizer return normalizer, inverse_normalizer
def grad_like(self, r, eps): """ Gradient of likelihood w.r.t variational parameters Args: r (): Transformed random sample eps (): Random sample Returns: gradient w.r.t covariance, gradient w.r.t mean """ if self.obs_idx is not None: r_obs = r[self.obs_idx] else: r_obs = r dr = self.likelihood_grad(r_obs, self.y) dr[np.isnan(dr)] = 0. self.dr = dr grads_R = [] for d in range(len(self.Rs)): Rs_copy = deepcopy(self.Rs) n = Rs_copy[d].shape[0] grad_R = np.zeros((n, n)) for i, j in zip(*np.triu_indices(n)): R_d = np.zeros((n, n)) R_d[i, j] = 1. Rs_copy[d] = R_d dR_eps = kron_mvp(Rs_copy, eps) if self.obs_idx is not None: dR_eps = dR_eps[self.obs_idx] grad_R[i, j] = np.sum(np.multiply(dr, dR_eps)) grads_R.append(grad_R) grad_mu = np.zeros(self.n) grad_mu[self.obs_idx] = dr return grads_R, grad_mu
def grad_like(self, r, eps): """ Gradient of likelihood w.r.t variational parameters Args: r (): Transformed random sample eps (): Random sample Returns: gradient w.r.t variances, gradient w.r.t mean """ if self.obs_idx is not None: r_obs = r[self.obs_idx] else: r_obs = r dr = self.likelihood_grad(r_obs, self.y) dr[np.isnan(dr)] = 0. if self.obs_idx is not None: grad_mu = np.zeros(self.m) grad_mu[self.obs_idx] = dr else: grad_mu = dr grad_S = np.multiply( grad_mu, np.multiply( eps, np.multiply(0.5 / np.sqrt(np.exp(self.q_S)), np.exp(self.q_S)))) return grad_S, grad_mu
def test_joint_probability(self, x): # A differentiable function to compute the joint probability for a given # latent state sequence import autograd.numpy as anp T = self.T ll = 0 # Initial likelihood mu_init, sigma_init = self.mu_init, self.sigma_init ll += -0.5 * anp.dot(x[0] - mu_init, anp.linalg.solve(sigma_init, x[0] - mu_init)) # Transition likelihoods A, B, Q = self.A, self.B, self.sigma_states xpred = anp.dot(x[:T - 1], A.T) + anp.dot(self.inputs[:T - 1], B.T) dx = x[1:] - xpred ll += -0.5 * (dx.T * anp.linalg.solve(Q, dx.T)).sum() # Observation likelihoods y = self.data C, D = self.C, self.D psi = (anp.dot(x, C.T) + anp.dot(self.inputs, D.T)) ll += anp.sum(y * psi) ll -= anp.sum(np.log(1 + np.exp(psi))) if anp.isnan(ll): ll = -anp.inf return ll
def train(self, scale=1.0): theta = self.rand_theta(scale) self.loss = np.inf theta0 = np.copy(theta) self.theta = np.copy(theta) def loss(theta): nlz = self.neg_likelihood(theta) return nlz gloss = grad(loss) try: fmin_l_bfgs_b(loss, theta0, gloss, maxiter=self.bfgs_iter, m=100, iprint=self.debug) except np.linalg.LinAlgError: print('Increase noise term and re-optimization') theta0 = np.copy(self.theta) theta0[1] += np.log(10) theta0[2] += np.log(10) try: fmin_l_bfgs_b(loss, theta0, gloss, maxiter=self.bfgs_iter, m=10, iprint=self.debug) except: print('Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) except: print('Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) if(np.isnan(self.loss) or np.isinf(self.loss)): print('Fail to build GP model') sys.exit(1) self.alpha = chol_inv(self.L, self.y.T)
def Fit(self, X, Y, **kwargs): self.cov = np.cov(Y.T) if not self.cov.shape: # you could be spllied with a 1 feature data set, in which cas self.cov is just a number self.eigval = self.cov self.eigvec = np.eye(1) self.cov = self.cov.reshape(-1, 1) else: self.eigval, self.eigvec = np.linalg.eigh(self.cov) idx = self.eigval.argsort()[::-1] self.eigval = self.eigval[idx] self.eigvec = self.eigvec[:, idx] if self.percentage is not None: total_val = sum(self.eigval) running_fraction = np.cumsum(self.eigval) / total_val self.component = np.searchsorted(running_fraction, self.percentage) if self.component == 0: self.component = 1 assert (self.component <= Y.shape[1] ), "number of components cannot exceed number of variables" self.reconstruction_error = np.sum( self.eigval[self.component:]) / self.cov.shape[0] if self.reconstruction_error is None or np.isnan( self.reconstruction_error): self.reconstruction_error = 0 self.eigval = self.eigval[0:self.component] self.eigvec = self.eigvec[:, 0:self.component]
def extract_examples(self, df, filter_first_ixns=True): """ Get delays, memory strengths, module indices, and outcomes for a set of interactions :param pd.DataFrame df: Interaction log data :param bool filter_first_ixns: True if the first interaction in a user-item history should be removed, False otherwise. These interactions are marked by tlast = np.nan. :rtype: (np.array,np.array,np.array,np.array) :return: A tuple of (delays, memory strengths, module indices, outcomes) """ if self.using_delay: if filter_first_ixns: df = df[~np.isnan(df['tlast'])] timestamps = np.array(df['timestamp'].values) previous_review_timestamps = np.array(df['tlast'].values) delays = 1 + (timestamps - previous_review_timestamps) / 86400 else: delays = 1 strengths = 1 if self.strength_model is None else np.array( df[self.strength_model].values) module_idxes = np.array(df['module_id'].map( self.idx_of_module_id).values) outcomes = np.array( df['outcome'].apply(lambda x: 1 if x else 0).values) return delays, strengths, module_idxes, outcomes
def backtracking_line_search(x0, dx, obj, g, stepsize=1.0, min_stepsize=1e-8, alpha=0.2, beta=0.7): """ A backtracking line search for the step size in Newton's method. Algorithm 9.2, Boyd & Vandenberghe, 2004. - dx is the descent direction - g is the gradient evaluated at x0 - alpha in (0,0.5) is fraction of decrease in objective predicted by a linear extrapolation that we will accept - beta in (0,1) is step size reduction factor """ x = x0 # criterion: stop when f(x + stepsize * dx) < f(x) + \alpha * stepsize * f'(x)^T dx f_term = obj(x) grad_term = alpha * np.dot(g.ravel(), dx.ravel()) # decrease stepsize until criterion is met # or stop at minimum step size while stepsize > min_stepsize: fx = obj(x + stepsize * dx) if np.isnan(fx) or fx > f_term + grad_term * stepsize: stepsize *= beta else: break return stepsize
def logl(q): z = q[0] omega = q[1:(B_mle.shape[0] + 1)] mu = q[B_mle.shape[0] + 1] ll = pixel_likelihood(z, ru.softmax(omega), np.exp(mu), y_flux, y_flux_ivar, lam0, B_mle) if np.isnan(ll): #print "NAN NOOOOO:", z, omega, mu ll = -np.inf return ll
def _set_transmat(self, transmat_val): if transmat_val is None: transmat = np.tile(1.0 / self.n_components, (self.n_components, self.n_components)) else: transmat_val[np.isnan(transmat_val)] = 0.0 normalize(transmat_val, axis=1) if (np.asarray(transmat_val).shape == (self.n_components, self.n_components)): transmat = np.copy(transmat_val) elif transmat_val.shape[0] == self.n_unique: transmat = self._ntied_transmat(transmat_val) else: raise ValueError("cannot match shape of transmat") if not np.all(np.allclose(np.sum(transmat, axis=1), 1.0)): raise ValueError('Rows of transmat must sum to 1.0') self._log_transmat = np.log(np.asarray(transmat).copy()) underflow_idx = np.isnan(self._log_transmat) self._log_transmat[underflow_idx] = NEGINF
def log_likelihood_isolated(self, u=None, fluxes=None, shape=None): """ log likelihood of this source if this is the only patch in the local area. This is the same as saying the pixel for lambda_m is only determined by this source's model and the image noise """ # args passed in or from source's own params u = self.params.u if u is None else u fluxes = self.params.fluxes if fluxes is None else fluxes shape = self.params.shape if shape is None else shape # for each source image, compute per pixel poisson likelihood term ll = 0 for n, (samp_img, fits_img, pixel_grid) in enumerate(self.sample_image_list): # grab the patch the sample_image corresponds to ylim, xlim = (samp_img.y0, samp_img.y1), \ (samp_img.x0, samp_img.x1) data_patch = fits_img.nelec[ylim[0]:ylim[1], xlim[0]:xlim[1]] # photon scatter image (from psf and galaxy extent) aligned w/ patch psf_ns, _, _ = \ self.compute_scatter_on_pixels(fits_image=fits_img, u=u, shape=shape, pixel_grid=pixel_grid, ylim=ylim, xlim=xlim) # convert parameter flux to fits_image specific photon count flux band_flux = self.flux_in_image(fits_img, fluxes=fluxes) assert psf_ns is not None, 'source not overlapping - this is a bug fix me.' #if psf_ns is None: # ll_img = - band_flux * np.sum(fits_img.weights) # ll += ll_img # continue # compute model patch means and the sum of means outside patch (should be small...) model_patch = band_flux * psf_ns + fits_img.epsilon # compute poisson likelihood of each pixel - note that # the last term would be sum(model_patch) - model_outside, which is # just equal to band_flux * sum(fits_img.weights) mask = (model_patch > 0.) ll_img = np.sum(np.log(model_patch) * np.array(data_patch)) - np.sum(model_patch) ### debug if np.isnan(ll_img): print "NAN" print "model patch zeros: ", np.sum(model_patch==0) print "band flux ", band_flux ll += ll_img return ll
def log_likelihood(beta, y,X,z,u,tauParams,N): ll = 0 #generate N*n particles inv_lognormal = 1./generate_lognormal(tauParams,u) if np.isnan(inv_lognormal).any(): print 'some nans' print 5/0 alpha = np.zeros(len(inv_lognormal))#np.sqrt(inv_lognormal)*z print 'mean inv lognormal' print np.mean(inv_lognormal) count = 0 ll = 0 t = np.shape(y)[1] #iterate over participants for i in range(y.shape[0]): l_individual = likelihood_individual(beta,y[i,:],X[i,:,:],alpha[i*N:(i+1)*N]) ll += np.log(l_individual) return ll
def test_optimize_locs_width(self): """ Test the function optimize_locs_width(..). Make sure it does not return unusual results. """ # sample source n = 600 dim = 2 seed = 17 ss = data.SSGaussMeanDiff(dim, my=1.0) #ss = data.SSGaussVarDiff(dim) #ss = data.SSSameGauss(dim) # ss = data.SSBlobs() dim = ss.dim() dat = ss.sample(n, seed=seed) tr, te = dat.split_tr_te(tr_proportion=0.5, seed=10) xy_tr = tr.stack_xy() # initialize test_locs by drawing the a Gaussian fitted to the data # number of test locations J = 3 V0 = util.fit_gaussian_draw(xy_tr, J, seed=seed+1) med = util.meddistance(xy_tr, subsample=1000) gwidth0 = med**2 assert gwidth0 > 0 # optimize V_opt, gw2_opt, opt_info = tst.GaussUMETest.optimize_locs_width(tr, V0, gwidth0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, gwidth_lb=None, gwidth_ub=None) # perform the test using the optimized parameters on the test set alpha = 0.01 ume_opt = tst.GaussUMETest(V_opt, gw2_opt, n_simulate=2000, alpha=alpha) test_result = ume_opt.perform_test(te) assert test_result['h0_rejected'] assert util.is_real_num(gw2_opt) assert gw2_opt > 0 assert np.all(np.logical_not((np.isnan(V_opt)))) assert np.all(np.logical_not((np.isinf(V_opt))))
def sgd(learner, numEpochs, mkTrainingData, devData, testData, weights, computeLosses=None, batchSize=1, outputFrequency=1, outputExpDelay=False, eta0=0.01, initial_t=0, power_t=0.5, extraObjective=None, adaptive=False, clipping=False, targetDict=None, senseIsMinimize=True): global globalEpoch, globalBestWeights globalEpoch,globalBestWeights = 0, None printUpdate = makePrintUpdate(learner, mkTrainingData, devData, testData, computeLosses, targetDict=targetDict, senseIsMinimize=senseIsMinimize) globalEpoch = 0 sum_grad_squared = None totalExamples = 0 for epoch in range(1, numEpochs+1): trainingData = mkTrainingData() for start in range(0, len(trainingData), batchSize): data = trainingData[start:start+batchSize] learner.set_weights_copy(weights.copy()) obj_and_grad = value_and_grad(learner, data, weights, extraObjective) _, gradient = obj_and_grad(weights) eta = eta0 / (1 if power_t == 0 else ((epoch + initial_t) ** power_t)) gradient[np.isnan(gradient)] = 0 gradient[np.isinf(gradient)] = 0 gradient *= eta if clipping: numBig = sum(gradient < -1) + sum(gradient > 1) if numBig > 0: print 'clipping %d / %d gradient terms, avg|grad| %g' % (numBig, len(gradient), np.mean(np.abs(gradient))) gradient[gradient > 1] = 1 gradient[gradient < -1] = -1 if adaptive: if sum_grad_squared is None: sum_grad_squared = 1e-4 + gradient * gradient else: gradient /= np.sqrt(sum_grad_squared) sum_grad_squared += gradient * gradient weights -= gradient if outputExpDelay and log2(totalExamples) != log2(totalExamples+len(data)): printUpdate(weights, totalExamples) totalExamples += len(data) if epoch % outputFrequency == 0: printUpdate(weights) return globalBestWeights
def load_stamps_and_samps(gstamps): # gather all stamp files! print "loading available stamps" gstamps.sort() stamp_ids = extract_stamp_ids(gstamps) stamps = stamps2array(gstamps) # gather all samps! print "loading MCMC sample files" gal_chain_template = 'samp_cache/run5/gal_samps_stamp_%s_chain_0.bin' gal_chain_files = [gal_chain_template%sid for sid in stamp_ids] chain_mask = np.zeros(len(stamp_ids), dtype=np.bool) # keep track of the ones that actually have samples Nselect = 500 Nskip = 5 samps = [] for i,chain in enumerate(gal_chain_files): print "Galaxy", os.path.basename(chain) ## 0. load four chains from disk src_samp_chains, ll_samp_chains, eps_samp_chains = \ io.load_mcmc_chains(chain, num_chains=4) if len(src_samp_chains) > 0: th = rec2matrix( np.concatenate(src_samp_chains)) # make sure there are no infinite samples if np.any(np.isinf(th)) or np.any(np.isnan(th)): continue chain_mask[i] = True samps.append(th[-Nselect*Nskip:-1:Nskip, :]) print "There are %d chains with either missing, zeros, or otherwise unsuitable samples"%((~chain_mask).sum()) # samps and stamps now aligned stamps = stamps[chain_mask, :, :, :] samps = np.array(samps) return stamps, samps
return (pi**yi)*((1-pi)**(1-yi)) def logistic(x): return 1 / (1 + np.exp(-x)) if __name__=='__main__': #create some data with beta = 2 beta = np.array([-1.5,2]) d = len(beta) params = np.random.normal(0,1,2*d+2) # generate_data(beta,tau2,n,num_times) params[-2:] = 0 X,y = generate_data(beta,1.5,500,4)#537 #test likelihood for several beta values, beta = 2 should give high likelihood m = np.zeros(2*d+2) v = np.zeros(2*d+2) for i in range(150): params,m,v =iterate(params,y,X,i,m,v,5) mu = params[0:(len(params)-2)/2] print mu Sigma = params[(len(params)-2)/2:-2] #print np.exp(Sigma) if np.isnan(params).any(): params = np.random.normal(0,1,2*d+2) m = np.zeros(2*d+2) v = np.zeros(2*d+2) #print 1/np.exp(params[-2]) # eps = np.random.rand(50) # print lower_bound(params,y,X,eps)
load_data_clean_split(spec_fits_file = 'quasar_data.fits', Ntrain = 400) N = qtrain['spectra'].shape[0] ## resample to lam0 => rest frame basis lam0, lam0_delta = get_lam0(lam_subsample=10) print " resampling de-redshifted data" spectra_resampled, spectra_ivar_resampled, lam_mat = \ resample_rest_frame(qtrain['spectra'], qtrain['spectra_ivar'], qtrain['Z'], lam_obs, lam0) # clean nans X = spectra_resampled X[np.isnan(X)] = 0 Lam = spectra_ivar_resampled Lam[np.isnan(Lam)] = 0 ########################################################################### ## Set prior variables (K_chol, sig2_omega, sig2_mu) ########################################################################### sig2_omega = 1. sig2_mu = 500. beta_kern = GPy.kern.Matern52(input_dim=1, variance=1., lengthscale=length_scale) K_beta = beta_kern.K(lam0.reshape((-1, 1))) K_chol = np.linalg.cholesky(K_beta) K_inv = np.linalg.inv(K_beta) ########################################################################## ## set up the likelihood and prior functions and generate a sample
def isLegal(v): return np.sum(np.any(np.imag(v)))==0 and np.sum(np.isnan(v))==0 and \ np.sum(np.isinf(v))==0