def _space_constraint(self, x_in, min_dist): x = np.nan_to_num(x_in[0:self.nturbs]) y = np.nan_to_num(x_in[self.nturbs:]) dist = [np.sqrt((x[i]-x[j])**2 + (y[i]-y[j])**2) \ for i in range(self.nturbs) \ for j in range(self.nturbs) if i != j] return np.min(dist) - self._norm(min_dist, self.bndx_min, self.bndx_max)
def mle_batch(self, data, batch, k): """ Calculates LID values of data w.r.t batch Args: data: samples to calculate LIDs of batch: samples to calculate LIDs against k: the number of nearest neighbors to consider Returns: the calculated LID values """ k = min(k, len(data) - 1) f = lambda v: -k / np.sum(np.log(v / v[-1])) gamma = self.classifier.kernel.gamma if gamma is None: gamma = 1.0 / self.training_data_ndarray.shape[1] if batch is None: # K = cdist(data, data) K = rbf_kernel(data, Y=data, gamma=gamma) K = np.reciprocal(K) # get the closest k neighbours a = np.apply_along_axis(np.sort, axis=1, arr=K)[:, 1:k + 1] else: batch = np.asarray(batch, dtype=np.float32) # K = cdist(data, batch) K = rbf_kernel(data, Y=batch, gamma=gamma) K = np.reciprocal(K) # get the closest k neighbours a = np.apply_along_axis(np.sort, axis=1, arr=K)[:, 0:k] a = np.apply_along_axis(f, axis=1, arr=a) return np.nan_to_num(a)
def mle_batch_euclidean(self, data, k): """ Calculates LID values of data w.r.t batch Args: data: samples to calculate LIDs of batch: samples to calculate LIDs against k: the number of nearest neighbors to consider Returns: the calculated LID values """ batch = self.training_data_ndarray f = lambda v: -k / np.sum(np.log((v / v[-1]) + 1e-9)) gamma = self.classifier.kernel.gamma if gamma is None: gamma = 1.0 / self.training_data_ndarray.shape[1] K = rbf_kernel(data, Y=batch, gamma=gamma) K = np.reciprocal(K) # K = cdist(data, batch) # get the closest k neighbours if self.xc is not None and self.xc.shape[0] == 1: # only one attack sample sorted_distances = np.sort(K)[0, 1:1 + k] else: sorted_distances = np.sort(K)[0, 0:k] a = np.apply_along_axis(f, axis=0, arr=sorted_distances) return np.nan_to_num(a)
def get_L(X): W = 1 - squareform(pdist(X, 'cosine')) W = np.nan_to_num(W) n_sample, n_feature = W.shape K = np.dot(W, np.ones((n_sample, n_sample))) D = np.diag(np.diag(K)) return D - W
def D_KL(test_spectrum, apriori_spectrum): """ Calculates the Kullback-Leibler divergence. """ fDEF = apriori_spectrum / sum(apriori_spectrum) f = test_spectrum / sum(test_spectrum) from autograd import numpy as ag_np log_ratio = ag_np.nan_to_num(ag_np.log(fDEF / f)) return ag_np.dot(fDEF, log_ratio)
def GradAckleyProblem(xs): """del H/del xi = -20 * -0.2 * (xi * 1/n) / sqrt(1/n sum_j xj^2) * a + 2 pi sin(2 pi xi)/n * b""" out_shape = xs.shape a = np.exp(-0.2 * np.sqrt(1. / len(xs) * np.square(np.linalg.norm(xs, axis=0)))) b = -np.exp(1. / len(xs) * np.sum(np.cos(2 * np.pi * xs), axis=0)) a_p = -0.2 * (xs * 1. / len(xs)) / np.sqrt(1. / len(xs) * np.square(np.linalg.norm(xs, axis=0))) b_p = -2 * np.pi * np.sin(2 * np.pi * xs) / len(xs) return np.nan_to_num( -20 * a_p * a + b_p * b).reshape(out_shape) # only when norm(x) == 0 do we have nan and we know the grad is zero there
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): K = self.K P = sum([np.sum(Ezzp1, axis=0) for _, Ezzp1, _ in expectations]) + 1e-32 P = np.nan_to_num(P / P.sum(axis=-1, keepdims=True)) # Set rows that are all zero to uniform P = np.where(P.sum(axis=-1, keepdims=True) == 0, 1.0 / K, P) log_P = np.log(P) self.log_Ps = log_P - logsumexp(log_P, axis=-1, keepdims=True)
def theta_sample(alpha, batch, lr=.2, EM_iter=EM_iter, SGD_iter=SGD_iter): """ Returns mc random samples of the posterior mean estimator, for doing Monte Carlo approx. This uses EM algorithm with _EM_iter_ iterations to converge to approx posterior. The output is an matrix of _d_ sampled means, times _mc_ samples. Shape = (d, mc) lr must be maximum 0.25 otherwise explosion occurs with reparam """ if model_name == "jaakkola": lv1 = np.abs(np.random.normal(0, 1, batch[1].shape)) # POSITIVE initial value of lambda(v) before maximization, same shape as Y for i in range(EM_iter): # EM algorithm updating (mu_P,S_P) and v in turn. S_P1 = S_P(alpha, batch, lv1) # Cov matrix (d x d) mu_P1 = mu_P(alpha, batch, S_P1) # Mean vector (d x 1) lv1 = lambda_v(batch, mu_P1, S_P1) # lambda(v) vector (n x 1) return np.random.multivariate_normal(mu_P1.reshape(-1), S_P1, mc).T if model_name == "SVI": # Prior MUST BE N(0,I) mu_P1 = np.copy(mu_0) # init posterior = prior N(0,I) rho_P1 = reparam_bwd(sigma_0) # init posterior = prior N(0,I) gradients_mu = np.empty((d,SGD_iter-1)) # init plotting for j in range(1, SGD_iter): epsilon = np.random.multivariate_normal(np.zeros(d), np.identity(d)).reshape(-1, 1) # generate noise ~ N(0,I) theta = np.nan_to_num( mu_P1 + reparam_fwd(rho_P1.reshape(-1,1)) * epsilon ) # nan to num is used to fix Autograd bug with sqrt(0) df_dthetha1 = np.nan_to_num( df_dtheta(theta, alpha, batch, mu_P1, rho_P1) ) grad_mu_P = np.nan_to_num( df_dthetha1 + df_dmu_P(theta, alpha, batch, mu_P1, rho_P1) ) grad_rho_P = np.nan_to_num( df_dthetha1 * (epsilon/(1 + np.exp(-rho_P1.reshape(-1,1)))) + df_drho_P(theta, alpha, batch, mu_P1, rho_P1).reshape(-1,1) ) mu_P1 -= lr/np.sqrt(j) * grad_mu_P # gradient descent rho_P1 -= lr/np.sqrt(j) * np.squeeze(grad_rho_P) # Plotting the SGD gradients_mu[:,j-1] = np.squeeze(mu_P1) if(False and alpha==0): plt.plot(gradients_mu.T) plt.xlabel("iteration") plt.ylabel("mu_P's values") plt.savefig("../plots/SGD_SVI.png") plt.show() plt.clf() return np.random.multivariate_normal(mu_P1.reshape(-1), np.diag(reparam_fwd(rho_P1)**2), mc).T
def elbo(y, phi, lam, pi, psi, sigma2s, mus, Sigmas, kernel_params): """ phi [N, K] sample membership (cell line cluster) lam [G, L] feature membership (expression cluster) pi [K] sample mixture weight psi [L] feature mixture weights y[N, G, T] data mus [K, L, T] means """ """ conditional = np.array([list(map( lambda f, s: norm.logpdf(y, f, s).sum(axis=-1), Q[:, :-1], Q[:, -1])) for Q in np.concatenate([mus, sigma2s[:, :, np.newaxis]], 2)]) conditional = conditional + np.log(mix)[:, :, np.newaxis, np.newaxis] assignments = np.einsum('nk, gl->klng', phi, lam) likelihood = np.sum(conditional * assignments) """ likelihood = 0 # data likelihood for l in range(L): for k in range(K): ll = np.sum(np.nan_to_num(norm.logpdf( y, mus[k, l], np.sqrt(sigma2s[k, l]))), axis=-1) ll = ll - 0.5 * (np.trace(Sigmas[k, l] / sigma2s[k, l])) ll = ll * phi[:, k][:, np.newaxis] ll = ll * lam[:, l] likelihood = likelihood + np.sum(ll) # assignment likelihood likelihood = likelihood + np.sum(np.log(pi) * phi) likelihood = likelihood + np.sum(np.log(psi) * lam) # function liklihood for k in range(K): for l in range(L): Ker = cov_func(kernel_params[k, l], inputs, inputs) likelihood = likelihood \ + mvn.logpdf(mus[k, l], np.zeros(T), Ker) \ - 0.5 * np.trace(solve(Ker, Sigmas[k, l])) entropy = np.sum(list(map(multinomial_entropy, phi)) + list(map(multinomial_entropy, lam))) for k in range(K): for l in range(L): entropy = entropy + mvn.entropy(mus[k, l], Sigmas[k, l]) return likelihood + entropy
def log_like(self, s, t): """ Calculates log likelihood based on LIF likelihood Args: s (): estimated gain of stimulation in space t (): spike timings Returns: """ v = np.einsum('i,ij->ij', np.exp(s), self.const_mat) p = expit(v - self.v_thresh) logp = np.sum(np.log(1 - p), 1) logp = logp + np.multiply( t < self.t_max, -np.log(1 - p[self.t_idx]) + np.log(p[self.t_idx])) return np.nan_to_num(logp)
def decaycos_int(w, tau, phi=0.0, L=128.0, verbose=0, **kwargs): r""" integral from 0 to L. Using Euler identities we find the antiderivative $$\begin{aligned} \int^L\cos (\omega \xi)\exp (-\tau \xi)\dd \xi &= \frac{ e^{-L \tau } ( \omega \sin (L \omega +\phi )- \tau \cos (L \omega +\phi ) ) }{\tau ^2+\omega ^2} \end{aligned}$$ and thus $$\begin{aligned} \int_0^L\cos (\omega \xi )\exp (-\tau \xi )\dd \xi &=\frac{1}{\tau ^2+\omega ^2}\left. e^{- \xi \tau } (\omega \sin ( \xi \omega +\phi )-\tau \cos ( \xi \omega +\phi )) \right|_{ \xi =0}^{ \xi =L}\\ &=\frac{1}{\tau ^2+\omega ^2}\left( e^{-L \tau } ( \omega \sin ( L \omega +\phi ) -\tau \cos ( L \omega +\phi ) ) -\omega \sin \phi + \tau \cos \phi ) \right).\\ \end{aligned}$$ """ comps = ( np.exp(-L*tau) * ( -tau * np.cos(L * w + phi) + w * np.sin(L * w + phi) ) + ( tau * np.cos(phi) - w * np.sin(phi) ) )/(np.square(tau) + np.square(w)) # This approximation is numerically stable as tau and w go to 0, # and autograd can (usually) differentiate it correctly; # there are issues with the gradient if tau is 0 but no other terms are explode_mask = np.isfinite(comps) < 1 comps = np.nan_to_num(comps) + L * explode_mask return comps
def grad_KL_R(self): """ Gradient of KL divergence w.r.t variational covariance Returns: returns gradient """ grad_Rs = [] for d in range(len(self.Rs)): R_d = self.Rs[d] n = R_d.shape[0] grad_R = np.zeros((n, n)) R_inv = np.linalg.inv(R_d) K_inv_R = self.K_invs[d].dot(R_d) for i, j in zip(*np.triu_indices(n)): grad_R[i, j] = - R_inv[i, j] + \ np.prod(self.traces) / self.traces[d] * \ K_inv_R[i, j] grad_Rs.append(np.nan_to_num(grad_R)) return grad_Rs
def sqrt_eig(self): """ Calculates square root of kernel matrix using fast kronecker eigendecomp. This is used in stochastic approximations of the predictive variance. Returns: Square root of kernel matrix """ res = [] for e, v in self.K_eigs: e_root_diag = np.sqrt(e) e_root = np.diag(np.real(np.nan_to_num(e_root_diag))) res.append(np.real(np.dot(np.dot(v, e_root), np.transpose(v)))) res = np.squeeze(kron_list(res)) self.root_eigdecomp = res return res
def multi_objective(rates): """ normalised inner product for each rate """ molecules = [molecular_scale( code_coef, 1, rate, ) for rate in rates] normecules = np.array([ molecular_eval_norm(t, *molecule, norm_method=norm_method, verbose=verbose) for molecule in molecules ]) if not np.all(np.isfinite(normecules)) and verbose >= 1: exploded = np.isfinite(normecules.sum(1)) warnings.warn( "{} normed molecules {} exploded with\n{} at rates\n{}".format( np.sum(exploded), normecules.shape, code_coef, rates[exploded])) obj = np.array([np.dot(normecule, target) for normecule in normecules]) return np.nan_to_num(obj)
def test_nan_to_num(): y = np.array([0., np.nan, np.inf, -np.inf]) fun = lambda x: np.sum(np.sin(np.nan_to_num(x + y))) x = np.random.randn(4) check_grads(fun, x)
def training_loss_W(W): out = W @ self.A + self.b #fast_mul(W, self.A) + self.b softmax = np.nan_to_num( np.nan_to_num(np.exp(out / self.T)) / np.nan_to_num(np.sum(np.exp(out / self.T), axis=0))) return ce(y, softmax)
def training_loss_b(b): out = self.W @ self.A + b #fast_mul(self.W, self.A) + b softmax = np.nan_to_num( np.nan_to_num(np.exp(out / self.T)) / np.nan_to_num(np.sum(np.exp(out / self.T), axis=0))) return ce(y, softmax)
def choose_molecule_pitch_opt(target, code_coef, maxiter=5, t=None, lr=0.01, low_pitch=0.5**0.5, high_pitch=2.0**0.5, n_starts=65, trace=False, pdb=False, verbose=0, norm_method='analytic', **molecule_args): """ choose pitch for one molecule and return inner product at that pitch """ if t is None: t = np.arange(target.size) rates = np.exp( np.linspace(np.log(low_pitch), np.log(high_pitch), n_starts + 2, endpoint=True)[1:-1]) max_step = (high_pitch - low_pitch) / n_starts def multi_objective(rates): """ normalised inner product for each rate """ molecules = [molecular_scale( code_coef, 1, rate, ) for rate in rates] normecules = np.array([ molecular_eval_norm(t, *molecule, norm_method=norm_method, verbose=verbose) for molecule in molecules ]) if not np.all(np.isfinite(normecules)) and verbose >= 1: exploded = np.isfinite(normecules.sum(1)) warnings.warn( "{} normed molecules {} exploded with\n{} at rates\n{}".format( np.sum(exploded), normecules.shape, code_coef, rates[exploded])) obj = np.array([np.dot(normecule, target) for normecule in normecules]) return np.nan_to_num(obj) grad = elementwise_grad(multi_objective) # f, axarr = plt.subplots(2, 1) if trace: trace_list = [] for step_i in range(maxiter): # gradient ascent jac = grad(rates) if not np.all(np.isfinite(jac)) and verbose >= 1: warnings.warn( "jac exploded {}\nfor coefs \n{}\nat rate {}\nwith obj {}". format(jac, code_coef, rates, multi_objective(rates))) jac = np.nan_to_num(jac) step = np.clip(lr * jac, -max_step, max_step) if pdb: print(step_i, "jac", np.sqrt((jac**2).mean()), "step", np.sqrt((step**2).mean())) from IPython.core.debugger import set_trace set_trace() # val = multi_objective(rates) # best = np.argmax(val) # stepsize = jac[best] # print('stepsize', stepsize) # axarr[0].quiver( # rates, # X # val, # Y # step, # U # np.zeros_like(step), # V # np.full_like(step, step_i/(maxiter-1)), # C # cmap="magma", # angles='xy', # label="step {}".format(step_i)) # axarr[1].scatter( # rates, # X # val, # Y # cmap="magma", # label="step {}".format(step_i)) rates = rates + step # gradient ascent step rates = np.clip(rates, low_pitch, high_pitch) if trace: trace_list.append((multi_objective(rates), rates, jac, step)) if verbose >= 21: max_goodness = np.amax(multi_objective(rates)) print( "max_goodness at ", step_i, ) if not np.isfinite(max_goodness): from IPython.core.debugger import set_trace set_trace() if trace: return trace_list goodnesses = multi_objective(rates) best_idx = np.argmax(goodnesses) if verbose >= 11: print( "choose_molecule_pitch_opt", best_idx, rates[best_idx], "@", goodnesses[best_idx], ) return rates[best_idx], goodnesses[best_idx]
def test_nan_to_num(): y = np.array([0., np.nan, np.inf, -np.inf]) fun = lambda x: np.sum(np.sin(np.nan_to_num(x + y))) x = np.random.randn(4) check_grads(fun, x)
print neighbors data = np.array(data) # gew = elementwise_grad(entropy_loss) g = grad(entropy_loss_indices) # g = elementwise_grad(entropy_loss_indices) print '\tCurrent point:\n', '\n'.join([str(s) for s in sorted(data)]) print '\tCurrent avg. dist:', get_avg_dist(data, neighbors) curr_loss = entropy_loss_indices(data, neighbors) print '\tCurrent loss', curr_loss print '\tGradient:\n', '\n'.join([str(s) for s in g(data, neighbors)]) print '\tUnsorted data:\n', '\n'.join([str(s) for s in data]) stepsize = 1 / max(abs(np.nan_to_num(g(data, neighbors)))) prop_accepted = 0 for iter in range(1000): print iter proposal = data - stepsize * np.nan_to_num(g(data, neighbors)) print '\tProposal:\n', '\n'.join([str(s) for s in sorted(proposal)]) proposed_neighbors = make_neighbors_indexes(proposal) print '\tProposed avg. dist:', get_avg_dist(proposal, proposed_neighbors) proposed_loss = entropy_loss_indices(proposal, proposed_neighbors) print '\tProposed loss:', proposed_loss # import code; code.interact(local=dict(globals(), **locals())) if stepsize < 0.0001: break
def test_nan_to_num(): y = np.array([0., np.nan, np.inf, -np.inf]) fun = lambda x: np.sum(np.sin(np.nan_to_num(x + y))) x = np.random.randn(4) combo_check(fun, [0], [x])
def normalize(data): return numpy.nan_to_num((data - mean) / std)
def arnet_cost_function(params, model_input, model_output): yhat = arnet_predict(params, model_input)[0] # minimize SMAPE return np.mean(200 * np.nan_to_num(np.abs(model_output - yhat) / (np.abs(model_output) + np.abs(yhat))))