def p1(): """ Generates the plots for problem 1, which show estimated loss and gradients for different numbers of Monte Carlo samples. """ sample_sizes = [1, 10, 100, 1000] std_w = np.log(0.1 * np.ones(1)) mean_b, std_b = np.zeros(1), np.log(0.1 * np.ones(1)) grad_q = autograd.grad(log_entropy, argnum=2) for samples in sample_sizes: loss_results = [] grad_results = [] for m in mean_grid: mean_w = m * np.ones(1) losses, grads = np.zeros(samples), np.zeros(samples) for s in range(samples): w, b = sample_gaussian(mean_w, std_w, mean_b, std_b) loss = loss_function(w, b, mean_w, std_w, mean_b, std_b, bnn=simple_predict) losses[s] = loss grads[s] = grad_q(w, b, mean_w, std_w, mean_b, std_b) * loss loss_results.append(np.average(losses)) grad_results.append(np.average(grads)) plot_lines(mean_grid, loss_results) plt.savefig('bbvi_loss_' + str((plt.gcf().number) // 2 + 1) + '.png', bbox_inches='tight') plot_lines(mean_grid, grad_results) plt.savefig('bbvi_grad_' + str((plt.gcf().number) // 2) + '.png', bbox_inches='tight') plt.show()
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): x = np.concatenate(datas) weights = np.concatenate([Ez for Ez, _, _ in expectations]) for k in range(self.K): self.mus[k] = np.average(x, axis=0, weights=weights[:,k]) sqerr = (x - self.mus[k])**2 self.inv_sigmas[k] = np.log(np.average(sqerr, weights=weights[:,k], axis=0))
def runSB(psf, psf_k, imageArray): nImages = np.shape(imageArray)[2] results = imageArray * 0 for imageIdx in range(0, nImages): if imageIdx < start: continue grndpath = '/home/moss/SMLM/data/fluorophores/frames/' + str( imageIdx + 1).zfill(5) + '.csv' grnd = Table.read(grndpath, format='ascii') no_source = len(grnd['xnano']) img = imageArray[:, :, imageIdx] sub = img - np.average(img[img < np.average(img) + 3 * np.std(img)]) subnorm = sub / np.max(sub) mock = makeMock(grnd) #mocknorm = mock/np.max(mock); sb = SparseBayes(subnorm, psf, psf_k, no_source) #sb = SparseBayes_alpha(mock,psf,psf_k,no_source); #sb = SparseBayes_nofft(mock,psf,psf_k,sig_psf,no_source); #sb = SparseBayes_gaussian(subnorm,psf,psf_k); results[:, :, imageIdx] = sb.res s = 'nopri' + str(imageIdx + 1).zfill(5) + '.out' np.savetxt(s, sb.res) plt.imshow(results[:, :, imageIdx]) plt.show() return results
def leave_one_out(data, model, cols): target = 'formation_energy_per_atom' loo = LeaveOneOut() rmse = [] cv_scores = [] start = time.time() for train_index, test_index in loo.split(data[cols]): # Split dataset x_train, x_test = data[cols].loc[train_index], data[cols].loc[test_index] y_train, y_test = data[target].loc[train_index], data[target].loc[test_index] # Fit model model.fit(x_train, y_train) # Get metrics pred_y = model.predict(x_test) rmse.append((pred_y[0] - y_test.values[0])**2) #cv_scores.append(cross_val_score(model, x_test, y_test, cv=1)) print("\nAVERAGE RMSE: {}, IN {} SECONDS".format(np.sqrt(np.average(rmse)), time.time()-start)) #print(np.average(cv_scores)) #exit(1) return np.sqrt(np.average(rmse))
def get_orthogonality_score(C_matrix, verbose=True): """ Gets the angle between each subspace and the other ones. Note the we leave the diagonal as zeros, because the angles are 1 anyway And it helps to have a more representative mean. """ in_degree = True len_1, len_2 = C_matrix.shape orthogonality_matrix = np.zeros((len_2, len_2)) for lat_i in range(0, len_2): for lat_j in range(lat_i + 1, len_2): angle = np.dot(C_matrix[:, lat_i], C_matrix[:, lat_j]) / (np.dot( np.linalg.norm(C_matrix[:, lat_i]), np.linalg.norm(C_matrix[:, lat_j]))) orthogonality_matrix[lat_i, lat_j] = np.arccos(np.abs(angle)) orthogonality_matrix[lat_j, lat_i] = np.arccos(np.abs(angle)) if in_degree: orthogonality_matrix = 180 * orthogonality_matrix / np.pi mean_per_sub_space = np.sum(np.abs(orthogonality_matrix), 1) / (len_2 - 1) glob_mean = np.mean(mean_per_sub_space) try: all_non_diag = orthogonality_matrix.flatten() all_non_diag = all_non_diag[np.nonzero(all_non_diag)] tenth_percentil = np.percentile(all_non_diag, 25) ninetith_percentil = np.percentile(all_non_diag, 75) small_avr = np.average( all_non_diag, weights=(all_non_diag <= tenth_percentil).astype(int)) high_avr = np.average( all_non_diag, weights=(all_non_diag >= ninetith_percentil).astype(int)) except: small_avr = glob_mean high_avr = glob_mean if verbose: print(np.around(orthogonality_matrix, 2)) print("Mean abs angle per subspace: ", mean_per_sub_space) print("Mean abs angle overall: ", glob_mean) #print("Std abs angle overall: ", np.std(mean_per_sub_space)) # print(small_avr, high_avr) if len_2 <= 1: glob_mean = small_avr = high_avr = 0 return glob_mean, small_avr, high_avr
def bbvi(n_iters, step_size, grad_samples, post_samples=50): """ Performs black-box variational inference and returns the average loss over iterations and predictions from post_samples posteriors. Arguments: - n_iters: Number of iterations to run variational inference, int - step_size: Learning rate, float - grad_samples: Number of MC samples to take for the gradient, int - post_samples: Number of samples from posterior to return, int """ params = [ np.random.normal(size=20), np.zeros(20), np.random.normal(size=11), np.zeros(11) ] grad_q = autograd.grad(log_entropy, argnum=[2, 3, 4, 5]) avg_losses = [] for i in range(n_iters): gradients, losses = estimate_gradient(sample_gaussian, loss_function, grad_q, grad_samples, *params) avg_loss = np.average(losses) avg_losses.append(avg_loss) params = update_params(params, gradients, -1 * losses, step_size) if i % 50 == 0: print("Iteration " + str(i) + "/" + str(n_iters) + " ---- Loss: " + str(avg_loss)) w, b = sample_gaussian(*params, samples=post_samples) preds = [nn_predict(x_grid, w[i], b[i]) for i in range(post_samples)] return avg_losses, np.array(preds)
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): from sklearn.linear_model import LinearRegression D, M = self.D, self.M for k in range(self.K): xs, ys, weights = [], [], [] for (Ez, _), data, input in zip(expectations, datas, inputs): xs.append( np.hstack([ data[self.lags - l - 1:-l - 1] for l in range(self.lags) ] + [input[self.lags:]])) ys.append(data[self.lags:]) weights.append(Ez[self.lags:, k]) xs = np.concatenate(xs) ys = np.concatenate(ys) weights = np.concatenate(weights) # Fit a weighted linear regression lr = LinearRegression() lr.fit(xs, ys, sample_weight=weights) self.As[k], self.Vs[k], self.bs[ k] = lr.coef_[:, :D * self.lags], lr.coef_[:, D * self.lags:], lr.intercept_ assert np.all(np.isfinite(self.As)) assert np.all(np.isfinite(self.Vs)) assert np.all(np.isfinite(self.bs)) # Update the variances yhats = lr.predict(xs) sqerr = (ys - yhats)**2 self.inv_sigmas[k] = np.log( np.average(sqerr, weights=weights, axis=0))
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): x = np.concatenate(datas) weights = np.concatenate([Ez for Ez, _, _ in expectations]) for k in range(self.K): ps = np.clip(np.average(x, axis=0, weights=weights[:, k]), 1e-3, 1 - 1e-3) self.logit_ps[k] = logit(ps)
def plot_posterior(preds, suffix): """ Creates two plots of the posterior, one with 10 samples, and the other with error bars. Arguments: - preds: Array of predictions on x_grid (samples x grid_size) - suffix: Title for the plot, with info on learning rate and sample number """ plot_lines(x_grid, preds[np.random.choice(len(preds), 10, replace=False)]) plt.title("Converged Posterior Samples " + suffix) plt.plot(x_train, y_train, 'rx') plt.savefig('final_posterior_' + str(plt.gcf().number) + '.png', bbox_inches='tight') plt.figure() plt.title("Posterior Samples' Uncertainty " + suffix) plt.plot(x_train, y_train, 'rx') mean, std = np.average(preds, axis=0), np.std(preds, axis=0) plt.plot(x_grid, mean, 'k-') plt.gca().fill_between(x_grid.flat, mean - 2 * std, mean + 2 * std, color="#dddddd") plt.savefig('final_uncertainty_' + str(plt.gcf().number) + '.png', bbox_inches='tight')
def getNoise(self): #estimate based off of vals less than 3 sigma above mean #did a couple of spot checks and this works pretty well, need more rigor in future noi = np.std(self.data[self.data < np.average(self.data) + 2 * np.std(self.data)]) print('noise is:') print(noi) return noi
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): x = np.concatenate(datas) weights = np.concatenate([Ez for Ez, _, _ in expectations]) for k in range(self.K): # compute weighted histogram of the class assignments xoh = one_hot(x, self.C) # T x D x C ps = np.average(xoh, axis=0, weights=weights[:, k]) + 1e-3 # D x C ps /= np.sum(ps, axis=-1, keepdims=True) self.logits[k] = np.log(ps)
def run(epochs): np.random.seed(1) lr = 0.01 cost = [] x = np.array([[0, 0], [0, 1.], [1., 0], [1., 1.]]) y = np.array([[0, 1., 0, 1.]]).T layer_0 = x layer_1 = LinearLayer( 2, 4, sigmoid, weight_initialization_function=Initializer.random_normal) layer_1n = BatchNorm(4) layer_2 = LinearLayer( 4, 4, relu, weight_initialization_function=Initializer.relu_uniform, num_layers=3) layer_2n = BatchNorm(4) layer_3 = LinearLayer( 4, 1, sigmoid, weight_initialization_function=Initializer.sigmoid_uniform) cost = [] for i in range(epochs): hl3 = layer_3(layer_2n(layer_2(layer_1n(layer_1(layer_0))))) loss = np.average(bce_loss(hl3, y)) dloss = hl3 - y _ = layer_1.backward_pass( layer_1n.backward_pass( layer_2.backward_pass( layer_2n.backward_pass(layer_3.backward_pass(dloss, lr), lr), lr), lr), lr) if i % 1000 == 0: print(loss) cost.append(loss) print(hl3) print(y) plt.plot(cost) plt.ylabel('Loss') plt.title('{}'.format(loss)) plt.show()
def accuracies(params): # unpack logged parameters optimized_c_l_r, optimized_w = unpack_params(params) predicted_train_classes = np.argmax(bayespredict( optimized_c_l_r, optimized_w, train_images), axis=0) predicted_test_classes = np.argmax(bayespredict( optimized_c_l_r, optimized_w, test_images), axis=0) # compute real classes real_train_classes = np.argmax(train_labels, axis=1) real_test_classes = np.argmax(test_labels, axis=1) # compute accuracy train_accuracy = np.average( np.equal(predicted_train_classes, real_train_classes).astype(float)) test_accuracy = np.average( np.equal(predicted_test_classes, real_test_classes).astype(float)) # output accuracy return train_accuracy, test_accuracy
def backward_pass(self, grad, lr): #calculate the gradient if self.activation_function and self.d_activation_function: ds = self.d_activation_function(self.s) * grad else: ds = self.s * grad db = np.average(grad, axis=0) dw = self.inputs.T @ ds dh = ds @ self.weights.T #update layer parameters to be more accurate! self.weights = self.weights - dw * lr self.bias = self.bias - db * lr return dh
def predict(self, X): """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :return: Returns an array of predictions shape=(n_samples,) """ X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["X_", "y_"]) results = np.zeros(X.shape[0]) for idx in range(X.shape[0]): results[idx] = np.average(self.y_, weights=self._calc_wts(x_i=X[idx, :])) return results
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): from sklearn.linear_model import LinearRegression D, M = self.D, self.M for d in range(self.D): # Collect data for this dimension xs, ys, weights = [], [], [] for (Ez, _, _), data, input, mask in zip(expectations, datas, inputs, masks): # Only use data if it is complete if np.all(mask[:, d]): xs.append( np.hstack([data[self.lags-l-1:-l-1, d:d+1] for l in range(self.lags)] + [input[self.lags:, :M], np.ones((data.shape[0]-self.lags, 1))])) ys.append(data[self.lags:, d]) weights.append(Ez[self.lags:]) xs = np.concatenate(xs) ys = np.concatenate(ys) weights = np.concatenate(weights) # If there was no data for this dimension then skip it if len(xs) == 0: self.As[:, d, :] = 0 self.Vs[:, d, :] = 0 self.bs[:, d] = 0 continue # Otherwise, fit a weighted linear regression for each discrete state for k in range(self.K): # Check for zero weights (singular matrix) if np.sum(weights[:, k]) < self.lags + M + 1: self.As[k, d] = 1.0 self.Vs[k, d] = 0 self.bs[k, d] = 0 self.inv_sigmas[k, d] = 0 continue # Solve for the most likely A,V,b (no prior) Jk = np.sum(weights[:, k][:, None, None] * xs[:,:,None] * xs[:, None,:], axis=0) hk = np.sum(weights[:, k][:, None] * xs * ys[:, None], axis=0) muk = np.linalg.solve(Jk, hk) self.As[k, d] = muk[:self.lags] self.Vs[k, d] = muk[self.lags:self.lags+M] self.bs[k, d] = muk[-1] # Update the variances yhats = xs.dot(np.concatenate((self.As[k, d], self.Vs[k, d], [self.bs[k, d]]))) sqerr = (ys - yhats)**2 sigma = np.average(sqerr, weights=weights[:, k], axis=0) + 1e-16 self.inv_sigmas[k, d] = np.log(sigma)
def get_results(self, var_name, result_name="default", result="bias", KL_results="all"): xy_vars = ["xm", "ym", "xp", "yp", "x_xi", "y_xi"] physics_param_vars = ["gamma", "r", "d", "r_xi", "d_xi"] if (not (var_name in xy_vars)) and (not (var_name in physics_param_vars)): raise ValueError( "'{}' not an xy parameter (xm, ym, xp, yp, x_xi, y_xi) or physics parameter (gamma, r, d, r_xi, d_xi)" .format(var_name)) # Pick out the right parameters, and choose fit values or bais if var_name in xy_vars: idx = xy_vars.index(var_name) var_type = "xy" else: idx = physics_param_vars.index(var_name) var_type = "physics_param" # select results where both fits succeeded array = self.results["{}-{}-{}".format(result_name, var_type, result)][:, :, idx] success = self.results["{}-fit-success".format(result_name)] phys_success = self.results["{}-phys-fit-success".format(result_name)] weights = np.minimum(success, phys_success) avg_weights = np.average(weights, axis=0) avg_obs_suc = np.average(success, axis=0) avg_phy_suc = np.average(phys_success, axis=0) for i, aw in enumerate(avg_weights): if aw == 0: weights[1, i] = 1 if self.no_printed_warning: print("NO FITS converged for param set number '", i, "' (zero indexed)", " name:", result_name) print "setting 1 weight to 1 (but fix!" print "weight :", avg_weights print "obs fits :", avg_obs_suc print "phys fits:", avg_phy_suc self.no_printed_warning = False # The determine what should be done with the different KL amplitudes if KL_results == "all": # Return a (# KL amplitudes) x (#input params) array with all results return array elif KL_results == "avg": return np.average(array, axis=0, weights=weights) elif KL_results == "std": # calculate weigthed std (not in numpy) avg = np.average(array, axis=0, weights=weights) var = np.average((array - avg)**2, axis=0, weights=weights) return np.sqrt(var)
def estimate(data, num=1, tol=0.01, maxiter=100): fit_params = np.zeros(3 * num - 1) a = np.average(data) s = np.log(np.std(data)) for i in range(num): fit_params[2 * i] = np.random.normal(loc=a, scale=np.exp(s), size=1) fit_params[2 * i + 1] = np.random.normal(loc=s - np.log(num), scale=1, size=1) def training_likelihood(params): return log_likelihood_logistic(data, params) def training_loss(params): return -log_likelihood_logistic(data, params) training_likelihood_jac = grad(training_likelihood) training_loss_jac = grad(training_loss) res = minimize(training_loss, jac=training_loss_jac, x0=fit_params, method="BFGS", options={ "maxiter": maxiter, "gtol": tol }) print(res) final_params = res.x for i in range(num): final_params[2 * i + 1] = np.exp(final_params[2 * i + 1]) results = [] for i in range(num): results.append(final_params[2 * i]) results.append( logistic.isf( 0.25, loc=final_params[2 * i], scale=final_params[2 * i + 1]) - final_params[2 * i]) for i in range(num - 1): results.append(final_params[2 * num + i]) return results
def update_params(params, gradients, obj, step_size): """ Updates parameters based on gradients, the objective function, and the learning rate. Returns a list of arrays the same size as params. Arguments: - params: A list of arrays (parameter values) - gradients: A list of arrays like params, but with as many rows as there were samples of the gradient - obj: An array of loss values with size samples - step_size: The learning rate, a float """ updates = [np.zeros(g.shape) for g in gradients] num_params, num_samples = len(params), len(gradients[0]) for p in range(num_params): for s in range(num_samples): updates[p][s] = step_size * gradients[p][s] * obj[s] avg_update = [np.average(update, axis=0) for update in updates] new_params = [p + u for (p, u) in zip(params, avg_update)] return new_params
def getNoise(self, data): #estimate based off of vals less than 3 sigma above mean #did a couple of spot checks and this works pretty well, need more rigor in future return np.std(data[data < np.average(data) + 3 * np.std(data)])
def quadratic_intensity(self, X): intensity = self.lamb_bar * np.average( X**2, axis=1, weights=[self.a, 1]) return intensity
layer_2 = LinearLayer(10, 10, relu, weight_initialization_function=Initializer.relu_uniform, num_layers=3) layer_3 = LinearLayer( 10, 3, sigmoid, weight_initialization_function=Initializer.sigmoid_uniform) cost = [] for i in range(epochs): for idx, mb in enumerate(range(mini_batches_per_epoch)): mb = next(data.mini_batch) layer_0 = mb.train hl3 = layer_3(layer_2(layer_1(layer_0))) loss = np.average(bce_loss(hl3, mb.target)) dloss = hl3 - mb.target _ = layer_1.backward_pass( layer_2.backward_pass(layer_3.backward_pass(dloss, lr), lr), lr) if i % 1000 == 0: print(loss) cost.append(loss) preds = layer_3(layer_2(layer_1(x))) print(preds) plt.plot(cost) plt.ylabel('Loss') plt.title('{}'.format(loss))
def m_step(self, expectations, datas, inputs, masks, tags, **kwargs): x = np.concatenate(datas) weights = np.concatenate([Ez for Ez, _, _ in expectations]) for k in range(self.K): self.log_lambdas[k] = np.log( np.average(x, axis=0, weights=weights[:, k]) + 1e-16)
def standardization(data): data_avg = np.average(data) data_std = np.std(data) return (data - data_avg) / data_std, data_avg, data_std
MODE = "BFGS" qn_H2_soln , qn_H2_iterates = general_rank_2_QN_H(max_iter,f,dfdx,MODE,x_start,np.linalg.inv(TEMP_B0)) # FOR NOISE USE noise = lambda s: np.random.multivariate_normal([0,0],[[1,0],[0,1]]) print("rank 2 H method \"{}\" returns".format(MODE)) print(qn_H2_soln) ax.plot(qn_H2_iterates[:, 0], qn_H2_iterates[:, 1], marker='o', ls='-', label="QN-H R2 (1973)") ax.legend() # Run bound noise qn_H2_bnd_noise = np.zeros((trials,max_iter + 1,2)) + opt_x for i in range(trials): _, qn_H2_bnd_noise[i] = general_rank_2_QN_H(max_iter,f,dfdx,MODE,x_start,np.linalg.inv(TEMP_B0), noise = bnd_noise) avg_qn_H2_bnd_noise = np.average(qn_H2_bnd_noise, axis=0) print("avg bounded noise rank 2 H method \"{}\" returns".format(MODE)) print(avg_qn_H2_bnd_noise[-1,:]) ax.plot(avg_qn_H2_bnd_noise[:, 0], avg_qn_H2_bnd_noise[:, 1], marker='o', ls='-',label="Avg QN-H R2 Bound Noise") ax.legend() # Run unbound noise qn_H2_unbnd_noise = np.zeros((trials,max_iter + 1,2)) + opt_x for i in range(trials): _, qn_H2_unbnd_noise[i] = general_rank_2_QN_H(max_iter,f,dfdx,MODE,x_start,np.linalg.inv(TEMP_B0), noise = noise) avg_qn_H2_unbnd_noise = np.average(qn_H2_unbnd_noise, axis=0) print("avg unbounded noise rank 2 H method \"{}\" returns".format(MODE)) print(avg_qn_H2_unbnd_noise[-1,:]) ax.plot(avg_qn_H2_unbnd_noise[:, 0], avg_qn_H2_unbnd_noise[:, 1], marker='o', ls='-',label="Avg QN-H R2 Unbound Noise")
def rmse(y_1, y_2): return np.sqrt(np.average((y_1 - y_2)**2))
NA = 1.4 #numerical aperture; FWHM = lam/(2*NA); #fwhm in nm of gaussian psf sig_nm = FWHM/(2*np.log(2.0)); sig_psf = sig_nm/100/64;#gaussian sigma in pix sig_sq = sig_psf**2 #so we don't have to compute sig_noise = back_std; #create our psf mid = int(n_grid/2); x,y = np.meshgrid(pix_1d,pix_1d); psf = np.exp(-((y-pix_1d[mid])**2 + (x - pix_1d[mid])**2)/2/sig_psf**2); #keep in mind difference between x and y position and indices! Here, you are given indices, but meshgrid is in x-y coords #fourier transform of psf psf_k = fft.fft2(psf); img = plt.imread('/home/moss/SMLM/data/sequence/00002.tif'); img = img-np.average(img[img<np.average(img)+3*np.std(img)]); data = img/np.max(img); xi = data + 0.5; f = 7/(64**2); sig_noise = np.std(data[data<np.average(data)+3*np.std(data)]); print('noise is'); print(sig_noise); norm_sig = 0.75; norm_mean = -5; wlim = (0.01,5); def roll_fft(f): r,c = np.shape(f); f2 = np.roll(f,(c//2)); f3 = np.roll(f2,(r//2),axis=-2); return f3; def lognorm(ws):
def mixture_of_gaussian_em(data, Q, init_params=None, weights=None, num_iters=100): """ Use expectation-maximization (EM) to compute the maximum likelihood estimate of the parameters of a Gaussian mixture model. The datapoints x_i are assumed to come from the following model: z_i ~ Cate(pi) x_i | z_i ~ N(mu_{z_i}, Sigma_{z_i}) the parameters are {pi_q, mu_q, Sigma_q} for q = 1...Q Assume: - data x_i are vectors in R^M - covariance is diagonal S_q = diag([S_{q1}, .., S_{qm}]) """ N, M = data.shape ### concatenate all marks; N = # of spikes, M = # of mark dim if init_params is not None: pi, mus, inv_sigmas = init_params assert pi.shape == (Q, ) assert np.all(pi >= 0) and np.allclose(pi.sum(), 1) assert mus.shape == (M, Q) assert inv_sigmas.shape == (M, Q) else: pi = np.ones(Q) / Q mus = npr.randn(M, Q) inv_sigmas = -2 + npr.randn(M, Q) if weights is not None: assert weights.shape == (N, ) and np.all(weights >= 0) else: weights = np.ones(N) for itr in range(num_iters): ## E-step: ## output: number of spikes by number of mixture ## attribute spikes to each Q element sigmas = np.exp(inv_sigmas) responsibilities = np.zeros((N, Q)) responsibilities += np.log(pi) for q in range(Q): responsibilities[:, q] = np.sum( -0.5 * (data - mus[None, :, q])**2 / sigmas[None, :, q] - 0.5 * np.log(2 * np.pi * sigmas[None, :, q]), axis=1) # norm.logpdf(...) responsibilities -= logsumexp(responsibilities, axis=1, keepdims=True) responsibilities = np.exp(responsibilities) ## M-step: ## take in responsibilities (output of e-step) ## compute MLE of Gaussian parameters ## mean/std is weighted means/std of mix for q in range(Q): pi[q] = np.average(responsibilities[:, q]) mus[:, q] = np.average(data, weights=responsibilities[:, q] * weights, axis=0) sqerr = (data - mus[None, :, q])**2 inv_sigmas[:, q] = np.log(1e-8 + np.average( sqerr, weights=responsibilities[:, q] * weights, axis=0)) return mus, inv_sigmas, pi
# output remaining accuracy info if args.fullaccuracy: for i in range(0, NUM_ITERATIONS): print("Iteration {} log-likelihood {}".format(i, liklog[i])) train_accuracy, test_accuracy = accuracies(param_log[i]) print("Iteration {} train_accuracy: {}".format(i, train_accuracy)) print("Iteration {} test_accuracy: {}".format(i, test_accuracy)) else: train_accuracy, test_accuracy = accuracies(optimized_params) print("train_accuracy: {}".format(train_accuracy)) print("test_accuracy: {}".format(test_accuracy)) #return estimated convergence point (first log-likelihood in the log within <margin> of the average of the last <finalsamples> log-liklihoods) finalsamples = 5 margin = 10 rejectmargin = 50 goal = np.average(liklog[-finalsamples:]) convergenceindex = np.argmax( liklog > (goal - margin)) #argmax returns first index of a True in this case if (np.abs(goal - liklog[-1]) > rejectmargin): print("log-likelihood has not yet converged") else: print("log-likelihood converges by iteration " + str(convergenceindex)) # Plot weights optimized_c_l_r, optimized_w = unpack_params(optimized_params) print(softmax(optimized_c_l_r)) weights = optimized_w.reshape(C, 28, 28) save_images(weights, "weights.jpg")
af_pri = Agrad.grad(lambda tt: -1*lnprior(tt,f_curr,a_curr,sig_delta)); aval_like = af_like(tt0); aval_pri = af_pri(tt0); aval = aval_like+aval_pri tt0 = tt0.reshape((n_grid,n_grid)); gval = np.array([sgrad_lnpost(tt0,index,f_curr,a_curr,sig_delta) for (index,w) in np.ndenumerate(tt0)]); #lsis = lsis.reshape((n_grid,n_grid)); print(np.absolute(aval-gval)); #now test hessian ''' ''' hval = hess_lnpost(tt0,f_curr,a_curr,sig_delta); afunc = Agrad.grad(lambda tt: sgrad_lnpost(tt,f_curr,a_curr,sig_delta)); tt0 = tt0.reshape((n_grid,n_grid)); aval = np.array([afunc(tt0,index) for (index,w) in np.ndenumerate(tt0)]); print(np.absolute(aval-hval)); ''' hval = hess_lnpost(tt0, f_curr, a_curr, sig_delta) #af_like = Agrad.hessian(lambda tt: -1*lnlike(tt)); af_pri = Agrad.hessian(lambda tt: -1 * lnprior(tt, f_curr, a_curr, sig_delta)) #aval_like = af_like(tt0); aval_pri = af_pri(tt0) #aval = aval_like+aval_pri; #print(np.diagonal(aval,axis1=1,axis2=2)-np.diagonal(hval)) print(np.average(aval_pri[0][:][:] - hval)) #print(hval_like - np.diagonal(aval_like)); #print(hval_pri - np.diagonal(aval_pri)) #print(np.absolute(aval-hval));