def KL_symmetric_divergence(p1, p2, x_range, delta): bins = np.linspace(x_range[0], x_range[1], (x_range[1] - x_range[0]) / float(delta) + 1) p1_hist, p1_bin_edges = np.histogram(p1, bins) p2_hist, p2_bin_edges = np.histogram(p2, bins) p1_hist = p1_hist.astype('float64') p2_hist = p2_hist.astype('float64') p1_hist[p1_hist == 0] = 1e-25 p2_hist[p2_hist == 0] = 1e-25 return entropy(p1_hist, p2_hist, base=2) + entropy(p2_hist, p1_hist, base=2)
def testDirichletCategorical(self): def log_joint(p, x, alpha): log_prior = np.sum((alpha - 1) * np.log(p)) log_prior += -special.gammaln(alpha).sum() + special.gammaln(alpha.sum()) # TODO(mhoffman): We should make it possible to only use one-hot # when necessary. one_hot_x = one_hot(x, alpha.shape[0]) log_likelihood = np.sum(np.dot(one_hot_x, np.log(p))) return log_prior + log_likelihood vocab_size = 5 n_examples = 11 alpha = 1.3 * np.ones(vocab_size) p = np.random.gamma(alpha, 1.) p /= p.sum(-1, keepdims=True) x = np.random.choice(np.arange(vocab_size), n_examples, p=p) conditional, marginalized_value = ( _condition_and_marginalize(log_joint, 0, SupportTypes.SIMPLEX, p, x, alpha)) new_alpha = alpha + np.histogram(x, np.arange(vocab_size + 1))[0] correct_marginalized_value = ( -special.gammaln(alpha).sum() + special.gammaln(alpha.sum()) + special.gammaln(new_alpha).sum() - special.gammaln(new_alpha.sum())) self.assertAlmostEqual(correct_marginalized_value, marginalized_value) self.assertTrue(np.allclose(new_alpha, conditional.alpha))
def print_perf(gen_trainable_params, dsc_trainable_params, init_params_max, init_params_min, iter): if iter % 10 == 0: ability = np.mean(objective(gen_trainable_params, dsc_trainable_params, iter)) subspace_training = init_params_max[3] gen_nn_params = get_params_from_subspace(gen_trainable_params, init_params_max[1], init_params_max[2]) dsc_nn_params = get_params_from_subspace(dsc_trainable_params, init_params_min[1], init_params_min[2]) input_params = gen_trainable_params if subspace_training else gen_nn_params # TODO(sorenmind): REMOVE! # dsc_trainable_params = np.zeros(len(dsc_trainable_params)) fake_data = generate_from_noise(gen_nn_params, 1000, latent_dim, seed, dsc_trainable_params) real_data = sample_true_data_dist(100, seed) probs_fake = np.mean(np.exp(disc(dsc_nn_params, input_params, fake_data))) probs_real = np.mean(np.exp(disc(dsc_nn_params, input_params, real_data))) print("{:15}|{:20}|{:20}|{:20}".format(iter, ability, probs_fake, probs_real)) # Plot data and functions. figrange = (-1, 3) plot_inputs = np.expand_dims(np.linspace(*figrange, num=400), 1) outputs = np.exp(true_data_dist_logprob(plot_inputs)) discvals = sigmoid(disc(dsc_nn_params, input_params, plot_inputs)) h, b = np.histogram(fake_data, bins=100, range=figrange, density=True) plt.cla() ax.plot(plot_inputs, outputs, 'g-') ax.plot(plot_inputs, discvals, 'r-') ax.plot(b[:-1], h, 'b-') ax.set_ylim([0, 3]) plt.draw() plt.pause(1.0 / 60.0)
def print_perf(gen_params, gen_layer_sizes, subspace_params, dsc_params, iter): if iter % 10 == 0: ability = np.mean( objective(gen_params, gen_layer_sizes, dsc_params, dsc_layer_sizes, iter)) fake_data = generate_from_noise(gen_params, gen_layer_sizes, subspace_params, 1000, latent_dim, seed) real_data = sample_true_data_dist(100, seed) probs_fake = np.mean( np.exp(disc(dsc_params, gen_params, fake_data))) probs_real = np.mean( np.exp(disc(dsc_params, gen_params, real_data))) print("{:15}|{:20}|{:20}|{:20}".format(iter, ability, probs_fake, probs_real)) # Plot data and functions. figrange = (-1, 3) plot_inputs = np.expand_dims(np.linspace(*figrange, num=400), 1) outputs = np.exp(true_data_dist_logprob(plot_inputs)) discvals = sigmoid(disc(dsc_params, gen_params, plot_inputs)) h, b = np.histogram(fake_data, bins=100, range=figrange, density=True) plt.cla() ax.plot(plot_inputs, outputs, 'g-') ax.plot(plot_inputs, discvals, 'r-') ax.plot(b[:-1], h, 'b-') ax.set_ylim([0, 3]) plt.draw() plt.pause(1.0 / 60.0)
def feature_distributions(self, x): # create figure fig = plt.figure(figsize=(10, 4)) # create subplots N = x.shape[0] gs = 0 if N <= 5: gs = gridspec.GridSpec(1, N) else: gs = gridspec.GridSpec(2, 5) # remove whitespace from figure fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.01, hspace=0.01) # loop over input and plot each individual input dimension value all_bins = [] for n in range(N): hist, bins = np.histogram(x[n, :], bins=30) all_bins.append(bins.ravel()) # determine range for all subplots maxview = np.max(all_bins) minview = np.min(all_bins) viewrange = (maxview - minview) * 0.1 maxview += viewrange minview -= viewrange for n in range(N): # make subplot ax = plt.subplot(gs[n]) hist, bins = np.histogram(x[n, :], bins=30) width = 0.7 * (bins[1] - bins[0]) center = (bins[:-1] + bins[1:]) / 2 ax.barh(center, hist, width) ax.set_title(r'$x_' + str(n + 1) + '$', fontsize=14) ax.set_ylim([minview, maxview]) plt.show()
def single_layer_distributions(self, u, x, axs): # loop over input and plot each individual input dimension value all_bins = [] N = x.shape[0] for n in range(N): hist, bins = np.histogram(x[n, :], bins=30) all_bins.append(bins.ravel()) # determine range for all subplots maxview = np.max(all_bins) minview = np.min(all_bins) viewrange = (maxview - minview) * 0.1 maxview += viewrange minview -= viewrange for n in range(N): ax = axs[n] hist, bins = np.histogram(x[n, :], bins=30) width = 0.7 * (bins[1] - bins[0]) center = (bins[:-1] + bins[1:]) / 2 ax.barh(center, hist, width) ax.set_title(r'$f_' + str(n + 1) + '^{(' + str(u + 1) + ')}$', fontsize=14) ax.set_ylim([minview, maxview])
def _threshold(morph): """Find the threshold value for a given morphology """ _morph = morph[morph > 0] _bins = 50 # Decrease the bin size for sources with a small number of pixels if _morph.size < 500: _bins = max(np.int(_morph.size / 10), 1) if _bins == 1: return 0, _bins hist, bins = np.histogram(np.log10(_morph).reshape(-1), _bins) cutoff = np.where(hist == 0)[0] # If all of the pixels are used there is no need to threshold if len(cutoff) == 0: return 0, _bins return 10**bins[cutoff[-1]], _bins
def plot_observed_spectrum(rm, ax=None): if ax is None: fig, ax = plt.subplots(figsize=(8, 8)) spectrum = rm.eigvals() nonsingular_spectrum = spectrum[np.abs(spectrum) > EPS] observed_nonsingular_mass = len(nonsingular_spectrum) / len(spectrum) hist, edges = np.histogram( nonsingular_spectrum, density=True) hist = hist * observed_nonsingular_mass extended_hist = [0] + list(hist) + [0] extended_edges = list(edges) + [edges[-1] + EPS] ax.step( extended_edges, extended_hist, lw=format.LINEWIDTH, label="Empirical", color=EMPIRICAL_COLOR) ax.set_ylabel(r"$\rho$", fontdict={"size": "large"}) ax.set_xlabel(r"$\lambda$", fontdict={"size": "large"}) return ax
ax.set_xlim(X.min(), X.max()) ax.set_ylim(Y.min(), Y.max()) ax.set_xlabel("period [years]") ax.set_ylabel("$R_\mathrm{P} / R_\mathrm{J}$") ax.set_xticks([3, 5, 10, 20]) ax.get_xaxis().set_major_formatter(pl.ScalarFormatter()) ax.set_yticks([0.2, 0.5, 1, 2]) ax.get_yaxis().set_major_formatter(pl.ScalarFormatter()) # Histograms. # Top: ax = pl.axes([0.1, 0.71, 0.6, 0.15]) x = np.exp(ln_period_bins) / 365.25 y = ( np.histogram(np.log(rec.period), ln_period_bins)[0] / np.histogram(np.log(inj.period), ln_period_bins)[0] ) x = np.array(list(zip(x[:-1], x[1:]))).flatten() y = np.array(list(zip(y, y))).flatten() ax.plot(x, y, lw=1, color=COLORS["DATA"]) ax.fill_between(x, y, np.zeros_like(y), color=COLORS["DATA"], alpha=0.2) ax.set_xlim(X.min(), X.max()) ax.set_ylim(0, 0.8) ax.set_xscale("log") ax.set_xticks([3, 5, 10, 20]) ax.set_xticklabels([]) ax.yaxis.set_major_locator(pl.MaxNLocator(3)) # Right: ax = pl.axes([0.71, 0.1, 0.15, 0.6])
def count(self, t, *args, **kwargs): tau = self.get_param('tau', **kwargs) return np.histogram(t, tau, density=False)
for i in range(n_samples): start = (i * bsize) % (x.shape[0] - bsize) xmb = np.copy(x[start:start + bsize]) sample = sampler.step(xmb) samples.append(np.random.randn() * np.sqrt(np.exp(sample[1]) + 1e-16) + sample[0]) true_step = 0.001 xs = np.arange(-6, 6, true_step) nlls = np.zeros_like(xs) for i, x in enumerate(xs): nlls[i] = neg_log_like(np.array([mean, np.log(std**2)]), x) # compute likelihood lls = np.exp(-nlls) # approximately compute z via euler integration z = np.sum(lls) * true_step # approximate density from the samples step_sample = 0.1 xgrid = np.arange(-6, 6, step_sample) ygrid = np.asarray(np.histogram(samples, xgrid, density=True)[0]) plt.plot(xs, lls / z, color='red') plt.plot(xgrid[:len(ygrid)], ygrid) plt.figure() ygrid2 = np.histogram(samples, 1500)[0] plt.scatter(np.arange(len(ygrid)), ygrid) #plt.scatter(samples, samples) plt.show()
def binVariable(var, binwidth=0.001): return np.histogram(var, bins=np.arange(0, b.lastBehaviorTime, binwidth))[0]
def testMixtureOfGaussians(self): def log_joint(x, pi, z, mu, sigma_sq, alpha, sigma_sq_mu): log_p_pi = log_probs.dirichlet_gen_log_prob(pi, alpha) log_p_mu = log_probs.norm_gen_log_prob(mu, 0, np.sqrt(sigma_sq_mu)) z_one_hot = one_hot(z, len(pi)) log_p_z = np.einsum('ij,j->', z_one_hot, np.log(pi)) mu_z = np.einsum('ij,jk->ik', z_one_hot, mu) log_p_x = log_probs.norm_gen_log_prob(x, mu_z, np.sqrt(sigma_sq)) return log_p_pi + log_p_z + log_p_mu + log_p_x n_clusters = 5 n_dimensions = 2 n_observations = 200 alpha = 3.3 * np.ones(n_clusters) sigma_sq_mu = 1.5 ** 2 sigma_sq = 0.5 ** 2 np.random.seed(10001) pi = np.random.gamma(alpha) pi /= pi.sum() mu = np.random.normal(0, np.sqrt(sigma_sq_mu), [n_clusters, n_dimensions]) z = np.random.choice(np.arange(n_clusters), size=n_observations, p=pi) x = np.random.normal(mu[z, :], sigma_sq) pi_est = np.ones(n_clusters) / n_clusters z_est = np.random.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = np.random.normal(0., 0.01, [n_clusters, n_dimensions]) all_args = [x, pi_est, z_est, mu_est, sigma_sq, alpha, sigma_sq_mu] pi_posterior_args = all_args[:1] + all_args[2:] z_posterior_args = all_args[:2] + all_args[3:] mu_posterior_args = all_args[:3] + all_args[4:] pi_posterior = complete_conditional(log_joint, 1, SupportTypes.SIMPLEX, *all_args) z_posterior = complete_conditional(log_joint, 2, SupportTypes.INTEGER, *all_args) mu_posterior = complete_conditional(log_joint, 3, SupportTypes.REAL, *all_args) self.assertTrue(np.allclose( pi_posterior(*pi_posterior_args).alpha, alpha + np.histogram(z_est, np.arange(n_clusters+1))[0])) correct_z_logits = -0.5 / sigma_sq * np.square(x[:, :, None] - mu_est.T[None, :, :]).sum(1) correct_z_logits += np.log(pi_est) correct_z_posterior = np.exp(correct_z_logits - misc.logsumexp(correct_z_logits, 1, keepdims=True)) self.assertTrue(np.allclose(correct_z_posterior, z_posterior(*z_posterior_args).p)) correct_mu_posterior_mean = np.zeros_like(mu_est) correct_mu_posterior_var = np.zeros_like(mu_est) for k in range(n_clusters): n_k = (z_est == k).sum() correct_mu_posterior_var[k] = 1. / (1. / sigma_sq_mu + n_k / sigma_sq) correct_mu_posterior_mean[k] = ( x[z_est == k].sum(0) / sigma_sq * correct_mu_posterior_var[k]) mu_posterior_val = mu_posterior(*mu_posterior_args) self.assertTrue(np.allclose(correct_mu_posterior_mean, mu_posterior_val.args[0])) self.assertTrue(np.allclose(correct_mu_posterior_var, mu_posterior_val.args[1] ** 2))