Exemple #1
0
def KL_symmetric_divergence(p1, p2, x_range, delta):
    bins = np.linspace(x_range[0], x_range[1], (x_range[1] - x_range[0]) / float(delta) + 1)
    p1_hist, p1_bin_edges = np.histogram(p1, bins)
    p2_hist, p2_bin_edges = np.histogram(p2, bins)

    p1_hist = p1_hist.astype('float64')
    p2_hist = p2_hist.astype('float64')

    p1_hist[p1_hist == 0] = 1e-25
    p2_hist[p2_hist == 0] = 1e-25
    return entropy(p1_hist, p2_hist, base=2) + entropy(p2_hist, p1_hist, base=2)
  def testDirichletCategorical(self):
    def log_joint(p, x, alpha):
      log_prior = np.sum((alpha - 1) * np.log(p))
      log_prior += -special.gammaln(alpha).sum() + special.gammaln(alpha.sum())
      # TODO(mhoffman): We should make it possible to only use one-hot
      # when necessary.
      one_hot_x = one_hot(x, alpha.shape[0])
      log_likelihood = np.sum(np.dot(one_hot_x, np.log(p)))
      return log_prior + log_likelihood
    vocab_size = 5
    n_examples = 11
    alpha = 1.3 * np.ones(vocab_size)
    p = np.random.gamma(alpha, 1.)
    p /= p.sum(-1, keepdims=True)
    x = np.random.choice(np.arange(vocab_size), n_examples, p=p)

    conditional, marginalized_value = (
        _condition_and_marginalize(log_joint, 0, SupportTypes.SIMPLEX,
                                   p, x, alpha))

    new_alpha = alpha + np.histogram(x, np.arange(vocab_size + 1))[0]
    correct_marginalized_value = (
        -special.gammaln(alpha).sum() + special.gammaln(alpha.sum()) +
        special.gammaln(new_alpha).sum() - special.gammaln(new_alpha.sum()))
    self.assertAlmostEqual(correct_marginalized_value, marginalized_value)
    self.assertTrue(np.allclose(new_alpha, conditional.alpha))
    def print_perf(gen_trainable_params, dsc_trainable_params, init_params_max, init_params_min, iter):
        if iter % 10 == 0:

            ability = np.mean(objective(gen_trainable_params, dsc_trainable_params, iter))

            subspace_training = init_params_max[3]
            gen_nn_params = get_params_from_subspace(gen_trainable_params, init_params_max[1], init_params_max[2])
            dsc_nn_params = get_params_from_subspace(dsc_trainable_params, init_params_min[1], init_params_min[2])
            input_params = gen_trainable_params if subspace_training else gen_nn_params

            # TODO(sorenmind): REMOVE!
            # dsc_trainable_params = np.zeros(len(dsc_trainable_params))

            fake_data = generate_from_noise(gen_nn_params, 1000, latent_dim, seed, dsc_trainable_params)
            real_data = sample_true_data_dist(100, seed)
            probs_fake = np.mean(np.exp(disc(dsc_nn_params, input_params, fake_data)))
            probs_real = np.mean(np.exp(disc(dsc_nn_params, input_params, real_data)))
            print("{:15}|{:20}|{:20}|{:20}".format(iter, ability, probs_fake, probs_real))

            # Plot data and functions.
            figrange = (-1, 3)
            plot_inputs = np.expand_dims(np.linspace(*figrange, num=400), 1)
            outputs = np.exp(true_data_dist_logprob(plot_inputs))
            discvals = sigmoid(disc(dsc_nn_params, input_params, plot_inputs))

            h, b = np.histogram(fake_data, bins=100, range=figrange, density=True)

            plt.cla()
            ax.plot(plot_inputs, outputs, 'g-')
            ax.plot(plot_inputs, discvals, 'r-')
            ax.plot(b[:-1], h, 'b-')
            ax.set_ylim([0, 3])
            plt.draw()
            plt.pause(1.0 / 60.0)
    def print_perf(gen_params, gen_layer_sizes, subspace_params, dsc_params,
                   iter):
        if iter % 10 == 0:
            ability = np.mean(
                objective(gen_params, gen_layer_sizes, dsc_params,
                          dsc_layer_sizes, iter))
            fake_data = generate_from_noise(gen_params, gen_layer_sizes,
                                            subspace_params, 1000, latent_dim,
                                            seed)
            real_data = sample_true_data_dist(100, seed)
            probs_fake = np.mean(
                np.exp(disc(dsc_params, gen_params, fake_data)))
            probs_real = np.mean(
                np.exp(disc(dsc_params, gen_params, real_data)))
            print("{:15}|{:20}|{:20}|{:20}".format(iter, ability, probs_fake,
                                                   probs_real))

            # Plot data and functions.
            figrange = (-1, 3)
            plot_inputs = np.expand_dims(np.linspace(*figrange, num=400), 1)
            outputs = np.exp(true_data_dist_logprob(plot_inputs))
            discvals = sigmoid(disc(dsc_params, gen_params, plot_inputs))

            h, b = np.histogram(fake_data,
                                bins=100,
                                range=figrange,
                                density=True)

            plt.cla()
            ax.plot(plot_inputs, outputs, 'g-')
            ax.plot(plot_inputs, discvals, 'r-')
            ax.plot(b[:-1], h, 'b-')
            ax.set_ylim([0, 3])
            plt.draw()
            plt.pause(1.0 / 60.0)
Exemple #5
0
    def feature_distributions(self, x):
        # create figure
        fig = plt.figure(figsize=(10, 4))

        # create subplots
        N = x.shape[0]
        gs = 0
        if N <= 5:
            gs = gridspec.GridSpec(1, N)
        else:
            gs = gridspec.GridSpec(2, 5)

        # remove whitespace from figure
        fig.subplots_adjust(left=0, right=1, bottom=0,
                            top=1)  # remove whitespace
        fig.subplots_adjust(wspace=0.01, hspace=0.01)

        # loop over input and plot each individual input dimension value
        all_bins = []
        for n in range(N):
            hist, bins = np.histogram(x[n, :], bins=30)
            all_bins.append(bins.ravel())

        # determine range for all subplots
        maxview = np.max(all_bins)
        minview = np.min(all_bins)
        viewrange = (maxview - minview) * 0.1
        maxview += viewrange
        minview -= viewrange

        for n in range(N):
            # make subplot
            ax = plt.subplot(gs[n])
            hist, bins = np.histogram(x[n, :], bins=30)
            width = 0.7 * (bins[1] - bins[0])
            center = (bins[:-1] + bins[1:]) / 2
            ax.barh(center, hist, width)
            ax.set_title(r'$x_' + str(n + 1) + '$', fontsize=14)
            ax.set_ylim([minview, maxview])
        plt.show()
    def single_layer_distributions(self, u, x, axs):
        # loop over input and plot each individual input dimension value
        all_bins = []
        N = x.shape[0]
        for n in range(N):
            hist, bins = np.histogram(x[n, :], bins=30)
            all_bins.append(bins.ravel())

        # determine range for all subplots
        maxview = np.max(all_bins)
        minview = np.min(all_bins)
        viewrange = (maxview - minview) * 0.1
        maxview += viewrange
        minview -= viewrange

        for n in range(N):
            ax = axs[n]
            hist, bins = np.histogram(x[n, :], bins=30)
            width = 0.7 * (bins[1] - bins[0])
            center = (bins[:-1] + bins[1:]) / 2
            ax.barh(center, hist, width)
            ax.set_title(r'$f_' + str(n + 1) + '^{(' + str(u + 1) + ')}$',
                         fontsize=14)
            ax.set_ylim([minview, maxview])
Exemple #7
0
def _threshold(morph):
    """Find the threshold value for a given morphology
    """
    _morph = morph[morph > 0]
    _bins = 50
    # Decrease the bin size for sources with a small number of pixels
    if _morph.size < 500:
        _bins = max(np.int(_morph.size / 10), 1)
        if _bins == 1:
            return 0, _bins
    hist, bins = np.histogram(np.log10(_morph).reshape(-1), _bins)
    cutoff = np.where(hist == 0)[0]
    # If all of the pixels are used there is no need to threshold
    if len(cutoff) == 0:
        return 0, _bins
    return 10**bins[cutoff[-1]], _bins
Exemple #8
0
def plot_observed_spectrum(rm, ax=None):
    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 8))

    spectrum = rm.eigvals()
    nonsingular_spectrum = spectrum[np.abs(spectrum) > EPS]
    observed_nonsingular_mass = len(nonsingular_spectrum) / len(spectrum)
    hist, edges = np.histogram(
        nonsingular_spectrum, density=True)
    hist = hist * observed_nonsingular_mass

    extended_hist = [0] + list(hist) + [0]
    extended_edges = list(edges) + [edges[-1] + EPS]

    ax.step(
        extended_edges, extended_hist,
        lw=format.LINEWIDTH,
        label="Empirical",
        color=EMPIRICAL_COLOR)

    ax.set_ylabel(r"$\rho$", fontdict={"size": "large"})
    ax.set_xlabel(r"$\lambda$", fontdict={"size": "large"})

    return ax
Exemple #9
0
ax.set_xlim(X.min(), X.max())
ax.set_ylim(Y.min(), Y.max())
ax.set_xlabel("period [years]")
ax.set_ylabel("$R_\mathrm{P} / R_\mathrm{J}$")
ax.set_xticks([3, 5, 10, 20])
ax.get_xaxis().set_major_formatter(pl.ScalarFormatter())
ax.set_yticks([0.2, 0.5, 1, 2])
ax.get_yaxis().set_major_formatter(pl.ScalarFormatter())

# Histograms.

# Top:
ax = pl.axes([0.1, 0.71, 0.6, 0.15])
x = np.exp(ln_period_bins) / 365.25
y = (
    np.histogram(np.log(rec.period), ln_period_bins)[0] /
    np.histogram(np.log(inj.period), ln_period_bins)[0]
)
x = np.array(list(zip(x[:-1], x[1:]))).flatten()
y = np.array(list(zip(y, y))).flatten()
ax.plot(x, y, lw=1, color=COLORS["DATA"])
ax.fill_between(x, y, np.zeros_like(y), color=COLORS["DATA"], alpha=0.2)
ax.set_xlim(X.min(), X.max())
ax.set_ylim(0, 0.8)
ax.set_xscale("log")
ax.set_xticks([3, 5, 10, 20])
ax.set_xticklabels([])
ax.yaxis.set_major_locator(pl.MaxNLocator(3))

# Right:
ax = pl.axes([0.71, 0.1, 0.15, 0.6])
 def count(self, t, *args, **kwargs):
     tau = self.get_param('tau', **kwargs)
     return np.histogram(t, tau, density=False)
Exemple #11
0
for i in range(n_samples):
    start = (i * bsize) % (x.shape[0] - bsize)
    xmb = np.copy(x[start:start + bsize])
    sample = sampler.step(xmb)

    samples.append(np.random.randn() * np.sqrt(np.exp(sample[1]) + 1e-16) +
                   sample[0])

true_step = 0.001
xs = np.arange(-6, 6, true_step)
nlls = np.zeros_like(xs)
for i, x in enumerate(xs):
    nlls[i] = neg_log_like(np.array([mean, np.log(std**2)]), x)

# compute likelihood
lls = np.exp(-nlls)
# approximately compute z via euler integration
z = np.sum(lls) * true_step

# approximate density from the samples
step_sample = 0.1
xgrid = np.arange(-6, 6, step_sample)
ygrid = np.asarray(np.histogram(samples, xgrid, density=True)[0])
plt.plot(xs, lls / z, color='red')
plt.plot(xgrid[:len(ygrid)], ygrid)
plt.figure()
ygrid2 = np.histogram(samples, 1500)[0]
plt.scatter(np.arange(len(ygrid)), ygrid)
#plt.scatter(samples, samples)
plt.show()
def binVariable(var, binwidth=0.001):
    return np.histogram(var, bins=np.arange(0, b.lastBehaviorTime,
                                            binwidth))[0]
  def testMixtureOfGaussians(self):
    def log_joint(x, pi, z, mu, sigma_sq, alpha, sigma_sq_mu):
      log_p_pi = log_probs.dirichlet_gen_log_prob(pi, alpha)
      log_p_mu = log_probs.norm_gen_log_prob(mu, 0, np.sqrt(sigma_sq_mu))

      z_one_hot = one_hot(z, len(pi))
      log_p_z = np.einsum('ij,j->', z_one_hot, np.log(pi))

      mu_z = np.einsum('ij,jk->ik', z_one_hot, mu)
      log_p_x = log_probs.norm_gen_log_prob(x, mu_z, np.sqrt(sigma_sq))

      return log_p_pi + log_p_z + log_p_mu + log_p_x

    n_clusters = 5
    n_dimensions = 2
    n_observations = 200

    alpha = 3.3 * np.ones(n_clusters)
    sigma_sq_mu = 1.5 ** 2
    sigma_sq = 0.5 ** 2

    np.random.seed(10001)

    pi = np.random.gamma(alpha)
    pi /= pi.sum()
    mu = np.random.normal(0, np.sqrt(sigma_sq_mu), [n_clusters, n_dimensions])
    z = np.random.choice(np.arange(n_clusters), size=n_observations, p=pi)
    x = np.random.normal(mu[z, :], sigma_sq)

    pi_est = np.ones(n_clusters) / n_clusters
    z_est = np.random.choice(np.arange(n_clusters), size=n_observations,
                             p=pi_est)
    mu_est = np.random.normal(0., 0.01, [n_clusters, n_dimensions])

    all_args = [x, pi_est, z_est, mu_est, sigma_sq, alpha, sigma_sq_mu]
    pi_posterior_args = all_args[:1] + all_args[2:]
    z_posterior_args = all_args[:2] + all_args[3:]
    mu_posterior_args = all_args[:3] + all_args[4:]

    pi_posterior = complete_conditional(log_joint, 1, SupportTypes.SIMPLEX,
                                        *all_args)
    z_posterior = complete_conditional(log_joint, 2, SupportTypes.INTEGER,
                                       *all_args)
    mu_posterior = complete_conditional(log_joint, 3, SupportTypes.REAL,
                                        *all_args)

    self.assertTrue(np.allclose(
        pi_posterior(*pi_posterior_args).alpha,
        alpha + np.histogram(z_est, np.arange(n_clusters+1))[0]))

    correct_z_logits = -0.5 / sigma_sq * np.square(x[:, :, None] -
                                                   mu_est.T[None, :, :]).sum(1)
    correct_z_logits += np.log(pi_est)
    correct_z_posterior = np.exp(correct_z_logits -
                                 misc.logsumexp(correct_z_logits, 1,
                                                keepdims=True))
    self.assertTrue(np.allclose(correct_z_posterior,
                                z_posterior(*z_posterior_args).p))

    correct_mu_posterior_mean = np.zeros_like(mu_est)
    correct_mu_posterior_var = np.zeros_like(mu_est)
    for k in range(n_clusters):
      n_k = (z_est == k).sum()
      correct_mu_posterior_var[k] = 1. / (1. / sigma_sq_mu + n_k / sigma_sq)
      correct_mu_posterior_mean[k] = (
          x[z_est == k].sum(0) / sigma_sq * correct_mu_posterior_var[k])
    mu_posterior_val = mu_posterior(*mu_posterior_args)
    self.assertTrue(np.allclose(correct_mu_posterior_mean,
                                mu_posterior_val.args[0]))
    self.assertTrue(np.allclose(correct_mu_posterior_var,
                                mu_posterior_val.args[1] ** 2))