def make_pinwheel_data(num_classes, num_per_class, rate=2.0, noise_std=0.001): spoke_angles = np.linspace(0, 2*np.pi, num_classes+1)[:-1] rs = npr.RandomState(0) x = np.linspace(0.1, 1, num_per_class) xs = np.concatenate([rate *x * np.cos(angle + x * rate) + noise_std * rs.randn(num_per_class) for angle in spoke_angles]) ys = np.concatenate([rate *x * np.sin(angle + x * rate) + noise_std * rs.randn(num_per_class) for angle in spoke_angles]) return np.concatenate([np.expand_dims(xs, 1), np.expand_dims(ys,1)], axis=1)
def make_pinwheel_data(num_spokes=5, points_per_spoke=40, rate=1.0, noise_std=0.005): """Make synthetic data in the shape of a pinwheel.""" spoke_angles = np.linspace(0, 2 * np.pi, num_spokes + 1)[:-1] rs = npr.RandomState(0) x = np.linspace(0.1, 1, points_per_spoke) xs = np.concatenate([x * np.cos(angle + x * rate) + noise_std * rs.randn(len(x)) for angle in spoke_angles]) ys = np.concatenate([x * np.sin(angle + x * rate) + noise_std * rs.randn(len(x)) for angle in spoke_angles]) return np.concatenate([np.expand_dims(xs, 1), np.expand_dims(ys,1)], axis=1)
def plot_gmm(params, ax, num_points=100): angles = np.expand_dims(np.linspace(0, 2*np.pi, num_points), 1) xs, ys = np.cos(angles), np.sin(angles) circle_pts = np.concatenate([xs, ys], axis=1) * 2.0 for log_proportion, mean, chol in zip(*unpack_params(params)): cur_pts = mean + np.dot(circle_pts, chol) ax.plot(cur_pts[:, 0], cur_pts[:, 1], '-')
def predictions(weights, inputs): """weights is shape (num_weight_samples x num_weights) inputs is shape (num_datapoints x D)""" inputs = np.expand_dims(inputs, 0) for W, b in unpack_layers(weights): outputs = np.einsum('mnd,mdo->mno', inputs, W) + b inputs = nonlinearity(outputs) return outputs
def log_marginal_likelihood(params, data): cluster_lls = [] for log_proportion, mean, chol in zip(*unpack_params(params)): cov = np.dot(chol.T, chol) + 0.000001 * np.eye(D) cluster_log_likelihood = log_proportion + mvn.logpdf(data, mean, cov) cluster_lls.append(np.expand_dims(cluster_log_likelihood, axis=0)) cluster_lls = np.concatenate(cluster_lls, axis=0) return np.sum(logsumexp(cluster_lls, axis=0))
def unpack_params(params): """Unpacks parameter vector into the proportions, means and covariances of each mixture component. The covariance matrices are parametrized by their Cholesky decompositions.""" log_proportions = parser.get(params, 'log proportions') normalized_log_proportions = log_proportions - logsumexp(log_proportions) means = parser.get(params, 'means') lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1) diag_chols = np.exp( parser.get(params, 'log diagonals')) chols = [] for lower_tri, diag in zip(lower_tris, diag_chols): chols.append(np.expand_dims(lower_tri + np.diag(diag), 0)) chols = np.concatenate(chols, axis=0) return normalized_log_proportions, means, chols
def callback(params, t, g): print("Iteration {} lower bound {}".format(t, -objective(params, t))) # Sample functions from posterior. rs = npr.RandomState(0) mean, log_std = unpack_params(params) #rs = npr.RandomState(0) sample_weights = rs.randn(10, num_weights) * np.exp(log_std) + mean plot_inputs = np.linspace(-8, 8, num=400) outputs = predictions(sample_weights, np.expand_dims(plot_inputs, 1)) # Plot data and functions. plt.cla() ax.plot(inputs.ravel(), targets.ravel(), 'bx') ax.plot(plot_inputs, outputs[:, :, 0].T) ax.set_ylim([-2, 3]) plt.draw() plt.pause(1.0/60.0)
def rbf_covariance(kernel_params, x, xp): output_scale = np.exp(kernel_params[0]) lengthscales = np.exp(kernel_params[1:]) diffs = np.expand_dims(x /lengthscales, 1)\ - np.expand_dims(xp/lengthscales, 0) return output_scale * np.exp(-0.5 * np.sum(diffs**2, axis=2))
return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2)) else: raise ArithmeticError def generalized_outer_product(mat): if len(mat.shape) == 1: return np.outer(mat, mat) elif len(mat.shape) == 2: return np.einsum('ij,ik->ijk', mat, mat) else: raise ArithmeticError def covgrad(x, mean, cov): # I think once we have Cholesky we can make this nicer. solved = np.linalg.solve(cov, (x - mean).T).T return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved)) logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x, lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0) logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g: np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1) logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov, lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2) # Same as log pdf, but multiplied by the pdf (ans). pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x, lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0) pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g: g * ans * np.linalg.solve(cov, x - mean)), argnum=1) pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov, lambda g: -g * ans * covgrad(x, mean, cov)), argnum=2) entropy.defgrad_is_zero(argnums=(0,)) entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g: 0.5 * g * np.linalg.inv(cov).T), argnum=1)
gamma = primitive(scipy.special.gamma) gammaln = primitive(scipy.special.gammaln) gammasgn = primitive(scipy.special.gammasgn) rgamma = primitive(scipy.special.rgamma) multigammaln = primitive(scipy.special.multigammaln) gammasgn.defgrad_is_zero() polygamma.defgrad_is_zero(argnums=(0,)) polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1) psi.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) digamma.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) gamma.defgrad( lambda ans, x: lambda g: g * ans * psi(x)) gammaln.defgrad( lambda ans, x: lambda g: g * psi(x)) rgamma.defgrad( lambda ans, x: lambda g: g * psi(x) / -gamma(x)) multigammaln.defgrad(lambda ans, a, d: lambda g: g * np.sum(digamma(np.expand_dims(a, -1) - np.arange(d)/2.), -1)) multigammaln.defgrad_is_zero(argnums=(1,)) ### Bessel functions ### j0 = primitive(scipy.special.j0) y0 = primitive(scipy.special.y0) j1 = primitive(scipy.special.j1) y1 = primitive(scipy.special.y1) jn = primitive(scipy.special.jn) yn = primitive(scipy.special.yn) j0.defgrad(lambda ans, x: lambda g: -g * j1(x)) y0.defgrad(lambda ans, x: lambda g: -g * y1(x)) j1.defgrad(lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0)