def _run(self): """ Estimates the probability density from data using the DEFT algorithm. """ #? G == self.npts # just remove G-something laplace_operator = laplacian.Laplacian(self.periodic, self.alpha, self.npts) # Get histogram counts and grid centers # Histogram based on bin centers counts, _ = np.histogram(data, self.bin_edges) counts_total = sum(counts) # Compute initial t # t_start = min(0.0, np.log(counts_total) - 2 * self.alpha * np.log(self.alpha / self.step)) # Do DEFT density estimation # self.results = deft_core.run( counts, laplace_operator, Z_eval, num_Z_samples, self.npts t_start, print_t, tollerance, resolution, num_pt_samples, fix_t_at_t_star, max_log_evidence_ratio_drop) # Normalize densities properly self.results.step = self.step self.results.L = self.npts * self.step self.results.R /= self.step self.results.M /= self.step self.results.Q_star /= self.step self.results.l_star = self.step * ( sp.exp(-self.results.t_star) * counts_total)**(1/(2.*self.alpha)) for p in self.results.map_curve.points: p.Q /= self.step if not (num_pt_samples == 0): results.Q_samples /= self.step
import scipy as sp import numpy as np import matplotlib.pyplot as plt # Add parent directory to path import sys sys.path.append('../') # Import deft modules import laplacian # Make 1d laplacians alphas = [1, 2, 3] op_types = ['2d_bilateral', '2d_periodic'] Gs_per_side = [10, 20] h = 1.0 directory = 'laplacians/' for alpha in alphas: for op_type in op_types: for G_per_side in Gs_per_side: Gx = G_per_side Gy = G_per_side file_name = '%s_alpha_%d_G_%dx%d.pickle' % (op_type, alpha, Gx, Gy) print 'creating operator %s...' % file_name op = laplacian.Laplacian(op_type, alpha, [Gx, Gy], [h, h]) op.save(directory + file_name)
ps = sp.zeros([Gx, Gy]) for i, x in enumerate(x_centers): for j, y in enumerate(y_centers): ps[i, j] = eval(settings['pdf_py']) Q_true = ps / ps.sum() # Compute the true mutual information mi_true = mutual_information(Q_true) # Histogram data counts_2d, xs, ys = utils.histogram_2d(data, box, num_bins, \ normalized=False) counts = counts_2d.ravel() # Get laplacian to use Delta = laplacian.Laplacian('2d_bilateral', alpha, num_bins, [1., 1.]) # Compute maxent distribution for histogram print '\ndata_type=%s, alpha=%d, N=%d' % (data_type, alpha, N) start_time = time.clock() results = \ deft_core.run(counts, Delta, resolution=resolution, tollerance=tollerance, details=True, num_samples=100, print_t = True, errorbars=False) end_time = time.clock() print 'compute_maxent_prob took %f sec' % (end_time - start_time) # Compute mutaul information estimate in bits entropy_start_time = time.clock() num_samples = results.num_samples mis = sp.zeros(num_samples)
def sample_from_deft_1d_prior(template_data, ell, G=100, alpha=3, bbox=[-np.Inf, np.Inf], periodic=False): # Create Laplacian if periodic: Delta = laplacian.Laplacian('1d_periodic', alpha, G, 1.0) else: Delta = laplacian.Laplacian('1d_bilateral', alpha, G, 1.0) # Get histogram counts and grid centers counts, bin_centers = utils.histogram_counts_1d(template_data, G, bbox=bbox) R = 1.*counts/np.sum(counts) # Get other information agout grid bbox, h, bin_edges = utils.grid_info_from_bin_centers_1d(bin_centers) # Draw coefficients for other components of phi kernel_dim = Delta._kernel_dim kernel_basis = Delta._eigenbasis[:,:kernel_dim] rowspace_basis = Delta._eigenbasis[:,kernel_dim:] rowspace_eigenvalues = ell**(2*alpha) * h**(-2*alpha) * \ np.array(Delta._eigenvalues[kernel_dim:]) # Keep drawing coefficients until phi_rowspace is not minimized # at either extreme while True: # Draw coefficients for rowspace coefficients while True: rowspace_coeffs = \ np.random.randn(G-kernel_dim)/np.sqrt(2.*rowspace_eigenvalues) # Construct rowspace phi rowspace_coeffs_col = np.mat(rowspace_coeffs).T rowspace_basis_mat = np.mat(rowspace_basis) phi_rowspace = rowspace_basis_mat*rowspace_coeffs_col #if not min(phi_rowspace) in phi_rowspace[[0,-1]]: break if kernel_dim == 1: phi_kernel = sp.zeros(phi_rowspace.shape) break # Construct full phi so that distribution mateches moments of R phi_kernel, success = maxent.compute_maxent_field(R, kernel_basis, phi0=phi_rowspace, geo_dist_tollerance=1.0E-10) if success: break else: print 'Maxent failure! Trying to sample again.' phi_rowspace = np.array(phi_rowspace).ravel() phi_kernel = np.array(phi_kernel).ravel() phi = phi_kernel + phi_rowspace # Return Q Q = utils.field_to_prob(phi)/h R = R/h return bin_centers, Q, R
def run(data, G=100, alpha=3, bbox=[-np.Inf, np.Inf], periodic=False, \ resolution=3.14E-2, tollerance=1E-3, num_samples=100, errorbars=False, print_t=False, ell_guess=False): # Start clock start_time = time.clock() # Create Laplacian laplacian_start_time = time.clock() if periodic: op_type = '1d_periodic' else: op_type = '1d_bilateral' # Check for Laplacian on disk. Otherwise, create de novo laplacian_dir = '/Users/jkinney/github/15_deft/laplacians/' file_name = '%s%s_alpha_%d_G_%d.pickle'%(laplacian_dir,op_type,alpha,G) if os.path.isfile(file_name): Delta = laplacian.load(file_name) if print_t: print 'Laplacian loaded from disk' else: Delta = laplacian.Laplacian(op_type, alpha, G, 1.0) if print_t: print 'Laplacian computed de novo' laplacian_compute_time = time.clock() - laplacian_start_time # Get histogram counts and grid centers counts, bin_centers = utils.histogram_counts_1d(data, G, bbox=bbox) N = sum(counts) # Get other information agout grid bbox, h, bin_edges = utils.grid_info_from_bin_centers_1d(bin_centers) # Compute initial t if ell_guess: t_start = sp.log(N) - 2.0*alpha*sp.log(ell_guess/h) else: t_start = sp.log(N) - 2.0*alpha*sp.log(G/4.0) if print_t: print 't_start == %0.2f'%t_start # Do DEFT density estimation core_results = deft_core.run(counts, Delta, resolution=resolution, tollerance=tollerance, details=True, errorbars=errorbars, num_samples=num_samples, t_start=t_start, print_t=print_t) # Fill in results copy_start_time = time.clock() results = core_results # Get all results from deft_core # Normalize densities properly results.h = h results.L = G*h results.R /= h results.Q_star /= h if results.errorbars: results.Q_lb /= h results.Q_ub /= h if results.num_samples > 0: results.Q_samples /= h for p in results.map_curve.points: p.Q /= h p.ell = (sp.exp(-p.t)*G)**(1/(2.*alpha)) # Get 1D-specific information results.bin_centers = bin_centers results.bin_edges = bin_edges results.periodic = periodic results.alpha = alpha results.bbox = bbox results.Delta = Delta copy_compute_time = time.clock() - copy_start_time # Compute differential entropy in bits entropy_start_time = time.clock() if results.num_samples > 1: entropies = np.zeros(num_samples) for i in range(results.Q_samples.shape[1]): Q = results.Q_samples[:,i].ravel() entropy = -sp.sum(h*Q*sp.log2(Q + utils.TINY_FLOAT64)) #for j in range(G): # entropy += -results.h*Q[j]*sp.log2(Q[j] + utils.TINY_FLOAT64) entropies[i] = entropy # Compute mean and variance of the differential entropy results.entropies = entropies results.e_mean = np.mean(entropies) results.e_std = np.std(entropies) results.entropy_compute_time = time.clock() - entropy_start_time # Record execution time results.copy_compute_time = copy_compute_time results.laplacian_compute_time = laplacian_compute_time results.deft_1d_compute_time = time.clock()-start_time return results
#!/usr/local/bin/python import scipy as sp import numpy as np import matplotlib.pyplot as plt # Add parent directory to path import sys sys.path.append('../code/') sys.path.append('../sim/') # Import deft modules import laplacian # Make 1d laplacians alphas = [1, 2, 3] op_types = ['1d_bilateral', '1d_periodic'] Gs = [20, 50, 100, 200, 500, 1000, 2000] h = 1.0 directory = '../laplacians/' for alpha in alphas: for op_type in op_types: for G in Gs: file_name = '%s_alpha_%d_G_%d.pickle' % (op_type, alpha, G) print 'creating operator %s...' % file_name op = laplacian.Laplacian(op_type, alpha, G, h) op.save(directory + file_name)