Exemple #1
0
    def _run(self):
        """
        Estimates the probability density from data using the DEFT algorithm.
        """

        #? G == self.npts # just remove G-something

        laplace_operator = laplacian.Laplacian(self.periodic, self.alpha, 
                self.npts)

        # Get histogram counts and grid centers

        # Histogram based on bin centers
        counts, _ = np.histogram(data, self.bin_edges)
        counts_total = sum(counts)

        # Compute initial t
        #
        t_start = min(0.0,
                      np.log(counts_total)
                      - 2 * self.alpha * np.log(self.alpha / self.step))

        # Do DEFT density estimation
        #
        self.results = deft_core.run(
            counts,
            laplace_operator,
            Z_eval,
         num_Z_samples,
    self.npts       t_start,
            print_t,
            tollerance,
            resolution,
            num_pt_samples,
            fix_t_at_t_star,
            max_log_evidence_ratio_drop)

        # Normalize densities properly
        self.results.step = self.step
        self.results.L = self.npts * self.step
        self.results.R /= self.step
        self.results.M /= self.step
        self.results.Q_star /= self.step
        self.results.l_star = self.step * (
            sp.exp(-self.results.t_star) * counts_total)**(1/(2.*self.alpha))
        for p in self.results.map_curve.points:
            p.Q /= self.step
        if not (num_pt_samples == 0):
            results.Q_samples /= self.step
Exemple #2
0
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt

# Add parent directory to path
import sys

sys.path.append('../')

# Import deft modules
import laplacian

# Make 1d laplacians
alphas = [1, 2, 3]
op_types = ['2d_bilateral', '2d_periodic']
Gs_per_side = [10, 20]
h = 1.0
directory = 'laplacians/'

for alpha in alphas:
    for op_type in op_types:
        for G_per_side in Gs_per_side:
            Gx = G_per_side
            Gy = G_per_side
            file_name = '%s_alpha_%d_G_%dx%d.pickle' % (op_type, alpha, Gx, Gy)
            print 'creating operator %s...' % file_name
            op = laplacian.Laplacian(op_type, alpha, [Gx, Gy], [h, h])
            op.save(directory + file_name)
Exemple #3
0
ps = sp.zeros([Gx, Gy])
for i, x in enumerate(x_centers):
    for j, y in enumerate(y_centers):
        ps[i, j] = eval(settings['pdf_py'])
Q_true = ps / ps.sum()

# Compute the true mutual information
mi_true = mutual_information(Q_true)

# Histogram data
counts_2d, xs, ys = utils.histogram_2d(data, box, num_bins, \
    normalized=False)
counts = counts_2d.ravel()

# Get laplacian to use
Delta = laplacian.Laplacian('2d_bilateral', alpha, num_bins, [1., 1.])

# Compute maxent distribution for histogram
print '\ndata_type=%s, alpha=%d, N=%d' % (data_type, alpha, N)
start_time = time.clock()
results = \
    deft_core.run(counts, Delta, resolution=resolution,
        tollerance=tollerance, details=True, num_samples=100,
        print_t = True, errorbars=False)
end_time = time.clock()
print 'compute_maxent_prob took %f sec' % (end_time - start_time)

# Compute mutaul information estimate in bits
entropy_start_time = time.clock()
num_samples = results.num_samples
mis = sp.zeros(num_samples)
Exemple #4
0
def sample_from_deft_1d_prior(template_data, ell, G=100, alpha=3, 
    bbox=[-np.Inf, np.Inf], periodic=False):

    # Create Laplacian
    if periodic:
        Delta = laplacian.Laplacian('1d_periodic', alpha, G, 1.0)
    else:
        Delta = laplacian.Laplacian('1d_bilateral', alpha, G, 1.0)

    # Get histogram counts and grid centers
    counts, bin_centers = utils.histogram_counts_1d(template_data, G, 
        bbox=bbox)
    R = 1.*counts/np.sum(counts)

    # Get other information agout grid
    bbox, h, bin_edges = utils.grid_info_from_bin_centers_1d(bin_centers)

    # Draw coefficients for other components of phi
    kernel_dim = Delta._kernel_dim
    kernel_basis = Delta._eigenbasis[:,:kernel_dim]
    rowspace_basis = Delta._eigenbasis[:,kernel_dim:]
    rowspace_eigenvalues = ell**(2*alpha) * h**(-2*alpha) * \
        np.array(Delta._eigenvalues[kernel_dim:]) 

    # Keep drawing coefficients until phi_rowspace is not minimized
    # at either extreme
    while True:

        # Draw coefficients for rowspace coefficients
        while True:
            rowspace_coeffs = \
                np.random.randn(G-kernel_dim)/np.sqrt(2.*rowspace_eigenvalues)

            # Construct rowspace phi
            rowspace_coeffs_col = np.mat(rowspace_coeffs).T
            rowspace_basis_mat = np.mat(rowspace_basis)
            phi_rowspace = rowspace_basis_mat*rowspace_coeffs_col

            #if not min(phi_rowspace) in phi_rowspace[[0,-1]]:
            break

        if kernel_dim == 1:
            phi_kernel = sp.zeros(phi_rowspace.shape)
            break

        # Construct full phi so that distribution mateches moments of R
        phi_kernel, success = maxent.compute_maxent_field(R, kernel_basis, 
            phi0=phi_rowspace, geo_dist_tollerance=1.0E-10)

        if success:
            break
        else:
            print 'Maxent failure! Trying to sample again.'

    phi_rowspace = np.array(phi_rowspace).ravel()
    phi_kernel = np.array(phi_kernel).ravel()
    phi = phi_kernel + phi_rowspace

    # Return Q
    Q = utils.field_to_prob(phi)/h
    R = R/h
    return bin_centers, Q, R
Exemple #5
0
def run(data, G=100, alpha=3, bbox=[-np.Inf, np.Inf], periodic=False, \
        resolution=3.14E-2, tollerance=1E-3, num_samples=100, 
        errorbars=False, print_t=False, ell_guess=False):
    
    # Start clock
    start_time = time.clock()

    # Create Laplacian
    laplacian_start_time = time.clock()
    if periodic:
        op_type = '1d_periodic'
    else:
        op_type = '1d_bilateral'

    # Check for Laplacian on disk. Otherwise, create de novo
    laplacian_dir = '/Users/jkinney/github/15_deft/laplacians/'
    file_name = '%s%s_alpha_%d_G_%d.pickle'%(laplacian_dir,op_type,alpha,G)
    if os.path.isfile(file_name):
        Delta = laplacian.load(file_name)
        if print_t:
            print 'Laplacian loaded from disk'
    else:
        Delta = laplacian.Laplacian(op_type, alpha, G, 1.0)
        if print_t:
            print 'Laplacian computed de novo'
    laplacian_compute_time = time.clock() - laplacian_start_time

    # Get histogram counts and grid centers
    counts, bin_centers = utils.histogram_counts_1d(data, G, bbox=bbox)
    N = sum(counts)

    # Get other information agout grid
    bbox, h, bin_edges = utils.grid_info_from_bin_centers_1d(bin_centers)

    # Compute initial t
    if ell_guess:
        t_start = sp.log(N) - 2.0*alpha*sp.log(ell_guess/h)
    else:
        t_start = sp.log(N) - 2.0*alpha*sp.log(G/4.0)
    if print_t:
        print 't_start == %0.2f'%t_start

    # Do DEFT density estimation
    core_results = deft_core.run(counts, Delta, resolution=resolution, tollerance=tollerance, details=True, errorbars=errorbars, num_samples=num_samples, t_start=t_start, print_t=print_t)

    # Fill in results
    copy_start_time = time.clock()
    results = core_results # Get all results from deft_core

    # Normalize densities properly
    results.h = h 
    results.L = G*h
    results.R /= h
    results.Q_star /= h
    if results.errorbars:
        results.Q_lb /= h
        results.Q_ub /= h
    if results.num_samples > 0:
        results.Q_samples /= h
    for p in results.map_curve.points:
        p.Q /= h
        p.ell = (sp.exp(-p.t)*G)**(1/(2.*alpha))
        
    # Get 1D-specific information
    results.bin_centers = bin_centers
    results.bin_edges = bin_edges
    results.periodic = periodic
    results.alpha = alpha
    results.bbox = bbox
    results.Delta = Delta
    copy_compute_time = time.clock() - copy_start_time

    # Compute differential entropy in bits
    entropy_start_time = time.clock()
    if results.num_samples > 1:
        entropies = np.zeros(num_samples)
        for i in range(results.Q_samples.shape[1]):
            Q = results.Q_samples[:,i].ravel()
            entropy = -sp.sum(h*Q*sp.log2(Q + utils.TINY_FLOAT64))
            #for j in range(G):
            #    entropy += -results.h*Q[j]*sp.log2(Q[j] + utils.TINY_FLOAT64)
            entropies[i] = entropy

        # Compute mean and variance of the differential entropy
        results.entropies = entropies
        results.e_mean = np.mean(entropies)
        results.e_std = np.std(entropies)
        results.entropy_compute_time = time.clock() - entropy_start_time

    # Record execution time
    results.copy_compute_time = copy_compute_time
    results.laplacian_compute_time = laplacian_compute_time
    results.deft_1d_compute_time = time.clock()-start_time

    return results
Exemple #6
0
#!/usr/local/bin/python
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt

# Add parent directory to path
import sys
sys.path.append('../code/')
sys.path.append('../sim/')

# Import deft modules
import laplacian

# Make 1d laplacians
alphas = [1, 2, 3]
op_types = ['1d_bilateral', '1d_periodic']
Gs = [20, 50, 100, 200, 500, 1000, 2000]
h = 1.0
directory = '../laplacians/'

for alpha in alphas:
    for op_type in op_types:
        for G in Gs:
            file_name = '%s_alpha_%d_G_%d.pickle' % (op_type, alpha, G)
            print 'creating operator %s...' % file_name
            op = laplacian.Laplacian(op_type, alpha, G, h)
            op.save(directory + file_name)