Ejemplo n.º 1
0
def hessian(phi, R, Delta, t, N, regularized=False):
    # Make sure phi is valid
    if not all(np.isreal(phi)):
        raise ControlledError('/hessian/ phi is not real: phi = %s' % phi)
    if not all(np.isfinite(phi)):
        raise ControlledError('/hessian/ phi is not finite: phi = %s' % phi)
    # Make sure t is valid
    if not np.isreal(t):
        raise ControlledError('/hessian/ t is not real: t = %s' % t)
    if not np.isfinite(t):
        raise ControlledError('/hessian/ t is not finite: t = %s' % t)
    # Make sure regularized is valid
    if not isinstance(regularized, bool):
        raise ControlledError(
            '/hessian/ regularized must be a boolean: regularized = %s' %
            type(regularized))

    G = 1. * len(R)
    quasiQ = utils.field_to_quasiprob(phi)
    Delta_sparse = Delta.get_sparse_matrix()
    H = sp.exp(-t) * Delta_sparse + G * diags(quasiQ, 0)

    if regularized:
        H += diags(np.ones(int(G)), 0) / (N * PHI_STD_REG**2)

    # Make sure H is valid ?
    return H
Ejemplo n.º 2
0
def laplacian_1d(G, alpha, grid_spacing, periodic, sparse=True, report_kernel=True):
    """ Returns a G x G sized 1d bilateral laplacian matrix of order alpha """

    # Make sure sparse is valid
    if not isinstance(sparse, bool):
        raise ControlledError('/laplacian_1d/ sparse must be a boolean: sparse = %s' % type(sparse))
    # Make sure report_kernel is valid
    if not isinstance(report_kernel, bool):
        raise ControlledError('/laplacian_1d/ report_kernel must be a boolean: report_kernel = %s' % type(report_kernel))
    
    x_grid = (sp.arange(G) - (G-1)/2.)/(G/2.)

    # If periodic boundary conditions, construct regular laplacian
    if periodic:
        tmp_mat = 2*sp.diag(sp.ones(G),0) - sp.diag(sp.ones(G-1),-1) - sp.diag(sp.ones(G-1),+1)
        tmp_mat[G-1,0] = -1.0
        tmp_mat[0,G-1] = -1.0
        Delta = (sp.mat(tmp_mat)/(grid_spacing**2))**alpha
        
        # Get kernel, which is just the constant vector v = sp.ones([G,1])
        # kernel_basis = utils.normalize(v, grid_spacing)
        kernel_basis = utils.legendre_basis_1d(G, 1, grid_spacing)

    # Otherwise, construct bilateral laplacian
    else:
    
        # Initialize to G x G identity matrix
        right_side = sp.diag(sp.ones(G),0)
        
        # Multiply alpha derivative matrices of together. Reduce dimension going left
        for a in range(alpha):
            right_side = derivative_matrix_1d(G-a, grid_spacing)*right_side 
        
        # Construct final bilateral laplacian
        Delta = right_side.T*right_side

        # Make sure Delta is valid
        if not (Delta.shape[0] == Delta.shape[1] == G):
            raise ControlledError('/laplacian_1d/ Delta must have shape (%d, %d): Delta.shape = %s' % (G, G, Delta.shape))

        # Construct a basis for the kernel from legendre polynomials
        kernel_basis = utils.legendre_basis_1d(G, alpha, grid_spacing)

        # Make sure kernel_basis is valid
        if not ((kernel_basis.shape[0] == G) and (kernel_basis.shape[1] == alpha)):
            raise ControlledError('/laplacian_1d/ kernel_basis must have shape (%d, %d): kernel_basis.shape = %s' %
                                  (G,alpha,kernel_basis.shape))
        
    # Sparsify matrix if requested
    if sparse:  
        Delta = csr_matrix(Delta)

    # Report kernel if requested
    if report_kernel:
        return Delta, kernel_basis

    # Otherwise, just report matrix
    else:
        return Delta
Ejemplo n.º 3
0
    def _clean_data(self):
        """
        Sanitize the assigned data
        :param: self
        :return: None
        """
        data = self.data

        # if data is a list-like, convert to 1D np.array
        if isinstance(data, LISTLIKE):
            data = np.array(data).ravel()
        elif isinstance(data, set):
            data = np.array(list(data)).ravel()
        else:
            raise ControlledError(
                "Error: could not cast data into an np.array")

        # Check that entries are numbers
        check(all([isinstance(n, numbers.Real) for n in data]),
              'not all entries in data are real numbers')

        # Cast as 1D np.array of floats
        data = data.astype(float)

        # Keep only finite numbers
        data = data[np.isfinite(data)]

        try:
            if not (len(data) > 0):
                raise ControlledError(
                    'Input check failed, data must have length > 0: data = %s'
                    % data)
        except ControlledError as e:
            print(e)
            sys.exit(1)

        try:
            data_spread = max(data) - min(data)
            if not np.isfinite(data_spread):
                raise ControlledError(
                    'Input check failed. Data[max]-Data[min] is not finite: Data spread = %s'
                    % data_spread)
        except ControlledError as e:
            print(e)
            sys.exit(1)

        try:
            if not (data_spread > 0):
                raise ControlledError(
                    'Input check failed. Data[max]-Data[min] must be > 0: data_spread = %s'
                    % data_spread)
        except ControlledError as e:
            print(e)
            sys.exit(1)

        # Set cleaned data
        self.data = data
Ejemplo n.º 4
0
 def get_histogram_point(self):
     if not self._is_sorted:
         self.sort()
     p = self.points[-1]
     if not (p.t == sp.Inf):
         raise ControlledError(
             '/MAP_curve/ Not getting histogram point: t = %f' % p.t)
     return p
Ejemplo n.º 5
0
 def get_maxent_point(self):
     if not self._is_sorted:
         self.sort()
     p = self.points[0]
     if not (p.t == -sp.Inf):
         raise ControlledError(
             '/MAP_curve/ Not getting MaxEnt point: t = %f' % p.t)
     return p
Ejemplo n.º 6
0
def action(phi, R, Delta, t, N, phi_in_kernel=False, regularized=False):
    # Make sure phi is valid
    if not all(np.isreal(phi)):
        raise ControlledError('/action/ phi is not real: phi = %s' % phi)
    if not all(np.isfinite(phi)):
        raise ControlledError('/action/ phi is not finite: phi = %s' % phi)
    # Make sure t is valid
    if not np.isreal(t):
        raise ControlledError('/action/ t is not real: t = %s' % t)
    # if not np.isfinite(t):
    #    raise ControlledError('/action/ t is not finite: t = %s' % t)
    # Make sure phi_in_kernel is valid
    if not isinstance(phi_in_kernel, bool):
        raise ControlledError(
            '/action/ phi_in_kernel must be a boolean: phi_in_kernel = %s' %
            type(phi_in_kernel))
    # Make sure regularized is valid
    if not isinstance(regularized, bool):
        raise ControlledError(
            '/action/ regularized must be a boolean: regularized = %s' %
            type(regularized))

    G = 1. * len(R)
    quasiQ = utils.field_to_quasiprob(phi)
    quasiQ_col = sp.mat(quasiQ).T
    Delta_sparse = Delta.get_sparse_matrix()
    phi_col = sp.mat(phi).T
    R_col = sp.mat(R).T
    ones_col = sp.mat(sp.ones(int(G))).T

    if phi_in_kernel:
        S_mat = G * R_col.T * phi_col + G * ones_col.T * quasiQ_col
    else:
        S_mat = 0.5 * sp.exp(
            -t
        ) * phi_col.T * Delta_sparse * phi_col + G * R_col.T * phi_col + G * ones_col.T * quasiQ_col

    if regularized:
        S_mat += 0.5 * (phi_col.T * phi_col) / (N * PHI_STD_REG**2)

    S = S_mat[0, 0]

    # Make sure S is valid
    if not np.isreal(S):
        raise ControlledError('/action/ S is not real at t = %s: S = %s' %
                              (t, S))
    if not np.isfinite(S):
        raise ControlledError('/action/ S is not finite at t = %s: S = %s' %
                              (t, S))

    return S
Ejemplo n.º 7
0
    def _load_dataset(self, file_name):
        # Load data
        self.data = np.genfromtxt(file_name)

        # Fill in details from data file header
        details = {}
        header_lines = [
            line.strip()[1:] for line in open(file_name, 'r')
            if line.strip()[0] == '#'
        ]
        for line in header_lines:
            key = eval(line.split(':')[0])
            value = eval(line.split(':')[1])
            try:
                setattr(self, key, value)
            except:
                ControlledError(
                    'Error loading example data. Either key or value'
                    'of metadata is invalid. key = %s, value = %s' %
                    (key, value))
Ejemplo n.º 8
0
def gradient(phi, R, Delta, t, N, regularized=False):
    # Make sure phi is valid
    if not all(np.isreal(phi)):
        raise ControlledError('/gradient/ phi is not real: phi = %s' % phi)
    if not all(np.isfinite(phi)):
        raise ControlledError('/gradient/ phi is not finite: phi = %s' % phi)
    # Make sure t is valid
    if not np.isreal(t):
        raise ControlledError('/gradient/ t is not real: t = %s' % t)
    if not np.isfinite(t):
        raise ControlledError('/gradient/ t is not finite: t = %s' % t)
    # Make sure regularized is valid
    if not isinstance(regularized, bool):
        raise ControlledError(
            '/gradient/ regularized must be a boolean: regularized = %s' %
            type(regularized))

    G = 1. * len(R)
    quasiQ = utils.field_to_quasiprob(phi)
    quasiQ_col = sp.mat(quasiQ).T
    Delta_sparse = Delta.get_sparse_matrix()
    phi_col = sp.mat(phi).T
    R_col = sp.mat(R).T
    grad_col = sp.exp(-t) * Delta_sparse * phi_col + G * R_col - G * quasiQ_col

    if regularized:
        grad_col += phi_col / (N * PHI_STD_REG**2)

    grad = sp.array(grad_col).ravel()

    # Make sure grad is valid
    if not all(np.isreal(grad)):
        raise ControlledError(
            '/gradient/ grad is not real at t = %s: grad = %s' % (t, grad))
    if not all(np.isfinite(grad)):
        raise ControlledError(
            '/gradient/ grad is not finite at t = %s: grad = %s' % (t, grad))

    return grad
Ejemplo n.º 9
0
    def _inputs_check(self):
        """
        Check all inputs NOT having to do with the choice of grid
        :param self:
        :return: None
        """

        if self.grid_spacing is not None:

            # max_t_step is a number
            check(
                isinstance(self.grid_spacing, numbers.Real),
                'type(grid_spacing) = %s; must be a number' %
                type(self.grid_spacing))

            # grid_spacing is positive
            check(self.grid_spacing > 0,
                  'grid_spacing = %f; must be > 0.' % self.grid_spacing)

        if self.grid is not None:

            # grid is a list or np.array
            types = (list, np.ndarray, np.matrix)
            check(
                isinstance(self.grid, types),
                'type(grid) = %s; must be a list or np.ndarray' %
                type(self.grid))

            # cast grid as np.array as ints
            try:
                self.grid = np.array(self.grid).ravel().astype(float)
            except:  # SHOULD BE MORE SPECIFIC
                raise ControlledError(
                    'Cannot cast grid as 1D np.array of floats.')

            # grid has appropriate number of points
            check(
                2 * self.alpha <= len(self.grid) <= MAX_NUM_GRID_POINTS,
                'len(grid) = %d; must have %d <= len(grid) <= %d.' %
                (len(self.grid), 2 * self.alpha, MAX_NUM_GRID_POINTS))

            # grid is ordered
            diffs = np.diff(self.grid)
            check(all(diffs > 0), 'grid is not monotonically increasing.')

            # grid is evenly spaced
            check(
                all(np.isclose(diffs, diffs.mean())),
                'grid is not evenly spaced; grid spacing = %f +- %f' %
                (diffs.mean(), diffs.std()))

        # alpha is int
        check(isinstance(self.alpha, int),
              'type(alpha) = %s; must be int.' % type(self.alpha))

        # alpha in range
        check(1 <= self.alpha <= 4,
              'alpha = %d; must have 1 <= alpha <= 4' % self.alpha)

        if self.num_grid_points is not None:

            # num_grid_points is an integer
            check(
                isinstance(self.num_grid_points,
                           int), 'type(num_grid_points) = %s; must be int.' %
                type(self.num_grid_points))

            # num_grid_points is in the right range
            check(
                2 * self.alpha <= self.num_grid_points <= MAX_NUM_GRID_POINTS,
                'num_grid_points = %d; must have %d <= num_grid_poitns <= %d.'
                % (self.num_grid_points, 2 * self.alpha, MAX_NUM_GRID_POINTS))

        # bounding_box
        if self.bounding_box is not None:

            # bounding_box is right type
            box_types = (list, tuple, np.ndarray)
            check(
                isinstance(self.bounding_box, box_types),
                'type(bounding_box) = %s; must be one of %s' %
                (type(self.bounding_box), box_types))

            # bounding_box has right length
            check(
                len(self.bounding_box) == 2,
                'len(bounding_box) = %d; must be %d' %
                (len(self.bounding_box), 2))

            # bounding_box entries must be numbers
            check(
                isinstance(self.bounding_box[0], numbers.Real)
                and isinstance(self.bounding_box[1], numbers.Real),
                'bounding_box = %s; entries must be numbers' %
                repr(self.bounding_box))

            # bounding_box entries must be sorted
            check(
                self.bounding_box[0] < self.bounding_box[1],
                'bounding_box = %s; entries must be sorted' %
                repr(self.bounding_box))

            # reset bounding_box as tuple
            self.bounding_box = (float(self.bounding_box[0]),
                                 float(self.bounding_box[1]))

        # periodic is bool
        check(isinstance(self.periodic, bool),
              'type(periodic) = %s; must be bool' % type(self.periodic))

        # evaluation_method_for_Z is valid
        Z_evals = ['Lap', 'Lap+Imp', 'Lap+Fey']
        check(
            self.Z_evaluation_method in Z_evals,
            'Z_eval = %s; must be in %s' % (self.Z_evaluation_method, Z_evals))

        # num_samples_for_Z is an integer
        check(
            isinstance(self.num_samples_for_Z, numbers.Integral),
            'type(self.num_samples_for_Z) = %s; ' %
            type(self.num_samples_for_Z) + 'must be integer.')
        self.num_samples_for_Z = int(self.num_samples_for_Z)

        # num_samples_for_Z is in range
        check(
            0 <= self.num_samples_for_Z <= MAX_NUM_SAMPLES_FOR_Z,
            'self.num_samples_for_Z = %d; ' % self.num_samples_for_Z +
            ' must satisfy 0 <= num_samples_for_Z <= %d.' %
            MAX_NUM_SAMPLES_FOR_Z)

        # max_t_step is a number
        check(
            isinstance(self.max_t_step, numbers.Real),
            'type(max_t_step) = %s; must be a number' % type(self.max_t_step))

        # max_t_step is positive
        check(self.max_t_step > 0,
              'maxt_t_step = %f; must be > 0.' % self.max_t_step)

        # print_t is bool
        check(isinstance(self.print_t, bool),
              'type(print_t) = %s; must be bool.' % type(self.print_t))

        # tolerance is float
        check(isinstance(self.tolerance, numbers.Real),
              'type(tolerance) = %s; must be number' % type(self.tolerance))

        # tolerance is positive
        check(self.tolerance > 0,
              'tolerance = %f; must be > 0' % self.tolerance)

        # resolution is number
        check(isinstance(self.resolution, numbers.Real),
              'type(resolution) = %s; must be number' % type(self.resolution))

        # resolution is positive
        check(self.resolution > 0,
              'resolution = %f; must be > 0' % self.resolution)

        if self.seed is not None:

            # seed is int
            check(isinstance(self.seed, int),
                  'type(seed) = %s; must be int' % type(self.seed))

            # seed is in range
            check(0 <= self.seed <= 2**32 - 1,
                  'seed = %d; must have 0 <= seed <= 2**32 - 1' % self.seed)

        # sample_only_at_l_star is bool
        check(
            isinstance(self.sample_only_at_l_star, bool),
            'type(sample_only_at_l_star) = %s; must be bool.' %
            type(self.sample_only_at_l_star))

        # num_posterior_samples is int
        check(
            isinstance(self.num_posterior_samples, numbers.Integral),
            'type(num_posterior_samples) = %s; must be integer' %
            type(self.num_posterior_samples))
        self.num_posterior_samples = int(self.num_posterior_samples)

        # num_posterior_samples is nonnegative
        check(
            0 <= self.num_posterior_samples <= MAX_NUM_POSTERIOR_SAMPLES,
            'num_posterior_samples = %f; need ' % self.num_posterior_samples +
            '0 <= num_posterior_samples <= %d.' % MAX_NUM_POSTERIOR_SAMPLES)

        # max_log_evidence_ratio_drop is number
        check(
            isinstance(self.max_log_evidence_ratio_drop, numbers.Real),
            'type(max_log_evidence_ratio_drop) = %s; must be number' %
            type(self.max_log_evidence_ratio_drop))

        # max_log_evidence_ratio_drop is positive
        check(
            self.max_log_evidence_ratio_drop > 0,
            'max_log_evidence_ratio_drop = %f; must be > 0' %
            self.max_log_evidence_ratio_drop)
Ejemplo n.º 10
0
def compute_corrector_step(phi,
                           R,
                           Delta,
                           t,
                           N,
                           tollerance,
                           report_num_steps=False):
    # Make sure phi is valid
    if not all(np.isreal(phi)):
        raise ControlledError(
            '/compute_corrector_step/ phi is not real: phi = %s' % phi)
    if not all(np.isfinite(phi)):
        raise ControlledError(
            '/compute_corrector_step/ phi is not finite: phi = %s' % phi)
    # Make sure t is valid
    if not np.isreal(t):
        raise ControlledError(
            '/compute_corrector_step/ t is not real: t = %s' % t)
    if not np.isfinite(t):
        raise ControlledError(
            '/compute_corrector_step/ t is not finite: t = %s' % t)
    # Make sure report_num_steps is valid
    if not isinstance(report_num_steps, bool):
        raise ControlledError(
            '/compute_corrector_step/ report_num_steps must be a boolean: report_num_steps = %s'
            % type(report_num_steps))

    # Evaluate the probability distribution
    Q = utils.field_to_prob(phi)

    # Evaluate action
    S = action(phi, R, Delta, t, N)

    # Perform corrector steps until phi converges
    num_corrector_steps = 0
    num_backtracks = 0
    while True:

        # Compute the gradient
        v = gradient(phi, R, Delta, t, N)

        # Compute the hessian
        H = hessian(phi, R, Delta, t, N)

        # Solve linear equation to get change in field
        dphi = -spsolve(H, v)

        # Make sure dphi is valid
        if not all(np.isreal(dphi)):
            raise ControlledError(
                '/compute_corrector_step/ dphi is not real at t = %s: dphi = %s'
                % (t, dphi))
        if not all(np.isfinite(dphi)):
            raise ControlledError(
                '/compute_corrector_step/ dphi is not finite at t = %s: dphi = %s'
                % (t, dphi))

        # Compute corresponding change in action
        dS = sp.sum(dphi * v)

        # If we're already very close to the max, then dS will be close to zero. In this case, we're done already
        if dS > MAX_DS:
            break

        # Reduce step size until in linear regime
        beta = 1.0
        while True:

            # Make sure beta is valid
            if beta < 1E-50:
                raise ControlledError(
                    '/compute_corrector_step/ phi is not converging at t = %s: beta = %s'
                    % (t, beta))

            # Compute new phi
            phi_new = phi + beta * dphi

            # If new phi is insane, decrease beta
            if any(phi_new < PHI_MIN) or any(phi_new > PHI_MAX):
                num_backtracks += 1
                beta *= 0.5
                continue

            # Compute new action
            S_new = action(phi_new, R, Delta, t, N)

            # Check for linear regime
            if S_new - S <= 0.5 * beta * dS:
                break

            # If not in linear regime, backtrack value of beta
            else:
                num_backtracks += 1
                beta *= 0.5
                continue

        # Make sure phi_new is valid
        if not all(np.isreal(phi_new)):
            raise ControlledError(
                '/compute_corrector_step/ phi_new is not real at t = %s: phi_new = %s'
                % (t, phi_new))
        if not all(np.isfinite(phi_new)):
            raise ControlledError(
                '/compute_corrector_step/ phi_new is not finite at t = %s: phi_new = %s'
                % (t, phi_new))

        # Compute new Q
        Q_new = utils.field_to_prob(phi_new)

        # Break out of loop if Q_new is close enough to Q
        gd = utils.geo_dist(Q_new, Q)
        if gd < tollerance:
            break

        # Break out of loop with warning if S_new > S.
        # Should not happen, but not fatal if it does. Just means less precision
        # ACTUALLY, THIS SHOULD NEVER HAPPEN!
        elif S_new - S > 0:
            raise ControlledError(
                '/compute_corrector_step/ S_new > S at t = %s: terminating corrector steps'
                % t)

        # Otherwise, continue with corrector step
        else:
            # New phi, Q, and S values have already been computed
            phi = phi_new
            Q = Q_new
            S = S_new
            num_corrector_steps += 1

    # After corrector loop has finished, return field
    if report_num_steps:
        return phi, num_corrector_steps, num_backtracks
    else:
        return phi
Ejemplo n.º 11
0
def compute_predictor_step(phi, R, Delta, t, N, direction, resolution, DT_MAX):
    # Make sure phi is valid
    if not all(np.isreal(phi)):
        raise ControlledError(
            '/compute_predictor_step/ phi is not real: phi = %s' % phi)
    if not all(np.isfinite(phi)):
        raise ControlledError(
            '/compute_predictor_step/ phi is not finite: phi = %s' % phi)
    # Make sure t is valid
    if not np.isreal(t):
        raise ControlledError(
            '/compute_predictor_step/ t is not real: t = %s' % t)
    if not np.isfinite(t):
        raise ControlledError(
            '/compute_predictor_step/ t is not finite: t = %s' % t)
    # Make sure direction is valid
    if not ((direction == 1) or (direction == -1)):
        raise ControlledError(
            '/compute_predictor_step/ direction must be just a sign: direction = %s'
            % direction)

    # Get current probability distribution
    Q = utils.field_to_prob(phi)
    G = 1. * len(Q)

    # Get hessian
    H = hessian(phi, R, Delta, t, N)

    # Compute rho, which indicates direction of step
    rho = G * spsolve(H, Q - R)

    # Make sure rho is valid
    if not all(np.isreal(rho)):
        raise ControlledError(
            '/compute_predictor_step/ rho is not real at t = %s: rho = %s' %
            (t, rho))
    if not all(np.isfinite(rho)):
        raise ControlledError(
            '/compute_predictor_step/ rho is not finite at t = %s: rho = %s' %
            (t, rho))

    denom = sp.sqrt(sp.sum(rho * Q * rho))

    # Make sure denom is valid
    if not np.isreal(denom):
        raise ControlledError(
            '/compute_predictor_step/ denom is not real at t = %s: denom = %s'
            % (t, denom))
    if not np.isfinite(denom):
        raise ControlledError(
            '/compute_predictor_step/ denom is not finite at t = %s: denom = %s'
            % (t, denom))
    if not (denom > 0):
        raise ControlledError(
            '/compute_predictor_step/ denom is not positive at t = %s: denom = %s'
            % (t, denom))

    # Compute dt based on value of epsilon (the resolution)
    dt = direction * resolution / denom
    while abs(dt) > DT_MAX:
        dt /= 2.0

        # Return phi_new and new t_new. WARNING: IT IS NOT YET CLEAR THAT PHI_NEW ISN'T INSANE
    phi_new = phi + rho * dt
    t_new = t + dt

    # Make sure phi_new is valid
    if not all(np.isreal(phi_new)):
        raise ControlledError(
            '/compute_predictor_step/ phi_new is not real at t_new = %s: phi_new = %s'
            % (t_new, phi_new))
    if not all(np.isfinite(phi_new)):
        raise ControlledError(
            '/compute_predictor_step/ phi_new is not finite at t_new = %s: phi_new = %s'
            % (t_new, phi_new))
    # Make sure t_new is valid
    if not np.isreal(t_new):
        raise ControlledError(
            '/compute_predictor_step/ t_new is not real: t_new = %s' % t_new)
    if not np.isfinite(t_new):
        raise ControlledError(
            '/compute_predictor_step/ t_new is not finite: t_new = %s' % t_new)

    return phi_new, t_new
Ejemplo n.º 12
0
def log_ptgd(phi, R, Delta, t, N, Z_eval, num_Z_samples):
    # Make sure phi is valid
    if not all(np.isreal(phi)):
        raise ControlledError('/log_ptgd/ phi is not real: phi = %s' % phi)
    if not all(np.isfinite(phi)):
        raise ControlledError('/log_ptgd/ phi is not finite: phi = %s' % phi)
    # Make sure t is valid
    if not np.isreal(t):
        raise ControlledError('/log_ptgd/ t is not real: t = %s' % t)
    if not np.isfinite(t):
        raise ControlledError('/log_ptgd/ t is not finite: t = %s' % t)

    G = 1. * len(phi)
    alpha = 1. * Delta._alpha
    kernel_dim = 1. * Delta._kernel_dim
    H = hessian(phi, R, Delta, t, N)
    H_prime = H.todense() * sp.exp(t)

    S = action(phi, R, Delta, t, N)

    # First try computing log determinant straight away
    log_det = sp.log(det(H_prime))

    # If failed, try computing the sum of eigenvalues, forcing the eigenvalues to be real and non-negative
    if not (np.isreal(log_det) and np.isfinite(log_det)):
        lambdas = abs(eigvalsh(H_prime))
        log_det = sp.sum(sp.log(lambdas))

        # Make sure log_det is valid
    if not np.isreal(log_det):
        raise ControlledError(
            '/log_ptgd/ log_det is not real at t = %s: log_det = %s' %
            (t, log_det))
    if not np.isfinite(log_det):
        raise ControlledError(
            '/log_ptgd/ log_det is not finite at t = %s: log_det = %s' %
            (t, log_det))

    # Compute contribution from finite t
    log_ptgd = -(N / G) * S + 0.5 * kernel_dim * t - 0.5 * log_det

    # Make sure log_ptgd is valid
    if not np.isreal(log_ptgd):
        raise ControlledError(
            '/log_ptgd/ log_ptgd is not real at t = %s: log_ptgd = %s' %
            (t, log_ptgd))
    if not np.isfinite(log_ptgd):
        raise ControlledError(
            '/log_ptgd/ log_ptgd is not finite at t = %s: log_ptgd = %s' %
            (t, log_ptgd))

    # If requested, incorporate corrections to the partition function
    num_samples = num_Z_samples
    if Z_eval == 'Lap':
        correction, w_sample_mean, w_sample_mean_std = \
            0.0, 1.0, 0.0
    if Z_eval == 'Lap+Imp':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.Laplace_approach(phi, R, Delta, t, N, num_samples, go_parallel=False)
    if Z_eval == 'Lap+Imp+P':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.Laplace_approach(phi, R, Delta, t, N, num_samples, go_parallel=True)
    if Z_eval == 'GLap':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi, R, Delta, t, N, num_samples, go_parallel=False, sampling=False)
    if Z_eval == 'GLap+P':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi, R, Delta, t, N, num_samples, go_parallel=True, sampling=False)
    if Z_eval == 'GLap+Sam':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi, R, Delta, t, N, num_samples, go_parallel=False, sampling=True)
    if Z_eval == 'GLap+Sam+P':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi, R, Delta, t, N, num_samples, go_parallel=True, sampling=True)
    if Z_eval == 'Lap+Fey':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.Feynman_diagrams(phi, R, Delta, t, N)

    # Make sure correction is valid
    if not np.isreal(correction):
        raise ControlledError(
            '/log_ptgd/ correction is not real at t = %s: correction = %s' %
            (t, correction))
    if not np.isfinite(correction):
        raise ControlledError(
            '/log_ptgd/ correction is not finite at t = %s: correction = %s' %
            (t, correction))

    log_ptgd += correction

    details = Results()
    details.S = S
    details.N = N
    details.G = G
    details.kernel_dim = kernel_dim
    details.t = t
    details.log_det = log_det
    details.phi = phi

    return log_ptgd, w_sample_mean, w_sample_mean_std
Ejemplo n.º 13
0
def log_ptgd_at_maxent(phi_M, R, Delta, N, Z_eval, num_Z_samples):
    # Make sure phi_M is valid
    if not all(np.isreal(phi_M)):
        raise ControlledError(
            '/log_ptgd_at_maxent/ phi_M is not real: phi_M = %s' % phi_M)
    if not all(np.isfinite(phi_M)):
        raise ControlledError(
            '/log_ptgd_at_maxent/ phi_M is not finite: phi_M = %s' % phi_M)

    kernel_dim = Delta._kernel_dim
    M = utils.field_to_prob(phi_M)
    M_on_kernel = sp.zeros([kernel_dim, kernel_dim])
    kernel_basis = Delta._kernel_basis
    lambdas = Delta._eigenvalues
    for a in range(int(kernel_dim)):
        for b in range(int(kernel_dim)):
            psi_a = sp.ravel(kernel_basis[:, a])
            psi_b = sp.ravel(kernel_basis[:, b])
            M_on_kernel[a, b] = sp.sum(psi_a * psi_b * M)

    # Compute log Occam factor at infinity
    log_Occam_at_infty = -0.5 * sp.log(det(M_on_kernel)) - 0.5 * sp.sum(
        sp.log(lambdas[kernel_dim:]))

    # Make sure log_Occam_at_infty is valid
    if not np.isreal(log_Occam_at_infty):
        raise ControlledError(
            '/log_ptgd_at_maxent/ log_Occam_at_infty is not real: log_Occam_at_infty = %s'
            % log_Occam_at_infty)
    if not np.isfinite(log_Occam_at_infty):
        raise ControlledError(
            '/log_ptgd_at_maxent/ log_Occam_at_infty is not finite: log_Occam_at_infty = %s'
            % log_Occam_at_infty)

    # Compute the log likelihood at infinity
    log_likelihood_at_infty = -N * sp.sum(phi_M * R) - N

    # Make sure log_likelihood_at_infty is valid
    if not np.isreal(log_likelihood_at_infty):
        raise ControlledError(
            '/log_ptgd_at_maxent/ log_likelihood_at_infty is not real: log_likelihood_at_infty = %s'
            % log_likelihood_at_infty)
    if not np.isfinite(log_likelihood_at_infty):
        raise ControlledError(
            '/log_ptgd_at_maxent/ log_likelihood_at_infty is not finite: log_likelihood_at_infty = %s'
            % log_likelihood_at_infty)

    # Compute the log posterior (not sure this is right)
    log_ptgd_at_maxent = log_likelihood_at_infty + log_Occam_at_infty

    # If requested, incorporate corrections to the partition function
    t = -np.inf
    num_samples = num_Z_samples
    if Z_eval == 'Lap':
        correction, w_sample_mean, w_sample_mean_std = \
            0.0, 1.0, 0.0
    if Z_eval == 'Lap+Imp':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.Laplace_approach(phi_M, R, Delta, t, N, num_samples, go_parallel=False)
    if Z_eval == 'Lap+Imp+P':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.Laplace_approach(phi_M, R, Delta, t, N, num_samples, go_parallel=True)
    if Z_eval == 'GLap':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi_M, R, Delta, t, N, num_samples, go_parallel=False, sampling=False)
    if Z_eval == 'GLap+P':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi_M, R, Delta, t, N, num_samples, go_parallel=True, sampling=False)
    if Z_eval == 'GLap+Sam':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi_M, R, Delta, t, N, num_samples, go_parallel=False, sampling=True)
    if Z_eval == 'GLap+Sam+P':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.GLaplace_approach(phi_M, R, Delta, t, N, num_samples, go_parallel=True, sampling=True)
    if Z_eval == 'Lap+Fey':
        correction, w_sample_mean, w_sample_mean_std = \
            supplements.Feynman_diagrams(phi_M, R, Delta, t, N)

    # Make sure correction is valid
    if not np.isreal(correction):
        raise ControlledError(
            '/log_ptgd_at_maxent/ correction is not real: correction = %s' %
            correction)
    if not np.isfinite(correction):
        raise ControlledError(
            '/log_ptgd_at_maxent/ correction is not finite: correction = %s' %
            correction)

    log_ptgd_at_maxent += correction

    return log_ptgd_at_maxent, w_sample_mean, w_sample_mean_std
Ejemplo n.º 14
0
def run(counts_array,
        Delta,
        Z_eval,
        num_Z_samples,
        t_start,
        DT_MAX,
        print_t,
        tollerance,
        resolution,
        num_pt_samples,
        fix_t_at_t_star,
        max_log_evidence_ratio_drop,
        details=False):
    """
    The core algorithm of DEFT, used for both 1D and 2D density estmation.

    Args:
        counts_array (numpy.ndarray):
            A scipy array of counts. All counts must be nonnegative.

        Delta (Smoothness_operator instance):
            An operator providing the definition of 'smoothness' used by DEFT
    """

    # Make sure details is valid
    if not isinstance(details, bool):
        raise ControlledError(
            '/deft_core._run/ details must be a boolean: details = %s' %
            type(details))

    # Get number of gridpoints and kernel dimension from smoothness operator
    G = Delta.get_G()
    kernel_dim = Delta.get_kernel_dim()

    # Make sure counts_array is valid
    if not (len(counts_array) == G):
        raise ControlledError(
            '/deft_core._run/ counts_array must have length %d: len(counts_array) = %d'
            % (G, len(counts_array)))
    if not all(counts_array >= 0):
        raise ControlledError(
            '/deft_core._run/ counts_array is not non-negative: counts_array = %s'
            % counts_array)
    if not (sum(counts_array > 0) > kernel_dim):
        raise ControlledError(
            '/deft_core._run/ Only %d elements of counts_array contain data, less than kernel dimension %d'
            % (sum(counts_array > 0), kernel_dim))

    # Get number of data points and normalize histogram
    N = sum(counts_array)
    R = 1.0 * counts_array / N

    #
    # Compute the MAP curve
    #

    start_time = time.clock()
    map_curve = compute_map_curve(N, R, Delta, Z_eval, num_Z_samples, t_start,
                                  DT_MAX, print_t, tollerance, resolution,
                                  max_log_evidence_ratio_drop)
    end_time = time.clock()
    map_curve_compute_time = end_time - start_time
    if print_t:
        print('MAP curve computation took %.2f sec' % map_curve_compute_time)

    # Identify the optimal density estimate
    points = map_curve.points
    log_Es = sp.array([p.log_E for p in points])
    log_E_max = log_Es.max()
    ibest = log_Es.argmax()
    star = points[ibest]
    Q_star = np.copy(star.Q)
    t_star = star.t
    phi_star = np.copy(star.phi)
    map_curve.i_star = ibest

    #
    # Do posterior sampling
    #

    if not (num_pt_samples == 0):
        Q_samples, phi_samples, phi_weights = \
            supplements.posterior_sampling(points, R, Delta, N, G,
                                           num_pt_samples, fix_t_at_t_star)

    #
    # Package results
    #

    # Create a container
    results = Results()

    # Fill in info that's guaranteed to be there
    results.phi_star = phi_star
    results.Q_star = Q_star
    results.R = R
    results.map_curve = map_curve
    results.map_curve_compute_time = map_curve_compute_time
    results.G = G
    results.N = N
    results.t_star = t_star
    results.i_star = ibest
    results.counts = counts_array
    results.tollerance = tollerance
    results.resolution = resolution
    results.points = points

    # Get maxent point
    maxent_point = results.map_curve.get_maxent_point()
    results.M = maxent_point.Q / np.sum(maxent_point.Q)

    # Include posterior sampling info if any sampling was performed
    if not (num_pt_samples == 0):
        results.num_pt_samples = num_pt_samples
        results.Q_samples = Q_samples
        results.phi_samples = phi_samples
        results.phi_weights = phi_weights

    # Return density estimate along with histogram on which it is based
    return results
Ejemplo n.º 15
0
    def __init__(self, operator_type, operator_order, num_gridpoints, grid_spacing=1.0 ):
        """
        Constructor for Smoothness_operator class

        Args:
            operator_type (str): 
                The type of operator. Accepts one of the following values:
                    '1d_bilateral'
                    '1d_periodic'
                    '2d_bilateral'
                    '2d_periodic'

            operator_order (int): 
                The order of the operator.

            num_gridpoints: 
                The number of gridpoints in each dimension of the domain.
        """
        
        # Make sure grid_spacing is valid
        if not isinstance(grid_spacing, float):
            raise ControlledError('/Laplacian/ grid_spacing must be a float: grid_spacing = %s' % type(grid_spacing))
        if not (grid_spacing > 0):
            raise ControlledError('/Laplacian/ grid_spacing must be > 0: grid_spacing = %s' % grid_spacing)
        
        if '1d' in operator_type:
            self._coordinate_dim = 1

            # Make sure operator_type is valid
            if operator_type == '1d_bilateral':
                periodic = False
            elif operator_type == '1d_periodic':
                periodic = True
            else:
                raise ControlledError('/Laplacian/ Cannot identify operator_type: operator_type = %s' % operator_type)
                
            self._type = operator_type
            
            self._sparse_matrix, self._kernel_basis = \
                laplacian_1d(num_gridpoints, operator_order, grid_spacing, periodic)
            
            self._G = self._kernel_basis.shape[0]
            self._kernel_dim = self._kernel_basis.shape[1]
            self._alpha = operator_order

        elif '2d' in operator_type:
            self._coordinate_dim = 2

            assert( len(num_gridpoints)==2 )
            assert( all([isinstance(n,utils.NUMBER) for n in num_gridpoints]) )

            assert( len(grid_spacing)==2 )
            assert( all([isinstance(n,utils.NUMBER) for n in grid_spacing]) )

            if operator_type == '2d_bilateral':
                periodic = False
            elif operator_type == '2d_periodic':
                periodic = True
            else:
                raise ControlledError('ERROR: cannot identify operator_type.')

            
            self._type = operator_type
            
            self._sparse_matrix, self._kernel_basis = \
                laplacian_2d( num_gridpoints, 
                              operator_order, 
                              grid_spacing, 
                              periodic=periodic, 
                              sparse=True,
                              report_kernel=True)

            self._Gx = int(num_gridpoints[0])
            self._Gy = int(num_gridpoints[1])
            self._G = self._Gx * self._Gy
            self._alpha = operator_order
            assert( self._G == self._kernel_basis.shape[0] )
            self._kernel_dim = self._kernel_basis.shape[1]

        else:
            raise ControlledError('/Laplacian/ Cannot identify operator_type: operator_type = %s' % operator_type)

        # Compute spectrum, and set lowest rank eigenvectors as kernel
        self._dense_matrix = self._sparse_matrix.todense()
        eigenvalues, eigenvectors = eigh(self._dense_matrix)
        self._eigenvalues = eigenvalues
        self._eigenbasis = utils.normalize(eigenvectors)
        #self._kernel_basis = self._eigenbasis[:,:self._kernel_dim]

        # Set kernel eigenvalues and eigenvectors
        self._eigenvalues[:self._kernel_dim] = 0.0
        self._eigenbasis[:,:self._kernel_dim] = self._kernel_basis
Ejemplo n.º 16
0
    def __init__(self,
                 distribution='gaussian',
                 num_data_points=100,
                 seed=None):

        # Check that distribution is valid
        check(distribution in self.list(),
              'distribution = %s is not valid' % distribution)

        # Check num_data_points is integral
        check(isinstance(num_data_points, numbers.Integral),
              'num_data_points = %s is not an integer.' % num_data_points)

        # Cast num_data_points as an integer
        num_data_points = int(num_data_points)

        # Check value
        check(
            0 < num_data_points <= MAX_DATASET_SIZE,
            'num_data_points = %d; must have 0 < num_data_points <= %d.' %
            (num_data_points, MAX_DATASET_SIZE))

        # Run seed and catch errors
        try:
            np.random.seed(seed)
        except TypeError:
            raise ControlledError('type(seed) = %s; invalid type.' %
                                  type(seed))
        except ValueError:
            raise ControlledError('seed = %s; invalid value.' % seed)

        # Set default value for periodic
        periodic = False

        # If gaussian distribution
        if distribution == 'gaussian':
            description = 'Gaussian distribution'
            mus = [0.]
            sigmas = [1.]
            weights = [1.]
            bounding_box = [-5, 5]
            data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights,
                                                    mus, sigmas, bounding_box)

        # If mixture of two gaussian distributions
        elif distribution == 'narrow':
            description = 'Gaussian mixture, narrow separation'
            mus = [-1.25, 1.25]
            sigmas = [1., 1.]
            weights = [1., 1.]
            bounding_box = [-6, 6]
            data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights,
                                                    mus, sigmas, bounding_box)

        # If mixture of two gaussian distributions
        elif distribution == 'wide':
            description = 'Gaussian mixture, wide separation'
            mus = [-2.0, 2.0]
            sigmas = [1.0, 1.0]
            weights = [1.0, 0.5]
            bounding_box = [-6.0, 6.0]
            data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights,
                                                    mus, sigmas, bounding_box)

        elif distribution == 'foothills':
            description = 'Foothills (Gaussian mixture)'
            mus = [0., 5., 8., 10, 11]
            sigmas = [2., 1., 0.5, 0.25, 0.125]
            weights = [1., 1., 1., 1., 1.]
            bounding_box = [-5, 12]
            data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights,
                                                    mus, sigmas, bounding_box)

        elif distribution == 'accordian':
            description = 'Accordian (Gaussian mixture)'
            mus = [0., 5., 8., 10, 11, 11.5]
            sigmas = [2., 1., 0.5, 0.25, 0.125, 0.0625]
            weights = [16., 8., 4., 2., 1., 0.5]
            bounding_box = [-5, 13]
            data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights,
                                                    mus, sigmas, bounding_box)

        elif distribution == 'goalposts':
            description = 'Goalposts (Gaussian mixture)'
            mus = [-20, 20]
            sigmas = [1., 1.]
            weights = [1., 1.]
            bounding_box = [-25, 25]
            data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights,
                                                    mus, sigmas, bounding_box)

        elif distribution == 'towers':
            description = 'Towers (Gaussian mixture)'
            mus = [-20, -15, -10, -5, 0, 5, 10, 15, 20]
            sigmas = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
            weights = [1., 1., 1., 1., 1., 1., 1., 1., 1.]
            bounding_box = [-25, 25]
            data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights,
                                                    mus, sigmas, bounding_box)

        # If uniform distribution
        elif distribution == 'uniform':
            data = stats.uniform.rvs(size=num_data_points)
            bounding_box = [0, 1]
            description = 'Uniform distribution'
            pdf_js = "1.0"
            pdf_py = "1.0"

        # Convex beta distribution
        elif distribution == 'beta_convex':
            data = stats.beta.rvs(a=0.5, b=0.5, size=num_data_points)
            bounding_box = [0, 1]
            description = 'Convex beta distribtuion'
            pdf_js = "Math.pow(x,-0.5)*Math.pow(1-x,-0.5)*math.gamma(1)/(math.gamma(0.5)*math.gamma(0.5))"
            pdf_py = "np.power(x,-0.5)*np.power(1-x,-0.5)*math.gamma(1)/(math.gamma(0.5)*math.gamma(0.5))"

        # Concave beta distribution
        elif distribution == 'beta_concave':
            data = stats.beta.rvs(a=2, b=2, size=num_data_points)
            bounding_box = [0, 1]
            description = 'Concave beta distribution'
            pdf_js = "Math.pow(x,1)*Math.pow(1-x,1)*math.gamma(4)/(math.gamma(2)*math.gamma(2))"
            pdf_py = "np.power(x,1)*np.power(1-x,1)*math.gamma(4)/(math.gamma(2)*math.gamma(2))"

        # Exponential distribution
        elif distribution == 'exponential':
            data = stats.expon.rvs(size=num_data_points)
            bounding_box = [0, 5]
            description = 'Exponential distribution'
            pdf_js = "Math.exp(-x)"
            pdf_py = "np.exp(-x)"

        # Gamma distribution
        elif distribution == 'gamma':
            data = stats.gamma.rvs(a=3, size=num_data_points)
            bounding_box = [0, 10]
            description = 'Gamma distribution'
            pdf_js = "Math.pow(x,2)*Math.exp(-x)/math.gamma(3)"
            pdf_py = "np.power(x,2)*np.exp(-x)/math.gamma(3)"

        # Triangular distribution
        elif distribution == 'triangular':
            data = stats.triang.rvs(c=0.5, size=num_data_points)
            bounding_box = [0, 1]
            description = 'Triangular distribution'
            pdf_js = "2-4*Math.abs(x - 0.5)"
            pdf_py = "2-4*np.abs(x - 0.5)"

        # Laplace distribution
        elif distribution == 'laplace':
            data = stats.laplace.rvs(size=num_data_points)
            bounding_box = [-5, 5]
            description = "Laplace distribution"
            pdf_js = "0.5*Math.exp(- Math.abs(x))"
            pdf_py = "0.5*np.exp(- np.abs(x))"

        # von Misses distribution
        elif distribution == 'vonmises':
            data = stats.vonmises.rvs(1, size=num_data_points)
            bounding_box = [-3.14159, 3.14159]
            periodic = True
            description = 'von Mises distribution'
            pdf_js = "Math.exp(Math.cos(x))/7.95493"
            pdf_py = "np.exp(np.cos(x))/7.95493"

        else:
            raise ControlledError('Distribution type "%s" not recognized.' %
                                  distribution)

        # Set these
        attributes = {
            'data': data,
            'bounding_box': bounding_box,
            'distribution': distribution,
            'pdf_js': pdf_js,
            'pdf_py': pdf_py,
            'periodic': periodic
        }
        for key, value in attributes.items():
            setattr(self, key, value)