Exemple #1
0
    def integrate_norm(self, params, sel_dist, theta):
        """
        """
        #need to include tuple() here to make this function play nice
        #with numpy arrays
        #compute weights for each fs
        sel_args = (self.gammas, ) + tuple(params)
        weights = sel_dist(*sel_args)

        #compute weight for the effectively neutral portion. not using
        #CDF function because I want this to be able to compute weight
        #for arbitrary mass functions
        weight_neu, err_neu = scipy.integrate.quad(sel_dist,
                                                   self.gammas[-1],
                                                   0,
                                                   args=tuple(params))

        #function's adaptable for demographic models from 1-3
        #populations but this assumes the selection coefficient is the
        #same in both populations
        pops = len(self.neu_spec.shape)
        if pops == 1:
            integrated = self.neu_spec * weight_neu + Numerics.trapz(
                weights[:, numpy.newaxis] * self.spectra, self.gammas, axis=0)
        elif pops == 2:
            integrated = self.neu_spec * weight_neu + Numerics.trapz(
                weights[:, numpy.newaxis, numpy.newaxis] * self.spectra,
                self.gammas,
                axis=0)
        elif pops == 3:
            integrated = self.neu_spec * weight_neu + Numerics.trapz(
                weights[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] *
                self.spectra,
                self.gammas,
                axis=0)
        else:
            raise IndexError("Must have one to three populations")

        integrated_fs = Spectrum(integrated, extrap_x=self.extrap_x)

        #normalization
        dist_int = Numerics.trapz(weights, self.gammas) + weight_neu
        return integrated_fs / dist_int * theta
Exemple #2
0
    def integrate(self, params, sel_dist, theta):
        """
        integration without re-normalizing the DFE. This assumes the
        portion of the DFE that is not integrated is not seen in your
        sample.
        """
        #need to include tuple() here to make this function play nice
        #with numpy arrays
        sel_args = (self.gammas, ) + tuple(params)
        #compute weights for each fs
        weights = sel_dist(*sel_args)

        #compute weight for the effectively neutral portion. not using
        #CDF function because I want this to be able to compute weight
        #for arbitrary mass functions
        weight_neu, err_neu = scipy.integrate.quad(sel_dist,
                                                   self.gammas[-1],
                                                   0,
                                                   args=tuple(params))

        #function's adaptable for demographic models from 1-3 populations
        pops = len(self.neu_spec.shape)
        if pops == 1:
            integrated = self.neu_spec * weight_neu + Numerics.trapz(
                weights[:, numpy.newaxis] * self.spectra, self.gammas, axis=0)
        elif pops == 2:
            integrated = self.neu_spec * weight_neu + Numerics.trapz(
                weights[:, numpy.newaxis, numpy.newaxis] * self.spectra,
                self.gammas,
                axis=0)
        elif pops == 3:
            integrated = self.neu_spec * weight_neu + Numerics.trapz(
                weights[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] *
                self.spectra,
                self.gammas,
                axis=0)
        else:
            raise IndexError("Must have one to three populations")

        integrated_fs = Spectrum(integrated, extrap_x=self.extrap_x)

        #no normalization, allow lethal mutations to fall out
        return integrated_fs * theta
def demo_selection_distINV(params, ns, sel_dist, theta, cache):
    """
    sel_dist should be a function that is evaluated PDF(X) = func(x, param1, param2 ..., paramn)
    
    theta is required for now, just going to use Poisson ll
    """
    #load saved objects

    #spectra_obj = pickle.load(open('{0}spectra.obj'.format(note),'rb'))

    spectra_obj = cache

    # Note that first and last entry of SFS are meaningless!
    # The last two entries of params are now h_intercept and h_rate

    params_DFE = params[:-2]
    params_h = params[-2:]

    hvalues = comp_h_from_s_INV(spectra_obj['gammas'], *params_h)

    # Choose the closest SFS that correspond to the respective hvalues:

    hlist_idx = [find_nearest_idx(spectra_obj['hlist'], h) for h in hvalues]

    spectra_interp = numpy.array([
        spectra_obj['spectra'][hlist_idx[i], i, :]
        for i in range(spectra_obj['spectra'].shape[1])
    ])

    # For some reason, some SFS just contain nan when 2Neas*h is large...
    # For now, set them to 0 and deal with it later... they should only contain small values anyway
    # Update: THis is fixed, the problem where negative values in the SFS

    # spectra_interp = [numpy.nan_to_num(sfs) for sfs in spectra_interp]  # replaces nan with 0

    #compute weights for each SFS
    sel_args = (spectra_obj['gammas'], ) + tuple(params_DFE)
    weights = sel_dist(*sel_args)

    #compute weight for the effectively neutral portion. not using CDF function because
    #I want this to be able to compute weight for an arbitrary mass functions
    weight_neu, err_neu = scipy.integrate.quad(sel_dist,
                                               spectra_obj['gammas'][-1],
                                               0,
                                               args=tuple(params_DFE))
    weight_lethal, err_lethal = scipy.integrate.quad(sel_dist,
                                                     -numpy.inf,
                                                     spectra_obj['gammas'][0],
                                                     args=tuple(params_DFE))

    #function's adaptable for demographic models from 1-3 populations
    pops = len(spectra_obj['neu_spec'].shape)
    if pops == 1:
        integrated = spectra_obj['neu_spec'] * weight_neu + Numerics.trapz(
            weights[:, numpy.newaxis] * spectra_interp,
            spectra_obj['gammas'],
            axis=0) + spectra_interp[0] * weight_lethal
    elif pops == 2:
        integrated = spectra_obj['neu_spec'] * weight_neu + Numerics.trapz(
            weights[:, numpy.newaxis, numpy.newaxis] * spectra_interp,
            spectra_obj['gammas'],
            axis=0) + spectra_interp[0] * weight_lethal
    elif pops == 3:
        integrated = spectra_obj['neu_spec'] * weight_neu + Numerics.trapz(
            weights[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] *
            spectra_interp,
            spectra_obj['gammas'],
            axis=0) + spectra_interp[0] * weight_lethal
    else:
        raise IndexError("Must have one to three populations")

    integrated_fs = Spectrum(integrated, extrap_x=spectra_obj['extrap_x'])

    # Changed this:
    # Lethal mutations now don't fall out. All lethal mutations contribute the most deleterious SFS.
    # This assumes that the range of gamma goes from small to lethal!

    return integrated_fs * theta
def get_godambe(func_ex,
                grid_pts,
                all_boot,
                p0,
                data,
                eps,
                log=False,
                just_hess=False):
    """
    Godambe information and Hessian matrices

    NOTE: Assumes that last parameter in p0 is theta.

    func_ex: Model function
    grid_pts: Number of grid points to evaluate the model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for func_ex.
    data: Original data frequency spectrum
    eps: Fractional stepsize to use when taking finite-difference derivatives
    log: If True, calculate derivatives in terms of log-parameters
    just_hess: If True, only evaluate and return the Hessian matrix
    """
    ns = data.sample_sizes

    # Cache evaluations of the frequency spectrum inside our hessian/J
    # evaluation function
    cache = {}

    def func(params, data):
        key = (tuple(params), tuple(ns), tuple(grid_pts))
        if key not in cache:
            cache[key] = func_ex(params, ns, grid_pts)
        fs = cache[key]
        return Inference.ll(fs, data)

    def log_func(logparams, data):
        return func(numpy.exp(logparams), data)

    # First calculate the observed hessian
    if not log:
        hess = -get_hess(func, p0, eps, args=[data])
    else:
        hess = -get_hess(log_func, numpy.log(p0), eps, args=[data])

    if just_hess:
        return hess

    # Now the expectation of J over the bootstrap data
    J = numpy.zeros((len(p0), len(p0)))
    for ii, boot in enumerate(all_boot):
        boot = Spectrum(boot)
        if not log:
            grad_temp = get_grad(func, p0, eps, args=[boot])
        else:
            grad_temp = get_grad(log_func, numpy.log(p0), eps, args=[boot])

        J_temp = numpy.outer(grad_temp, grad_temp)
        J = J + J_temp
    J = J / len(all_boot)

    # G = H*J^-1*H
    J_inv = numpy.linalg.inv(J)
    godambe = numpy.dot(numpy.dot(hess, J_inv), hess)
    return godambe, hess, J
Exemple #5
0
def get_godambe(func_ex,
                grid_pts,
                all_boot,
                p0,
                data,
                eps,
                log=False,
                just_hess=False,
                boot_theta_adjusts=[]):
    """
    Godambe information and Hessian matrices

    func_ex: Model function
    grid_pts: Number of grid points to evaluate the model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for func_ex.
    data: Original data frequency spectrum
    eps: Fractional stepsize to use when taking finite-difference derivatives
         Note that if eps*param is < 1e-6, then the step size for that parameter
         will simply be eps, to avoid numerical issues with small parameter
         perturbations.
    log: If True, calculate derivatives in terms of log-parameters
    just_hess: If True, only evaluate and return the Hessian matrix
    boot_theta_adjusts: Factors by which to adjust theta for each bootstrap
                        sample, relative to full data theta.
    """
    ns = data.sample_sizes
    if not boot_theta_adjusts:
        boot_theta_adjusts = numpy.ones(len(all_boot))

    # Cache evaluations of the frequency spectrum inside our hessian/J
    # evaluation function
    cache = {}

    def func(params, data, theta_adjust=1):
        key = (tuple(params), tuple(ns), tuple(grid_pts))
        if key not in cache:
            cache[key] = func_ex(params, ns, grid_pts)
        # theta_adjust deals with bootstraps that need  different thetas
        fs = theta_adjust * cache[key]
        return Inference.ll(fs, data)

    def log_func(logparams, data, theta_adjust=1):
        return func(numpy.exp(logparams), data, theta_adjust)

    # First calculate the observed hessian.
    # theta_adjust defaults to 1.
    if not log:
        hess = -get_hess(func, p0, eps, args=[data])
    else:
        hess = -get_hess(log_func, numpy.log(p0), eps, args=[data])

    if just_hess:
        return hess

    # Now the expectation of J over the bootstrap data
    J = numpy.zeros((len(p0), len(p0)))
    # cU is a column vector
    cU = numpy.zeros((len(p0), 1))
    for ii, (boot, theta_adjust) in enumerate(zip(all_boot,
                                                  boot_theta_adjusts)):
        boot = Spectrum(boot)
        if not log:
            grad_temp = get_grad(func, p0, eps, args=[boot, theta_adjust])
        else:
            grad_temp = get_grad(log_func,
                                 numpy.log(p0),
                                 eps,
                                 args=[boot, theta_adjust])
        J_temp = numpy.outer(grad_temp, grad_temp)
        J = J + J_temp
        cU = cU + grad_temp
    J = J / len(all_boot)
    cU = cU / len(all_boot)

    # G = H*J^-1*H
    J_inv = numpy.linalg.inv(J)
    godambe = numpy.dot(numpy.dot(hess, J_inv), hess)
    return godambe, hess, J, cU