def integrate_norm(self, params, sel_dist, theta): """ """ #need to include tuple() here to make this function play nice #with numpy arrays #compute weights for each fs sel_args = (self.gammas, ) + tuple(params) weights = sel_dist(*sel_args) #compute weight for the effectively neutral portion. not using #CDF function because I want this to be able to compute weight #for arbitrary mass functions weight_neu, err_neu = scipy.integrate.quad(sel_dist, self.gammas[-1], 0, args=tuple(params)) #function's adaptable for demographic models from 1-3 #populations but this assumes the selection coefficient is the #same in both populations pops = len(self.neu_spec.shape) if pops == 1: integrated = self.neu_spec * weight_neu + Numerics.trapz( weights[:, numpy.newaxis] * self.spectra, self.gammas, axis=0) elif pops == 2: integrated = self.neu_spec * weight_neu + Numerics.trapz( weights[:, numpy.newaxis, numpy.newaxis] * self.spectra, self.gammas, axis=0) elif pops == 3: integrated = self.neu_spec * weight_neu + Numerics.trapz( weights[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] * self.spectra, self.gammas, axis=0) else: raise IndexError("Must have one to three populations") integrated_fs = Spectrum(integrated, extrap_x=self.extrap_x) #normalization dist_int = Numerics.trapz(weights, self.gammas) + weight_neu return integrated_fs / dist_int * theta
def integrate(self, params, sel_dist, theta): """ integration without re-normalizing the DFE. This assumes the portion of the DFE that is not integrated is not seen in your sample. """ #need to include tuple() here to make this function play nice #with numpy arrays sel_args = (self.gammas, ) + tuple(params) #compute weights for each fs weights = sel_dist(*sel_args) #compute weight for the effectively neutral portion. not using #CDF function because I want this to be able to compute weight #for arbitrary mass functions weight_neu, err_neu = scipy.integrate.quad(sel_dist, self.gammas[-1], 0, args=tuple(params)) #function's adaptable for demographic models from 1-3 populations pops = len(self.neu_spec.shape) if pops == 1: integrated = self.neu_spec * weight_neu + Numerics.trapz( weights[:, numpy.newaxis] * self.spectra, self.gammas, axis=0) elif pops == 2: integrated = self.neu_spec * weight_neu + Numerics.trapz( weights[:, numpy.newaxis, numpy.newaxis] * self.spectra, self.gammas, axis=0) elif pops == 3: integrated = self.neu_spec * weight_neu + Numerics.trapz( weights[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] * self.spectra, self.gammas, axis=0) else: raise IndexError("Must have one to three populations") integrated_fs = Spectrum(integrated, extrap_x=self.extrap_x) #no normalization, allow lethal mutations to fall out return integrated_fs * theta
def demo_selection_distINV(params, ns, sel_dist, theta, cache): """ sel_dist should be a function that is evaluated PDF(X) = func(x, param1, param2 ..., paramn) theta is required for now, just going to use Poisson ll """ #load saved objects #spectra_obj = pickle.load(open('{0}spectra.obj'.format(note),'rb')) spectra_obj = cache # Note that first and last entry of SFS are meaningless! # The last two entries of params are now h_intercept and h_rate params_DFE = params[:-2] params_h = params[-2:] hvalues = comp_h_from_s_INV(spectra_obj['gammas'], *params_h) # Choose the closest SFS that correspond to the respective hvalues: hlist_idx = [find_nearest_idx(spectra_obj['hlist'], h) for h in hvalues] spectra_interp = numpy.array([ spectra_obj['spectra'][hlist_idx[i], i, :] for i in range(spectra_obj['spectra'].shape[1]) ]) # For some reason, some SFS just contain nan when 2Neas*h is large... # For now, set them to 0 and deal with it later... they should only contain small values anyway # Update: THis is fixed, the problem where negative values in the SFS # spectra_interp = [numpy.nan_to_num(sfs) for sfs in spectra_interp] # replaces nan with 0 #compute weights for each SFS sel_args = (spectra_obj['gammas'], ) + tuple(params_DFE) weights = sel_dist(*sel_args) #compute weight for the effectively neutral portion. not using CDF function because #I want this to be able to compute weight for an arbitrary mass functions weight_neu, err_neu = scipy.integrate.quad(sel_dist, spectra_obj['gammas'][-1], 0, args=tuple(params_DFE)) weight_lethal, err_lethal = scipy.integrate.quad(sel_dist, -numpy.inf, spectra_obj['gammas'][0], args=tuple(params_DFE)) #function's adaptable for demographic models from 1-3 populations pops = len(spectra_obj['neu_spec'].shape) if pops == 1: integrated = spectra_obj['neu_spec'] * weight_neu + Numerics.trapz( weights[:, numpy.newaxis] * spectra_interp, spectra_obj['gammas'], axis=0) + spectra_interp[0] * weight_lethal elif pops == 2: integrated = spectra_obj['neu_spec'] * weight_neu + Numerics.trapz( weights[:, numpy.newaxis, numpy.newaxis] * spectra_interp, spectra_obj['gammas'], axis=0) + spectra_interp[0] * weight_lethal elif pops == 3: integrated = spectra_obj['neu_spec'] * weight_neu + Numerics.trapz( weights[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] * spectra_interp, spectra_obj['gammas'], axis=0) + spectra_interp[0] * weight_lethal else: raise IndexError("Must have one to three populations") integrated_fs = Spectrum(integrated, extrap_x=spectra_obj['extrap_x']) # Changed this: # Lethal mutations now don't fall out. All lethal mutations contribute the most deleterious SFS. # This assumes that the range of gamma goes from small to lethal! return integrated_fs * theta
def get_godambe(func_ex, grid_pts, all_boot, p0, data, eps, log=False, just_hess=False): """ Godambe information and Hessian matrices NOTE: Assumes that last parameter in p0 is theta. func_ex: Model function grid_pts: Number of grid points to evaluate the model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for func_ex. data: Original data frequency spectrum eps: Fractional stepsize to use when taking finite-difference derivatives log: If True, calculate derivatives in terms of log-parameters just_hess: If True, only evaluate and return the Hessian matrix """ ns = data.sample_sizes # Cache evaluations of the frequency spectrum inside our hessian/J # evaluation function cache = {} def func(params, data): key = (tuple(params), tuple(ns), tuple(grid_pts)) if key not in cache: cache[key] = func_ex(params, ns, grid_pts) fs = cache[key] return Inference.ll(fs, data) def log_func(logparams, data): return func(numpy.exp(logparams), data) # First calculate the observed hessian if not log: hess = -get_hess(func, p0, eps, args=[data]) else: hess = -get_hess(log_func, numpy.log(p0), eps, args=[data]) if just_hess: return hess # Now the expectation of J over the bootstrap data J = numpy.zeros((len(p0), len(p0))) for ii, boot in enumerate(all_boot): boot = Spectrum(boot) if not log: grad_temp = get_grad(func, p0, eps, args=[boot]) else: grad_temp = get_grad(log_func, numpy.log(p0), eps, args=[boot]) J_temp = numpy.outer(grad_temp, grad_temp) J = J + J_temp J = J / len(all_boot) # G = H*J^-1*H J_inv = numpy.linalg.inv(J) godambe = numpy.dot(numpy.dot(hess, J_inv), hess) return godambe, hess, J
def get_godambe(func_ex, grid_pts, all_boot, p0, data, eps, log=False, just_hess=False, boot_theta_adjusts=[]): """ Godambe information and Hessian matrices func_ex: Model function grid_pts: Number of grid points to evaluate the model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for func_ex. data: Original data frequency spectrum eps: Fractional stepsize to use when taking finite-difference derivatives Note that if eps*param is < 1e-6, then the step size for that parameter will simply be eps, to avoid numerical issues with small parameter perturbations. log: If True, calculate derivatives in terms of log-parameters just_hess: If True, only evaluate and return the Hessian matrix boot_theta_adjusts: Factors by which to adjust theta for each bootstrap sample, relative to full data theta. """ ns = data.sample_sizes if not boot_theta_adjusts: boot_theta_adjusts = numpy.ones(len(all_boot)) # Cache evaluations of the frequency spectrum inside our hessian/J # evaluation function cache = {} def func(params, data, theta_adjust=1): key = (tuple(params), tuple(ns), tuple(grid_pts)) if key not in cache: cache[key] = func_ex(params, ns, grid_pts) # theta_adjust deals with bootstraps that need different thetas fs = theta_adjust * cache[key] return Inference.ll(fs, data) def log_func(logparams, data, theta_adjust=1): return func(numpy.exp(logparams), data, theta_adjust) # First calculate the observed hessian. # theta_adjust defaults to 1. if not log: hess = -get_hess(func, p0, eps, args=[data]) else: hess = -get_hess(log_func, numpy.log(p0), eps, args=[data]) if just_hess: return hess # Now the expectation of J over the bootstrap data J = numpy.zeros((len(p0), len(p0))) # cU is a column vector cU = numpy.zeros((len(p0), 1)) for ii, (boot, theta_adjust) in enumerate(zip(all_boot, boot_theta_adjusts)): boot = Spectrum(boot) if not log: grad_temp = get_grad(func, p0, eps, args=[boot, theta_adjust]) else: grad_temp = get_grad(log_func, numpy.log(p0), eps, args=[boot, theta_adjust]) J_temp = numpy.outer(grad_temp, grad_temp) J = J + J_temp cU = cU + grad_temp J = J / len(all_boot) cU = cU / len(all_boot) # G = H*J^-1*H J_inv = numpy.linalg.inv(J) godambe = numpy.dot(numpy.dot(hess, J_inv), hess) return godambe, hess, J, cU